From 69cba230f5cb43a1a6f5d8f650c8bbbead535f98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 13 Dec 2023 11:07:23 +0100 Subject: [PATCH 001/161] test(ir): reorganize ibis/tests/expr to enable running the tests without a functional backend --- ibis/backends/base/__init__.py | 1 + ibis/backends/base/sql/__init__.py | 7 +- ibis/backends/pandas/__init__.py | 2 + ibis/backends/pandas/core.py | 1 + .../tests}/test_interactive.py | 6 - ibis/expr/tests/conftest.py | 7 ++ ibis/tests/benchmarks/test_benchmarks.py | 3 + ibis/tests/expr/mocks.py | 115 +++--------------- ibis/tests/expr/test_table.py | 29 ++--- 9 files changed, 48 insertions(+), 123 deletions(-) rename ibis/{tests/expr => backends/tests}/test_interactive.py (96%) diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py index 7190d831ba68..a208a8f00bc1 100644 --- a/ibis/backends/base/__init__.py +++ b/ibis/backends/base/__init__.py @@ -31,6 +31,7 @@ import pyarrow as pa import torch + __all__ = ("BaseBackend", "Database", "connect") _IBIS_TO_SQLGLOT_DIALECT = { diff --git a/ibis/backends/base/sql/__init__.py b/ibis/backends/base/sql/__init__.py index c0345780fc3f..8d31f56979cc 100644 --- a/ibis/backends/base/sql/__init__.py +++ b/ibis/backends/base/sql/__init__.py @@ -22,6 +22,8 @@ import pandas as pd import pyarrow as pa +raise RuntimeError("Temporarily make the SQL backends dysfunctional") + __all__ = ["BaseSQLBackend"] @@ -90,9 +92,8 @@ def table(self, name: str, database: str | None = None) -> ir.Table: ) qualified_name = self._fully_qualified_name(name, database) schema = self.get_schema(qualified_name) - node = ops.DatabaseTable( - name, schema, self, namespace=ops.Namespace(database=database) - ) + namespace = ops.Namespace(database=database) + node = ops.DatabaseTable(name, schema, self, namespace=namespace) return node.to_expr() def _fully_qualified_name(self, name, database): diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index c4de4af3f8fa..4349400c50ab 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -22,6 +22,8 @@ import pathlib from collections.abc import Mapping, MutableMapping +raise RuntimeError("Temporarily make the pandas backend dysfunctional") + class BasePandasBackend(BaseBackend): """Base class for backends based on pandas.""" diff --git a/ibis/backends/pandas/core.py b/ibis/backends/pandas/core.py index ef29b2bb29cc..50ae12d30b2e 100644 --- a/ibis/backends/pandas/core.py +++ b/ibis/backends/pandas/core.py @@ -135,6 +135,7 @@ if TYPE_CHECKING: from collections.abc import Iterable, Mapping + integer_types = np.integer, int floating_types = (numbers.Real,) numeric_types = integer_types + floating_types diff --git a/ibis/tests/expr/test_interactive.py b/ibis/backends/tests/test_interactive.py similarity index 96% rename from ibis/tests/expr/test_interactive.py rename to ibis/backends/tests/test_interactive.py index 5f6d3725025b..111be16de5ee 100644 --- a/ibis/tests/expr/test_interactive.py +++ b/ibis/backends/tests/test_interactive.py @@ -16,12 +16,6 @@ import pytest from ibis import config -from ibis.tests.expr.mocks import MockBackend - - -@pytest.fixture -def con(): - return MockBackend() def test_interactive_execute_on_repr(con): diff --git a/ibis/expr/tests/conftest.py b/ibis/expr/tests/conftest.py index 24105a2cb1bd..c571947a5a45 100644 --- a/ibis/expr/tests/conftest.py +++ b/ibis/expr/tests/conftest.py @@ -45,6 +45,13 @@ "star3": [("bar_id", "string"), ("value2", "double")], "test1": [("c", "int32"), ("f", "double"), ("g", "string")], "test2": [("key", "string"), ("value", "double")], + "geo": [ + ("id", "int32"), + ("geo_point", "point"), + ("geo_linestring", "linestring"), + ("geo_polygon", "polygon"), + ("geo_multipolygon", "multipolygon"), + ], "tpch_region": [ ("r_regionkey", "int16"), ("r_name", "string"), diff --git a/ibis/tests/benchmarks/test_benchmarks.py b/ibis/tests/benchmarks/test_benchmarks.py index 4977e1bc18f2..debc26942ebc 100644 --- a/ibis/tests/benchmarks/test_benchmarks.py +++ b/ibis/tests/benchmarks/test_benchmarks.py @@ -22,6 +22,9 @@ pytestmark = pytest.mark.benchmark +# TODO(kszucs): this should be moved under the backends since the benchmarks are +# backend specific + def make_t(): return ibis.table( diff --git a/ibis/tests/expr/mocks.py b/ibis/tests/expr/mocks.py index db1ba44b6750..cfb7e7e4aa5c 100644 --- a/ibis/tests/expr/mocks.py +++ b/ibis/tests/expr/mocks.py @@ -16,19 +16,14 @@ import contextlib -import pytest -import sqlalchemy as sa - import ibis.expr.operations as ops import ibis.expr.types as ir -from ibis.backends.base.sql import BaseSQLBackend -from ibis.backends.base.sql.alchemy import AlchemyCompiler -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType +from ibis.backends.base import BaseBackend from ibis.expr.schema import Schema from ibis.expr.tests.conftest import MOCK_TABLES -class MockBackend(BaseSQLBackend): +class MockBackend(BaseBackend): name = "mock" version = "1.0" current_database = "mockdb" @@ -41,6 +36,11 @@ def __init__(self): def do_connect(self): pass + def table(self, name, **kwargs): + schema = self.get_schema(name) + node = ops.DatabaseTable(source=self, name=name, schema=schema) + return node.to_expr() + def list_tables(self): return list(MOCK_TABLES) @@ -54,33 +54,14 @@ def get_schema(self, name): name = name.replace("`", "") return Schema.from_tuples(MOCK_TABLES[name]) - def to_pyarrow(self, expr, limit=None, params=None, **kwargs): - ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - for query in ast.queries: - self.executed_queries.append(query.compile()) - - if isinstance(expr, ir.Scalar): - return None - elif isinstance(expr, ir.Column): - schema = expr.as_table().schema() - return schema.to_pyarrow().empty_table()[0] - else: - return expr.schema().to_pyarrow().empty_table() - - def execute(self, expr, limit=None, params=None, **kwargs): - out = self.to_pyarrow(expr, limit=limit, params=params, **kwargs) - return None if out is None else out.to_pandas() - - def compile( - self, - expr, - limit=None, - params=None, - timecontext=None, - ): - ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - queries = [q.compile() for q in ast.queries] - return queries[0] if len(queries) == 1 else queries + def to_pyarrow(self, *_, **__): + raise NotImplementedError(self.name) + + def execute(self, *_, **__): + raise NotImplementedError(self.name) + + def compile(self, *_, **__): + raise NotImplementedError(self.name) def create_table(self, *_, **__) -> ir.Table: raise NotImplementedError(self.name) @@ -108,69 +89,3 @@ def set_query_schema(self, query, schema): self.sql_query_schemas[query] = schema yield self.sql_query_schemas.pop(query, None) - - -def table_from_schema(name, meta, schema, *, database: str | None = None): - # Convert Ibis schema to SQLA table - columns = [] - - for colname, dtype in zip(schema.names, schema.types): - satype = AlchemyType.from_ibis(dtype) - column = sa.Column(colname, satype, nullable=dtype.nullable) - columns.append(column) - - return sa.Table(name, meta, *columns, schema=database) - - -class MockAlchemyBackend(MockBackend): - compiler = AlchemyCompiler - - def __init__(self): - super().__init__() - pytest.importorskip("sqlalchemy") - self.tables = {} - - def table(self, name, **_): - schema = self.get_schema(name) - return self._inject_table(name, schema) - - def _inject_table(self, name, schema): - if name not in self.tables: - self.tables[name] = table_from_schema(name, sa.MetaData(), schema) - return ops.DatabaseTable(source=self, name=name, schema=schema).to_expr() - - def _get_sqla_table(self, name, **_): - return self.tables[name] - - -GEO_TABLE = { - "geo": [ - ("id", "int32"), - ("geo_point", "point"), - ("geo_linestring", "linestring"), - ("geo_polygon", "polygon"), - ("geo_multipolygon", "multipolygon"), - ] -} - - -class GeoMockConnectionPostGIS(MockAlchemyBackend): - _tables = GEO_TABLE - - def __init__(self): - super().__init__() - self.executed_queries = [] - - def get_schema(self, name): - return Schema.from_tuples(self._tables[name]) - - -class GeoMockConnectionOmniSciDB(MockBackend): - _tables = GEO_TABLE - - def __init__(self): - super().__init__() - self.executed_queries = [] - - def get_schema(self, name): - return Schema.from_tuples(self._tables[name]) diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 58220df7af61..dd873f95bf99 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -23,7 +23,6 @@ from ibis.common.exceptions import RelationError from ibis.expr import api from ibis.expr.types import Column, Table -from ibis.tests.expr.mocks import MockAlchemyBackend, MockBackend from ibis.tests.util import assert_equal, assert_pickle_roundtrip @@ -1509,24 +1508,26 @@ def test_mutate_chain(): # TODO(kszucs): move this test case to ibis/tests/sql since it requires the # sql backend to be executed -def test_multiple_dbcon(): - """Expr from multiple connections to same DB should be compatible.""" - con1 = MockBackend() - con2 = MockBackend() +# def test_multiple_dbcon(): +# """Expr from multiple connections to same DB should be compatible.""" +# con1 = MockBackend() +# con2 = MockBackend() - con1.table("alltypes").union(con2.table("alltypes")).execute() +# con1.table("alltypes").union(con2.table("alltypes")).execute() -def test_multiple_db_different_backends(): - con1 = MockBackend() - con2 = MockAlchemyBackend() +# TODO(kszucs): move this test to ibis/tests/sql since it requires the +# sql backend to be executed +# def test_multiple_db_different_backends(): +# con1 = MockBackend() +# con2 = MockAlchemyBackend() - backend1_table = con1.table("alltypes") - backend2_table = con2.table("alltypes") +# backend1_table = con1.table("alltypes") +# backend2_table = con2.table("alltypes") - expr = backend1_table.union(backend2_table) - with pytest.raises(com.IbisError, match="Multiple backends"): - expr.compile() +# expr = backend1_table.union(backend2_table) +# with pytest.raises(com.IbisError, match="Multiple backends"): +# expr.compile() def test_merge_as_of_allows_overlapping_columns(): From 4986c91706283c0fdb936df2ce02ba711dd6f4b0 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 5 Dec 2023 14:20:38 -0500 Subject: [PATCH 002/161] test(sql): move sql tests requiring a functional backend from `ibis/tests/sql` to `ibis/backends/tests/sql` --- ibis/{ => backends}/tests/sql/__init__.py | 0 ibis/{ => backends}/tests/sql/conftest.py | 7 +- .../test_agg_and_non_agg_filter/out.sql | 0 .../test_compiler/test_agg_filter/out.sql | 0 .../test_agg_filter_with_alias/out.sql | 0 .../test_column_distinct/decompiled.py | 0 .../test_column_distinct/out.sql | 0 .../decompiled.py | 0 .../test_column_expr_default_name/out.sql | 0 .../decompiled.py | 0 .../test_column_expr_retains_name/out.sql | 0 .../test_count_distinct/decompiled.py | 0 .../test_compiler/test_count_distinct/out.sql | 0 .../decompiled.py | 0 .../test_difference_project_column/out.sql | 0 .../test_having_from_filter/decompiled.py | 0 .../test_having_from_filter/out.sql | 0 .../test_compiler/test_having_size/out.sql | 0 .../decompiled.py | 0 .../test_intersect_project_column/out.sql | 0 .../decompiled.py | 0 .../test_multiple_count_distinct/out.sql | 0 .../test_pushdown_with_or/out.sql | 0 .../test_simple_agg_filter/out.sql | 0 .../decompiled.py | 0 .../test_subquery_where_location/out.sql | 0 .../test_table_difference/decompiled.py | 0 .../test_table_difference/out.sql | 0 .../test_table_distinct/decompiled.py | 0 .../test_compiler/test_table_distinct/out.sql | 0 .../test_table_drop_with_filter/decompiled.py | 0 .../test_table_drop_with_filter/out.sql | 0 .../test_table_intersect/decompiled.py | 0 .../test_table_intersect/out.sql | 0 .../test_compiler/test_union/decompiled.py | 0 .../test_compiler/test_union/out.sql | 0 .../test_union_order_by/decompiled.py | 0 .../test_compiler/test_union_order_by/out.sql | 0 .../test_union_project_column/decompiled.py | 0 .../test_union_project_column/out.sql | 0 .../test_aggregate_count_joined/decompiled.py | 0 .../test_aggregate_count_joined/out.sql | 0 .../test_aggregate_having/explicit.sql | 0 .../test_aggregate_having/inline.sql | 0 .../out.sql | 0 .../agg_filtered.sql | 0 .../agg_filtered2.sql | 0 .../filtered.sql | 0 .../proj.sql | 0 .../test_anti_join/decompiled.py | 0 .../test_select_sql/test_anti_join/out.sql | 0 .../test_bool_bool/decompiled.py | 0 .../test_select_sql/test_bool_bool/out.sql | 0 .../test_bug_duplicated_where/out.sql | 0 .../test_bug_project_multiple_times/out.sql | 0 .../test_case_in_projection/decompiled.py | 0 .../test_case_in_projection/out.sql | 0 .../result.sql | 0 .../test_complex_union/result.sql | 0 .../out.sql | 0 .../test_endswith/decompiled.py | 0 .../test_select_sql/test_endswith/out.sql | 0 .../test_filter_inside_exists/out.sql | 0 .../test_filter_predicates/out.sql | 0 .../result.sql | 0 .../expr3.sql | 0 .../expr4.sql | 0 .../test_fuse_projections/decompiled.py | 0 .../test_fuse_projections/project.sql | 0 .../test_fuse_projections/project_filter.sql | 0 .../test_identifier_quoting/out.sql | 0 .../result.sql | 0 .../result.sql | 0 .../test_join_between_joins/decompiled.py | 0 .../test_join_between_joins/out.sql | 0 .../out.sql | 0 .../test_join_just_materialized/decompiled.py | 0 .../test_join_just_materialized/out.sql | 0 .../test_join_projection_subquery_bug/out.sql | 0 .../test_join_with_limited_table/out.sql | 0 .../test_limit_cte_extract/out.sql | 0 .../test_limit_with_self_join/decompiled.py | 0 .../test_limit_with_self_join/out.sql | 0 .../test_loj_subquery_filter_handling/out.sql | 0 .../test_multiple_joins/decompiled.py | 0 .../test_multiple_joins/out.sql | 0 .../test_multiple_limits/decompiled.py | 0 .../test_multiple_limits/out.sql | 0 .../test_nameless_table/decompiled.py | 0 .../test_nameless_table/out.sql | 0 .../decompiled.py | 0 .../out.sql | 0 .../test_projection_filter_fuse/out.sql | 0 .../out.sql | 0 .../agg_explicit_column/decompiled.py | 0 .../agg_explicit_column/out.sql | 0 .../agg_string_columns/decompiled.py | 0 .../agg_string_columns/out.sql | 0 .../decompiled.py | 0 .../aggregate_table_count_metric/out.sql | 0 .../filter_then_limit/decompiled.py | 0 .../test_select_sql/filter_then_limit/out.sql | 0 .../limit_simple/decompiled.py | 0 .../test_select_sql/limit_simple/out.sql | 0 .../limit_then_filter/decompiled.py | 0 .../test_select_sql/limit_then_filter/out.sql | 0 .../limit_with_offset/decompiled.py | 0 .../test_select_sql/limit_with_offset/out.sql | 0 .../mixed_columns_ascending/decompiled.py | 0 .../mixed_columns_ascending/out.sql | 0 .../self_reference_simple/decompiled.py | 0 .../self_reference_simple/out.sql | 0 .../single_column/decompiled.py | 0 .../test_select_sql/single_column/out.sql | 0 .../decompiled.py | 0 .../out.sql | 0 .../out.sql | 0 .../test_semi_join/decompiled.py | 0 .../test_select_sql/test_semi_join/out.sql | 0 .../test_simple_joins/decompiled.py | 0 .../test_simple_joins/inner.sql | 0 .../test_simple_joins/inner_two_preds.sql | 0 .../test_simple_joins/left.sql | 0 .../test_simple_joins/outer.sql | 0 .../result1.sql | 0 .../result2.sql | 0 .../test_startswith/decompiled.py | 0 .../test_select_sql/test_startswith/out.sql | 0 .../out.sql | 0 .../expr.sql | 0 .../expr2.sql | 0 .../test_subquery_in_union/decompiled.py | 0 .../test_subquery_in_union/out.sql | 0 .../test_subquery_used_for_self_join/out.sql | 0 .../test_topk_analysis_bug/out.sql | 0 .../test_topk_operation/e1.sql | 0 .../test_topk_operation/e2.sql | 0 .../test_topk_predicate_pushdown_bug/out.sql | 0 .../test_topk_to_aggregate/out.sql | 0 .../test_tpch_self_join_failure/out.sql | 0 .../decompiled.py | 0 .../test_where_analyze_scalar_op/out.sql | 0 .../decompiled.py | 0 .../test_where_no_pushdown_possible/out.sql | 0 .../test_where_with_between/decompiled.py | 0 .../test_where_with_between/out.sql | 0 .../test_where_with_join/decompiled.py | 0 .../test_where_with_join/out.sql | 0 .../test_aggregate/having_count/out.sql | 0 .../test_aggregate/having_sum/out.sql | 0 .../test_aggregate/single/out.sql | 0 .../test_aggregate/two/out.sql | 0 .../test_sqlalchemy/test_between/out.sql | 0 .../test_boolean_conjunction/and/out.sql | 0 .../test_boolean_conjunction/or/out.sql | 0 .../test_sqlalchemy/test_coalesce/out.sql | 0 .../test_comparisons/eq/out.sql | 0 .../test_comparisons/ge/out.sql | 0 .../test_comparisons/gt/out.sql | 0 .../test_comparisons/le/out.sql | 0 .../test_comparisons/lt/out.sql | 0 .../test_comparisons/ne/out.sql | 0 .../out.sql | 0 .../test_distinct/count_distinct/out.sql | 0 .../group_by_count_distinct/out.sql | 0 .../test_distinct/projection_distinct/out.sql | 0 .../single_column_projection_distinct/out.sql | 0 .../test_distinct/table_distinct/out.sql | 0 .../test_sqlalchemy/test_exists/e1.sql | 0 .../test_sqlalchemy/test_exists/e2.sql | 0 .../out.sql | 0 .../test_sqlalchemy/test_gh_1045/out.sql | 0 .../test_isnull_notnull/isnull/out.sql | 0 .../test_isnull_notnull/notnull/out.sql | 0 .../test_join_just_materialized/out.sql | 0 .../test_sqlalchemy/test_joins/inner/out.sql | 0 .../test_joins/inner_select/out.sql | 0 .../test_sqlalchemy/test_joins/left/out.sql | 0 .../test_joins/left_select/out.sql | 0 .../test_sqlalchemy/test_joins/outer/out.sql | 0 .../test_joins/outer_select/out.sql | 0 .../test_limit/expr_fn0/out.sql | 0 .../test_limit/expr_fn1/out.sql | 0 .../test_sqlalchemy/test_limit_filter/out.sql | 0 .../test_limit_subquery/out.sql | 0 .../decompiled.py | 0 .../test_lower_projection_sort_key/out.sql | 0 .../test_sqlalchemy/test_multi_join/out.sql | 0 .../out.sql | 0 .../test_sqlalchemy/test_named_expr/out.sql | 0 .../test_sqlalchemy/test_negate/out.sql | 0 .../test_sqlalchemy/test_no_cart_join/out.sql | 0 .../test_no_cross_join/out.sql | 0 .../test_sqlalchemy/test_not_exists/out.sql | 0 .../test_order_by/column/out.sql | 0 .../test_order_by/random/out.sql | 0 .../test_order_by_expr/out.sql | 0 .../test_searched_case/out.sql | 0 .../anti.sql | 0 .../semi.sql | 0 .../test_self_reference_join/out.sql | 0 .../test_sqlalchemy/test_simple_case/out.sql | 0 .../out.sql | 0 .../test_subquery_aliased/out.sql | 0 .../test_sqlalchemy/test_tpc_h11/out.sql | 0 .../test_sqlalchemy/test_tpc_h17/out.sql | 0 .../test_where_correlated_subquery/out.sql | 0 .../out.sql | 0 .../decompiled.py | 0 .../test_where_simple_comparisons/out.sql | 0 .../test_where_uncorrelated_subquery/out.sql | 0 .../{ => backends}/tests/sql/test_compiler.py | 6 +- .../tests/sql/test_select_sql.py | 4 +- .../tests/sql/test_sql.py} | 213 +++++------------- ibis/tests/sql/test_ast_builder.py | 80 ------- 215 files changed, 74 insertions(+), 236 deletions(-) rename ibis/{ => backends}/tests/sql/__init__.py (100%) rename ibis/{ => backends}/tests/sql/conftest.py (96%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_column_distinct/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_column_expr_default_name/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_having_size/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_table_difference/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_union/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_union/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_bool_bool/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_endswith/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_nameless_table/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_nameless_table/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_startswith/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_count/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_sum/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_aggregate/single/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_aggregate/two/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_between/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/and/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/or/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_coalesce/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_comparisons/eq/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ge/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_comparisons/gt/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_comparisons/le/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_comparisons/lt/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ne/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_cte_factor_distinct_but_equal/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_distinct/count_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_distinct/group_by_count_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_distinct/projection_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_distinct/single_column_projection_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_distinct/table_distinct/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_exists/e1.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_exists/e2.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_filter_group_by_agg_with_same_name/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_gh_1045/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/isnull/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/notnull/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_join_just_materialized/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_joins/inner/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_joins/inner_select/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_joins/left/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_joins/left_select/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_joins/outer/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_joins/outer_select/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn0/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn1/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_limit_filter/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_limit_subquery/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_multi_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_mutate_filter_join_no_cross_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_named_expr/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_negate/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_no_cart_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_no_cross_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_not_exists/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_order_by/column/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_order_by/random/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_order_by_expr/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_searched_case/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/anti.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/semi.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_self_reference_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_simple_case/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_sort_aggregation_translation_failure/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_subquery_aliased/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_tpc_h11/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_tpc_h17/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery_with_join/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/decompiled.py (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/out.sql (100%) rename ibis/{ => backends}/tests/sql/snapshots/test_sqlalchemy/test_where_uncorrelated_subquery/out.sql (100%) rename ibis/{ => backends}/tests/sql/test_compiler.py (98%) rename ibis/{ => backends}/tests/sql/test_select_sql.py (99%) rename ibis/{tests/sql/test_sqlalchemy.py => backends/tests/sql/test_sql.py} (73%) delete mode 100644 ibis/tests/sql/test_ast_builder.py diff --git a/ibis/tests/sql/__init__.py b/ibis/backends/tests/sql/__init__.py similarity index 100% rename from ibis/tests/sql/__init__.py rename to ibis/backends/tests/sql/__init__.py diff --git a/ibis/tests/sql/conftest.py b/ibis/backends/tests/sql/conftest.py similarity index 96% rename from ibis/tests/sql/conftest.py rename to ibis/backends/tests/sql/conftest.py index 6121fa68fb41..5d2c66b89679 100644 --- a/ibis/tests/sql/conftest.py +++ b/ibis/backends/tests/sql/conftest.py @@ -3,8 +3,11 @@ import pytest import ibis -from ibis.backends.base.sql.compiler import Compiler, QueryContext -from ibis.tests.expr.mocks import MockBackend + +pytest.importorskip("duckdb") + +from ibis.backends.duckdb import Backend as DuckDBBackend # noqa: E402 +from ibis.tests.expr.mocks import MockBackend # noqa: E402 @pytest.fixture(scope="module") diff --git a/ibis/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_column_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_column_distinct/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_column_expr_default_name/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_column_expr_default_name/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_having_size/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_having_size/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_table_difference/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_table_difference/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_union/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_union/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_union/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_union/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql diff --git a/ibis/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql rename to ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_bool_bool/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_bool_bool/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_endswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_endswith/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_nameless_table/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_nameless_table/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_nameless_table/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_nameless_table/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_nameless_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_nameless_table/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_nameless_table/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_nameless_table/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/mixed_columns_ascending/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_startswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_startswith/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_count/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_count/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_count/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_count/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_sum/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_sum/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_sum/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_aggregate/having_sum/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_aggregate/single/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_aggregate/single/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_aggregate/single/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_aggregate/single/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_aggregate/two/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_aggregate/two/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_aggregate/two/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_aggregate/two/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_between/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_between/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_between/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_between/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/and/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/and/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/and/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/and/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/or/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/or/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/or/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_boolean_conjunction/or/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_coalesce/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_coalesce/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_coalesce/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_coalesce/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/eq/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/eq/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/eq/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/eq/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ge/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ge/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ge/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ge/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/gt/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/gt/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/gt/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/gt/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/le/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/le/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/le/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/le/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/lt/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/lt/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/lt/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/lt/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ne/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ne/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ne/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_comparisons/ne/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_cte_factor_distinct_but_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_cte_factor_distinct_but_equal/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_cte_factor_distinct_but_equal/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_cte_factor_distinct_but_equal/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/count_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/count_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/count_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/group_by_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/group_by_count_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/group_by_count_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/group_by_count_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/projection_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/projection_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/projection_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/single_column_projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/single_column_projection_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/single_column_projection_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/single_column_projection_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/table_distinct/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_distinct/table_distinct/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_distinct/table_distinct/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_exists/e1.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_exists/e1.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_exists/e1.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_exists/e1.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_exists/e2.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_exists/e2.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_exists/e2.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_exists/e2.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_filter_group_by_agg_with_same_name/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_filter_group_by_agg_with_same_name/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_filter_group_by_agg_with_same_name/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_filter_group_by_agg_with_same_name/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_gh_1045/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_gh_1045/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_gh_1045/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_gh_1045/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/isnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/isnull/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/isnull/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/isnull/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/notnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/notnull/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/notnull/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_isnull_notnull/notnull/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_join_just_materialized/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_join_just_materialized/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_join_just_materialized/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/inner/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/inner/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/inner/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/inner/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/inner_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/inner_select/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/inner_select/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/inner_select/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/left/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/left/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/left/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/left/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/left_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/left_select/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/left_select/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/left_select/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/outer/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/outer/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/outer/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/outer/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/outer_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/outer_select/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_joins/outer_select/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_joins/outer_select/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn0/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn0/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn0/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn0/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn1/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn1/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn1/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_limit/expr_fn1/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_limit_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_limit_filter/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_limit_filter/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_limit_filter/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_limit_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_limit_subquery/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_limit_subquery/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_limit_subquery/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/decompiled.py b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_lower_projection_sort_key/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_multi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_multi_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_multi_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_multi_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_mutate_filter_join_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_mutate_filter_join_no_cross_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_mutate_filter_join_no_cross_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_mutate_filter_join_no_cross_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_named_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_named_expr/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_named_expr/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_named_expr/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_negate/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_negate/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_negate/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_negate/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_no_cart_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_no_cart_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_no_cart_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_no_cart_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_no_cross_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_no_cross_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_no_cross_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_not_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_not_exists/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_not_exists/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_not_exists/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_order_by/column/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_order_by/column/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_order_by/column/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_order_by/column/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_order_by/random/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_order_by/random/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_order_by/random/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_order_by/random/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_order_by_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_order_by_expr/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_order_by_expr/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_order_by_expr/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_searched_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_searched_case/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_searched_case/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_searched_case/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/anti.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/anti.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/anti.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/anti.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/semi.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/semi.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/semi.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_self_reference_in_not_exists/semi.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_self_reference_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_self_reference_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_self_reference_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_self_reference_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_simple_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_simple_case/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_simple_case/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_simple_case/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_sort_aggregation_translation_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_sort_aggregation_translation_failure/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_sort_aggregation_translation_failure/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_sort_aggregation_translation_failure/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_subquery_aliased/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_subquery_aliased/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_subquery_aliased/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_subquery_aliased/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_tpc_h11/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_tpc_h11/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_tpc_h11/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_tpc_h11/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_tpc_h17/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_tpc_h17/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_tpc_h17/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_tpc_h17/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery_with_join/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery_with_join/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_correlated_subquery_with_join/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/decompiled.py b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/decompiled.py similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/decompiled.py rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/decompiled.py diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_simple_comparisons/out.sql diff --git a/ibis/tests/sql/snapshots/test_sqlalchemy/test_where_uncorrelated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_uncorrelated_subquery/out.sql similarity index 100% rename from ibis/tests/sql/snapshots/test_sqlalchemy/test_where_uncorrelated_subquery/out.sql rename to ibis/backends/tests/sql/snapshots/test_sqlalchemy/test_where_uncorrelated_subquery/out.sql diff --git a/ibis/tests/sql/test_compiler.py b/ibis/backends/tests/sql/test_compiler.py similarity index 98% rename from ibis/tests/sql/test_compiler.py rename to ibis/backends/tests/sql/test_compiler.py index e78db023542e..c331c6069ccb 100644 --- a/ibis/tests/sql/test_compiler.py +++ b/ibis/backends/tests/sql/test_compiler.py @@ -2,11 +2,15 @@ import datetime +import pytest + import ibis from ibis.backends.base.sql.compiler import Compiler -from ibis.tests.sql.conftest import to_sql +from ibis.backends.tests.sql.conftest import to_sql from ibis.tests.util import assert_decompile_roundtrip +pytestmark = pytest.mark.duckdb + def test_union(union, snapshot): snapshot.assert_match(to_sql(union), "out.sql") diff --git a/ibis/tests/sql/test_select_sql.py b/ibis/backends/tests/sql/test_select_sql.py similarity index 99% rename from ibis/tests/sql/test_select_sql.py rename to ibis/backends/tests/sql/test_select_sql.py index 517714c5226a..c736c4f5f178 100644 --- a/ibis/tests/sql/test_select_sql.py +++ b/ibis/backends/tests/sql/test_select_sql.py @@ -6,9 +6,11 @@ import ibis from ibis import _ from ibis.backends.base.sql.compiler import Compiler -from ibis.tests.sql.conftest import get_query, to_sql +from ibis.backends.tests.sql.conftest import get_query, to_sql from ibis.tests.util import assert_decompile_roundtrip +pytestmark = pytest.mark.duckdb + @pytest.mark.parametrize( "expr_fn", diff --git a/ibis/tests/sql/test_sqlalchemy.py b/ibis/backends/tests/sql/test_sql.py similarity index 73% rename from ibis/tests/sql/test_sqlalchemy.py rename to ibis/backends/tests/sql/test_sql.py index 43aa962d5ca2..38a966a2e2fc 100644 --- a/ibis/tests/sql/test_sqlalchemy.py +++ b/ibis/backends/tests/sql/test_sql.py @@ -17,78 +17,68 @@ from operator import methodcaller import pytest -import sqlglot as sg from pytest import param -from sqlalchemy import types as sat import ibis -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy import AlchemyCompiler, BaseAlchemyBackend -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType, ArrayType -from ibis.tests.expr.mocks import MockAlchemyBackend -from ibis.tests.util import assert_decompile_roundtrip, assert_equal +from ibis.backends.tests.sql.conftest import to_sql +from ibis.tests.util import assert_decompile_roundtrip -sa = pytest.importorskip("sqlalchemy") - - -L = sa.literal - - -def to_sql(expr, *args, **kwargs) -> str: - compiled = AlchemyCompiler.to_sql(expr, *args, **kwargs) - sqlstring = str(compiled.compile(compile_kwargs=dict(literal_binds=True))) - return sg.parse_one(sqlstring).sql(pretty=True, dialect="duckdb") - - -@pytest.fixture(scope="module") -def con(): - return MockAlchemyBackend() +pytestmark = pytest.mark.duckdb @pytest.fixture(scope="module") -def star1(con): - return con.table("star1") +def star1(): + return ibis.table( + [ + ("c", "int32"), + ("f", "double"), + ("foo_id", "string"), + ("bar_id", "string"), + ], + name="star1", + ) @pytest.fixture(scope="module") -def functional_alltypes(con): - return con.table("functional_alltypes") +def functional_alltypes(): + return ibis.table( + { + "id": "int32", + "bool_col": "boolean", + "tinyint_col": "int8", + "smallint_col": "int16", + "int_col": "int32", + "bigint_col": "int64", + "float_col": "float32", + "double_col": "float64", + "date_string_col": "string", + "string_col": "string", + "timestamp_col": "timestamp", + "year": "int32", + "month": "int32", + }, + name="functional_alltypes", + ) @pytest.fixture(scope="module") -def alltypes(con): - return con.table("alltypes") - - -def test_sqla_schema_conversion(): - typespec = [ - # name, type, nullable - ("smallint", sat.SMALLINT, False, dt.int16), - ("smallint_", sat.SmallInteger, False, dt.int16), - ("int", sat.INTEGER, True, dt.int32), - ("integer", sat.INTEGER, True, dt.int32), - ("integer_", sat.Integer, True, dt.int32), - ("bigint", sat.BIGINT, False, dt.int64), - ("bigint_", sat.BigInteger, False, dt.int64), - ("real", sat.REAL, True, dt.float32), - ("bool", sat.BOOLEAN, True, dt.bool), - ("bool_", sat.Boolean, True, dt.bool), - ("timestamp", sat.DATETIME, True, dt.timestamp), - ("timestamp_", sat.DateTime, True, dt.timestamp), - ] - - sqla_types = [] - ibis_types = [] - for name, t, nullable, ibis_type in typespec: - sqla_types.append(sa.Column(name, t, nullable=nullable)) - ibis_types.append((name, ibis_type(nullable=nullable))) - - table = sa.Table("tname", sa.MetaData(), *sqla_types) - - schema = BaseAlchemyBackend._schema_from_sqla_table(table) - expected = ibis.schema(ibis_types) - - assert_equal(schema, expected) +def alltypes(): + return ibis.table( + [ + ("a", "int8"), + ("b", "int16"), + ("c", "int32"), + ("d", "int64"), + ("e", "float32"), + ("f", "float64"), + ("g", "string"), + ("h", "boolean"), + ("i", "timestamp"), + ("j", "date"), + ("k", "time"), + ], + name="alltypes", + ) @pytest.mark.parametrize("opname", ["ge", "gt", "lt", "le", "eq", "ne"]) @@ -151,9 +141,9 @@ def test_named_expr(functional_alltypes, snapshot): ], ids=["inner", "left", "outer", "inner_select", "left_select", "outer_select"], ) -def test_joins(con, expr_fn, snapshot): - region = con.table("tpch_region") - nation = con.table("tpch_nation") +def test_joins(tpch_region, tpch_nation, expr_fn, snapshot): + region = tpch_region + nation = tpch_nation expr = expr_fn(region, nation) snapshot.assert_match(to_sql(expr), "out.sql") @@ -170,11 +160,11 @@ def test_join_just_materialized(nation, region, customer, snapshot): snapshot.assert_match(to_sql(joined), "out.sql") -def test_full_outer_join(con): +def test_full_outer_join(tpch_region, tpch_nation): """Testing full outer join separately due to previous issue with outer join resulting in left outer join (issue #1773)""" - region = con.table("tpch_region") - nation = con.table("tpch_nation") + region = tpch_region + nation = tpch_nation predicate = region.r_regionkey == nation.n_regionkey joined = region.outer_join(nation, predicate) @@ -247,9 +237,9 @@ def test_limit_subquery(star1, snapshot): snapshot.assert_match(to_sql(expr), "out.sql") -def test_cte_factor_distinct_but_equal(con, snapshot): - t = con.table("alltypes") - tt = con.table("alltypes") +def test_cte_factor_distinct_but_equal(alltypes, snapshot): + t = alltypes + tt = alltypes.view() expr1 = t.group_by("g").aggregate(t.f.sum().name("metric")) expr2 = tt.group_by("g").aggregate(tt.f.sum().name("metric")).view() @@ -267,8 +257,8 @@ def test_self_reference_join(star1, snapshot): snapshot.assert_match(to_sql(expr), "out.sql") -def test_self_reference_in_not_exists(con, snapshot): - t = con.table("functional_alltypes") +def test_self_reference_in_not_exists(functional_alltypes, snapshot): + t = functional_alltypes t2 = t.view() cond = (t.string_col == t2.string_col).any() @@ -318,7 +308,7 @@ def test_lower_projection_sort_key(star1, star2, snapshot): assert_decompile_roundtrip(expr2, snapshot) -def test_exists(con, foo_t, bar_t, snapshot): +def test_exists(foo_t, bar_t, snapshot): t1 = foo_t t2 = bar_t cond = (t1.key1 == t2.key1).any() @@ -500,57 +490,6 @@ def test_multi_join(snapshot): snapshot.assert_match(to_sql(expr), "out.sql") -def test_tpc_h11(snapshot): - NATION = "GERMANY" - FRACTION = 0.0001 - - partsupp = ibis.table( - dict( - ps_partkey="int32", - ps_suppkey="int32", - ps_availqty="int32", - ps_supplycost="decimal(15, 2)", - ), - name="partsupp", - ) - supplier = ibis.table( - dict(s_suppkey="int32", s_nationkey="int32"), - name="supplier", - ) - nation = ibis.table( - dict(n_nationkey="int32", n_name="string"), - name="nation", - ) - - q = partsupp - q = q.join(supplier, partsupp.ps_suppkey == supplier.s_suppkey) - q = q.join(nation, nation.n_nationkey == supplier.s_nationkey) - - q = q.filter([q.n_name == NATION]) - - innerq = partsupp - innerq = innerq.join(supplier, partsupp.ps_suppkey == supplier.s_suppkey) - innerq = innerq.join(nation, nation.n_nationkey == supplier.s_nationkey) - innerq = innerq.filter([innerq.n_name == NATION]) - innerq = innerq.aggregate(total=(innerq.ps_supplycost * innerq.ps_availqty).sum()) - - gq = q.group_by([q.ps_partkey]) - q = gq.aggregate(value=(q.ps_supplycost * q.ps_availqty).sum()) - q = q.filter([q.value > innerq.total * FRACTION]) - q = q.order_by(ibis.desc(q.value)) - - snapshot.assert_match(to_sql(q), "out.sql") - - -def test_to_sqla_type_array_of_non_primitive(): - result = AlchemyType.from_ibis(dt.Array(dt.Struct(dict(a="int")))) - [(result_name, result_type)] = result.value_type.fields.items() - expected_name = "a" - assert result_name == expected_name - assert type(result_type) == sat.BigInteger - assert isinstance(result, ArrayType) - - def test_no_cart_join(snapshot): facts = ibis.table(dict(product_id="!int32"), name="facts") products = ibis.table( @@ -583,33 +522,3 @@ def test_order_by_expr(snapshot): t = ibis.table(dict(a="int", b="string"), name="t") expr = t[lambda t: t.a == 1].order_by(lambda t: t.b + "a") snapshot.assert_match(to_sql(expr), "out.sql") - - -def test_tpc_h17(snapshot): - BRAND = "Brand#23" - CONTAINER = "MED BOX" - - lineitem = ibis.table( - dict( - l_partkey="!int32", l_quantity="!int32", l_extendedprice="!decimal(15, 2)" - ), - name="lineitem", - ) - part = ibis.table( - dict(p_partkey="!int32", p_brand="!string", p_container="!string"), name="part" - ) - - q = lineitem.join(part, part.p_partkey == lineitem.l_partkey) - innerq = lineitem.filter([lineitem.l_partkey == q.p_partkey]) - q = q.filter( - [ - q.p_brand == BRAND, - q.p_container == CONTAINER, - q.l_quantity < (0.2 * innerq.l_quantity.mean()), - ] - ) - q = q.aggregate( - avg_yearly=q.l_extendedprice.sum() / ibis.literal(7.0, type="decimal(15, 2)") - ) - - snapshot.assert_match(to_sql(q), "out.sql") diff --git a/ibis/tests/sql/test_ast_builder.py b/ibis/tests/sql/test_ast_builder.py deleted file mode 100644 index f3ac5b00e4c8..000000000000 --- a/ibis/tests/sql/test_ast_builder.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import annotations - -import pytest - -import ibis.expr.operations as ops -from ibis.tests.sql.conftest import get_query - -pytest.importorskip("sqlalchemy") - - -def test_ast_with_projection_join_filter(con): - table = con.table("test1") - table2 = con.table("test2") - - filter_pred = table["f"] > 0 - - table3 = table[filter_pred] - - join_pred = table3["g"] == table2["key"] - - joined = table2.inner_join(table3, [join_pred]) - result = joined[[table3, table2["value"]]] - - stmt = get_query(result) - - def foo(): - table3 = table[filter_pred] - joined = table2.inner_join(table3, [join_pred]) - result = joined[[table3, table2["value"]]] - return result - - assert len(stmt.select_set) == 2 - - # #790, make sure the filter stays put - assert len(stmt.where) == 0 - - # Check that the joined tables are not altered - tbl_node = stmt.table_set - assert isinstance(tbl_node, ops.InnerJoin) - assert tbl_node.left == table2.op() - assert tbl_node.right == table3.op() - - -def test_ast_with_aggregation_join_filter(con): - table = con.table("test1") - table2 = con.table("test2") - - filter_pred = table["f"] > 0 - table3 = table[filter_pred] - join_pred = table3["g"] == table2["key"] - - joined = table2.inner_join(table3, [join_pred]) - - met1 = (table3["f"] - table2["value"]).mean().name("foo") - result = joined.aggregate( - [met1, table3["f"].sum().name("bar")], - by=[table3["g"], table2["key"]], - ) - - stmt = get_query(result) - - # #790, this behavior was different before - ex_pred = [table3["g"] == table2["key"]] - expected_table_set = table2.inner_join(table3, ex_pred) - assert stmt.table_set == expected_table_set.op() - - # Check various exprs - ex_metrics = [ - (table3["f"] - table2["value"]).mean().name("foo"), - table3["f"].sum().name("bar"), - ] - ex_by = [table3["g"], table2["key"]] - for res, ex in zip(stmt.select_set, ex_by + ex_metrics): - assert res == ex.op() - - for res, ex in zip(stmt.group_by, ex_by): - assert stmt.select_set[res] == ex.op() - - # The filter is in the joined subtable - assert len(stmt.where) == 0 From 4eba86014ca0f53269ff3061905e26b39bc9ee3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 13 Dec 2023 12:13:41 +0100 Subject: [PATCH 003/161] test(backends): move backends dependent benchmarks to ibis/backends/tests/ --- ibis/backends/base/sql/__init__.py | 1 - ibis/backends/pandas/__init__.py | 2 -- ibis/{tests/benchmarks => backends/tests}/test_benchmarks.py | 0 ibis/tests/benchmarks/__init__.py | 0 4 files changed, 3 deletions(-) rename ibis/{tests/benchmarks => backends/tests}/test_benchmarks.py (100%) delete mode 100644 ibis/tests/benchmarks/__init__.py diff --git a/ibis/backends/base/sql/__init__.py b/ibis/backends/base/sql/__init__.py index 8d31f56979cc..3b19a843de25 100644 --- a/ibis/backends/base/sql/__init__.py +++ b/ibis/backends/base/sql/__init__.py @@ -22,7 +22,6 @@ import pandas as pd import pyarrow as pa -raise RuntimeError("Temporarily make the SQL backends dysfunctional") __all__ = ["BaseSQLBackend"] diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 4349400c50ab..c4de4af3f8fa 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -22,8 +22,6 @@ import pathlib from collections.abc import Mapping, MutableMapping -raise RuntimeError("Temporarily make the pandas backend dysfunctional") - class BasePandasBackend(BaseBackend): """Base class for backends based on pandas.""" diff --git a/ibis/tests/benchmarks/test_benchmarks.py b/ibis/backends/tests/test_benchmarks.py similarity index 100% rename from ibis/tests/benchmarks/test_benchmarks.py rename to ibis/backends/tests/test_benchmarks.py diff --git a/ibis/tests/benchmarks/__init__.py b/ibis/tests/benchmarks/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 From a38aae85060ce7772a06d5e165012535124839cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 13 Dec 2023 12:26:42 +0100 Subject: [PATCH 004/161] test(ir): ensure that no backends are required to run the core tests --- ibis/backends/base/sql/__init__.py | 1 + ibis/backends/dask/__init__.py | 3 +++ ibis/backends/pandas/__init__.py | 2 ++ ibis/backends/tests/sql/conftest.py | 2 +- ibis/backends/tests/sql/test_compiler.py | 3 ++- ibis/backends/tests/sql/test_select_sql.py | 3 ++- ibis/backends/tests/test_benchmarks.py | 25 ++++++++++--------- ibis/backends/tests/test_temporal.py | 14 +++++++++++ ibis/tests/test_api.py | 28 +++++++++++----------- 9 files changed, 51 insertions(+), 30 deletions(-) diff --git a/ibis/backends/base/sql/__init__.py b/ibis/backends/base/sql/__init__.py index 3b19a843de25..8d31f56979cc 100644 --- a/ibis/backends/base/sql/__init__.py +++ b/ibis/backends/base/sql/__init__.py @@ -22,6 +22,7 @@ import pandas as pd import pyarrow as pa +raise RuntimeError("Temporarily make the SQL backends dysfunctional") __all__ = ["BaseSQLBackend"] diff --git a/ibis/backends/dask/__init__.py b/ibis/backends/dask/__init__.py index 80aba0be36ad..86a4f1055d2f 100644 --- a/ibis/backends/dask/__init__.py +++ b/ibis/backends/dask/__init__.py @@ -24,6 +24,9 @@ import pathlib from collections.abc import Mapping, MutableMapping + +raise RuntimeError("Temporarily make the dask backend dysfunctional") + # Make sure that the pandas backend options have been loaded ibis.pandas # noqa: B018 diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index c4de4af3f8fa..4349400c50ab 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -22,6 +22,8 @@ import pathlib from collections.abc import Mapping, MutableMapping +raise RuntimeError("Temporarily make the pandas backend dysfunctional") + class BasePandasBackend(BaseBackend): """Base class for backends based on pandas.""" diff --git a/ibis/backends/tests/sql/conftest.py b/ibis/backends/tests/sql/conftest.py index 5d2c66b89679..b16b9fa48147 100644 --- a/ibis/backends/tests/sql/conftest.py +++ b/ibis/backends/tests/sql/conftest.py @@ -6,7 +6,7 @@ pytest.importorskip("duckdb") -from ibis.backends.duckdb import Backend as DuckDBBackend # noqa: E402 + from ibis.tests.expr.mocks import MockBackend # noqa: E402 diff --git a/ibis/backends/tests/sql/test_compiler.py b/ibis/backends/tests/sql/test_compiler.py index c331c6069ccb..8e66b358fca0 100644 --- a/ibis/backends/tests/sql/test_compiler.py +++ b/ibis/backends/tests/sql/test_compiler.py @@ -5,7 +5,8 @@ import pytest import ibis -from ibis.backends.base.sql.compiler import Compiler + +# from ibis.backends.base.sql.compiler import Compiler from ibis.backends.tests.sql.conftest import to_sql from ibis.tests.util import assert_decompile_roundtrip diff --git a/ibis/backends/tests/sql/test_select_sql.py b/ibis/backends/tests/sql/test_select_sql.py index c736c4f5f178..24e2a65468d4 100644 --- a/ibis/backends/tests/sql/test_select_sql.py +++ b/ibis/backends/tests/sql/test_select_sql.py @@ -5,7 +5,8 @@ import ibis from ibis import _ -from ibis.backends.base.sql.compiler import Compiler + +# from ibis.backends.base.sql.compiler import Compiler from ibis.backends.tests.sql.conftest import get_query, to_sql from ibis.tests.util import assert_decompile_roundtrip diff --git a/ibis/backends/tests/test_benchmarks.py b/ibis/backends/tests/test_benchmarks.py index debc26942ebc..a38760628e5a 100644 --- a/ibis/backends/tests/test_benchmarks.py +++ b/ibis/backends/tests/test_benchmarks.py @@ -18,12 +18,11 @@ import ibis.expr.operations as ops import ibis.expr.types as ir from ibis.backends.base import _get_backend_names -from ibis.backends.pandas.udf import udf -pytestmark = pytest.mark.benchmark +# from ibis.backends.pandas.udf import udf -# TODO(kszucs): this should be moved under the backends since the benchmarks are -# backend specific +# FIXME(kszucs): pytestmark = pytest.mark.benchmark +pytestmark = pytest.mark.skip(reason="the backends must be rewritten first") def make_t(): @@ -283,9 +282,9 @@ def high_card_grouped_rolling(t): return t.value.mean().over(high_card_rolling_window(t)) -@udf.reduction(["double"], "double") -def my_mean(series): - return series.mean() +# @udf.reduction(["double"], "double") +# def my_mean(series): +# return series.mean() def low_card_grouped_rolling_udf_mean(t): @@ -296,9 +295,9 @@ def high_card_grouped_rolling_udf_mean(t): return my_mean(t.value).over(high_card_rolling_window(t)) -@udf.analytic(["double"], "double") -def my_zscore(series): - return (series - series.mean()) / series.std() +# @udf.analytic(["double"], "double") +# def my_zscore(series): +# return (series - series.mean()) / series.std() def low_card_window(t): @@ -317,9 +316,9 @@ def high_card_window_analytics_udf(t): return my_zscore(t.value).over(high_card_window(t)) -@udf.reduction(["double", "double"], "double") -def my_wm(v, w): - return np.average(v, weights=w) +# @udf.reduction(["double", "double"], "double") +# def my_wm(v, w): +# return np.average(v, weights=w) def low_card_grouped_rolling_udf_wm(t): diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 93799226abb2..a29a65129fb9 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -33,6 +33,20 @@ from ibis.common.annotations import ValidationError +def day_name(obj: pd.core.indexes.accessors.DatetimeProperties | pd.Timestamp) -> str: + """Backwards compatible name-of-day getting function. + + Returns + ------- + str + The name of the day corresponding to `obj` + """ + try: + return obj.day_name() + except AttributeError: + return obj.weekday_name + + @pytest.mark.parametrize("attr", ["year", "month", "day"]) @pytest.mark.parametrize( "expr_fn", diff --git a/ibis/tests/test_api.py b/ibis/tests/test_api.py index 1481c46080df..83a2090188ea 100644 --- a/ibis/tests/test_api.py +++ b/ibis/tests/test_api.py @@ -9,24 +9,24 @@ import ibis +# FIXME(kszucs): the following backends require the sqlite backend loaded +# def test_backends_are_cached(): +# assert ibis.sqlite is ibis.sqlite +# del ibis.sqlite # delete to force recreation +# assert ibis.sqlite is ibis.sqlite -def test_backends_are_cached(): - assert ibis.sqlite is ibis.sqlite - del ibis.sqlite # delete to force recreation - assert ibis.sqlite is ibis.sqlite +# def test_backends_tab_completion(): +# assert hasattr(ibis, "sqlite") +# del ibis.sqlite # delete to ensure not real attr +# assert "sqlite" in dir(ibis) +# assert ibis.sqlite is ibis.sqlite +# assert "sqlite" in dir(ibis) # in dir even if already created -def test_backends_tab_completion(): - assert hasattr(ibis, "sqlite") - del ibis.sqlite # delete to ensure not real attr - assert "sqlite" in dir(ibis) - assert ibis.sqlite is ibis.sqlite - assert "sqlite" in dir(ibis) # in dir even if already created - -def test_public_backend_methods(): - public = {m for m in dir(ibis.sqlite) if not m.startswith("_")} - assert public == {"connect", "compile", "has_operation", "add_operation", "name"} +# def test_public_backend_methods(): +# public = {m for m in dir(ibis.sqlite) if not m.startswith("_")} +# assert public == {"connect", "compile", "has_operation", "add_operation", "name"} def test_missing_backend(): From 2927d8f1e574eb74df51896d47fcf507477e1268 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 13 Dec 2023 10:39:45 +0100 Subject: [PATCH 005/161] chore(ci): skip running backend tests on the-epic-split branch --- .github/workflows/ibis-backends-skip-helper.yml | 2 ++ .github/workflows/ibis-backends.yml | 2 ++ .github/workflows/ibis-main.yml | 2 ++ .github/workflows/nix-skip-helper.yml | 2 ++ .github/workflows/nix.yml | 2 ++ 5 files changed, 10 insertions(+) diff --git a/.github/workflows/ibis-backends-skip-helper.yml b/.github/workflows/ibis-backends-skip-helper.yml index 9676a0be42a5..d889b16d8d45 100644 --- a/.github/workflows/ibis-backends-skip-helper.yml +++ b/.github/workflows/ibis-backends-skip-helper.yml @@ -13,6 +13,7 @@ on: branches: - main - "*.x.x" + - "!the-epic-split" pull_request: paths: - "docs/**" @@ -23,6 +24,7 @@ on: branches: - main - "*.x.x" + - "!the-epic-split" merge_group: jobs: test_backends: diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index b6e1bd1d37ff..e42589ab004c 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -12,6 +12,7 @@ on: branches: - main - "*.x.x" + - "!the-epic-split" pull_request: # Skip the backend suite if all changes are docs paths-ignore: @@ -23,6 +24,7 @@ on: branches: - main - "*.x.x" + - "!the-epic-split" merge_group: permissions: diff --git a/.github/workflows/ibis-main.yml b/.github/workflows/ibis-main.yml index 9e1f54426d0b..d9f409ea03a0 100644 --- a/.github/workflows/ibis-main.yml +++ b/.github/workflows/ibis-main.yml @@ -11,6 +11,7 @@ on: branches: - main - "*.x.x" + - "the-epic-split" pull_request: # Skip the test suite if all changes are in the docs directory paths-ignore: @@ -21,6 +22,7 @@ on: branches: - main - "*.x.x" + - "the-epic-split" merge_group: permissions: diff --git a/.github/workflows/nix-skip-helper.yml b/.github/workflows/nix-skip-helper.yml index a63e5680320b..1b82f7334796 100644 --- a/.github/workflows/nix-skip-helper.yml +++ b/.github/workflows/nix-skip-helper.yml @@ -13,6 +13,7 @@ on: branches: - main - "*.x.x" + - "the-epic-split" pull_request: paths: - "docs/**" @@ -22,6 +23,7 @@ on: branches: - main - "*.x.x" + - "the-epic-split" merge_group: jobs: diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index e434878d348b..6bf1cd1412de 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -10,6 +10,7 @@ on: branches: - main - "*.x.x" + - "the-epic-split" pull_request: paths-ignore: - "docs/**" @@ -19,6 +20,7 @@ on: branches: - main - "*.x.x" + - "the-epic-split" merge_group: concurrency: From 6b225ee52a21b3e9817154970a667088fe60d27d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 13 Dec 2023 13:34:41 +0100 Subject: [PATCH 006/161] chore(ci): change the core testing command since the core marker is completely broken without the backend tests --- .github/workflows/ibis-main.yml | 4 ++-- .github/workflows/nix.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ibis-main.yml b/.github/workflows/ibis-main.yml index d9f409ea03a0..3e8d8d6472b9 100644 --- a/.github/workflows/ibis-main.yml +++ b/.github/workflows/ibis-main.yml @@ -86,11 +86,11 @@ jobs: - name: run all core tests and run benchmarks once parallel if: matrix.os != 'windows-latest' - run: just ci-check -m "'core or benchmark'" -n auto + run: pytest -v -n auto ibis/common ibis/expr ibis/tests ibis/formats - name: run all core tests and run benchmarks once serial if: matrix.os == 'windows-latest' - run: just ci-check -m "'core or benchmark'" + run: pytest -v ibis/common ibis/expr ibis/tests ibis/formats - name: upload code coverage if: success() diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index 6bf1cd1412de..305b5f9981c9 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -10,7 +10,7 @@ on: branches: - main - "*.x.x" - - "the-epic-split" + - "!the-epic-split" pull_request: paths-ignore: - "docs/**" @@ -20,7 +20,7 @@ on: branches: - main - "*.x.x" - - "the-epic-split" + - "!the-epic-split" merge_group: concurrency: From b90f2c4183b7741e55c830b2e43e77405242a869 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 13 Dec 2023 14:08:31 +0100 Subject: [PATCH 007/161] chore(ci): temporarily disable test_doctests job --- .github/workflows/ibis-main.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ibis-main.yml b/.github/workflows/ibis-main.yml index 3e8d8d6472b9..6d059487c116 100644 --- a/.github/workflows/ibis-main.yml +++ b/.github/workflows/ibis-main.yml @@ -86,11 +86,11 @@ jobs: - name: run all core tests and run benchmarks once parallel if: matrix.os != 'windows-latest' - run: pytest -v -n auto ibis/common ibis/expr ibis/tests ibis/formats + run: poetry run pytest -v -n auto ibis/common ibis/expr ibis/tests ibis/formats - name: run all core tests and run benchmarks once serial if: matrix.os == 'windows-latest' - run: pytest -v ibis/common ibis/expr ibis/tests ibis/formats + run: poetry run pytest -v ibis/common ibis/expr ibis/tests ibis/formats - name: upload code coverage if: success() @@ -137,6 +137,8 @@ jobs: run: poetry run python -c 'import shapely.geometry, duckdb' test_doctests: + # FIXME(kszucs): re-enable this build + if: false name: Doctests runs-on: ${{ matrix.os }} strategy: From 26c1321c309ac2a3db32e7f2c76a01571b4800e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 14 Dec 2023 14:52:54 +0100 Subject: [PATCH 008/161] chore(ci): add todo note about restoring the previous ci-check command --- .github/workflows/ibis-main.yml | 1 + ibis/backends/base/sql/__init__.py | 5 +++-- ibis/backends/pandas/core.py | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ibis-main.yml b/.github/workflows/ibis-main.yml index 6d059487c116..9b69c1132ee4 100644 --- a/.github/workflows/ibis-main.yml +++ b/.github/workflows/ibis-main.yml @@ -86,6 +86,7 @@ jobs: - name: run all core tests and run benchmarks once parallel if: matrix.os != 'windows-latest' + # TODO(kszucs): restore "just ci-check -m "'core or benchmark'" -n auto" run: poetry run pytest -v -n auto ibis/common ibis/expr ibis/tests ibis/formats - name: run all core tests and run benchmarks once serial diff --git a/ibis/backends/base/sql/__init__.py b/ibis/backends/base/sql/__init__.py index 8d31f56979cc..1fd6528170c4 100644 --- a/ibis/backends/base/sql/__init__.py +++ b/ibis/backends/base/sql/__init__.py @@ -92,8 +92,9 @@ def table(self, name: str, database: str | None = None) -> ir.Table: ) qualified_name = self._fully_qualified_name(name, database) schema = self.get_schema(qualified_name) - namespace = ops.Namespace(database=database) - node = ops.DatabaseTable(name, schema, self, namespace=namespace) + node = ops.DatabaseTable( + name, schema, self, namespace=ops.Namespace(database=database) + ) return node.to_expr() def _fully_qualified_name(self, name, database): diff --git a/ibis/backends/pandas/core.py b/ibis/backends/pandas/core.py index 50ae12d30b2e..ef29b2bb29cc 100644 --- a/ibis/backends/pandas/core.py +++ b/ibis/backends/pandas/core.py @@ -135,7 +135,6 @@ if TYPE_CHECKING: from collections.abc import Iterable, Mapping - integer_types = np.integer, int floating_types = (numbers.Real,) numeric_types = integer_types + floating_types From 631349af21b835f4926aa619e11bcd7f7c5709a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 13 Dec 2023 12:26:42 +0100 Subject: [PATCH 009/161] test(ir): ensure that no backends are required to run the core tests --- ibis/backends/tests/test_temporal.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index a29a65129fb9..9d1ec03b6fe9 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -33,6 +33,20 @@ from ibis.common.annotations import ValidationError +def day_name(obj: pd.core.indexes.accessors.DatetimeProperties | pd.Timestamp) -> str: + """Backwards compatible name-of-day getting function. + + Returns + ------- + str + The name of the day corresponding to `obj` + """ + try: + return obj.day_name() + except AttributeError: + return obj.weekday_name + + def day_name(obj: pd.core.indexes.accessors.DatetimeProperties | pd.Timestamp) -> str: """Backwards compatible name-of-day getting function. From c805eb906a33ee8b71d7cd2018874ff1e77405fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 8 Nov 2023 11:01:33 +0100 Subject: [PATCH 010/161] refactor(ir): split the relational operations Rationale and history --------------------- In the last couple of years we have been constantly refactoring the internals to make it easier to work with. Although we have made great progress, the current codebase is still hard to maintain and extend. One example of that complexity is the try to remove the `Projector` class in #7430. I had to realize that we are unable to improve the internals in smaller incremental steps, we need to make a big leap forward to make the codebase maintainable in the long run. One of the hotspots of problems is the `analysis.py` module which tries to bridge the gap between the user-facing API and the internal representation. Part of its complexity is caused by loose integrity checks in the internal representation, allowing various ways to represent the same operation. This makes it hard to inspect, reason about and optimize the relational operations. In addition to that, it makes much harder to implement the backends since more branching is required to cover all the variations. We have always been aware of these problems, and actually we had several attempts to solve them the same way this PR does. However, we never managed to actually split the relational operations, we always hit roadblocks to maintain compatibility with the current test suite. Actually we were unable to even understand those issues because of the complexity of the codebase and number of indirections between the API, analysis functions and the internal representation. But(!) finally we managed to prototype a new IR in #7580 along with implementations for the majority of the backends, including `various SQL backends` and `pandas`. After successfully validating the viability of the new IR, we split the PR into smaller pieces which can be individually reviewed. This PR is the first step of that process, it introduces the new IR and the new API. The next steps will be to implement the remaining backends on top of the new IR. Changes in this commit ---------------------- - Split the `ops.Selection` and `ops.Aggregration` nodes into proper relational algebra operations. - Almost entirely remove `analysis.py` with the technical debt accumulated over the years. - More flexible window frame binding: if an unbound analytical function is used with a window containing references to a relation then `.over()` is now able to bind the window frame to the relation. - Introduce a new API-level technique to dereference columns to the target relation(s). - Revamp the subquery handling to be more robust and to support more use cases with strict validation, now we have `ScalarSubquery`, `ExistsSubquery`, and `InSubquery` nodes which can only be used in the appropriate context. - Use way stricter integrity checks for all the relational operations, most of the time enforcing that all the value inputs of the node must originate from the parent relation the node depends on. - Introduce a new `JoinChain` operations to represent multiple joins in a single operation followed by a projection attached to the same relation. This enabled to solve several outstanding issues with the join handling (including the notorious chain join issue). - Use straightforward rewrite rules collected in `rewrites.py` to reinterpret user input so that the new operations can be constructed, even with the strict integrity checks. - Provide a set of simplification rules to reorder and squash the relational operations into a more compact form. - Use mappings to represent projections, eliminating the need of internally storing `ops.Alias` nodes. In addition to that table nodes in projections are not allowed anymore, the columns are expanded to the same mapping making the semantics clear. - Uniform handling of the various kinds of inputs for all the API methods using a generic `bind()` function. Advantages of the new IR ------------------------ - The operations are much simpler with clear semantics. - The operations are easier to reason about and to optimize. - The backends can easily lower the internal representation to a backend-specific form before compilation/execution, so the lowered form can be easily inspected, debugged, and optimized. - The API is much closer to the users' mental model, thanks to the dereferencing technique. - The backend implementation can be greatly simplified due to the simpler internal representation and strict integrity checks. As an example the pandas backend can be slimmed down by 4k lines of code while being more robust and easier to maintain. Disadvantages of the new IR --------------------------- - The backends must be rewritten to support the new internal representation. --- ibis/expr/analysis.py | 442 +----- ibis/expr/api.py | 13 +- ibis/expr/builders.py | 43 +- ibis/expr/decompile.py | 141 +- ibis/expr/format.py | 104 +- ibis/expr/operations/core.py | 27 +- ibis/expr/operations/generic.py | 49 +- ibis/expr/operations/geospatial.py | 6 +- ibis/expr/operations/logical.py | 77 +- ibis/expr/operations/reductions.py | 9 + ibis/expr/operations/relations.py | 784 ++++------- ibis/expr/operations/sortkeys.py | 1 + ibis/expr/operations/strings.py | 7 +- ibis/expr/operations/temporal_windows.py | 8 + ibis/expr/operations/tests/test_structs.py | 2 +- ibis/expr/operations/window.py | 7 +- ibis/expr/rewrites.py | 202 ++- ibis/expr/sql.py | 28 +- .../test_aggregate_arg_names/repr.txt | 10 +- .../test_format/test_asof_join/repr.txt | 30 +- .../test_format/test_complex_repr/repr.txt | 20 +- .../test_destruct_selection/repr.txt | 2 +- .../test_fillna/fillna_int_repr.txt | 5 +- .../test_fillna/fillna_str_repr.txt | 5 +- .../test_format_dummy_table/repr.txt | 2 +- .../repr.txt | 45 +- .../repr.txt | 44 +- .../test_format_projection/repr.txt | 9 +- .../test_memoize_filtered_table/repr.txt | 20 +- .../repr.txt | 35 +- .../test_format/test_repr_exact/repr.txt | 9 +- .../repr.txt | 7 +- .../test_table_count_expr/cnt_repr.txt | 2 +- .../test_table_count_expr/join_repr.txt | 11 +- .../test_table_count_expr/union_repr.txt | 7 +- .../test_format/test_two_inner_joins/repr.txt | 36 +- .../decompiled.py | 30 +- .../decompiled.py | 4 +- .../decompiled.py | 6 +- .../inner/decompiled.py | 38 +- .../left/decompiled.py | 36 +- .../right/decompiled.py | 38 +- .../decompiled.py | 7 +- .../test_parse_sql_in_clause/decompiled.py | 5 +- .../decompiled.py | 36 +- .../decompiled.py | 22 +- .../decompiled.py | 4 +- .../decompiled.py | 2 +- .../decompiled.py | 2 +- .../test_parse_sql_table_alias/decompiled.py | 4 +- ibis/expr/tests/test_format.py | 32 +- ibis/expr/tests/test_newrels.py | 1193 +++++++++++++++++ ibis/expr/tests/test_rewrites.py | 104 ++ ibis/expr/types/__init__.py | 1 + ibis/expr/types/core.py | 3 +- ibis/expr/types/generic.py | 165 +-- ibis/expr/types/geospatial.py | 11 +- ibis/expr/types/groupby.py | 134 +- ibis/expr/types/joins.py | 247 ++++ ibis/expr/types/logical.py | 63 +- ibis/expr/types/relations.py | 570 ++++---- ibis/expr/types/temporal_windows.py | 30 +- ibis/expr/visualize.py | 6 +- ibis/selectors.py | 2 +- .../test_format_sql_query_result/repr.txt | 9 +- .../test_memoize_database_table/repr.txt | 29 +- .../test_memoize_insert_sort_key/repr.txt | 27 +- ibis/tests/expr/test_analysis.py | 191 ++- ibis/tests/expr/test_selectors.py | 4 +- ibis/tests/expr/test_set_operations.py | 4 +- ibis/tests/expr/test_struct.py | 12 +- ibis/tests/expr/test_table.py | 559 +++++--- ibis/tests/expr/test_value_exprs.py | 13 +- ibis/tests/expr/test_window_frames.py | 26 +- ibis/tests/expr/test_window_functions.py | 9 +- 75 files changed, 3543 insertions(+), 2384 deletions(-) create mode 100644 ibis/expr/tests/test_newrels.py create mode 100644 ibis/expr/tests/test_rewrites.py create mode 100644 ibis/expr/types/joins.py diff --git a/ibis/expr/analysis.py b/ibis/expr/analysis.py index 386ede17090f..e210c99f1c28 100644 --- a/ibis/expr/analysis.py +++ b/ibis/expr/analysis.py @@ -1,379 +1,22 @@ from __future__ import annotations -from collections import defaultdict -from typing import TYPE_CHECKING - -import toolz - import ibis.common.graph as g import ibis.expr.operations as ops -import ibis.expr.operations.relations as rels -import ibis.expr.types as ir -from ibis import util from ibis.common.deferred import deferred, var -from ibis.common.exceptions import ExpressionError, IbisTypeError, IntegrityError -from ibis.common.patterns import Eq, In, pattern, replace +from ibis.common.patterns import pattern from ibis.util import Namespace -if TYPE_CHECKING: - from collections.abc import Iterable, Iterator - p = Namespace(pattern, module=ops) c = Namespace(deferred, module=ops) x = var("x") y = var("y") -# --------------------------------------------------------------------- -# Some expression metaprogramming / graph transformations to support -# compilation later - - -def sub_immediate_parents(node: ops.Node, table: ops.TableNode) -> ops.Node: - """Replace immediate parent tables in `op` with `table`.""" - parents = find_immediate_parent_tables(node) - return node.replace(In(parents) >> table) - - -def find_immediate_parent_tables(input_node, keep_input=True): - """Find every first occurrence of a `ir.Table` object in `input_node`. - - This function does not traverse into `Table` objects. For example, the - underlying `PhysicalTable` of a `Selection` will not be yielded. - - Parameters - ---------- - input_node - Input node - keep_input - Whether to keep the input when traversing - - Yields - ------ - ir.Expr - Parent table expression - - Examples - -------- - >>> import ibis, toolz - >>> t = ibis.table([("a", "int64")], name="t") - >>> expr = t.mutate(foo=t.a + 1) - >>> (result,) = find_immediate_parent_tables(expr.op()) - >>> result.equals(expr.op()) - True - >>> (result,) = find_immediate_parent_tables(expr.op(), keep_input=False) - >>> result.equals(t.op()) - True - """ - assert all(isinstance(arg, ops.Node) for arg in util.promote_list(input_node)) - - def finder(node): - if isinstance(node, ops.TableNode): - if keep_input or node != input_node: - return g.halt, node - else: - return g.proceed, None - - # HACK: special case ops.Contains to only consider the needle's base - # table, since that's the only expression that matters for determining - # cardinality - elif isinstance(node, ops.InColumn): - # we allow InColumn.options to be a column from a foreign table - return [node.value], None - else: - return g.proceed, None - - return list(toolz.unique(g.traverse(finder, input_node))) - - -def get_mutation_exprs(exprs: list[ir.Expr], table: ir.Table) -> list[ir.Expr | None]: - """Return the exprs to use to instantiate the mutation.""" - # The below logic computes the mutation node exprs by splitting the - # assignment exprs into two disjoint sets: - # 1) overwriting_cols_to_expr, which maps a column name to its expr - # if the expr contains a column that overwrites an existing table column. - # All keys in this dict are columns in the original table that are being - # overwritten by an assignment expr. - # 2) non_overwriting_exprs, which is a list of all exprs that do not do - # any overwriting. That is, if an expr is in this list, then its column - # name does not exist in the original table. - # Given these two data structures, we can compute the mutation node exprs - # based on whether any columns are being overwritten. - overwriting_cols_to_expr: dict[str, ir.Expr | None] = {} - non_overwriting_exprs: list[ir.Expr] = [] - table_schema = table.schema() - for expr in exprs: - expr_contains_overwrite = False - if isinstance(expr, ir.Value) and expr.get_name() in table_schema: - overwriting_cols_to_expr[expr.get_name()] = expr - expr_contains_overwrite = True - - if not expr_contains_overwrite: - non_overwriting_exprs.append(expr) - - columns = table.columns - if overwriting_cols_to_expr: - return [ - overwriting_cols_to_expr.get(column, table[column]) - for column in columns - if overwriting_cols_to_expr.get(column, table[column]) is not None - ] + non_overwriting_exprs - - table_expr: ir.Expr = table - return [table_expr] + exprs - - -def pushdown_selection_filters(parent, predicates): - if not predicates: - return parent - - default = ops.Selection(parent, selections=[], predicates=predicates) - if not isinstance(parent, (ops.Selection, ops.Aggregation)): - return default - - projected_column_names = set() - for value in parent._projection.selections: - if isinstance(value, (ops.Relation, ops.TableColumn)): - # we are only interested in projected value expressions, not tables - # nor column references which are not changing the projection - continue - elif value.find((ops.WindowFunction, ops.ExistsSubquery), filter=ops.Value): - # the parent has analytic projections like window functions so we - # can't push down filters to that level - return default - else: - # otherwise collect the names of newly projected value expressions - # which are not just plain column references - projected_column_names.add(value.name) - - conflicting_projection = p.TableColumn(parent, In(projected_column_names)) - pushdown_pattern = Eq(parent) >> parent.table - - simplified = [] - for pred in predicates: - if pred.find(conflicting_projection, filter=p.Value): - return default - try: - simplified.append(pred.replace(pushdown_pattern)) - except (IntegrityError, IbisTypeError): - # former happens when there is a duplicate column name in the parent - # which is a join, the latter happens for semi/anti joins - return default - - return parent.copy(predicates=parent.predicates + tuple(simplified)) - - -@replace(p.Analytic | p.Reduction) -def wrap_analytic(_, default_frame): - return ops.WindowFunction(_, default_frame) - - -@replace(p.WindowFunction) -def merge_windows(_, default_frame): - if _.frame.start and default_frame.start and _.frame.start != default_frame.start: - raise ExpressionError( - "Unable to merge windows with conflicting `start` boundary" - ) - if _.frame.end and default_frame.end and _.frame.end != default_frame.end: - raise ExpressionError("Unable to merge windows with conflicting `end` boundary") - - start = _.frame.start or default_frame.start - end = _.frame.end or default_frame.end - group_by = tuple(toolz.unique(_.frame.group_by + default_frame.group_by)) - - order_by = {} - # iterate in the order of the existing keys followed by the new keys - # - # this allows duplicates to be overridden with no effect on the original - # position - # - # see https://github.com/ibis-project/ibis/issues/7940 for how this - # originally manifested - for sort_key in default_frame.order_by + _.frame.order_by: - order_by[sort_key.expr] = sort_key.ascending - order_by = tuple(ops.SortKey(k, v) for k, v in order_by.items()) - - frame = _.frame.copy(start=start, end=end, group_by=group_by, order_by=order_by) - return ops.WindowFunction(_.func, frame) - - -def windowize_function(expr, default_frame): - ctx = {"default_frame": default_frame} - node = expr.op() - node = node.replace(merge_windows, filter=p.Value, context=ctx) - node = node.replace(wrap_analytic, filter=p.Value & ~p.WindowFunction, context=ctx) - return node.to_expr() - - -def contains_first_or_last_agg(exprs): - def fn(node: ops.Node) -> tuple[bool, bool | None]: - if not isinstance(node, ops.Value): - return g.halt, None - return g.proceed, isinstance(node, (ops.First, ops.Last)) - - return any(g.traverse(fn, exprs)) - - -def simplify_aggregation(agg): - def _pushdown(nodes): - subbed = [] - for node in nodes: - new_node = node.replace(Eq(agg.table) >> agg.table.table) - subbed.append(new_node) - - # TODO(kszucs): perhaps this validation could be omitted - if subbed: - valid = shares_all_roots(subbed, agg.table.table) - else: - valid = True - - return valid, subbed - - table = agg.table - if ( - isinstance(table, ops.Selection) - and not table.selections - # more aggressive than necessary, a better solution would be to check - # whether the selections have any order sensitive aggregates that - # *depend on* the sort_keys - and not (table.sort_keys or contains_first_or_last_agg(table.selections)) - ): - metrics_valid, lowered_metrics = _pushdown(agg.metrics) - by_valid, lowered_by = _pushdown(agg.by) - having_valid, lowered_having = _pushdown(agg.having) - - if metrics_valid and by_valid and having_valid: - valid_lowered_sort_keys = frozenset(lowered_metrics).union(lowered_by) - return ops.Aggregation( - table.table, - lowered_metrics, - by=lowered_by, - having=lowered_having, - predicates=agg.table.predicates, - # only the sort keys that exist as grouping keys or metrics can - # be included - sort_keys=[ - key - for key in agg.table.sort_keys - if key.expr in valid_lowered_sort_keys - ], - ) - - return agg - - -class Projector: - """Analysis and validation of projection operation. - - This pass tries to take advantage of projection fusion opportunities where - they exist, i.e. combining compatible projections together rather than - nesting them. - - Translation / evaluation later will not attempt to do any further fusion / - simplification. - """ - - def __init__(self, parent, proj_exprs): - # TODO(kszucs): rewrite projector to work with operations exclusively - proj_exprs = util.promote_list(proj_exprs) - self.parent = parent - self.input_exprs = proj_exprs - self.resolved_exprs = [parent._ensure_expr(e) for e in proj_exprs] - - default_frame = ops.RowsWindowFrame(table=parent) - self.clean_exprs = [ - windowize_function(expr, default_frame) for expr in self.resolved_exprs - ] - - def get_result(self): - roots = find_immediate_parent_tables(self.parent.op()) - first_root = roots[0] - parent_op = self.parent.op() - - # reprojection of the same selections - if len(self.clean_exprs) == 1: - first = self.clean_exprs[0].op() - if isinstance(first, ops.Selection): - if first.selections == parent_op.selections: - return parent_op - - if len(roots) == 1 and isinstance(first_root, ops.Selection): - fused_op = self.try_fusion(first_root) - if fused_op is not None: - return fused_op - - return ops.Selection(self.parent, self.clean_exprs) - - def try_fusion(self, root): - assert self.parent.op() == root - - root_table = root.table - root_table_expr = root_table.to_expr() - roots = find_immediate_parent_tables(root_table) - fused_exprs = [] - clean_exprs = self.clean_exprs - - if not isinstance(root_table, ops.Join): - try: - resolved = [ - root_table_expr._ensure_expr(expr) for expr in self.input_exprs - ] - except (AttributeError, IbisTypeError): - resolved = clean_exprs - else: - # if any expressions aren't exactly equivalent then don't try - # to fuse them - if any( - not res_root_root.equals(res_root) - for res_root_root, res_root in zip(resolved, clean_exprs) - ): - return None - else: - # joins cannot be used to resolve expressions, but we still may be - # able to fuse columns from a projection off of a join. In that - # case, use the projection's input expressions as the columns with - # which to attempt fusion - resolved = clean_exprs - - root_selections = root.selections - parent_op = self.parent.op() - for val in resolved: - # a * projection - if isinstance(val, ir.Table) and ( - parent_op.equals(val.op()) - # gross we share the same table root. Better way to - # detect? - or len(roots) == 1 - and find_immediate_parent_tables(val.op())[0] == roots[0] - ): - have_root = False - for root_sel in root_selections: - # Don't add the * projection twice - if root_sel.equals(root_table): - fused_exprs.append(root_table) - have_root = True - continue - fused_exprs.append(root_sel) - - # This was a filter, so implicitly a select * - if not have_root and not root_selections: - fused_exprs = [root_table, *fused_exprs] - elif shares_all_roots(val.op(), root_table): - fused_exprs.append(val) - else: - return None - - return ops.Selection( - root_table, - fused_exprs, - predicates=root.predicates, - sort_keys=root.sort_keys, - ) - +# TODO(kszucs): should be removed def find_first_base_table(node): def predicate(node): - if isinstance(node, ops.TableNode): + if isinstance(node, ops.Relation): return g.halt, node else: return g.proceed, None @@ -384,42 +27,7 @@ def predicate(node): return None -def _find_projections(node): - if isinstance(node, ops.Selection): - # remove predicates and sort_keys, so that child tables are considered - # equivalent even if their predicates and sort_keys are not - return g.proceed, node._projection - elif isinstance(node, ops.SelfReference): - return g.proceed, node - elif isinstance(node, ops.Aggregation): - return g.proceed, node._projection - elif isinstance(node, ops.Join): - return g.proceed, None - elif isinstance(node, ops.TableNode): - return g.halt, node - elif isinstance(node, ops.InColumn): - # we allow InColumn.options to be a column from a foreign table - return [node.value], None - else: - return g.proceed, None - - -def shares_all_roots(exprs, parents): - # unique table dependencies of exprs and parents - exprs_deps = set(g.traverse(_find_projections, exprs)) - parents_deps = set(g.traverse(_find_projections, parents)) - return exprs_deps <= parents_deps - - -def shares_some_roots(exprs, parents): - # unique table dependencies of exprs and parents - exprs_deps = set(g.traverse(_find_projections, exprs)) - parents_deps = set(g.traverse(_find_projections, parents)) - # Also return True if exprs has no roots (e.g. literal-only expressions) - return bool(exprs_deps & parents_deps) or not exprs_deps - - -def flatten_predicate(node): +def flatten_predicates(node): """Yield the expressions corresponding to the `And` nodes of a predicate. Examples @@ -449,45 +57,3 @@ def predicate(node): return g.halt, node return list(g.traverse(predicate, node)) - - -def find_predicates(node, flatten=True): - # TODO(kszucs): consider to remove flatten argument and compose with - # flatten_predicates instead - def predicate(node): - assert isinstance(node, ops.Node), type(node) - if isinstance(node, ops.Value) and node.dtype.is_boolean(): - if flatten and isinstance(node, ops.And): - return g.proceed, None - else: - return g.halt, node - return g.proceed, None - - return list(g.traverse(predicate, node)) - - -def find_subqueries(node: ops.Node, min_dependents=1) -> tuple[ops.Node, ...]: - subquery_dependents = defaultdict(set) - for n in filter(None, util.promote_list(node)): - dependents = g.Graph.from_dfs(n).invert() - for u, vs in dependents.toposort().items(): - # count the number of table-node dependents on the current node - # but only if the current node is a selection or aggregation - if isinstance(u, (rels.Projection, rels.Aggregation, rels.Limit)): - subquery_dependents[u].update(vs) - - return tuple( - node - for node, dependents in reversed(subquery_dependents.items()) - if len(dependents) >= min_dependents - ) - - -def find_toplevel_unnest_children(nodes: Iterable[ops.Node]) -> Iterator[ops.Table]: - def finder(node): - return ( - isinstance(node, ops.Value), - find_first_base_table(node) if isinstance(node, ops.Unnest) else None, - ) - - return g.traverse(finder, nodes) diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 56ee85d1c005..85015156e8d9 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -5,6 +5,7 @@ import builtins import datetime import functools +import itertools import numbers import operator from collections import Counter @@ -303,6 +304,9 @@ def schema( return sch.Schema.from_tuples(zip(names, types)) +_table_names = (f"unbound_table_{i:d}" for i in itertools.count()) + + def table( schema: SupportsSchema | None = None, name: str | None = None, @@ -333,9 +337,12 @@ def table( a int64 b string """ - if isinstance(schema, type) and name is None: - name = schema.__name__ - return ops.UnboundTable(schema=schema, name=name).to_expr() + if name is None: + if isinstance(schema, type): + name = schema.__name__ + else: + name = next(_table_names) + return ops.UnboundTable(name=name, schema=schema).to_expr() @lazy_singledispatch diff --git a/ibis/expr/builders.py b/ibis/expr/builders.py index 9009518a19ea..333d3456bf43 100644 --- a/ibis/expr/builders.py +++ b/ibis/expr/builders.py @@ -9,13 +9,11 @@ import ibis.expr.rules as rlz import ibis.expr.types as ir from ibis import util -from ibis.common.annotations import annotated +from ibis.common.annotations import annotated, attribute from ibis.common.deferred import Deferred, Resolver, deferrable from ibis.common.exceptions import IbisInputError from ibis.common.grounds import Concrete from ibis.common.typing import VarTuple # noqa: TCH001 -from ibis.expr.operations.relations import Relation # noqa: TCH001 -from ibis.expr.types.relations import bind_expr if TYPE_CHECKING: from typing_extensions import Self @@ -146,6 +144,25 @@ class WindowBuilder(Builder): orderings: VarTuple[Union[str, Resolver, ops.Value]] = () max_lookback: Optional[ops.Value[dt.Interval]] = None + @attribute + def _table(self): + inputs = ( + self.start, + self.end, + *self.groupings, + *self.orderings, + self.max_lookback, + ) + valuerels = (v.relations for v in inputs if isinstance(v, ops.Value)) + relations = frozenset().union(*valuerels) + if len(relations) == 0: + return None + elif len(relations) == 1: + (table,) = relations + return table + else: + raise IbisInputError("Window frame can only depend on a single relation") + def _maybe_cast_boundary(self, boundary, dtype): if boundary.dtype == dtype: return boundary @@ -214,9 +231,23 @@ def lookback(self, value) -> Self: return self.copy(max_lookback=value) @annotated - def bind(self, table: Relation): - groupings = bind_expr(table.to_expr(), self.groupings) - orderings = bind_expr(table.to_expr(), self.orderings) + def bind(self, table: Optional[ops.Relation]): + table = table or self._table + if table is None: + raise IbisInputError("Unable to bind window frame to a table") + + table = table.to_expr() + + def bind_value(value): + if isinstance(value, str): + return table._get_column(value) + elif isinstance(value, Resolver): + return value.resolve({"_": table}) + else: + return value + + groupings = map(bind_value, self.groupings) + orderings = map(bind_value, self.orderings) if self.how == "rows": return ops.RowsWindowFrame( table=table, diff --git a/ibis/expr/decompile.py b/ibis/expr/decompile.py index a6cbf56de716..af279447bd76 100644 --- a/ibis/expr/decompile.py +++ b/ibis/expr/decompile.py @@ -10,6 +10,7 @@ import ibis.expr.operations as ops import ibis.expr.types as ir from ibis.common.graph import Graph +from ibis.expr.rewrites import simplify from ibis.util import experimental _method_overrides = { @@ -31,11 +32,8 @@ ops.ExtractYear: "year", ops.Intersection: "intersect", ops.IsNull: "isnull", - ops.LeftAntiJoin: "anti_join", - ops.LeftSemiJoin: "semi_join", ops.Lowercase: "lower", ops.RegexSearch: "re_search", - ops.SelfReference: "view", ops.StartsWith: "startswith", ops.StringContains: "contains", ops.StringSQLILike: "ilike", @@ -87,7 +85,6 @@ def translate(op, *args, **kwargs): @translate.register(ops.Value) -@translate.register(ops.TableNode) def value(op, *args, **kwargs): method = _get_method_name(op) kwargs = [(k, v) for k, v in kwargs.items() if v is not None] @@ -125,44 +122,80 @@ def _try_unwrap(stmt): if len(stmt) == 1: return stmt[0] else: - return f"[{', '.join(stmt)}]" + stmt = map(str, stmt) + values = ", ".join(stmt) + return f"[{values}]" + + +def _wrap_alias(values, rendered): + result = [] + for k, v in values.items(): + text = rendered[k] + if v.name != k: + text = f"{text}.name({k!r})" + result.append(text) + return result + + +def _inline(args): + return ", ".join(map(str, args)) + +@translate.register(ops.Project) +def project(op, parent, values): + out = f"{parent}" + if not values: + return out -@translate.register(ops.Selection) -def selection(op, table, selections, predicates, sort_keys): - out = f"{table}" - if selections: - out = f"{out}.select({_try_unwrap(selections)})" + values = _wrap_alias(op.values, values) + return f"{out}.select({_inline(values)})" + + +@translate.register(ops.Filter) +def filter_(op, parent, predicates): + out = f"{parent}" if predicates: - out = f"{out}.filter({_try_unwrap(predicates)})" - if sort_keys: - out = f"{out}.order_by({_try_unwrap(sort_keys)})" + out = f"{out}.filter({_inline(predicates)})" return out -@translate.register(ops.Aggregation) -def aggregation(op, table, by, metrics, predicates, having, sort_keys): - out = f"{table}" - if predicates: - out = f"{out}.filter({_try_unwrap(predicates)})" - if by: - out = f"{out}.group_by({_try_unwrap(by)})" - if having: - out = f"{out}.having({_try_unwrap(having)})" - if metrics: - out = f"{out}.aggregate({_try_unwrap(metrics)})" - if sort_keys: - out = f"{out}.order_by({_try_unwrap(sort_keys)})" +@translate.register(ops.Sort) +def sort(op, parent, keys): + out = f"{parent}" + if keys: + out = f"{out}.order_by({_inline(keys)})" return out -@translate.register(ops.Join) -def join(op, left, right, predicates): - method = _get_method_name(op) - return f"{left}.{method}({right}, {_try_unwrap(predicates)})" +@translate.register(ops.Aggregate) +def aggregation(op, parent, groups, metrics): + groups = _wrap_alias(op.groups, groups) + metrics = _wrap_alias(op.metrics, metrics) + if groups and metrics: + return f"{parent}.aggregate([{_inline(metrics)}], by=[{_inline(groups)}])" + elif metrics: + return f"{parent}.aggregate([{_inline(metrics)}])" + else: + raise ValueError("No metrics to aggregate") + + +@translate.register(ops.SelfReference) +def self_reference(op, parent, identifier): + return parent + + +@translate.register(ops.JoinLink) +def join_link(op, table, predicates, how): + return f".{how}_join({table}, {_try_unwrap(predicates)})" + + +@translate.register(ops.JoinChain) +def join(op, first, rest, values): + calls = "".join(rest) + return f"{first}{calls}" -@translate.register(ops.SetOp) +@translate.register(ops.Set) def union(op, left, right, distinct): method = _get_method_name(op) if distinct: @@ -172,16 +205,16 @@ def union(op, left, right, distinct): @translate.register(ops.Limit) -def limit(op, table, n, offset): +def limit(op, parent, n, offset): if offset: - return f"{table}.limit({n}, {offset})" + return f"{parent}.limit({n}, {offset})" else: - return f"{table}.limit({n})" + return f"{parent}.limit({n})" -@translate.register(ops.TableColumn) -def table_column(op, table, name): - return f"{table}.{name}" +@translate.register(ops.Field) +def table_column(op, rel, name): + return f"{rel}.{name}" @translate.register(ops.SortKey) @@ -292,14 +325,22 @@ def isin(op, value, options): class CodeContext: - always_assign = (ops.ScalarParameter, ops.UnboundTable, ops.Aggregation) - always_ignore = (ops.TableColumn, dt.Primitive, dt.Variadic, dt.Temporal) + always_assign = (ops.ScalarParameter, ops.UnboundTable, ops.Aggregate) + always_ignore = ( + ops.SelfReference, + ops.Field, + dt.Primitive, + dt.Variadic, + dt.Temporal, + ) shorthands = { - ops.Aggregation: "agg", + ops.Aggregate: "agg", ops.Literal: "lit", ops.ScalarParameter: "param", - ops.Selection: "proj", - ops.TableNode: "t", + ops.Project: "p", + ops.Relation: "r", + ops.Filter: "f", + ops.Sort: "s", } def __init__(self, assign_result_to="result"): @@ -308,7 +349,7 @@ def __init__(self, assign_result_to="result"): def variable_for(self, node): klass = type(node) - if isinstance(node, ops.TableNode) and isinstance(node, ops.Named): + if isinstance(node, ops.Relation) and hasattr(node, "name"): name = node.name elif klass in self.shorthands: name = self.shorthands[klass] @@ -345,7 +386,7 @@ def render(self, node, code, n_dependents): @experimental def decompile( - node: ops.Node | ir.Expr, + expr: ir.Expr, render_import: bool = True, assign_result_to: str = "result", format: bool = False, @@ -354,7 +395,7 @@ def decompile( Parameters ---------- - node + expr node or expression to decompile render_import Whether to add `import ibis` to the result. @@ -368,13 +409,11 @@ def decompile( str Equivalent Python source code for `node`. """ - if isinstance(node, ir.Expr): - node = node.op() - elif not isinstance(node, ops.Node): - raise TypeError( - f"Expected ibis expression or operation, got {type(node).__name__}" - ) + if not isinstance(expr, ir.Expr): + raise TypeError(f"Expected ibis expression, got {type(expr).__name__}") + node = expr.op() + node = simplify(node) out = io.StringIO() ctx = CodeContext(assign_result_to=assign_result_to) dependents = Graph(node).invert() diff --git a/ibis/expr/format.py b/ibis/expr/format.py index c0a244287e14..6ac9dfeb7b8a 100644 --- a/ibis/expr/format.py +++ b/ibis/expr/format.py @@ -192,11 +192,14 @@ def fmt(op, **kwargs): @fmt.register(ops.Relation) -@fmt.register(ops.DummyTable) @fmt.register(ops.WindowingTVF) -def _relation(op, **kwargs): - schema = render_schema(op.schema, indent_level=1) - return f"{op.__class__.__name__}\n{schema}" +def _relation(op, parent=None, **kwargs): + if parent is None: + top = f"{op.__class__.__name__}\n" + else: + top = f"{op.__class__.__name__}[{parent}]\n" + kwargs["schema"] = render_schema(op.schema) + return top + render_fields(kwargs, 1) @fmt.register(ops.PhysicalTable) @@ -218,6 +221,7 @@ def _in_memory_table(op, data, **kwargs): @fmt.register(ops.SQLStringView) def _sql_query_result(op, query, **kwargs): clsname = op.__class__.__name__ + if isinstance(op, ops.SQLStringView): child, name = kwargs["child"], kwargs["name"] top = f"{clsname}[{child}]: {name}\n" @@ -235,38 +239,54 @@ def _sql_query_result(op, query, **kwargs): @fmt.register(ops.FillNa) @fmt.register(ops.DropNa) -def _fill_na(op, table, **kwargs): - name = f"{op.__class__.__name__}[{table}]\n" +def _fill_na(op, parent, **kwargs): + name = f"{op.__class__.__name__}[{parent}]\n" return name + render_fields(kwargs, 1) -@fmt.register(ops.Aggregation) -def _aggregation(op, table, **kwargs): - name = f"{op.__class__.__name__}[{table}]\n" - kwargs["by"] = {node.name: r for node, r in zip(op.by, kwargs["by"])} - kwargs["metrics"] = {node.name: r for node, r in zip(op.metrics, kwargs["metrics"])} +@fmt.register(ops.Aggregate) +def _aggregate(op, parent, **kwargs): + name = f"{op.__class__.__name__}[{parent}]\n" return name + render_fields(kwargs, 1) -@fmt.register(ops.Selection) -def _selection(op, table, selections, **kwargs): - name = f"{op.__class__.__name__}[{table}]\n" +@fmt.register(ops.Project) +def _project(op, parent, values): + name = f"{op.__class__.__name__}[{parent}]\n" - # special handling required to support both relation and value selections - rels, values = [], {} - for node, rendered in zip(op.selections, selections): - if isinstance(node, ops.Relation): - rels.append(rendered) - else: - values[node.name] = f"{rendered}{type_info(node.dtype)}" + fields = {} + for k, v in values.items(): + node = op.values[k] + fields[f"{k}:"] = f"{v}{type_info(node.dtype)}" - segments = filter(None, [render(rels), render(values)]) - kwargs["selections"] = "\n".join(segments) + return name + render_schema(fields, 1) + + +@fmt.register(ops.DummyTable) +def _dummy_table(op, values): + name = op.__class__.__name__ + "\n" + + fields = {} + for k, v in values.items(): + node = op.values[k] + fields[f"{k}:"] = f"{v}{type_info(node.dtype)}" + + return name + render_schema(fields, 1) - return name + render_fields(kwargs, 1) +@fmt.register(ops.Filter) +def _project(op, parent, predicates): + name = f"{op.__class__.__name__}[{parent}]\n" + return name + render(predicates, 1) -@fmt.register(ops.SetOp) + +@fmt.register(ops.Sort) +def _sort(op, parent, keys): + name = f"{op.__class__.__name__}[{parent}]\n" + return name + render(keys, 1) + + +@fmt.register(ops.Set) def _set_op(op, left, right, distinct): args = [str(left), str(right)] if op.distinct is not None: @@ -274,7 +294,7 @@ def _set_op(op, left, right, distinct): return f"{op.__class__.__name__}[{', '.join(args)}]" -@fmt.register(ops.Join) +@fmt.register(ops.JoinChain) def _join(op, left, right, predicates, **kwargs): args = [str(left), str(right)] name = f"{op.__class__.__name__}[{', '.join(args)}]" @@ -291,30 +311,48 @@ def _join(op, left, right, predicates, **kwargs): return f"{top}\n{fields}" if fields else top +@fmt.register(ops.JoinLink) +def _join(op, how, table, predicates): + args = [str(how), str(table)] + name = f"{op.__class__.__name__}[{', '.join(args)}]" + return f"{name}\n{render(predicates, 1)}" + + +@fmt.register(ops.JoinChain) +def _join_project(op, first, rest, **kwargs): + name = f"{op.__class__.__name__}[{first}]\n" + return name + render(rest, 1) + "\n" + render_fields(kwargs, 1) + + @fmt.register(ops.Limit) @fmt.register(ops.Sample) -def _limit(op, table, **kwargs): +def _limit(op, parent, **kwargs): params = inline_args(kwargs) - return f"{op.__class__.__name__}[{table}, {params}]" + return f"{op.__class__.__name__}[{parent}, {params}]" @fmt.register(ops.SelfReference) @fmt.register(ops.Distinct) -def _self_reference(op, table, **kwargs): - return f"{op.__class__.__name__}[{table}]" +def _self_reference(op, parent, **kwargs): + return f"{op.__class__.__name__}[{parent}]" @fmt.register(ops.Literal) def _literal(op, value, **kwargs): if op.dtype.is_interval(): return f"{value!r} {op.dtype.unit.short}" + elif op.dtype.is_array(): + return f"{list(value)!r}" else: return f"{value!r}" -@fmt.register(ops.TableColumn) -def _table_column(op, table, name): - return f"{table}.{name}" +@fmt.register(ops.Field) +def _relation_field(op, rel, name): + if name.isidentifier(): + return f"{rel}.{name}" + else: + return f"{rel}[{name!r}]" @fmt.register(ops.Value) diff --git a/ibis/expr/operations/core.py b/ibis/expr/operations/core.py index c7b4e0a1dc75..5db1e2c2f17a 100644 --- a/ibis/expr/operations/core.py +++ b/ibis/expr/operations/core.py @@ -32,13 +32,14 @@ def op(self) -> Self: """Make `Node` backwards compatible with code that uses `Expr.op()`.""" return self - @abstractmethod - def to_expr(self): - ... - # Avoid custom repr for performance reasons __repr__ = object.__repr__ + # TODO(kszucs): hidrate the __children__ traversable attribute + # @attribute + # def __children__(self): + # return super().__children__ + # TODO(kszucs): remove this mixin @public @@ -126,6 +127,12 @@ def shape(self) -> S: ds.Shape """ + @attribute + def relations(self): + """Set of relations the value node depends on.""" + children = (n.relations for n in self.__children__ if isinstance(n, Value)) + return frozenset().union(*children) + @property @util.deprecated(as_of="7.0", instead="use .dtype property instead") def output_dtype(self): @@ -167,10 +174,14 @@ class Unary(Value): arg: Value - @property + @attribute def shape(self) -> ds.DataShape: return self.arg.shape + @attribute + def relations(self): + return self.arg.relations + @public class Binary(Value): @@ -179,10 +190,14 @@ class Binary(Value): left: Value right: Value - @property + @attribute def shape(self) -> ds.DataShape: return max(self.left.shape, self.right.shape) + @attribute + def relations(self): + return self.left.relations | self.right.relations + @public class Argument(Value): diff --git a/ibis/expr/operations/generic.py b/ibis/expr/operations/generic.py index be349cd45777..15cbd5a5d345 100644 --- a/ibis/expr/operations/generic.py +++ b/ibis/expr/operations/generic.py @@ -1,13 +1,12 @@ from __future__ import annotations import itertools -from typing import Annotated, Any, Optional, Union +from typing import Annotated, Any, Optional from typing import Literal as LiteralType from public import public from typing_extensions import TypeVar -import ibis.common.exceptions as com import ibis.expr.datashape as ds import ibis.expr.datatypes as dt import ibis.expr.rules as rlz @@ -20,33 +19,6 @@ from ibis.expr.operations.relations import Relation # noqa: TCH001 -@public -class TableColumn(Value, Named): - """Selects a column from a `Table`.""" - - table: Relation - name: Union[str, int] - - shape = ds.columnar - - def __init__(self, table, name): - if isinstance(name, int): - name = table.schema.name_at_position(name) - - if name not in table.schema: - columns_formatted = ", ".join(map(repr, table.schema.names)) - raise com.IbisTypeError( - f"Column {name!r} is not found in table. " - f"Existing columns: {columns_formatted}." - ) - - super().__init__(table=table, name=name) - - @property - def dtype(self): - return self.table.schema[self.name] - - @public class RowID(Value, Named): """The row number (an autonumeric) of the returned result.""" @@ -57,22 +29,9 @@ class RowID(Value, Named): shape = ds.columnar dtype = dt.int64 - -@public -class TableArrayView(Value, Named): - """Helper operation class for creating scalar subqueries.""" - - table: Relation - - shape = ds.columnar - - @property - def dtype(self): - return self.table.schema[self.name] - - @property - def name(self): - return self.table.schema.names[0] + @attribute + def relations(self): + return frozenset({self.table}) @public diff --git a/ibis/expr/operations/geospatial.py b/ibis/expr/operations/geospatial.py index 21067dc1d161..ba0d6d93a01c 100644 --- a/ibis/expr/operations/geospatial.py +++ b/ibis/expr/operations/geospatial.py @@ -4,7 +4,7 @@ import ibis.expr.datatypes as dt from ibis.expr.operations.core import Binary, Unary, Value -from ibis.expr.operations.reductions import Reduction +from ibis.expr.operations.reductions import Filterable, Reduction @public @@ -181,9 +181,11 @@ class GeoTouches(GeoSpatialBinOp): @public -class GeoUnaryUnion(Reduction, GeoSpatialUnOp): +class GeoUnaryUnion(Filterable, Reduction): """Returns the pointwise union of the geometries in the column.""" + arg: Value[dt.GeoSpatial] + dtype = dt.geometry diff --git a/ibis/expr/operations/logical.py b/ibis/expr/operations/logical.py index 3ca8675d49c2..78a33de77e1c 100644 --- a/ibis/expr/operations/logical.py +++ b/ibis/expr/operations/logical.py @@ -2,14 +2,12 @@ from public import public -import ibis.expr.datashape as ds import ibis.expr.datatypes as dt import ibis.expr.rules as rlz from ibis.common.annotations import ValidationError, attribute from ibis.common.exceptions import IbisTypeError from ibis.common.typing import VarTuple # noqa: TCH001 -from ibis.expr.operations.core import Binary, Column, Unary, Value -from ibis.expr.operations.relations import Relation # noqa: TCH001 +from ibis.expr.operations.core import Binary, Unary, Value @public @@ -137,15 +135,6 @@ def shape(self): return rlz.highest_precedence_shape(args) -@public -class InColumn(Value): - value: Value - options: Column[dt.Any] - - dtype = dt.boolean - shape = rlz.shape_like("args") - - @public class IfElse(Value): """Ternary case expression, equivalent to. @@ -164,67 +153,3 @@ class IfElse(Value): @attribute def dtype(self): return rlz.highest_precedence_dtype([self.true_expr, self.false_null_expr]) - - -@public -class ExistsSubquery(Value): - foreign_table: Relation - predicates: VarTuple[Value[dt.Boolean]] - - dtype = dt.boolean - shape = ds.columnar - - -@public -class UnresolvedExistsSubquery(Value): - """An exists subquery whose outer leaf table is unknown. - - Notes - ----- - Consider the following ibis expressions - - ```python - import ibis - - t = ibis.table(dict(a="string")) - s = ibis.table(dict(a="string")) - - cond = (t.a == s.a).any() - ``` - - Without knowing the table to use as the outer query there are two ways to - turn this expression into a SQL `EXISTS` predicate, depending on which of - `t` or `s` is filtered on. - - Filtering from `t`: - - ```sql - SELECT * - FROM t - WHERE EXISTS (SELECT 1 FROM s WHERE t.a = s.a) - ``` - - Filtering from `s`: - - ```sql - SELECT * - FROM s - WHERE EXISTS (SELECT 1 FROM t WHERE t.a = s.a) - ``` - - Notably the correlated subquery cannot stand on its own. - - The purpose of `UnresolvedExistsSubquery` is to capture enough information - about an exists predicate such that it can be resolved when predicates are - resolved against the outer leaf table when `Selection`s are constructed. - """ - - tables: VarTuple[Relation] - predicates: VarTuple[Value[dt.Boolean]] - - dtype = dt.boolean - shape = ds.columnar - - def resolve(self, table) -> ExistsSubquery: - (foreign_table,) = (t for t in self.tables if t != table) - return ExistsSubquery(foreign_table, self.predicates) diff --git a/ibis/expr/operations/reductions.py b/ibis/expr/operations/reductions.py index e0b63041395e..2a85dbfcbab5 100644 --- a/ibis/expr/operations/reductions.py +++ b/ibis/expr/operations/reductions.py @@ -22,6 +22,7 @@ def __window_op__(self): return self +# TODO(kszucs): all reductions all filterable so we could remove Filterable class Filterable(Value): where: Optional[Value[dt.Boolean]] = None @@ -39,6 +40,10 @@ class CountStar(Filterable, Reduction): dtype = dt.int64 + @attribute + def relations(self): + return frozenset({self.arg}) + @public class CountDistinctStar(Filterable, Reduction): @@ -46,6 +51,10 @@ class CountDistinctStar(Filterable, Reduction): dtype = dt.int64 + @attribute + def relations(self): + return frozenset({self.arg}) + @public class Arbitrary(Filterable, Reduction): diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 47412e556cd2..d42637013730 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -1,671 +1,457 @@ from __future__ import annotations import itertools +import typing from abc import abstractmethod -from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional -from typing import Union as UnionType +from typing import Annotated, Any, Literal, Optional, TypeVar from public import public -import ibis.common.exceptions as com +import ibis.expr.datashape as ds import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis import util -from ibis.common.annotations import annotated, attribute -from ibis.common.collections import FrozenDict # noqa: TCH001 -from ibis.common.deferred import Deferred +import ibis.expr.rules as rlz +from ibis.common.annotations import attribute +from ibis.common.collections import FrozenDict +from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError from ibis.common.grounds import Concrete -from ibis.common.patterns import Between, Coercible, Eq -from ibis.common.typing import VarTuple # noqa: TCH001 -from ibis.expr.operations.core import Column, Named, Node, Scalar, Value +from ibis.common.patterns import Between, InstanceOf +from ibis.common.typing import Coercible, VarTuple +from ibis.expr.operations.core import Alias, Column, Node, Scalar, Value from ibis.expr.operations.sortkeys import SortKey # noqa: TCH001 from ibis.expr.schema import Schema from ibis.formats import TableProxy # noqa: TCH001 +from ibis.util import gen_name -if TYPE_CHECKING: - import ibis.expr.types as ir +T = TypeVar("T") - -_table_names = (f"unbound_table_{i:d}" for i in itertools.count()) - - -@public -def genname(): - return next(_table_names) +Unaliased = Annotated[T, ~InstanceOf(Alias)] @public class Relation(Node, Coercible): @classmethod def __coerce__(cls, value): - import pandas as pd + from ibis.expr.types import TableExpr - import ibis - import ibis.expr.types as ir - - if isinstance(value, pd.DataFrame): - return ibis.memtable(value).op() - elif isinstance(value, ir.Expr): + if isinstance(value, Relation): + return value + elif isinstance(value, TableExpr): return value.op() else: - return value + raise TypeError(f"Cannot coerce {value!r} to a Relation") + + @property + @abstractmethod + def values(self) -> FrozenDict[str, Value]: + """A mapping of column names to expressions which build up the relation. - def order_by(self, sort_exprs): - return Selection(self, [], sort_keys=sort_exprs) + This attribute is heavily used in rewrites as well as during field + dereferencing in the API layer. The returned expressions must only + originate from parent relations, depending on the relation type. + """ @property @abstractmethod def schema(self) -> Schema: + """The schema of the relation. + + All relations must have a well-defined schema. + """ ... - def to_expr(self): - import ibis.expr.types as ir + @property + def fields(self) -> FrozenDict[str, Column]: + """A mapping of column names to fields of the relation. - return ir.Table(self) + This calculated property shouldn't be overridden in subclasses since it + is mostly used for convenience. + """ + return FrozenDict({k: Field(self, k) for k in self.schema}) + def to_expr(self): + from ibis.expr.types import TableExpr -TableNode = Relation + return TableExpr(self) @public -class Namespace(Concrete): - database: Optional[str] = None - schema: Optional[str] = None +class Field(Value): + rel: Relation + name: str + shape = ds.columnar -@public -class PhysicalTable(Relation, Named): - pass + def __init__(self, rel, name): + if name not in rel.schema: + columns_formatted = ", ".join(map(repr, rel.schema.names)) + raise IbisTypeError( + f"Column {name!r} is not found in table. " + f"Existing columns: {columns_formatted}." + ) + super().__init__(rel=rel, name=name) + + @attribute + def dtype(self): + return self.rel.schema[self.name] + + @attribute + def relations(self): + return frozenset({self.rel}) -# TODO(kszucs): PhysicalTable should have a source attribute and UnbountTable -# should just extend TableNode @public -class UnboundTable(PhysicalTable): - schema: Schema - name: Optional[str] = None - namespace: Namespace = Namespace() +class Subquery(Value): + rel: Relation + shape = ds.columnar - def __init__(self, schema, name, namespace) -> None: - if name is None: - name = genname() - super().__init__(schema=schema, name=name, namespace=namespace) + def __init__(self, rel, **kwargs): + if len(rel.schema) != 1: + raise IntegrityError( + f"Subquery must have exactly one column, got {len(rel.schema)}" + ) + super().__init__(rel=rel, **kwargs) + @attribute + def name(self): + return self.rel.schema.names[0] -@public -class DatabaseTable(PhysicalTable): - name: str - schema: Schema - source: Any - namespace: Namespace = Namespace() + @attribute + def value(self): + return self.rel.values[self.name] + + @attribute + def relations(self): + return frozenset() + + @property + def dtype(self): + return self.value.dtype @public -class SQLQueryResult(TableNode): - """A table sourced from the result set of a select query.""" +class ScalarSubquery(Subquery): + def __init__(self, rel): + from ibis.expr.rewrites import ReductionValue - query: str - schema: Schema - source: Any + super().__init__(rel=rel) + if not self.value.find(ReductionValue, filter=Value): + raise IntegrityError( + f"Subquery {self.value!r} is not scalar, it must be turned into a scalar subquery first" + ) @public -class InMemoryTable(PhysicalTable): - name: str - schema: Schema - data: TableProxy +class ExistsSubquery(Subquery): + dtype = dt.boolean + +@public +class InSubquery(Subquery): + needle: Value + dtype = dt.boolean -# TODO(kszucs): desperately need to clean this up, the majority of this -# functionality should be handled by input rules for the Join class -def _clean_join_predicates(left, right, predicates): - import ibis.expr.analysis as an - import ibis.expr.types as ir - from ibis.expr.analysis import shares_all_roots - - result = [] - - for pred in predicates: - if isinstance(pred, tuple): - if len(pred) != 2: - raise com.ExpressionError("Join key tuple must be length 2") - lk, rk = pred - lk = left.to_expr()._ensure_expr(lk) - rk = right.to_expr()._ensure_expr(rk) - pred = lk == rk - elif isinstance(pred, str): - pred = left.to_expr()[pred] == right.to_expr()[pred] - elif pred is True or pred is False: - pred = ops.Literal(pred, dtype="bool").to_expr() - elif isinstance(pred, Value): - pred = pred.to_expr() - elif isinstance(pred, Deferred): - # resolve deferred expressions on the left table - pred = pred.resolve(left.to_expr()) - elif not isinstance(pred, ir.Expr): - raise NotImplementedError - - if not isinstance(pred, ir.BooleanValue): - raise com.ExpressionError("Join predicate must be a boolean expression") - - preds = an.flatten_predicate(pred.op()) - result.extend(preds) - - # Validate join predicates. Each predicate must be valid jointly when - # considering the roots of each input table - for predicate in result: - if not shares_all_roots(predicate, [left, right]): - raise com.RelationError( - f"The expression {predicate!r} does not fully " - "originate from dependencies of the table " - "expression." + def __init__(self, **kwargs): + super().__init__(**kwargs) + if not rlz.comparable(self.value, self.needle): + raise IntegrityError( + f"Subquery {self.needle!r} is not comparable to {self.value!r}" ) - assert all(isinstance(pred, ops.Node) for pred in result) + @attribute + def relations(self): + return self.needle.relations - return tuple(result) +def _check_integrity(values, allowed_parents): + for value in values: + for rel in value.relations: + if rel not in allowed_parents: + raise IntegrityError( + f"Cannot add {value!r} to projection, they belong to another relation" + ) -@public -class Join(Relation): - left: Relation - right: Relation - predicates: Any = () - def __init__(self, left, right, predicates, **kwargs): - # TODO(kszucs): predicates should be already a list of operations, need - # to update the validation rule for the Join classes which is a noop - # currently - import ibis.expr.operations as ops - import ibis.expr.types as ir +@public +class Project(Relation): + parent: Relation + values: FrozenDict[str, Unaliased[Value]] - # TODO(kszucs): need to factor this out to appropriate join predicate - # rules - predicates = [ - pred.op() if isinstance(pred, ir.Expr) else pred - for pred in util.promote_list(predicates) - ] - - if left.equals(right): - # GH #667: If left and right table have a common parent expression, - # e.g. they have different filters, we need to add a self-reference - # and make the appropriate substitution in the join predicates - right = ops.SelfReference(right) - elif isinstance(right, Join): - # for joins with joins on the right side we turn the right side - # into a view, otherwise the join tree is incorrectly flattened - # and tables on the right are incorrectly scoped - old = right - new = right = ops.SelfReference(right) - rule = Eq(old) >> new - predicates = [pred.replace(rule) for pred in predicates] - - predicates = _clean_join_predicates(left, right, predicates) - - super().__init__(left=left, right=right, predicates=predicates, **kwargs) + def __init__(self, parent, values): + _check_integrity(values.values(), {parent}) + super().__init__(parent=parent, values=values) - @property + @attribute def schema(self): - # TODO(kszucs): use `return self.left.schema | self.right.schema` instead which - # eliminates unnecessary projection over the join, but currently breaks the - # pandas backend - left, right = self.left.schema, self.right.schema - if duplicates := left.keys() & right.keys(): - raise com.IntegrityError(f"Duplicate column name(s): {duplicates}") - return Schema( - { - name: typ.copy(nullable=True) - for name, typ in itertools.chain(left.items(), right.items()) - } - ) + return Schema({k: v.dtype for k, v in self.values.items()}) -@public -class InnerJoin(Join): - pass +class Simple(Relation): + parent: Relation + @attribute + def values(self): + return self.parent.fields -@public -class LeftJoin(Join): - pass + @attribute + def schema(self): + return self.parent.schema @public -class RightJoin(Join): - pass +class SelfReference(Simple): + _uid_counter = itertools.count() + identifier: Optional[int] = None -@public -class OuterJoin(Join): - pass + def __init__(self, parent, identifier): + if identifier is None: + identifier = next(self._uid_counter) + super().__init__(parent=parent, identifier=identifier) + @attribute + def name(self) -> str: + if (name := getattr(self.parent, "name", None)) is not None: + return f"{name}_ref" + return gen_name("self_ref") -@public -class AnyInnerJoin(Join): - pass + +JoinKind = Literal[ + "inner", + "left", + "right", + "outer", + "asof", + "semi", + "anti", + "any_inner", + "any_left", + "cross", +] @public -class AnyLeftJoin(Join): - pass +class JoinLink(Node): + how: JoinKind + table: SelfReference + predicates: VarTuple[Value[dt.Boolean]] @public -class LeftSemiJoin(Join): +class JoinChain(Relation): + first: Relation + rest: VarTuple[JoinLink] + values: FrozenDict[str, Unaliased[Value]] + + def __init__(self, first, rest, values): + allowed_parents = {first} + for join in rest: + allowed_parents.add(join.table) + _check_integrity(join.predicates, allowed_parents) + _check_integrity(values.values(), allowed_parents) + super().__init__(first=first, rest=rest, values=values) + @attribute def schema(self): - return self.left.schema + return Schema({k: v.dtype.copy(nullable=True) for k, v in self.values.items()}) + + def to_expr(self): + import ibis.expr.types as ir + + return ir.JoinExpr(self) @public -class LeftAntiJoin(Join): - @attribute - def schema(self): - return self.left.schema +class Sort(Simple): + keys: VarTuple[SortKey] + + def __init__(self, parent, keys): + _check_integrity(keys, {parent}) + super().__init__(parent=parent, keys=keys) @public -class CrossJoin(Join): - pass +class Filter(Simple): + predicates: VarTuple[Value[dt.Boolean]] + + def __init__(self, parent, predicates): + from ibis.expr.rewrites import ReductionValue + + for pred in predicates: + if pred.find(ReductionValue, filter=Value): + raise IntegrityError( + f"Cannot add {pred!r} to filter, it is a reduction" + ) + if pred.relations and parent not in pred.relations: + raise IntegrityError( + f"Cannot add {pred!r} to filter, they belong to another relation" + ) + super().__init__(parent=parent, predicates=predicates) @public -class AsOfJoin(Join): - # TODO(kszucs): convert to proper predicate rules - by: Any = () - tolerance: Optional[Value[dt.Interval]] = None +class Limit(Simple): + n: typing.Union[int, Scalar[dt.Integer], None] = None + offset: typing.Union[int, Scalar[dt.Integer]] = 0 + + +@public +class Aggregate(Relation): + parent: Relation + groups: FrozenDict[str, Unaliased[Column]] + metrics: FrozenDict[str, Unaliased[Scalar]] + + def __init__(self, parent, groups, metrics): + _check_integrity(groups.values(), {parent}) + _check_integrity(metrics.values(), {parent}) + if duplicates := groups.keys() & metrics.keys(): + raise RelationError( + f"Cannot add {duplicates} to aggregate, they are already in the groupby" + ) + super().__init__(parent=parent, groups=groups, metrics=metrics) + + @attribute + def values(self): + return FrozenDict({**self.groups, **self.metrics}) - def __init__(self, left, right, by, predicates, **kwargs): - by = _clean_join_predicates(left, right, util.promote_list(by)) - super().__init__(left=left, right=right, by=by, predicates=predicates, **kwargs) + @attribute + def schema(self): + return Schema({k: v.dtype for k, v in self.values.items()}) @public -class SetOp(Relation): +class Set(Relation): left: Relation right: Relation distinct: bool = False def __init__(self, left, right, **kwargs): - # convert to dictionary first, to get key-unordered comparison - # semantics + # convert to dictionary first, to get key-unordered comparison semantics if dict(left.schema) != dict(right.schema): - raise com.RelationError("Table schemas must be equal for set operations") + raise RelationError("Table schemas must be equal for set operations") elif left.schema.names != right.schema.names: # rewrite so that both sides have the columns in the same order making it # easier for the backends to implement set operations - cols = [ops.TableColumn(right, name) for name in left.schema.names] - right = Selection(right, cols) + cols = {name: Field(right, name) for name in left.schema.names} + right = Project(right, cols) super().__init__(left=left, right=right, **kwargs) + @attribute + def values(self): + return FrozenDict() + @attribute def schema(self): return self.left.schema @public -class Union(SetOp): +class Union(Set): pass @public -class Intersection(SetOp): +class Intersection(Set): pass @public -class Difference(SetOp): +class Difference(Set): pass @public -class Limit(Relation): - table: Relation - n: UnionType[int, Scalar[dt.Integer], None] = None - offset: UnionType[int, Scalar[dt.Integer]] = 0 +class PhysicalTable(Relation): + name: str @attribute - def schema(self): - return self.table.schema + def values(self): + return FrozenDict() @public -class SelfReference(Relation): - table: Relation - - @attribute - def name(self) -> str: - if (name := getattr(self.table, "name", None)) is not None: - return f"{name}_ref" - return util.gen_name("self_ref") - - @attribute - def schema(self): - return self.table.schema - - -class Projection(Relation): - table: Relation - selections: VarTuple[Relation | Value] - - @attribute - def schema(self): - # Resolve schema and initialize - if not self.selections: - return self.table.schema - - types, names = [], [] - for projection in self.selections: - if isinstance(projection, Value): - names.append(projection.name) - types.append(projection.dtype) - elif isinstance(projection, TableNode): - schema = projection.schema - names.extend(schema.names) - types.extend(schema.types) - - return Schema.from_tuples(zip(names, types)) - - -def _add_alias(op: ops.Value | ops.TableNode): - """Add a name to a projected column if necessary.""" - if isinstance(op, ops.Value) and not isinstance(op, (ops.Alias, ops.TableColumn)): - return ops.Alias(op, op.name) - else: - return op +class UnboundTable(PhysicalTable): + schema: Schema @public -class Selection(Projection): - predicates: VarTuple[Value[dt.Boolean]] = () - sort_keys: VarTuple[SortKey] = () - - def __init__(self, table, selections, predicates, sort_keys, **kwargs): - from ibis.expr.analysis import shares_all_roots, shares_some_roots - - if not shares_all_roots(selections + sort_keys, table): - raise com.RelationError( - "Selection expressions don't fully originate from " - "dependencies of the table expression." - ) - - for predicate in predicates: - if isinstance(predicate, ops.Literal): - if not (dtype := predicate.dtype).is_boolean(): - raise com.IbisTypeError(f"Invalid predicate dtype: {dtype}") - elif not shares_some_roots(predicate, table): - raise com.RelationError("Predicate doesn't share any roots with table") - - super().__init__( - table=table, - selections=tuple(map(_add_alias, selections)), - predicates=predicates, - sort_keys=sort_keys, - **kwargs, - ) - - @annotated - def order_by(self, keys: VarTuple[SortKey]): - from ibis.expr.analysis import shares_all_roots, sub_immediate_parents - - if not self.selections: - if shares_all_roots(keys, table := self.table): - sort_keys = tuple(self.sort_keys) + tuple( - sub_immediate_parents(key, table) for key in keys - ) - - return Selection( - table, - self.selections, - predicates=self.predicates, - sort_keys=sort_keys, - ) - - return Selection(self, [], sort_keys=keys) - - @attribute - def _projection(self): - return Projection(self.table, self.selections) +class Namespace(Concrete): + database: Optional[str] = None + schema: Optional[str] = None @public -class DummyTable(Relation): - # TODO(kszucs): verify that it has at least one element: Length(at_least=1) - values: VarTuple[Value[dt.Any]] - - @attribute - def schema(self): - return Schema({op.name: op.dtype for op in self.values}) +class DatabaseTable(PhysicalTable): + schema: Schema + source: Any + namespace: Namespace = Namespace() @public -class Aggregation(Relation): - table: Relation - metrics: VarTuple[Scalar] = () - by: VarTuple[Column] = () - having: VarTuple[Scalar[dt.Boolean]] = () - predicates: VarTuple[Value[dt.Boolean]] = () - sort_keys: VarTuple[SortKey] = () - - def __init__(self, table, metrics, by, having, predicates, sort_keys): - from ibis.expr.analysis import shares_all_roots, shares_some_roots - - # All non-scalar refs originate from the input table - if not shares_all_roots(metrics + by + having + sort_keys, table): - raise com.RelationError( - "Selection expressions don't fully originate from " - "dependencies of the table expression." - ) - - # invariant due to Aggregation and AggregateSelection requiring a valid - # Selection - assert all(shares_some_roots(predicate, table) for predicate in predicates) - - if not by: - sort_keys = tuple() - - super().__init__( - table=table, - metrics=tuple(map(_add_alias, metrics)), - by=tuple(map(_add_alias, by)), - having=having, - predicates=predicates, - sort_keys=sort_keys, - ) - - @attribute - def _projection(self): - return Projection(self.table, self.metrics + self.by) - - @attribute - def schema(self): - names, types = [], [] - for value in self.by + self.metrics: - names.append(value.name) - types.append(value.dtype) - return Schema.from_tuples(zip(names, types)) - - @annotated - def order_by(self, keys: VarTuple[SortKey]): - from ibis.expr.analysis import shares_all_roots, sub_immediate_parents - - if shares_all_roots(keys, table := self.table): - sort_keys = tuple(self.sort_keys) + tuple( - sub_immediate_parents(key, table) for key in keys - ) - return Aggregation( - table, - metrics=self.metrics, - by=self.by, - having=self.having, - predicates=self.predicates, - sort_keys=sort_keys, - ) - - return Selection(self, [], sort_keys=keys) +class InMemoryTable(PhysicalTable): + schema: Schema + data: TableProxy @public -class Distinct(Relation): - """Distinct is a table-level unique-ing operation. +class SQLQueryResult(Relation): + """A table sourced from the result set of a select query.""" - In SQL, you might have: + query: str + schema: Schema + source: Any + values = FrozenDict() - SELECT DISTINCT foo - FROM table - SELECT DISTINCT foo, bar - FROM table - """ +@public +class SQLStringView(PhysicalTable): + """A view created from a SQL string.""" - table: Relation + child: Relation + query: str @attribute def schema(self): - return self.table.schema + # TODO(kszucs): avoid converting to expression + backend = self.child.to_expr()._find_backend() + return backend._get_schema_using_query(self.query) @public -class Sample(Relation): - """Sample performs random sampling of records in a table.""" - - table: Relation - fraction: Annotated[float, Between(0, 1)] - method: Literal["row", "block"] - seed: UnionType[int, None] = None +class DummyTable(Relation): + values: FrozenDict[str, Value] @attribute def schema(self): - return self.table.schema + return Schema({k: v.dtype for k, v in self.values.items()}) -# TODO(kszucs): split it into two operations, one working with a single replacement -# value and the other with a mapping -# TODO(kszucs): the single value case was limited to numeric and string types @public -class FillNa(Relation): +class FillNa(Simple): """Fill null values in the table.""" - table: Relation - replacements: UnionType[Value[dt.Numeric | dt.String], FrozenDict[str, Any]] - - @attribute - def schema(self): - return self.table.schema + replacements: typing.Union[Value[dt.Numeric | dt.String], FrozenDict[str, Any]] @public -class DropNa(Relation): +class DropNa(Simple): """Drop null values in the table.""" - table: Relation - how: Literal["any", "all"] - subset: Optional[VarTuple[Column[dt.Any]]] = None - - @attribute - def schema(self): - return self.table.schema + how: typing.Literal["any", "all"] + subset: Optional[VarTuple[Column]] = None @public -class View(PhysicalTable): - """A view created from an expression.""" - - child: Relation - name: str +class Sample(Simple): + """Sample performs random sampling of records in a table.""" - @attribute - def schema(self): - return self.child.schema + fraction: Annotated[float, Between(0, 1)] + method: typing.Literal["row", "block"] + seed: typing.Union[int, None] = None @public -class SQLStringView(PhysicalTable): - """A view created from a SQL string.""" - - child: Relation - name: str - query: str - - @attribute - def schema(self): - # TODO(kszucs): avoid converting to expression - backend = self.child.to_expr()._find_backend() - return backend._get_schema_using_query(self.query) +class Distinct(Simple): + """Distinct is a table-level unique-ing operation.""" -def _dedup_join_columns(expr: ir.Table, lname: str, rname: str): - from ibis.expr.operations.generic import TableColumn - from ibis.expr.operations.logical import Equals - - op = expr.op() - left = op.left.to_expr() - right = op.right.to_expr() - - right_columns = frozenset(right.columns) - overlap = frozenset(column for column in left.columns if column in right_columns) - equal = set() - - if isinstance(op, InnerJoin) and util.all_of(op.predicates, Equals): - # For inner joins composed exclusively of equality predicates, we can - # avoid renaming columns with colliding names if their values are - # guaranteed to be equal due to the predicate. Here we collect a set of - # colliding column names that are known to have equal values between - # the left and right tables in the join. - tables = {op.left, op.right} - for pred in op.predicates: - if ( - isinstance(pred.left, TableColumn) - and isinstance(pred.right, TableColumn) - and {pred.left.table, pred.right.table} == tables - and pred.left.name == pred.right.name - ): - equal.add(pred.left.name) - - if not overlap: - return expr - - # Rename columns in the left table that overlap, unless they're known to be - # equal to a column in the right - left_projections = [ - left[column] - .cast(left[column].type().copy(nullable=True)) - .name(lname.format(name=column) if lname else column) - if column in overlap and column not in equal - else left[column].cast(left[column].type().copy(nullable=True)).name(column) - for column in left.columns - ] - - # Rename columns in the right table that overlap, dropping any columns that - # are known to be equal to those in the left table - right_projections = [ - right[column] - .cast(right[column].type().copy(nullable=True)) - .name(rname.format(name=column) if rname else column) - if column in overlap - else right[column].cast(right[column].type().copy(nullable=True)).name(column) - for column in right.columns - if column not in equal - ] - projections = left_projections + right_projections - - # Certain configurations can result in the renamed columns still colliding, - # here we check for duplicates again, and raise a nicer error message if - # any exist. - seen = set() - collisions = set() - for column in projections: - name = column.get_name() - if name in seen: - collisions.add(name) - seen.add(name) - if collisions: - raise com.IntegrityError( - f"Joining with `lname={lname!r}, rname={rname!r}` resulted in multiple " - f"columns mapping to the following names `{sorted(collisions)}`. Please " - f"adjust `lname` and/or `rname` accordingly" - ) - return expr.select(projections) - - -public(TableNode=Relation) +# TODO(kszucs): support t.select(*t) syntax by implementing TableExpr.__iter__() diff --git a/ibis/expr/operations/sortkeys.py b/ibis/expr/operations/sortkeys.py index 643427bf93d3..4b65b5e7adfb 100644 --- a/ibis/expr/operations/sortkeys.py +++ b/ibis/expr/operations/sortkeys.py @@ -28,6 +28,7 @@ class SortKey(Value): """A sort operation.""" + # TODO(kszucs): rename expr to arg or something else except expr expr: Value ascending: bool = True diff --git a/ibis/expr/operations/strings.py b/ibis/expr/operations/strings.py index d17f22e412a1..9b40261b9d2a 100644 --- a/ibis/expr/operations/strings.py +++ b/ibis/expr/operations/strings.py @@ -4,7 +4,6 @@ from public import public -import ibis.expr.datashape as ds import ibis.expr.datatypes as dt import ibis.expr.rules as rlz from ibis.common.annotations import attribute @@ -78,7 +77,7 @@ class Repeat(Value): arg: Value[dt.String] times: Value[dt.Integer] - shape = rlz.shape_like("arg") + shape = rlz.shape_like("args") dtype = dt.string @@ -156,7 +155,7 @@ class ArrayStringJoin(Value): @public class StartsWith(Value): arg: Value[dt.String] - start: Value[dt.String, ds.Scalar] + start: Value[dt.String] dtype = dt.boolean shape = rlz.shape_like("arg") @@ -165,7 +164,7 @@ class StartsWith(Value): @public class EndsWith(Value): arg: Value[dt.String] - end: Value[dt.String, ds.Scalar] + end: Value[dt.String] dtype = dt.boolean shape = rlz.shape_like("arg") diff --git a/ibis/expr/operations/temporal_windows.py b/ibis/expr/operations/temporal_windows.py index 415f0b026fd2..8eec01e25713 100644 --- a/ibis/expr/operations/temporal_windows.py +++ b/ibis/expr/operations/temporal_windows.py @@ -5,6 +5,7 @@ from public import public import ibis.expr.datatypes as dt +from ibis.common.annotations import attribute from ibis.expr.operations.core import Column, Scalar # noqa: TCH001 from ibis.expr.operations.relations import Relation from ibis.expr.schema import Schema @@ -14,9 +15,14 @@ class WindowingTVF(Relation): """Generic windowing table-valued function.""" + # TODO(kszucs): rename to `parent` table: Relation time_col: Column[dt.Timestamp] # enforce timestamp column type here + @attribute + def values(self): + return self.table.fields + @property def schema(self): names = list(self.table.schema.names) @@ -26,6 +32,8 @@ def schema(self): # of original relation as well as additional 3 columns named “window_start”, # “window_end”, “window_time” to indicate the assigned window + # TODO(kszucs): this looks like an implementation detail leaked from the + # flink backend names.extend(["window_start", "window_end", "window_time"]) # window_start, window_end, window_time have type TIMESTAMP(3) in Flink types.extend([dt.timestamp(scale=3)] * 3) diff --git a/ibis/expr/operations/tests/test_structs.py b/ibis/expr/operations/tests/test_structs.py index 7b7c36fae402..efded74516df 100644 --- a/ibis/expr/operations/tests/test_structs.py +++ b/ibis/expr/operations/tests/test_structs.py @@ -15,7 +15,7 @@ def test_struct_column_shape(): assert op.shape == ds.scalar - col = ops.TableColumn( + col = ops.Field( ops.UnboundTable(schema=ibis.schema(dict(a="int64")), name="t"), "a" ) op = ops.StructColumn(names=("a",), values=(col,)) diff --git a/ibis/expr/operations/window.py b/ibis/expr/operations/window.py index c724615708f9..b87686853032 100644 --- a/ibis/expr/operations/window.py +++ b/ibis/expr/operations/window.py @@ -129,12 +129,9 @@ class WindowFunction(Value): shape = ds.columnar def __init__(self, func, frame): - from ibis.expr.analysis import shares_all_roots - - if not shares_all_roots(func, frame): + if func.relations and frame.table not in func.relations: raise com.RelationError( - "Window function expressions doesn't fully originate from the " - "dependencies of the window expression." + "The reduction has different parent relation than the window" ) super().__init__(func=func, frame=frame) diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 4ae694f0120c..74e2294ec3db 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -1,81 +1,161 @@ """Some common rewrite functions to be shared between backends.""" from __future__ import annotations -import functools -from collections.abc import Mapping +import toolz -import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.common.exceptions import UnsupportedOperationError -from ibis.common.patterns import pattern, replace +from ibis.common.deferred import Item, _, deferred, var +from ibis.common.exceptions import ExpressionError +from ibis.common.patterns import Check, pattern, replace from ibis.util import Namespace p = Namespace(pattern, module=ops) +d = Namespace(deferred, module=ops) -@replace(p.FillNa) -def rewrite_fillna(_): - """Rewrite FillNa expressions to use more common operations.""" - if isinstance(_.replacements, Mapping): - mapping = _.replacements - else: - mapping = { - name: _.replacements - for name, type in _.table.schema.items() - if type.nullable - } - - if not mapping: - return _.table - - selections = [] - for name in _.table.schema.names: - col = ops.TableColumn(_.table, name) - if (value := mapping.get(name)) is not None: - col = ops.Alias(ops.Coalesce((col, value)), name) - selections.append(col) - - return ops.Selection(_.table, selections, (), ()) - - -@replace(p.DropNa) -def rewrite_dropna(_): - """Rewrite DropNa expressions to use more common operations.""" - if _.subset is None: - columns = [ops.TableColumn(_.table, name) for name in _.table.schema.names] +y = var("y") +name = var("name") + + +@replace(ops.Analytic) +def project_wrap_analytic(_, rel): + # Wrap analytic functions in a window function + return ops.WindowFunction(_, ops.RowsWindowFrame(rel)) + + +@replace(ops.Reduction) +def project_wrap_reduction(_, rel): + # Query all the tables that the reduction depends on + if _.relations == {rel}: + # The reduction is fully originating from the `rel`, so turn + # it into a window function of `rel` + return ops.WindowFunction(_, ops.RowsWindowFrame(rel)) else: - columns = _.subset - - if columns: - preds = [ - functools.reduce( - ops.And if _.how == "any" else ops.Or, - [ops.NotNull(c) for c in columns], - ) - ] - elif _.how == "all": - preds = [ops.Literal(False, dtype=dt.bool)] + # 1. The reduction doesn't depend on any table, constructed from + # scalar values, so turn it into a scalar subquery. + # 2. The reduction is originating from `rel` and other tables, + # so this is a correlated scalar subquery. + # 3. The reduction is originating entirely from other tables, + # so this is an uncorrelated scalar subquery. + return ops.ScalarSubquery(_.to_expr().as_table()) + + +def rewrite_project_input(value, relation): + # we need to detect reductions which are either turned into window functions + # or scalar subqueries depending on whether they are originating from the + # relation + return value.replace( + project_wrap_analytic | project_wrap_reduction, + filter=p.Value & ~p.WindowFunction, + context={"rel": relation}, + ) + + +ReductionValue = p.Reduction | p.Field(p.Aggregate(groups={})) + + +@replace(ReductionValue) +def filter_wrap_reduction(_): + # Wrap reductions or fields referencing an aggregation without a group by - + # which are scalar fields - in a scalar subquery. In the latter case we + # use the reduction value from the aggregation. + if isinstance(_, ops.Field): + value = _.rel.values[_.name] else: - return _.table + value = _ + return ops.ScalarSubquery(value.to_expr().as_table()) + - return ops.Selection(_.table, (), preds, ()) +def rewrite_filter_input(value): + return value.replace(filter_wrap_reduction, filter=p.Value & ~p.WindowFunction) -@replace(p.Sample) -def rewrite_sample(_): - """Rewrite Sample as `t.filter(random() <= fraction)`. +@replace(p.Analytic | p.Reduction) +def window_wrap_reduction(_, frame): + # Wrap analytic and reduction functions in a window function. Used in the + # value.over() API. + return ops.WindowFunction(_, frame) - Errors as unsupported if a `seed` is specified. - """ - if _.seed is not None: - raise UnsupportedOperationError( - "`Table.sample` with a random seed is unsupported" +@replace(p.WindowFunction) +def window_merge_frames(_, frame): + # Merge window frames, used in the value.over() and groupby.select() APIs. + if _.frame.start and frame.start and _.frame.start != frame.start: + raise ExpressionError( + "Unable to merge windows with conflicting `start` boundary" ) + if _.frame.end and frame.end and _.frame.end != frame.end: + raise ExpressionError("Unable to merge windows with conflicting `end` boundary") + + start = _.frame.start or frame.start + end = _.frame.end or frame.end + group_by = tuple(toolz.unique(_.frame.group_by + frame.group_by)) + + order_by = {} + for sort_key in _.frame.order_by + frame.order_by: + order_by[sort_key.expr] = sort_key.ascending + order_by = tuple(ops.SortKey(k, v) for k, v in order_by.items()) + + frame = _.frame.copy(start=start, end=end, group_by=group_by, order_by=order_by) + return ops.WindowFunction(_.func, frame) - return ops.Selection( - _.table, - (), - (ops.LessEqual(ops.RandomScalar(), _.fraction),), - (), + +def rewrite_window_input(value, frame): + context = {"frame": frame} + # if self is a reduction or analytic function, wrap it in a window function + node = value.replace( + window_wrap_reduction, + filter=p.Value & ~p.WindowFunction, + context=context, ) + # if self is already a window function, merge the existing window frame + # with the requested window frame + return node.replace(window_merge_frames, filter=p.Value, context=context) + + +# TODO(kszucs): schema comparison should be updated to not distinguish between +# different column order +@replace(p.Project(y @ p.Relation) & Check(_.schema == y.schema)) +def complete_reprojection(_, y): + # TODO(kszucs): this could be moved to the pattern itself but not sure how + # to express it, especially in a shorter way then the following check + for name in _.schema: + if _.values[name] != ops.Field(y, name): + return _ + return y + + +@replace(p.Project(y @ p.Project)) +def subsequent_projects(_, y): + rule = p.Field(y, name) >> Item(y.values, name) + values = {k: v.replace(rule) for k, v in _.values.items()} + return ops.Project(y.parent, values) + + +@replace(p.Filter(y @ p.Filter)) +def subsequent_filters(_, y): + rule = p.Field(y, name) >> d.Field(y.parent, name) + preds = tuple(v.replace(rule) for v in _.predicates) + return ops.Filter(y.parent, y.predicates + preds) + + +@replace(p.Filter(y @ p.Project)) +def reorder_filter_project(_, y): + rule = p.Field(y, name) >> Item(y.values, name) + preds = tuple(v.replace(rule) for v in _.predicates) + + inner = ops.Filter(y.parent, preds) + rule = p.Field(y.parent, name) >> d.Field(inner, name) + projs = {k: v.replace(rule) for k, v in y.values.items()} + + return ops.Project(inner, projs) + + +def simplify(node): + # TODO(kszucs): add a utility to the graph module to do rewrites in multiple + # passes after each other + node = node.replace(reorder_filter_project) + node = node.replace(reorder_filter_project) + node = node.replace(subsequent_projects | subsequent_filters) + node = node.replace(complete_reprojection) + return node diff --git a/ibis/expr/sql.py b/ibis/expr/sql.py index 29509cd2629f..1e3a805e4b2f 100644 --- a/ibis/expr/sql.py +++ b/ibis/expr/sql.py @@ -125,18 +125,23 @@ def convert_join(join, catalog): left_name = join.name left_table = catalog[left_name] + for right_name, desc in join.joins.items(): right_table = catalog[right_name] join_kind = _join_types[desc["side"]] - predicate = None - for left_key, right_key in zip(desc["source_key"], desc["join_key"]): - left_key = convert(left_key, catalog=catalog) - right_key = convert(right_key, catalog=catalog) - if predicate is None: - predicate = left_key == right_key - else: - predicate &= left_key == right_key + if desc["join_key"]: + predicate = None + for left_key, right_key in zip(desc["source_key"], desc["join_key"]): + left_key = convert(left_key, catalog=catalog) + right_key = convert(right_key, catalog=catalog) + if predicate is None: + predicate = left_key == right_key + else: + predicate &= left_key == right_key + else: + condition = desc["condition"] + predicate = convert(condition, catalog=catalog) left_table = left_table.join(right_table, predicates=predicate, how=join_kind) @@ -179,6 +184,11 @@ def convert_literal(literal, catalog): return ibis.literal(value) +@convert.register(sge.Boolean) +def convert_boolean(boolean, catalog): + return ibis.literal(boolean.this) + + @convert.register(sge.Alias) def convert_alias(alias, catalog): this = convert(alias.this, catalog=catalog) @@ -367,6 +377,6 @@ def to_sql(expr: ir.Expr, dialect: str | None = None, **kwargs) -> SQLString: else: read = write = getattr(backend, "_sqlglot_dialect", dialect) - sql = backend._to_sql(expr, **kwargs) + sql = backend._to_sql(expr.unbind(), **kwargs) (pretty,) = sg.transpile(sql, read=read, write=write, pretty=True) return SQLString(pretty) diff --git a/ibis/expr/tests/snapshots/test_format/test_aggregate_arg_names/repr.txt b/ibis/expr/tests/snapshots/test_format/test_aggregate_arg_names/repr.txt index 44b15ca820f4..23cc70e5b6ac 100644 --- a/ibis/expr/tests/snapshots/test_format/test_aggregate_arg_names/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_aggregate_arg_names/repr.txt @@ -11,10 +11,10 @@ r0 := UnboundTable: alltypes j date k time -Aggregation[r0] +Aggregate[r0] + groups: + key1: r0.g + key2: Round(r0.f) metrics: c: Sum(r0.c) - d: Mean(r0.d) - by: - key1: r0.g - key2: Round(r0.f) \ No newline at end of file + d: Mean(r0.d) \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt index aeaba71bcc34..263a594f7ef7 100644 --- a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt @@ -1,20 +1,24 @@ -r0 := UnboundTable: right - time2 int32 - value2 float64 - -r1 := UnboundTable: left +r0 := UnboundTable: left time1 int32 value float64 -r2 := AsOfJoin[r1, r0] r1.time1 == r0.time2 +r1 := UnboundTable: right + time2 int32 + value2 float64 + +r2 := SelfReference[r1] -r3 := InnerJoin[r2, r0] r1.value == r0.value2 +r3 := SelfReference[r1] -Selection[r3] - selections: - time1: r2.time1 - value: r2.value +JoinChain[r0] + JoinLink[asof, r2] + r0.time1 == r2.time2 + JoinLink[inner, r3] + r0.value == r3.value2 + values: + time1: r0.time1 + value: r0.value time2: r2.time2 value2: r2.value2 - time2_right: r0.time2 - value2_right: r0.value2 \ No newline at end of file + time2_right: r3.time2 + value2_right: r3.value2 \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_complex_repr/repr.txt b/ibis/expr/tests/snapshots/test_format/test_complex_repr/repr.txt index 0f9d5621fa4b..9334f1c07925 100644 --- a/ibis/expr/tests/snapshots/test_format/test_complex_repr/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_complex_repr/repr.txt @@ -1,20 +1,18 @@ r0 := UnboundTable: t a int64 -r1 := Selection[r0] - predicates: - r0.a < 42 - r0.a >= 42 +r1 := Filter[r0] + r0.a < 42 + r0.a >= 42 -r2 := Selection[r1] - selections: - r1 - x: r1.a + 42 +r2 := Project[r1] + a: r1.a + x: r1.a + 42 -r3 := Aggregation[r2] +r3 := Aggregate[r2] + groups: + x: r2.x metrics: y: Sum(r2.a) - by: - x: r2.x Limit[r3, n=10] \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_destruct_selection/repr.txt b/ibis/expr/tests/snapshots/test_format/test_destruct_selection/repr.txt index 013871ecfb27..05087799363d 100644 --- a/ibis/expr/tests/snapshots/test_format/test_destruct_selection/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_destruct_selection/repr.txt @@ -1,7 +1,7 @@ r0 := UnboundTable: t col int64 -Aggregation[r0] +Aggregate[r0] metrics: sum: StructField(ReductionVectorizedUDF(func=multi_output_udf, func_args=[r0.col], input_type=[int64], return_type={'sum': int64, 'mean': float64}), field='sum') mean: StructField(ReductionVectorizedUDF(func=multi_output_udf, func_args=[r0.col], input_type=[int64], return_type={'sum': int64, 'mean': float64}), field='mean') \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_fillna/fillna_int_repr.txt b/ibis/expr/tests/snapshots/test_format/test_fillna/fillna_int_repr.txt index d7aa4f2ee692..7ffb48f8a9f9 100644 --- a/ibis/expr/tests/snapshots/test_format/test_fillna/fillna_int_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_fillna/fillna_int_repr.txt @@ -2,9 +2,8 @@ r0 := UnboundTable: t a int64 b string -r1 := Selection[r0] - selections: - a: r0.a +r1 := Project[r0] + a: r0.a FillNa[r1] replacements: diff --git a/ibis/expr/tests/snapshots/test_format/test_fillna/fillna_str_repr.txt b/ibis/expr/tests/snapshots/test_format/test_fillna/fillna_str_repr.txt index 887edd9ee5b9..e23131448904 100644 --- a/ibis/expr/tests/snapshots/test_format/test_fillna/fillna_str_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_fillna/fillna_str_repr.txt @@ -2,9 +2,8 @@ r0 := UnboundTable: t a int64 b string -r1 := Selection[r0] - selections: - b: r0.b +r1 := Project[r0] + b: r0.b FillNa[r1] replacements: diff --git a/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt index 168803538ebe..0563c0ba6211 100644 --- a/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt @@ -1,2 +1,2 @@ DummyTable - foo array \ No newline at end of file + foo: [1] \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt index 057e2d8c8966..d1ed4735f67a 100644 --- a/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt @@ -1,36 +1,33 @@ -r0 := UnboundTable: three - bar_id string - value2 float64 - -r1 := UnboundTable: one +r0 := UnboundTable: one c int32 f float64 foo_id string bar_id string -r2 := UnboundTable: two +r1 := UnboundTable: two foo_id string value1 float64 -r3 := Selection[r1] - predicates: - r1.f > 0 +r2 := UnboundTable: three + bar_id string + value2 float64 -r4 := LeftJoin[r3, r2] r3.foo_id == r2.foo_id +r3 := SelfReference[r1] -r5 := Selection[r4] - selections: - c: r3.c - f: r3.f - foo_id: r3.foo_id - bar_id: r3.bar_id - foo_id_right: r2.foo_id - value1: r2.value1 +r4 := SelfReference[r2] -r6 := InnerJoin[r5, r0] r3.bar_id == r0.bar_id +r5 := Filter[r0] + r0.f > 0 -Selection[r6] - selections: - r3 - value1: r2.value1 - value2: r0.value2 \ No newline at end of file +JoinChain[r5] + JoinLink[left, r3] + r5.foo_id == r3.foo_id + JoinLink[inner, r4] + r5.bar_id == r4.bar_id + values: + c: r5.c + f: r5.f + foo_id: r5.foo_id + bar_id: r5.bar_id + value1: r3.value1 + value2: r4.value2 \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_format_new_relational_operation/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_new_relational_operation/repr.txt index f058f8b462d4..3169cef6f734 100644 --- a/ibis/expr/tests/snapshots/test_format/test_format_new_relational_operation/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_format_new_relational_operation/repr.txt @@ -11,20 +11,32 @@ r0 := UnboundTable: alltypes j date k time -r1 := MyRelation - a int8 - b int16 - c int32 - d int64 - e float32 - f float64 - g string - h boolean - i timestamp - j date - k time +r1 := MyRelation[r0] + kind: + foo + schema: + a int8 + b int16 + c int32 + d int64 + e float32 + f float64 + g string + h boolean + i timestamp + j date + k time -Selection[r1] - selections: - r1 - a2: r1.a \ No newline at end of file +Project[r1] + a: r1.a + b: r1.b + c: r1.c + d: r1.d + e: r1.e + f: r1.f + g: r1.g + h: r1.h + i: r1.i + j: r1.j + k: r1.k + a2: r1.a \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_format_projection/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_projection/repr.txt index c982128f1c0e..aff4c167e81f 100644 --- a/ibis/expr/tests/snapshots/test_format/test_format_projection/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_format_projection/repr.txt @@ -11,10 +11,9 @@ r0 := UnboundTable: alltypes j date k time -r1 := Selection[r0] - selections: - c: r0.c - a: r0.a - f: r0.f +r1 := Project[r0] + c: r0.c + a: r0.a + f: r0.f a: r1.a \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_table/repr.txt b/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_table/repr.txt index cfd72d2fff7c..2e6f5c480f1a 100644 --- a/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_table/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_table/repr.txt @@ -3,16 +3,16 @@ r0 := UnboundTable: airlines origin string arrdelay int32 -r1 := Aggregation[r0] +r1 := Filter[r0] + InValues(value=r0.dest, options=['ORD', 'JFK', 'SFO']) + +r2 := Aggregate[r1] + groups: + dest: r1.dest metrics: - Mean(arrdelay): Mean(r0.arrdelay) - by: - dest: r0.dest - predicates: - InValues(value=r0.dest, options=['ORD', 'JFK', 'SFO']) + Mean(arrdelay): Mean(r1.arrdelay) -r2 := Selection[r1] - sort_keys: - desc r1.Mean(arrdelay) +r3 := Sort[r2] + desc r2['Mean(arrdelay)'] -Limit[r2, n=10] \ No newline at end of file +Limit[r3, n=10] \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt b/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt index 69c7d1add031..95a08486a774 100644 --- a/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt @@ -4,27 +4,26 @@ r0 := UnboundTable: purchases user int64 amount float64 -r1 := Aggregation[r0] - metrics: - total: Sum(r0.amount) - by: +r1 := Aggregate[r0] + groups: region: r0.region kind: r0.kind - predicates: - r0.kind == 'foo' - -r2 := Aggregation[r0] metrics: total: Sum(r0.amount) - by: - region: r0.region - kind: r0.kind - predicates: - r0.kind == 'bar' -r3 := InnerJoin[r1, r2] r1.region == r2.region +r2 := Filter[r1] + r1.kind == 'foo' + +r3 := Filter[r1] + r1.kind == 'bar' + +r4 := SelfReference[r3] -Selection[r3] - selections: - r1 - right_total: r2.total \ No newline at end of file +JoinChain[r2] + JoinLink[inner, r4] + r2.region == r4.region + values: + region: r2.region + kind: r2.kind + total: r2.total + right_total: r4.total \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_repr_exact/repr.txt b/ibis/expr/tests/snapshots/test_format/test_repr_exact/repr.txt index 38e341469f0c..ae0745d7299f 100644 --- a/ibis/expr/tests/snapshots/test_format/test_repr_exact/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_repr_exact/repr.txt @@ -3,7 +3,8 @@ r0 := UnboundTable: t col2 string col3 float64 -Selection[r0] - selections: - r0 - col4: StringLength(r0.col2) \ No newline at end of file +Project[r0] + col: r0.col + col2: r0.col2 + col3: r0.col3 + col4: StringLength(r0.col2) \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_same_column_multiple_aliases/repr.txt b/ibis/expr/tests/snapshots/test_format/test_same_column_multiple_aliases/repr.txt index 1826aa9d8567..b3505df638d6 100644 --- a/ibis/expr/tests/snapshots/test_format/test_same_column_multiple_aliases/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_same_column_multiple_aliases/repr.txt @@ -1,7 +1,6 @@ r0 := UnboundTable: t col int64 -Selection[r0] - selections: - fakealias1: r0.col - fakealias2: r0.col \ No newline at end of file +Project[r0] + fakealias1: r0.col + fakealias2: r0.col \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/cnt_repr.txt b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/cnt_repr.txt index a85e2bdb5dbb..fffb76933234 100644 --- a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/cnt_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/cnt_repr.txt @@ -2,4 +2,4 @@ r0 := UnboundTable: t1 a int64 b float64 -CountStar(t1): CountStar(r0) \ No newline at end of file +CountStar(): CountStar(r0) \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt index e63b05c8c635..96aa59a58a31 100644 --- a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt @@ -6,12 +6,15 @@ r1 := UnboundTable: t2 a int64 b float64 -r2 := InnerJoin[r0, r1] r0.a == r1.a +r2 := SelfReference[r1] -r3 := Selection[r2] - selections: +r3 := JoinChain[r0] + JoinLink[inner, r2] + r0.a == r2.a + values: a: r0.a b: r0.b - b_right: r1.b + a_right: r2.a + b_right: r2.b CountStar(): CountStar(r3) \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/union_repr.txt b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/union_repr.txt index caab7a357ba4..39d67ba6a7a6 100644 --- a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/union_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/union_repr.txt @@ -8,9 +8,8 @@ r1 := UnboundTable: t2 r2 := Union[r0, r1, distinct=False] -r3 := Selection[r2] - selections: - a: r2.a - b: r2.b +r3 := Project[r2] + a: r2.a + b: r2.b CountStar(): CountStar(r3) \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt b/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt index 959d15672b18..aa61982fec8f 100644 --- a/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt @@ -1,25 +1,29 @@ -r0 := UnboundTable: right - time2 int32 - value2 float64 - b string - -r1 := UnboundTable: left +r0 := UnboundTable: left time1 int32 value float64 a string -r2 := InnerJoin[r1, r0] r1.a == r0.b +r1 := UnboundTable: right + time2 int32 + value2 float64 + b string + +r2 := SelfReference[r1] -r3 := InnerJoin[r2, r0] r1.value == r0.value2 +r3 := SelfReference[r1] -Selection[r3] - selections: - time1: r2.time1 - value: r2.value - a: r2.a +JoinChain[r0] + JoinLink[inner, r2] + r0.a == r2.b + JoinLink[inner, r3] + r0.value == r3.value2 + values: + time1: r0.time1 + value: r0.value + a: r0.a time2: r2.time2 value2: r2.value2 b: r2.b - time2_right: r0.time2 - value2_right: r0.value2 - b_right: r0.b \ No newline at end of file + time2_right: r3.time2 + value2_right: r3.value2 + b_right: r3.b \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py index 03fcc9f2791f..499385aab514 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py @@ -1,6 +1,10 @@ import ibis +employee = ibis.table( + name="employee", + schema={"first_name": "string", "last_name": "string", "id": "int64"}, +) call = ibis.table( name="call", schema={ @@ -14,28 +18,10 @@ call_outcome = ibis.table( name="call_outcome", schema={"outcome_text": "string", "id": "int64"} ) -employee = ibis.table( - name="employee", - schema={"first_name": "string", "last_name": "string", "id": "int64"}, +joinchain = employee.inner_join(call, employee.id == call.employee_id).inner_join( + call_outcome, call.call_outcome_id == call_outcome.id ) -innerjoin = employee.inner_join(call, employee.id == call.employee_id) -result = ( - innerjoin.inner_join(call_outcome, call.call_outcome_id == call_outcome.id) - .select( - [ - innerjoin.first_name, - innerjoin.last_name, - innerjoin.id, - innerjoin.start_time, - innerjoin.end_time, - innerjoin.employee_id, - innerjoin.call_outcome_id, - innerjoin.call_attempts, - call_outcome.outcome_text, - call_outcome.id.name("id_right"), - ] - ) - .group_by(call.employee_id) - .aggregate(call.call_attempts.mean().name("avg_attempts")) +result = joinchain.aggregate( + [joinchain.call_attempts.mean().name("avg_attempts")], by=[joinchain.employee_id] ) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation/decompiled.py index b5bd4842d48b..85221e0535fa 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation/decompiled.py @@ -12,6 +12,6 @@ }, ) -result = call.group_by(call.employee_id).aggregate( - call.call_attempts.sum().name("attempts") +result = call.aggregate( + [call.call_attempts.sum().name("attempts")], by=[call.employee_id] ) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py index 392f50271b0b..0b23d1687445 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py @@ -15,8 +15,8 @@ "call_attempts": "int64", }, ) -leftjoin = employee.left_join(call, employee.id == call.employee_id) +joinchain = employee.left_join(call, employee.id == call.employee_id) -result = leftjoin.group_by(leftjoin.id).aggregate( - call.call_attempts.sum().name("attempts") +result = joinchain.aggregate( + [joinchain.call_attempts.sum().name("attempts")], by=[joinchain.id] ) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py index 05f419d668db..8439fd762875 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py @@ -1,6 +1,10 @@ import ibis +employee = ibis.table( + name="employee", + schema={"first_name": "string", "last_name": "string", "id": "int64"}, +) call = ibis.table( name="call", schema={ @@ -11,24 +15,18 @@ "call_attempts": "int64", }, ) -employee = ibis.table( - name="employee", - schema={"first_name": "string", "last_name": "string", "id": "int64"}, -) -proj = employee.inner_join(call, employee.id == call.employee_id).filter( - employee.id < 5 -) +joinchain = employee.inner_join(call, employee.id == call.employee_id) +f = joinchain.filter(joinchain.id < 5) +s = f.order_by(f.id.desc()) -result = proj.select( - [ - proj.first_name, - proj.last_name, - proj.id, - call.start_time, - call.end_time, - call.employee_id, - call.call_outcome_id, - call.call_attempts, - proj.first_name.name("first"), - ] -).order_by(proj.id.desc()) +result = s.select( + s.first_name, + s.last_name, + s.id, + s.start_time, + s.end_time, + s.employee_id, + s.call_outcome_id, + s.call_attempts, + s.first_name.name("first"), +) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py index 2ed2c808d726..3e375cd052d2 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py @@ -1,6 +1,10 @@ import ibis +employee = ibis.table( + name="employee", + schema={"first_name": "string", "last_name": "string", "id": "int64"}, +) call = ibis.table( name="call", schema={ @@ -11,22 +15,18 @@ "call_attempts": "int64", }, ) -employee = ibis.table( - name="employee", - schema={"first_name": "string", "last_name": "string", "id": "int64"}, -) -proj = employee.left_join(call, employee.id == call.employee_id).filter(employee.id < 5) +joinchain = employee.left_join(call, employee.id == call.employee_id) +f = joinchain.filter(joinchain.id < 5) +s = f.order_by(f.id.desc()) -result = proj.select( - [ - proj.first_name, - proj.last_name, - proj.id, - call.start_time, - call.end_time, - call.employee_id, - call.call_outcome_id, - call.call_attempts, - proj.first_name.name("first"), - ] -).order_by(proj.id.desc()) +result = s.select( + s.first_name, + s.last_name, + s.id, + s.start_time, + s.end_time, + s.employee_id, + s.call_outcome_id, + s.call_attempts, + s.first_name.name("first"), +) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py index 0f31dffd1532..e9a8b2082dc1 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py @@ -1,6 +1,10 @@ import ibis +employee = ibis.table( + name="employee", + schema={"first_name": "string", "last_name": "string", "id": "int64"}, +) call = ibis.table( name="call", schema={ @@ -11,24 +15,18 @@ "call_attempts": "int64", }, ) -employee = ibis.table( - name="employee", - schema={"first_name": "string", "last_name": "string", "id": "int64"}, -) -proj = employee.right_join(call, employee.id == call.employee_id).filter( - employee.id < 5 -) +joinchain = employee.right_join(call, employee.id == call.employee_id) +f = joinchain.filter(joinchain.id < 5) +s = f.order_by(f.id.desc()) -result = proj.select( - [ - proj.first_name, - proj.last_name, - proj.id, - call.start_time, - call.end_time, - call.employee_id, - call.call_outcome_id, - call.call_attempts, - proj.first_name.name("first"), - ] -).order_by(proj.id.desc()) +result = s.select( + s.first_name, + s.last_name, + s.id, + s.start_time, + s.end_time, + s.employee_id, + s.call_outcome_id, + s.call_attempts, + s.first_name.name("first"), +) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_projection/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_projection/decompiled.py index b6e37f2ab518..404a75f95cfc 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_projection/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_projection/decompiled.py @@ -5,8 +5,7 @@ name="employee", schema={"first_name": "string", "last_name": "string", "id": "int64"}, ) -proj = employee.filter(employee.id < 5) +f = employee.filter(employee.id < 5) +s = f.order_by(f.id.desc()) -result = proj.select( - [proj.first_name, proj.last_name, proj.id, proj.first_name.name("first")] -).order_by(proj.id.desc()) +result = s.select(s.first_name, s.last_name, s.id, s.first_name.name("first")) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_in_clause/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_in_clause/decompiled.py index cc4993250d02..b29504c90709 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_in_clause/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_in_clause/decompiled.py @@ -5,8 +5,7 @@ name="employee", schema={"first_name": "string", "last_name": "string", "id": "int64"}, ) - -result = employee.select(employee.first_name).filter( +f = employee.filter( employee.first_name.isin( ( ibis.literal("Graham"), @@ -17,3 +16,5 @@ ) ) ) + +result = f.select(f.first_name) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py index 2ed2c808d726..3e375cd052d2 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py @@ -1,6 +1,10 @@ import ibis +employee = ibis.table( + name="employee", + schema={"first_name": "string", "last_name": "string", "id": "int64"}, +) call = ibis.table( name="call", schema={ @@ -11,22 +15,18 @@ "call_attempts": "int64", }, ) -employee = ibis.table( - name="employee", - schema={"first_name": "string", "last_name": "string", "id": "int64"}, -) -proj = employee.left_join(call, employee.id == call.employee_id).filter(employee.id < 5) +joinchain = employee.left_join(call, employee.id == call.employee_id) +f = joinchain.filter(joinchain.id < 5) +s = f.order_by(f.id.desc()) -result = proj.select( - [ - proj.first_name, - proj.last_name, - proj.id, - call.start_time, - call.end_time, - call.employee_id, - call.call_outcome_id, - call.call_attempts, - proj.first_name.name("first"), - ] -).order_by(proj.id.desc()) +result = s.select( + s.first_name, + s.last_name, + s.id, + s.start_time, + s.end_time, + s.employee_id, + s.call_outcome_id, + s.call_attempts, + s.first_name.name("first"), +) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py index ae6bfd9788f7..d6df17717b27 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py @@ -1,9 +1,6 @@ import ibis -call_outcome = ibis.table( - name="call_outcome", schema={"outcome_text": "string", "id": "int64"} -) employee = ibis.table( name="employee", schema={"first_name": "string", "last_name": "string", "id": "int64"}, @@ -18,21 +15,10 @@ "call_attempts": "int64", }, ) -innerjoin = employee.inner_join(call, employee.id == call.employee_id) +call_outcome = ibis.table( + name="call_outcome", schema={"outcome_text": "string", "id": "int64"} +) -result = innerjoin.inner_join( +result = employee.inner_join(call, employee.id == call.employee_id).inner_join( call_outcome, call.call_outcome_id == call_outcome.id -).select( - [ - innerjoin.first_name, - innerjoin.last_name, - innerjoin.id, - innerjoin.start_time, - innerjoin.end_time, - innerjoin.employee_id, - innerjoin.call_outcome_id, - innerjoin.call_attempts, - call_outcome.outcome_text, - call_outcome.id.name("id_right"), - ] ) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py index 81f627719b17..e651a29b1ad9 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py @@ -11,6 +11,6 @@ "call_attempts": "int64", }, ) -agg = call.aggregate(call.call_attempts.mean().name("mean")) +agg = call.aggregate([call.call_attempts.mean().name("mean")]) -result = call.inner_join(agg, []) +result = call.inner_join(agg, [agg.mean < call.call_attempts, ibis.literal(True)]) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_simple_reduction/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_simple_reduction/decompiled.py index 8fa935b7c5e4..3e4aaaf12b42 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_simple_reduction/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_simple_reduction/decompiled.py @@ -12,4 +12,4 @@ }, ) -result = call.aggregate(call.call_attempts.mean().name("mean")) +result = call.aggregate([call.call_attempts.mean().name("mean")]) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_simple_select_count/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_simple_select_count/decompiled.py index d993ac2ac040..8466d6aeb4ca 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_simple_select_count/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_simple_select_count/decompiled.py @@ -6,4 +6,4 @@ schema={"first_name": "string", "last_name": "string", "id": "int64"}, ) -result = employee.aggregate(employee.first_name.count().name("_col_0")) +result = employee.aggregate([employee.first_name.count().name("_col_0")]) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_table_alias/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_table_alias/decompiled.py index ec5df1972413..05aff9c5b4ee 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_table_alias/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_table_alias/decompiled.py @@ -1,9 +1,7 @@ import ibis -employee = ibis.table( +result = ibis.table( name="employee", schema={"first_name": "string", "last_name": "string", "id": "int64"}, ) - -result = employee.select([employee.first_name, employee.last_name, employee.id]) diff --git a/ibis/expr/tests/test_format.py b/ibis/expr/tests/test_format.py index 6ee6dbc42514..87186e24728e 100644 --- a/ibis/expr/tests/test_format.py +++ b/ibis/expr/tests/test_format.py @@ -11,17 +11,11 @@ import ibis.expr.operations as ops import ibis.legacy.udf.vectorized as udf from ibis import util -from ibis.expr.operations.relations import Projection # easier to switch implementation if needed fmt = repr -@pytest.mark.parametrize("cls", set(ops.Relation.__subclasses__()) - {Projection}) -def test_tables_have_format_rules(cls): - assert cls in ibis.expr.format.fmt.registry - - @pytest.mark.parametrize("cls", [ops.PhysicalTable, ops.Relation]) def test_tables_have_format_value_rules(cls): assert cls in ibis.expr.format.fmt.registry @@ -62,7 +56,6 @@ def test_table_type_output(snapshot): expr = foo.dept_id == foo.view().dept_id result = fmt(expr) - assert "SelfReference[r0]" in result assert "UnboundTable: foo" in result snapshot.assert_match(result, "repr.txt") @@ -77,7 +70,7 @@ def test_aggregate_arg_names(alltypes, snapshot): expr = t.group_by(by_exprs).aggregate(metrics) result = fmt(expr) assert "metrics" in result - assert "by" in result + assert "groups" in result snapshot.assert_match(result, "repr.txt") @@ -125,8 +118,6 @@ def test_memoize_filtered_table(snapshot): delay_filter = t.dest.topk(10, by=t.arrdelay.mean()) result = fmt(delay_filter) - assert result.count("Selection") == 1 - snapshot.assert_match(result, "repr.txt") @@ -167,12 +158,6 @@ def test_memoize_filtered_tables_in_join(snapshot): joined = left.join(right, cond)[left, right.total.name("right_total")] result = fmt(joined) - - # one for each aggregation - # joins are shown without the word `predicates` above them - # since joins only have predicates as arguments - assert result.count("predicates") == 2 - snapshot.assert_match(result, "repr.txt") @@ -331,9 +316,6 @@ def test_asof_join(snapshot): ) result = fmt(joined) - assert result.count("InnerJoin") == 1 - assert result.count("AsOfJoin") == 1 - snapshot.assert_match(result, "repr.txt") @@ -349,8 +331,6 @@ def test_two_inner_joins(snapshot): ) result = fmt(joined) - assert result.count("InnerJoin") == 2 - snapshot.assert_match(result, "repr.txt") @@ -382,11 +362,13 @@ def test_format_literal(literal, typ, output): def test_format_dummy_table(snapshot): +<<<<<<< HEAD t = ops.DummyTable([ibis.array([1]).cast("array").name("foo")]).to_expr() +======= + t = ops.DummyTable({"foo": ibis.array([1], type="array")}).to_expr() +>>>>>>> 2189ab71b (refactor(ir): split the relational operations) result = fmt(t) - assert "DummyTable" in result - assert "foo array" in result snapshot.assert_match(result, "repr.txt") @@ -408,6 +390,10 @@ class MyRelation(ops.Relation): def schema(self): return self.parent.schema + @property + def values(self): + return {} + table = MyRelation(alltypes, kind="foo").to_expr() expr = table[table, table.a.name("a2")] result = fmt(expr) diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py new file mode 100644 index 000000000000..1f0737c3897c --- /dev/null +++ b/ibis/expr/tests/test_newrels.py @@ -0,0 +1,1193 @@ +from __future__ import annotations + +import pytest + +import ibis +import ibis.expr.datashape as ds +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +import ibis.expr.types as ir +from ibis import _ +from ibis.common.annotations import ValidationError +from ibis.common.exceptions import IbisInputError, IntegrityError +from ibis.expr.operations import ( + Aggregate, + Field, + Filter, + JoinChain, + JoinLink, + Project, + UnboundTable, +) +from ibis.expr.schema import Schema + +t = ibis.table( + name="t", + schema={ + "bool_col": "boolean", + "int_col": "int64", + "float_col": "float64", + "string_col": "string", + }, +) + + +def test_field(): + f = Field(t, "bool_col") + assert f.rel == t.op() + assert f.name == "bool_col" + assert f.shape == ds.columnar + assert f.dtype == dt.boolean + assert f.to_expr().equals(t.bool_col) + assert f.relations == frozenset([t.op()]) + + +def test_relation_coercion(): + assert ops.Relation.__coerce__(t) == t.op() + assert ops.Relation.__coerce__(t.op()) == t.op() + with pytest.raises(TypeError): + assert ops.Relation.__coerce__("invalid") + + +def test_unbound_table(): + node = t.op() + assert isinstance(t, ir.TableExpr) + assert isinstance(node, UnboundTable) + assert node.name == "t" + assert node.schema == Schema( + { + "bool_col": dt.boolean, + "int_col": dt.int64, + "float_col": dt.float64, + "string_col": dt.string, + } + ) + assert node.fields == { + "bool_col": ops.Field(node, "bool_col"), + "int_col": ops.Field(node, "int_col"), + "float_col": ops.Field(node, "float_col"), + "string_col": ops.Field(node, "string_col"), + } + assert node.values == {} + + +def test_select_fields(): + proj = t.select("int_col") + expected = Project(parent=t, values={"int_col": t.int_col}) + assert proj.op() == expected + assert proj.op().schema == Schema({"int_col": dt.int64}) + + proj = t.select(myint=t.int_col) + expected = Project(parent=t, values={"myint": t.int_col}) + assert proj.op() == expected + assert proj.op().schema == Schema({"myint": dt.int64}) + + proj = t.select(t.int_col, myint=t.int_col) + expected = Project(parent=t, values={"int_col": t.int_col, "myint": t.int_col}) + assert proj.op() == expected + assert proj.op().schema == Schema({"int_col": dt.int64, "myint": dt.int64}) + + proj = t.select(_.int_col, myint=_.int_col) + expected = Project(parent=t, values={"int_col": t.int_col, "myint": t.int_col}) + assert proj.op() == expected + + +def test_select_values(): + proj = t.select((1 + t.int_col).name("incremented")) + expected = Project(parent=t, values={"incremented": (1 + t.int_col)}) + assert proj.op() == expected + assert proj.op().schema == Schema({"incremented": dt.int64}) + + proj = t.select(ibis.literal(1), "float_col", length=t.string_col.length()) + expected = Project( + parent=t, + values={"1": 1, "float_col": t.float_col, "length": t.string_col.length()}, + ) + assert proj.op() == expected + assert proj.op().schema == Schema( + {"1": dt.int8, "float_col": dt.float64, "length": dt.int32} + ) + + assert expected.fields == { + "1": ops.Field(proj, "1"), + "float_col": ops.Field(proj, "float_col"), + "length": ops.Field(proj, "length"), + } + assert expected.values == { + "1": ibis.literal(1).op(), + "float_col": t.float_col.op(), + "length": t.string_col.length().op(), + } + + +def test_select_windowing_local_reduction(): + t1 = t.select(res=t.int_col.sum()) + assert t1.op() == Project(parent=t, values={"res": t.int_col.sum().over()}) + + +def test_select_windowizing_analytic_function(): + t1 = t.select(res=t.int_col.lag()) + assert t1.op() == Project(parent=t, values={"res": t.int_col.lag().over()}) + + +def test_subquery_integrity_check(): + t = ibis.table(name="t", schema={"a": "int64", "b": "string"}) + + msg = "Subquery must have exactly one column, got 2" + with pytest.raises(IntegrityError, match=msg): + ops.ScalarSubquery(t) + + +def test_select_turns_scalar_reduction_into_subquery(): + arr = ibis.literal([1, 2, 3]) + res = arr.unnest().sum() + t1 = t.select(res) + subquery = ops.ScalarSubquery(res.as_table()) + expected = Project(parent=t, values={"Sum((1, 2, 3))": subquery}) + assert t1.op() == expected + + +def test_select_scalar_foreign_scalar_reduction_into_subquery(): + t1 = t.filter(t.bool_col) + t2 = t.select(summary=t1.int_col.sum()) + subquery = ops.ScalarSubquery(t1.int_col.sum().as_table()) + expected = Project(parent=t, values={"summary": subquery}) + assert t2.op() == expected + + +def test_select_turns_value_with_multiple_parents_into_subquery(): + v = ibis.table(name="v", schema={"a": "int64", "b": "string"}) + v_filt = v.filter(v.a == t.int_col) + + t1 = t.select(t.int_col, max=v_filt.a.max()) + subquery = ops.ScalarSubquery(v_filt.a.max().as_table()) + expected = Project(parent=t, values={"int_col": t.int_col, "max": subquery}) + assert t1.op() == expected + + +def test_mutate(): + proj = t.select(t, other=t.int_col + 1) + expected = Project( + parent=t, + values={ + "bool_col": t.bool_col, + "int_col": t.int_col, + "float_col": t.float_col, + "string_col": t.string_col, + "other": t.int_col + 1, + }, + ) + assert proj.op() == expected + + +def test_mutate_overwrites_existing_column(): + t = ibis.table(dict(a="string", b="string")) + + mut = t.mutate(a=42) + assert mut.op() == Project(parent=t, values={"a": ibis.literal(42), "b": t.b}) + + sel = mut.select("a") + assert sel.op() == Project(parent=mut, values={"a": mut.a}) + + +def test_select_full_reprojection(): + t1 = t.select(t) + assert t1.op() == Project( + t, + { + "bool_col": t.bool_col, + "int_col": t.int_col, + "float_col": t.float_col, + "string_col": t.string_col, + }, + ) + + +def test_subsequent_selections_with_field_names(): + t1 = t.select("bool_col", "int_col", "float_col") + assert t1.op() == Project( + parent=t, + values={ + "bool_col": t.bool_col, + "int_col": t.int_col, + "float_col": t.float_col, + }, + ) + t2 = t1.select("bool_col", "int_col") + assert t2.op() == Project( + parent=t1, + values={ + "bool_col": t1.bool_col, + "int_col": t1.int_col, + }, + ) + t3 = t2.select("bool_col") + assert t3.op() == Project( + parent=t2, + values={ + "bool_col": t2.bool_col, + }, + ) + + +def test_subsequent_selections_field_dereferencing(): + t1 = t.select(t.bool_col, t.int_col, t.float_col) + assert t1.op() == Project( + parent=t, + values={ + "bool_col": t.bool_col, + "int_col": t.int_col, + "float_col": t.float_col, + }, + ) + + t2 = t1.select(t1.bool_col, t1.int_col) + assert t1.select(t1.bool_col, t.int_col).equals(t2) + assert t1.select(t.bool_col, t.int_col).equals(t2) + assert t2.op() == Project( + parent=t1, + values={ + "bool_col": t1.bool_col, + "int_col": t1.int_col, + }, + ) + + t3 = t2.select(t2.bool_col) + assert t2.select(t1.bool_col).equals(t3) + assert t2.select(t.bool_col).equals(t3) + assert t3.op() == Project( + parent=t2, + values={ + "bool_col": t2.bool_col, + }, + ) + + u1 = t.select(t.bool_col, t.int_col, t.float_col) + assert u1.op() == Project( + parent=t, + values={ + "bool_col": t.bool_col, + "int_col": t.int_col, + "float_col": t.float_col, + }, + ) + + u2 = u1.select(u1.bool_col, u1.int_col, u1.float_col) + assert u1.select(t.bool_col, u1.int_col, u1.float_col).equals(u2) + assert u1.select(t.bool_col, t.int_col, t.float_col).equals(u2) + assert u2.op() == Project( + parent=u1, + values={ + "bool_col": u1.bool_col, + "int_col": u1.int_col, + "float_col": u1.float_col, + }, + ) + + u3 = u2.select(u2.bool_col, u2.int_col, u2.float_col) + assert u2.select(u2.bool_col, u1.int_col, u2.float_col).equals(u3) + assert u2.select(u2.bool_col, u1.int_col, t.float_col).equals(u3) + assert u3.op() == Project( + parent=u2, + values={ + "bool_col": u2.bool_col, + "int_col": u2.int_col, + "float_col": u2.float_col, + }, + ) + + +def test_subsequent_selections_value_dereferencing(): + t1 = t.select( + bool_col=~t.bool_col, int_col=t.int_col + 1, float_col=t.float_col * 3 + ) + assert t1.op() == Project( + parent=t, + values={ + "bool_col": ~t.bool_col, + "int_col": t.int_col + 1, + "float_col": t.float_col * 3, + }, + ) + + t2 = t1.select(t1.bool_col, t1.int_col, t1.float_col) + assert t2.op() == Project( + parent=t1, + values={ + "bool_col": t1.bool_col, + "int_col": t1.int_col, + "float_col": t1.float_col, + }, + ) + + t3 = t2.select( + t2.bool_col, + t2.int_col, + float_col=t2.float_col * 2, + another_col=t1.float_col - 1, + ) + assert t3.op() == Project( + parent=t2, + values={ + "bool_col": t2.bool_col, + "int_col": t2.int_col, + "float_col": t2.float_col * 2, + "another_col": t2.float_col - 1, + }, + ) + + +def test_where(): + filt = t.filter(t.bool_col) + expected = Filter(parent=t, predicates=[t.bool_col]) + assert filt.op() == expected + + filt = t.filter(t.bool_col, t.int_col > 0) + expected = Filter(parent=t, predicates=[t.bool_col, t.int_col > 0]) + assert filt.op() == expected + + filt = t.filter(_.bool_col) + expected = Filter(parent=t, predicates=[t.bool_col]) + assert filt.op() == expected + + assert expected.fields == { + "bool_col": ops.Field(expected, "bool_col"), + "int_col": ops.Field(expected, "int_col"), + "float_col": ops.Field(expected, "float_col"), + "string_col": ops.Field(expected, "string_col"), + } + assert expected.values == { + "bool_col": t.bool_col.op(), + "int_col": t.int_col.op(), + "float_col": t.float_col.op(), + "string_col": t.string_col.op(), + } + + +def test_where_raies_for_empty_predicate_list(): + t = ibis.table(dict(a="string")) + with pytest.raises(IbisInputError): + t.filter() + + +def test_where_after_select(): + t1 = t.select(t.bool_col) + t2 = t1.filter(t.bool_col) + expected = Filter(parent=t1, predicates=[t1.bool_col]) + assert t2.op() == expected + + t1 = t.select(int_col=t.bool_col) + t2 = t1.filter(t.bool_col) + expected = Filter(parent=t1, predicates=[t1.int_col]) + assert t2.op() == expected + + +def test_where_with_reduction(): + with pytest.raises(IntegrityError): + Filter(t, predicates=[t.int_col.sum() > 1]) + + t1 = t.filter(t.int_col.sum() > 0) + subquery = ops.ScalarSubquery(t.int_col.sum().as_table()) + expected = Filter(parent=t, predicates=[ops.Greater(subquery, 0)]) + assert t1.op() == expected + + +def test_where_flattens_predicates(): + t1 = t.filter(t.bool_col & ((t.int_col > 0) & (t.float_col < 0))) + expected = Filter( + parent=t, + predicates=[ + t.bool_col, + t.int_col > 0, + t.float_col < 0, + ], + ) + assert t1.op() == expected + + +def test_project_filter_sort(): + expr = t.select(t.bool_col, t.int_col).filter(t.bool_col).order_by(t.int_col) + expected = ops.Sort( + parent=( + filt := ops.Filter( + parent=( + proj := ops.Project( + parent=t, + values={ + "bool_col": t.bool_col, + "int_col": t.int_col, + }, + ) + ), + predicates=[ops.Field(proj, "bool_col")], + ) + ), + keys=[ops.SortKey(ops.Field(filt, "int_col"), ascending=True)], + ) + assert expr.op() == expected + + +def test_subsequent_filter(): + f1 = t.filter(t.bool_col) + f2 = f1.filter(t.int_col > 0) + expected = Filter(f1, predicates=[f1.int_col > 0]) + assert f2.op() == expected + + +def test_project_before_and_after_filter(): + t1 = t.select( + bool_col=~t.bool_col, int_col=t.int_col + 1, float_col=t.float_col * 3 + ) + assert t1.op() == Project( + parent=t, + values={ + "bool_col": ~t.bool_col, + "int_col": t.int_col + 1, + "float_col": t.float_col * 3, + }, + ) + + t2 = t1.filter(t1.bool_col) + assert t2.op() == Filter(parent=t1, predicates=[t1.bool_col]) + + t3 = t2.filter(t2.int_col > 0) + assert t3.op() == Filter(parent=t2, predicates=[t2.int_col > 0]) + + t3_ = t2.filter(t1.int_col > 0) + assert t3_.op() == Filter(parent=t2, predicates=[t2.int_col > 0]) + + t4 = t3.select(t3.bool_col, t3.int_col) + assert t4.op() == Project( + parent=t3, + values={ + "bool_col": t3.bool_col, + "int_col": t3.int_col, + }, + ) + + +# TODO(kszucs): add test for failing integrity checks +def test_join(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) + + joined = t1.join(t2, [t1.a == t2.c]) + assert isinstance(joined, ir.JoinExpr) + assert isinstance(joined.op(), JoinChain) + assert isinstance(joined.op().to_expr(), ir.JoinExpr) + + result = joined._finish() + assert isinstance(joined, ir.TableExpr) + assert isinstance(joined.op(), JoinChain) + assert isinstance(joined.op().to_expr(), ir.JoinExpr) + + t2_ = joined.op().rest[0].table.to_expr() + assert result.op() == JoinChain( + first=t1, + rest=[ + JoinLink("inner", t2_, [t1.a == t2_.c]), + ], + values={ + "a": t1.a, + "b": t1.b, + "c": t2_.c, + "d": t2_.d, + }, + ) + + +def test_join_unambiguous_select(): + a = ibis.table(name="a", schema={"a_int": "int64", "a_str": "string"}) + b = ibis.table(name="b", schema={"b_int": "int64", "b_str": "string"}) + + join = a.join(b, a.a_int == b.b_int) + expr1 = join["a_int", "b_int"] + expr2 = join.select("a_int", "b_int") + assert expr1.equals(expr2) + + b_ = join.op().rest[0].table.to_expr() + assert expr1.op() == JoinChain( + first=a, + rest=[JoinLink("inner", b_, [a.a_int == b_.b_int])], + values={ + "a_int": a.a_int, + "b_int": b_.b_int, + }, + ) + + +def test_join_with_subsequent_projection(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) + + # a single computed value is pulled to a subsequent projection + joined = t1.join(t2, [t1.a == t2.c]) + expr = joined.select(t1.a, t1.b, col=t2.c + 1) + t2_ = joined.op().rest[0].table.to_expr() + expected = JoinChain( + first=t1, + rest=[JoinLink("inner", t2_, [t1.a == t2_.c])], + values={"a": t1.a, "b": t1.b, "col": t2_.c + 1}, + ) + assert expr.op() == expected + + # multiple computed values + joined = t1.join(t2, [t1.a == t2.c]) + expr = joined.select( + t1.a, + t1.b, + foo=t2.c + 1, + bar=t2.c + 2, + baz=t2.d.name("bar") + "3", + baz2=(t2.c + t1.a).name("foo"), + ) + t2_ = joined.op().rest[0].table.to_expr() + expected = JoinChain( + first=t1, + rest=[JoinLink("inner", t2_, [t1.a == t2_.c])], + values={ + "a": t1.a, + "b": t1.b, + "foo": t2_.c + 1, + "bar": t2_.c + 2, + "baz": t2_.d.name("bar") + "3", + "baz2": t2_.c + t1.a, + }, + ) + assert expr.op() == expected + + +def test_join_with_subsequent_projection_colliding_names(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table( + name="t2", schema={"a": "int64", "b": "string", "c": "float", "d": "string"} + ) + + joined = t1.join(t2, [t1.a == t2.a]) + expr = joined.select( + t1.a, + t1.b, + foo=t2.a + 1, + bar=t1.a + t2.a, + ) + t2_ = joined.op().rest[0].table.to_expr() + expected = JoinChain( + first=t1, + rest=[JoinLink("inner", t2_, [t1.a == t2_.a])], + values={ + "a": t1.a, + "b": t1.b, + "foo": t2_.a + 1, + "bar": t1.a + t2_.a, + }, + ) + assert expr.op() == expected + + +def test_chained_join(): + a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) + b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) + c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) + + joined = a.join(b, [a.a == b.c]).join(c, [a.a == c.e]) + result = joined._finish() + + b_ = joined.op().rest[0].table.to_expr() + c_ = joined.op().rest[1].table.to_expr() + assert result.op() == JoinChain( + first=a, + rest=[ + JoinLink("inner", b_, [a.a == b_.c]), + JoinLink("inner", c_, [a.a == c_.e]), + ], + values={ + "a": a.a, + "b": a.b, + "c": b_.c, + "d": b_.d, + "e": c_.e, + "f": c_.f, + }, + ) + + joined = a.join(b, [a.a == b.c]).join(c, [b.c == c.e]) + result = joined.select(a.a, b.d, c.f) + + b_ = joined.op().rest[0].table.to_expr() + c_ = joined.op().rest[1].table.to_expr() + assert result.op() == JoinChain( + first=a, + rest=[ + JoinLink("inner", b_, [a.a == b_.c]), + JoinLink("inner", c_, [b_.c == c_.e]), + ], + values={ + "a": a.a, + "d": b_.d, + "f": c_.f, + }, + ) + + +def test_chained_join_referencing_intermediate_table(): + a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) + b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) + c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) + + ab = a.join(b, [a.a == b.c]) + assert isinstance(ab, ir.JoinExpr) + + # assert ab.a.op() == Field(ab, "a") + abc = ab.join(c, [ab.a == c.e]) + assert isinstance(abc, ir.JoinExpr) + + result = abc._finish() + + b_ = abc.op().rest[0].table.to_expr() + c_ = abc.op().rest[1].table.to_expr() + assert result.op() == JoinChain( + first=a, + rest=[ + JoinLink("inner", b_, [a.a == b_.c]), + JoinLink("inner", c_, [a.a == c_.e]), + ], + values={"a": a.a, "b": a.b, "c": b_.c, "d": b_.d, "e": c_.e, "f": c_.f}, + ) + + +def test_join_predicate_dereferencing(): + # See #790, predicate pushdown in joins not supported + + # Star schema with fact table + table = ibis.table({"c": int, "f": float, "foo_id": str, "bar_id": str}) + table2 = ibis.table({"foo_id": str, "value1": float, "value3": float}) + table3 = ibis.table({"bar_id": str, "value2": float}) + + filtered = table[table["f"] > 0] + + # dereference table.foo_id to filtered.foo_id + j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) + + table2_ = j1.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=filtered, + rest=[ + ops.JoinLink("left", table2_, [filtered.foo_id == table2_.foo_id]), + ], + values={ + "c": filtered.c, + "f": filtered.f, + "foo_id": filtered.foo_id, + "bar_id": filtered.bar_id, + "foo_id_right": table2_.foo_id, + "value1": table2_.value1, + "value3": table2_.value3, + }, + ) + assert j1.op() == expected + + j2 = j1.inner_join(table3, filtered["bar_id"] == table3["bar_id"]) + + table2_ = j2.op().rest[0].table.to_expr() + table3_ = j2.op().rest[1].table.to_expr() + expected = ops.JoinChain( + first=filtered, + rest=[ + ops.JoinLink("left", table2_, [filtered.foo_id == table2_.foo_id]), + ops.JoinLink("inner", table3_, [filtered.bar_id == table3_.bar_id]), + ], + values={ + "c": filtered.c, + "f": filtered.f, + "foo_id": filtered.foo_id, + "bar_id": filtered.bar_id, + "foo_id_right": table2_.foo_id, + "value1": table2_.value1, + "value3": table2_.value3, + "bar_id_right": table3_.bar_id, + "value2": table3_.value2, + }, + ) + assert j2.op() == expected + + # Project out the desired fields + view = j2[[filtered, table2["value1"], table3["value2"]]] + expected = ops.JoinChain( + first=filtered, + rest=[ + ops.JoinLink("left", table2_, [filtered.foo_id == table2_.foo_id]), + ops.JoinLink("inner", table3_, [filtered.bar_id == table3_.bar_id]), + ], + values={ + "c": filtered.c, + "f": filtered.f, + "foo_id": filtered.foo_id, + "bar_id": filtered.bar_id, + "value1": table2_.value1, + "value2": table3_.value2, + }, + ) + assert view.op() == expected + + +def test_aggregate(): + agg = t.aggregate(by=[t.bool_col], metrics=[t.int_col.sum()]) + expected = Aggregate( + parent=t, + groups={ + "bool_col": t.bool_col, + }, + metrics={ + "Sum(int_col)": t.int_col.sum(), + }, + ) + assert agg.op() == expected + + +def test_aggregate_having(): + table = ibis.table(name="table", schema={"g": "string", "f": "double"}) + + metrics = [table.f.sum().name("total")] + by = ["g"] + + expr = table.aggregate(metrics, by=by, having=(table.f.sum() > 0).name("cond")) + expected = table.aggregate(metrics, by=by).filter(_.total > 0) + assert expr.equals(expected) + + with pytest.raises(ValidationError): + # non boolean + table.aggregate(metrics, by=by, having=table.f.sum()) + + with pytest.raises(IntegrityError): + # non scalar + table.aggregate(metrics, by=by, having=table.f > 2) + + +def test_select_with_uncorrelated_scalar_subquery(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) + + # Create a subquery + t2_filt = t2.filter(t2.d == "value") + + # Non-reduction won't be turned into a subquery + with pytest.raises(IntegrityError): + t1.select(t2_filt.c) + + # Construct the projection using the subquery + sub = t1.select(t1.a, summary=t2_filt.c.sum()) + expected = Project( + parent=t1, + values={ + "a": t1.a, + "summary": ops.ScalarSubquery(t2_filt.c.sum().as_table()), + }, + ) + assert sub.op() == expected + + +def test_select_with_reduction_turns_into_window_function(): + # Define your tables + employees = ibis.table( + name="employees", schema={"name": "string", "salary": "double"} + ) + + # Use the subquery in a select operation + expr = employees.select(employees.name, average_salary=employees.salary.mean()) + expected = Project( + parent=employees, + values={ + "name": employees.name, + "average_salary": employees.salary.mean().over(), + }, + ) + assert expr.op() == expected + + +def test_select_with_correlated_scalar_subquery(): + # Define your tables + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) + + # Create a subquery + filt = t2.filter(t2.d == t1.b) + summary = filt.c.sum().name("summary") + + # Use the subquery in a select operation + expr = t1.select(t1.a, summary) + expected = Project( + parent=t1, + values={ + "a": t1.a, + "summary": ops.ScalarSubquery(filt.c.sum().as_table()), + }, + ) + assert expr.op() == expected + + +def test_aggregate_field_dereferencing(): + t = ibis.table( + { + "l_orderkey": "int32", + "l_partkey": "int32", + "l_suppkey": "int32", + "l_linenumber": "int32", + "l_quantity": "decimal(15, 2)", + "l_extendedprice": "decimal(15, 2)", + "l_discount": "decimal(15, 2)", + "l_tax": "decimal(15, 2)", + "l_returnflag": "string", + "l_linestatus": "string", + "l_shipdate": "date", + "l_commitdate": "date", + "l_receiptdate": "date", + "l_shipinstruct": "string", + "l_shipmode": "string", + "l_comment": "string", + } + ) + + f = t.filter(t.l_shipdate <= ibis.date("1998-09-01")) + assert f.op() == Filter( + parent=t, predicates=[t.l_shipdate <= ibis.date("1998-09-01")] + ) + + discount_price = t.l_extendedprice * (1 - t.l_discount) + charge = discount_price * (1 + t.l_tax) + a = f.group_by(["l_returnflag", "l_linestatus"]).aggregate( + sum_qty=t.l_quantity.sum(), + sum_base_price=t.l_extendedprice.sum(), + sum_disc_price=discount_price.sum(), + sum_charge=charge.sum(), + avg_qty=t.l_quantity.mean(), + avg_price=t.l_extendedprice.mean(), + avg_disc=t.l_discount.mean(), + count_order=f.count(), # note that this is f.count() not t.count() + ) + + discount_price_ = f.l_extendedprice * (1 - f.l_discount) + charge_ = discount_price_ * (1 + f.l_tax) + assert a.op() == Aggregate( + parent=f, + groups={ + "l_returnflag": f.l_returnflag, + "l_linestatus": f.l_linestatus, + }, + metrics={ + "sum_qty": f.l_quantity.sum(), + "sum_base_price": f.l_extendedprice.sum(), + "sum_disc_price": discount_price_.sum(), + "sum_charge": charge_.sum(), + "avg_qty": f.l_quantity.mean(), + "avg_price": f.l_extendedprice.mean(), + "avg_disc": f.l_discount.mean(), + "count_order": f.count(), + }, + ) + + s = a.order_by(["l_returnflag", "l_linestatus"]) + assert s.op() == ops.Sort( + parent=a, + keys=[a.l_returnflag, a.l_linestatus], + ) + + +def test_isin_subquery(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) + + t2_filt = t2.filter(t2.d == "value") + + expr = t1.filter(t1.a.isin(t2_filt.c)) + subquery = Project(t2_filt, values={"c": t2_filt.c}) + expected = Filter(parent=t1, predicates=[ops.InSubquery(rel=subquery, needle=t1.a)]) + assert expr.op() == expected + + +def test_filter_condition_referencing_agg_without_groupby_turns_it_into_a_subquery(): + r1 = ibis.table( + name="r3", schema={"name": str, "key": str, "int_col": int, "float_col": float} + ) + r2 = r1.filter(r1.name == "GERMANY") + r3 = r2.aggregate(by=[r2.key], value=(r2.float_col * r2.int_col).sum()) + r4 = r2.aggregate(total=(r2.float_col * r2.int_col).sum()) + r5 = r3.filter(r3.value > r4.total * 0.0001) + + total = (r2.float_col * r2.int_col).sum() + subquery = ops.ScalarSubquery( + ops.Aggregate(r2, groups={}, metrics={total.get_name(): total}) + ).to_expr() + expected = Filter(parent=r3, predicates=[r3.value > subquery * 0.0001]) + + assert r5.op() == expected + + +def test_self_join(): + t0 = ibis.table(schema=ibis.schema(dict(key="int")), name="leaf") + t1 = t0.filter(ibis.literal(True)) + t2 = t1[["key"]] + + t3 = t2.join(t2, ["key"]) + t2_ = t3.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=t2, + rest=[ + ops.JoinLink("inner", t2_, [t2.key == t2_.key]), + ], + values={"key": t2.key, "key_right": t2_.key}, + ) + assert t3.op() == expected + + t4 = t3.join(t3, ["key"]) + t3_ = t4.op().rest[1].table.to_expr() + + expected = ops.JoinChain( + first=t2, + rest=[ + ops.JoinLink("inner", t2_, [t2.key == t2_.key]), + ops.JoinLink("inner", t3_, [t2.key == t3_.key]), + ], + values={ + "key": t2.key, + "key_right": t2_.key, + "key_right_right": t3_.key_right, + }, + ) + assert t4.op() == expected + + +def test_self_join_view(): + t = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) + t_view = t.view() + expr = t.join(t_view, t.x == t_view.y).select("x", "y", "z", "z_right") + + t_view_ = expr.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=t, + rest=[ + ops.JoinLink("inner", t_view_, [t.x == t_view_.y]), + ], + values={"x": t.x, "y": t.y, "z": t.z, "z_right": t_view_.z}, + ) + assert expr.op() == expected + + +def test_self_join_with_view_projection(): + t1 = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) + t2 = t1.view() + expr = t1.inner_join(t2, ["x"])[[t1]] + + t2_ = expr.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=t1, + rest=[ + ops.JoinLink("inner", t2_, [t1.x == t2_.x]), + ], + values={"x": t1.x, "y": t1.y, "z": t1.z}, + ) + assert expr.op() == expected + + +def test_joining_same_table_twice(): + left = ibis.table(name="left", schema={"time1": int, "value": float, "a": str}) + right = ibis.table(name="right", schema={"time2": int, "value2": float, "b": str}) + + joined = left.inner_join(right, left.a == right.b).inner_join( + right, left.value == right.value2 + ) + + right_ = joined.op().rest[0].table.to_expr() + right__ = joined.op().rest[1].table.to_expr() + expected = ops.JoinChain( + first=left, + rest=[ + ops.JoinLink("inner", right_, [left.a == right_.b]), + ops.JoinLink("inner", right__, [left.value == right__.value2]), + ], + values={ + "time1": left.time1, + "value": left.value, + "a": left.a, + "time2": right_.time2, + "value2": right_.value2, + "b": right_.b, + "time2_right": right__.time2, + "value2_right": right__.value2, + "b_right": right__.b, + }, + ) + assert joined.op() == expected + + +def test_join_chain_gets_reused_and_continued_after_a_select(): + a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) + b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) + c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) + + ab = a.join(b, [a.a == b.c]) + abc = ab[a.b, b.d].join(c, [a.a == c.e]) + + b_ = abc.op().rest[0].table.to_expr() + c_ = abc.op().rest[1].table.to_expr() + expected = ops.JoinChain( + first=a, + rest=[ + ops.JoinLink("inner", b_, [a.a == b_.c]), + ops.JoinLink("inner", c_, [a.a == c_.e]), + ], + values={ + "b": a.b, + "d": b_.d, + "e": c_.e, + "f": c_.f, + }, + ) + assert abc.op() == expected + assert abc._finish().op() == expected + + +def test_self_join_extensive(): + a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) + + aa = a.join(a, [a.a == a.a]) + aa_ = a.join(a, "a") + aa__ = a.join(a, [("a", "a")]) + for join in [aa, aa_, aa__]: + a1 = join.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=a, + rest=[ + ops.JoinLink("inner", a1, [a.a == a1.a]), + ], + values={ + "a": a.a, + "b": a.b, + "a_right": a1.a, + "b_right": a1.b, + }, + ) + assert join.op() == expected + + aaa = a.join(a, [a.a == a.a]).join(a, [a.a == a.a]) + a0 = a + a1 = aaa.op().rest[0].table.to_expr() + a2 = aaa.op().rest[1].table.to_expr() + expected = ops.JoinChain( + first=a0, + rest=[ + ops.JoinLink("inner", a1, [a0.a == a1.a]), + ops.JoinLink("inner", a2, [a0.a == a2.a]), + ], + values={ + "a": a0.a, + "b": a0.b, + "a_right": a1.a, + "b_right": a1.b, + }, + ) + + aaa = aa.join(a, [aa.a == a.a]) + aaa_ = aa.join(a, "a") + aaa__ = aa.join(a, [("a", "a")]) + for join in [aaa, aaa_, aaa__]: + a1 = join.op().rest[0].table.to_expr() + a2 = join.op().rest[1].table.to_expr() + expected = ops.JoinChain( + first=a, + rest=[ + ops.JoinLink("inner", a1, [a.a == a1.a]), + ops.JoinLink("inner", a2, [a.a == a2.a]), + ], + values={ + "a": a.a, + "b": a.b, + "a_right": a1.a, + "b_right": a1.b, + }, + ) + assert join.op() == expected + + +def test_self_join_with_intermediate_selection(): + a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) + + join = a[["b", "a"]].join(a, [a.a == a.a]) + a0 = a[["b", "a"]] + a1 = join.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=a0, + rest=[ + ops.JoinLink("inner", a1, [a0.a == a1.a]), + ], + values={ + "b": a0.b, + "a": a0.a, + "a_right": a1.a, + "b_right": a1.b, + }, + ) + assert join.op() == expected + + aa_ = a.join(a, [a.a == a.a])["a", "b_right"] + aaa_ = aa_.join(a, [aa_.a == a.a]) + a0 = a + a1 = aaa_.op().rest[0].table.to_expr() + a2 = aaa_.op().rest[1].table.to_expr() + expected = ops.JoinChain( + first=a0, + rest=[ + ops.JoinLink("inner", a1, [a0.a == a1.a]), + ops.JoinLink("inner", a2, [a0.a == a2.a]), + ], + values={ + "a": a0.a, + "b_right": a1.b, + "a_right": a2.a, + "b": a2.b, + }, + ) + assert aaa_.op() == expected + + # TODO(kszucs): this use case could be supported if `_get_column` gets + # overridden to return underlying column reference, but that would mean + # that `aa.a` returns with `a.a` instead of `aa.a` which breaks other + # things + # aa = a.join(a, [a.a == a.a]) + # aaa = aa["a", "b_right"].join(a, [aa.a == a.a]) + # a0 = a + # a1 = aaa.op().rest[0].table.to_expr() + # a2 = aaa.op().rest[1].table.to_expr() + # expected = ops.JoinChain( + # first=a0, + # rest=[ + # ops.JoinLink("inner", a1, [a0.a == a1.a]), + # ops.JoinLink("inner", a2, [a0.a == a2.a]), + # ], + # values={ + # "a": a0.a, + # "b_right": a1.b, + # "a_right": a2.a, + # "b": a2.b, + # }, + # ) + # assert aaa.op() == expected + + +def test_name_collisions_raise(): + a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) + b = ibis.table(name="b", schema={"a": "int64", "b": "string"}) + c = ibis.table(name="c", schema={"a": "int64", "b": "string"}) + + ab = a.join(b, [a.a == b.a]) + filt = ab.filter(ab.a < 1) + expected = ops.Filter( + parent=ab, + predicates=[ + ops.Less(ops.Field(ab, "a"), 1), + ], + ) + assert filt.op() == expected + + abc = a.join(b, [a.a == b.a]).join(c, [a.a == c.a]) + with pytest.raises(IntegrityError): + abc.filter(abc.a < 1) diff --git a/ibis/expr/tests/test_rewrites.py b/ibis/expr/tests/test_rewrites.py new file mode 100644 index 000000000000..ca54f2216006 --- /dev/null +++ b/ibis/expr/tests/test_rewrites.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import ibis +import ibis.expr.operations as ops +from ibis.expr.rewrites import simplify + +t = ibis.table( + name="t", + schema={ + "bool_col": "boolean", + "int_col": "int64", + "float_col": "float64", + "string_col": "string", + }, +) + + +def test_simplify_full_reprojection(): + t1 = t.select(t) + t1_opt = simplify(t1.op()) + assert t1_opt == t.op() + + +def test_simplify_subsequent_field_selections(): + t1 = t.select(t.bool_col, t.int_col, t.float_col) + assert t1.op() == ops.Project( + parent=t, + values={ + "bool_col": t.bool_col, + "int_col": t.int_col, + "float_col": t.float_col, + }, + ) + + t2 = t1.select(t1.bool_col, t1.int_col) + t2_opt = simplify(t2.op()) + assert t2_opt == ops.Project( + parent=t, + values={ + "bool_col": t.bool_col, + "int_col": t.int_col, + }, + ) + + t3 = t2.select(t2.bool_col) + t3_opt = simplify(t3.op()) + assert t3_opt == ops.Project(parent=t, values={"bool_col": t.bool_col}) + + +def test_simplify_subsequent_value_selections(): + t1 = t.select( + bool_col=~t.bool_col, int_col=t.int_col + 1, float_col=t.float_col * 3 + ) + t2 = t1.select(t1.bool_col, t1.int_col, t1.float_col) + t2_opt = simplify(t2.op()) + assert t2_opt == ops.Project( + parent=t, + values={ + "bool_col": ~t.bool_col, + "int_col": t.int_col + 1, + "float_col": t.float_col * 3, + }, + ) + + t3 = t2.select( + t2.bool_col, + t2.int_col, + float_col=t2.float_col * 2, + another_col=t1.float_col - 1, + ) + t3_opt = simplify(t3.op()) + assert t3_opt == ops.Project( + parent=t, + values={ + "bool_col": ~t.bool_col, + "int_col": t.int_col + 1, + "float_col": (t.float_col * 3) * 2, + "another_col": (t.float_col * 3) - 1, + }, + ) + + +def test_simplify_subsequent_filters(): + f1 = t.filter(t.bool_col) + f2 = f1.filter(t.int_col > 0) + f2_opt = simplify(f2.op()) + assert f2_opt == ops.Filter(t, predicates=[t.bool_col, t.int_col > 0]) + + +def test_simplify_project_filter_project(): + t1 = t.select( + bool_col=~t.bool_col, int_col=t.int_col + 1, float_col=t.float_col * 3 + ) + t2 = t1.filter(t1.bool_col) + t3 = t2.filter(t2.int_col > 0) + t4 = t3.select(t3.bool_col, t3.int_col) + + filt = ops.Filter(parent=t, predicates=[~t.bool_col, t.int_col + 1 > 0]).to_expr() + proj = ops.Project( + parent=filt, values={"bool_col": ~filt.bool_col, "int_col": filt.int_col + 1} + ).to_expr() + + t4_opt = simplify(t4.op()) + assert t4_opt == proj.op() diff --git a/ibis/expr/types/__init__.py b/ibis/expr/types/__init__.py index 610504d43989..99bd54d2f6e4 100644 --- a/ibis/expr/types/__init__.py +++ b/ibis/expr/types/__init__.py @@ -12,6 +12,7 @@ from ibis.expr.types.maps import * # noqa: F403 from ibis.expr.types.numeric import * # noqa: F403 from ibis.expr.types.relations import * # noqa: F403 +from ibis.expr.types.joins import * # noqa: F403 from ibis.expr.types.strings import * # noqa: F403 from ibis.expr.types.structs import * # noqa: F403 from ibis.expr.types.temporal import * # noqa: F403 diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index 3167cbee373a..be7944a912f1 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -243,9 +243,10 @@ def _find_backends(self) -> tuple[list[BaseBackend], bool]: list[BaseBackend] A list of the backends found. """ + backends = set() has_unbound = False - node_types = (ops.DatabaseTable, ops.SQLQueryResult, ops.UnboundTable) + node_types = (ops.UnboundTable, ops.DatabaseTable, ops.SQLQueryResult) for table in self.op().find(node_types): if isinstance(table, ops.UnboundTable): has_unbound = True diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 22f1f1b65029..10c487475d11 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -6,19 +6,22 @@ from public import public import ibis + import ibis.common.exceptions as com import ibis.expr.datatypes as dt +import ibis.expr.builders as bl import ibis.expr.operations as ops -from ibis.common.deferred import Deferred +from ibis.common.deferred import Deferred, deferrable, _ from ibis.common.grounds import Singleton +from ibis.expr.rewrites import rewrite_window_input from ibis.expr.types.core import Expr, _binop, _FixedTextJupyterMixin from ibis.util import deprecated + if TYPE_CHECKING: import pandas as pd import pyarrow as pa - import ibis.expr.builders as bl import ibis.expr.types as ir from ibis.formats.pyarrow import PyArrowData @@ -573,7 +576,7 @@ def isin(self, values: Value | Sequence[Value]) -> ir.BooleanValue: if isinstance(values, ArrayValue): return ops.ArrayContains(values, self).to_expr() elif isinstance(values, Column): - return ops.InColumn(self, values).to_expr() + return ops.InSubquery(values.as_table(), needle=self).to_expr() else: return ops.InValues(self, values).to_expr() @@ -722,11 +725,7 @@ def over( Value A window function expression """ - import ibis.expr.analysis as an - import ibis.expr.builders as bl - from ibis.common.deferred import Call - from ibis import _ - + node = self.op() if window is None: window = ibis.window( rows=rows, @@ -734,23 +733,30 @@ def over( group_by=group_by, order_by=order_by, ) + elif not isinstance(window, bl.WindowBuilder): + raise com.IbisTypeError("Unexpected window type: {window!r}") + + if len(node.relations) == 0: + table = None + elif len(node.relations) == 1: + (table,) = node.relations + else: + raise com.RelationError("Cannot use window with multiple tables") + @deferrable def bind(table): frame = window.bind(table) - expr = an.windowize_function(self, frame) - if expr.equals(self): + winfunc = rewrite_window_input(node, frame) + if winfunc == node: raise com.IbisTypeError( "No reduction or analytic function found to construct a window expression" ) - return expr + return winfunc.to_expr() - if isinstance(window, bl.WindowBuilder): - if table := an.find_first_base_table(self.op()): - return bind(table) - else: - return Deferred(Call(bind, _)) - else: - raise com.IbisTypeError("Unexpected window type: {window!r}") + try: + return bind(table) + except com.IbisInputError: + return bind(_) def isnull(self) -> ir.BooleanValue: """Return whether this expression is NULL. @@ -1119,9 +1125,13 @@ def __hash__(self) -> int: return super().__hash__() def __eq__(self, other: Value) -> ir.BooleanValue: + if other is None: + return _binop(ops.IdenticalTo, self, other) return _binop(ops.Equals, self, other) def __ne__(self, other: Value) -> ir.BooleanValue: + if other is None: + return ~self.__eq__(other) return _binop(ops.NotEquals, self, other) def __ge__(self, other: Value) -> ir.BooleanValue: @@ -1161,22 +1171,20 @@ def as_table(self) -> ir.Table: >>> expr.equals(expected) True """ - from ibis.expr.analysis import find_immediate_parent_tables - - roots = find_immediate_parent_tables(self.op()) - if len(roots) > 1: + parents = self.op().relations + values = {self.get_name(): self} + + if len(parents) == 0: + return ops.DummyTable(values).to_expr() + elif len(parents) == 1: + (parent,) = parents + return parent.to_expr().select(self) + else: raise com.RelationError( - f"Cannot convert {type(self)} expression " - "involving multiple base table references " - "to a projection" + f"Cannot convert {type(self)} expression involving multiple " + "base table references to a projection" ) - if roots: - return roots[0].to_expr().select(self) - - # no child table to select from - return ops.DummyTable(values=(self,)).to_expr() - def to_pandas(self, **kwargs) -> pd.Series: """Convert a column expression to a pandas Series or scalar object. @@ -1257,20 +1265,19 @@ def as_table(self) -> ir.Table: >>> isinstance(lit, ir.Table) True """ - from ibis.expr.analysis import find_first_base_table + parents = self.op().relations - op = self.op() - table = find_first_base_table(op) - if table is not None: - return table.to_expr().aggregate(**{self.get_name(): self}) + if len(parents) == 0: + return ops.DummyTable({self.get_name(): self}).to_expr() + elif len(parents) == 1: + (parent,) = parents + return parent.to_expr().aggregate(self) else: - if isinstance(op, ops.Alias): - value = op - assert value.name == self.get_name() - else: - value = ops.Alias(op, self.get_name()) - - return ops.DummyTable(values=(value,)).to_expr() + raise com.RelationError( + f"The scalar expression {self} cannot be converted to a " + "table expression because it involves multiple base table " + "references" + ) def __deferred_repr__(self): return f"" @@ -1326,14 +1333,23 @@ def __pandas_result__(self, df: pd.DataFrame) -> pd.Series: return PandasData.convert_column(df.loc[:, column], self.type()) def _bind_reduction_filter(self, where): - import ibis.expr.analysis as an - - if where is None or not isinstance(where, Deferred): + if isinstance(where, Deferred): + if len(node.relations) == 0: + raise com.IbisInputError( + "Unable to bind deferred expression to a table because " + "the expression doesn't depend on any tables" + ) + elif len(node.relations) == 1: + (table,) = node.relations + return where.resolve(table) + else: + raise com.RelationError( + "Cannot bind deferred expression to a table because the " + "expression depends on multiple tables" + ) + else: return where - table = an.find_first_base_table(self.op()).to_expr() - return where.resolve(table) - def __deferred_repr__(self): return f"" @@ -1830,16 +1846,9 @@ def value_counts(self) -> ir.Table: │ d │ 3 │ └────────┴─────────────┘ """ - from ibis.expr.analysis import find_first_base_table - name = self.get_name() - return ( - find_first_base_table(self.op()) - .to_expr() - .select(self) - .group_by(name) - .agg(**{f"{name}_count": lambda t: t.count()}) - ) + metric = _.count().name(f"{name}_count") + return self.as_table().group_by(name).aggregate(metric) def first(self, where: ir.BooleanValue | None = None) -> Value: """Return the first value of a column. @@ -1921,13 +1930,7 @@ def rank(self) -> ir.IntegerColumn: │ 3 │ 5 │ └────────┴───────┘ """ - import ibis.expr.analysis as an - - return ( - ibis.rank() - .over(order_by=self) - .resolve(an.find_first_base_table(self.op()).to_expr()) - ) + return ibis.rank().over(order_by=self) def dense_rank(self) -> ir.IntegerColumn: """Position of first element within each group of equal values. @@ -1960,33 +1963,15 @@ def dense_rank(self) -> ir.IntegerColumn: │ 3 │ 2 │ └────────┴───────┘ """ - import ibis.expr.analysis as an - - return ( - ibis.dense_rank() - .over(order_by=self) - .resolve(an.find_first_base_table(self.op()).to_expr()) - ) + return ibis.dense_rank().over(order_by=self) def percent_rank(self) -> Column: """Return the relative rank of the values in the column.""" - import ibis.expr.analysis as an - - return ( - ibis.percent_rank() - .over(order_by=self) - .resolve(an.find_first_base_table(self.op()).to_expr()) - ) + return ibis.percent_rank().over(order_by=self) def cume_dist(self) -> Column: """Return the cumulative distribution over a window.""" - import ibis.expr.analysis as an - - return ( - ibis.cume_dist() - .over(order_by=self) - .resolve(an.find_first_base_table(self.op()).to_expr()) - ) + return ibis.cume_dist().over(order_by=self) def ntile(self, buckets: int | ir.IntegerValue) -> ir.IntegerColumn: """Return the integer number of a partitioning of the column values. @@ -1996,13 +1981,7 @@ def ntile(self, buckets: int | ir.IntegerValue) -> ir.IntegerColumn: buckets Number of buckets to partition into """ - import ibis.expr.analysis as an - - return ( - ibis.ntile(buckets) - .over(order_by=self) - .resolve(an.find_first_base_table(self.op()).to_expr()) - ) + return ibis.ntile(buckets).over(order_by=self) def cummin(self, *, where=None, group_by=None, order_by=None) -> Column: """Return the cumulative min over a window.""" diff --git a/ibis/expr/types/geospatial.py b/ibis/expr/types/geospatial.py index db61e1d216f1..edfc2d1bfe97 100644 --- a/ibis/expr/types/geospatial.py +++ b/ibis/expr/types/geospatial.py @@ -1622,13 +1622,20 @@ class GeoSpatialScalar(NumericScalar, GeoSpatialValue): @public class GeoSpatialColumn(NumericColumn, GeoSpatialValue): - def unary_union(self) -> ir.GeoSpatialScalar: + def unary_union( + self, where: bool | ir.BooleanValue | None = None + ) -> ir.GeoSpatialScalar: """Aggregate a set of geometries into a union. This corresponds to the aggregate version of the union. We give it a different name (following the corresponding method in GeoPandas) to avoid name conflicts with the non-aggregate version. + Parameters + ---------- + where + Filter expression + Returns ------- GeoSpatialScalar @@ -1642,7 +1649,7 @@ def unary_union(self) -> ir.GeoSpatialScalar: >>> t.geom.unary_union() """ - return ops.GeoUnaryUnion(self).to_expr().name("union") + return ops.GeoUnaryUnion(self, where=where).to_expr() @public diff --git a/ibis/expr/types/groupby.py b/ibis/expr/types/groupby.py index 123b1ca8b71f..b95effa93df5 100644 --- a/ibis/expr/types/groupby.py +++ b/ibis/expr/types/groupby.py @@ -21,91 +21,55 @@ from typing import Iterable, Sequence import ibis -import ibis.expr.analysis as an +from ibis.expr.rewrites import rewrite_window_input import ibis.expr.operations as ops import ibis.expr.types as ir from ibis import util +from ibis.common.grounds import Concrete from ibis.common.deferred import Deferred from ibis.selectors import Selector -from ibis.expr.types.relations import bind_expr + import ibis.common.exceptions as com from public import public - -_function_types = tuple( - filter( - None, - ( - types.BuiltinFunctionType, - types.BuiltinMethodType, - types.FunctionType, - types.LambdaType, - types.MethodType, - getattr(types, "UnboundMethodType", None), - ), - ) -) - - -def _get_group_by_key(table, value): - if isinstance(value, str): - yield table[value] - elif isinstance(value, _function_types): - yield value(table) - elif isinstance(value, Deferred): - yield value.resolve(table) - elif isinstance(value, Selector): - yield from value.expand(table) - elif isinstance(value, ir.Expr): - yield an.sub_immediate_parents(value.op(), table.op()).to_expr() - else: - yield value +from ibis.expr.types.relations import bind +from ibis.common.typing import VarTuple +import ibis.expr.datatypes as dt @public -class GroupedTable: +class GroupedTable(Concrete): """An intermediate table expression to hold grouping information.""" - def __init__(self, table, by, having=None, order_by=None, **expressions): - self.table = table - self.by = list( - itertools.chain( - itertools.chain.from_iterable( - _get_group_by_key(table, v) for v in util.promote_list(by) - ), - ( - expr.name(k) - for k, v in expressions.items() - for expr in _get_group_by_key(table, v) - ), - ) - ) - - if not self.by: - raise com.IbisInputError("The grouping keys list is empty") + table: ops.Relation + groupings: VarTuple[ops.Column] + orderings: VarTuple[ops.SortKey] = () + havings: VarTuple[ops.Value[dt.Boolean]] = () - self._order_by = order_by or [] - self._having = having or [] + def __init__(self, groupings, **kwargs): + if not groupings: + raise com.IbisInputError("No group keys provided") + super().__init__(groupings=groupings, **kwargs) def __getitem__(self, args): # Shortcut for projection with window functions return self.select(*args) def __getattr__(self, attr): - if hasattr(self.table, attr): - return self._column_wrapper(attr) + try: + field = getattr(self.table.to_expr(), attr) + except AttributeError as e: + raise AttributeError(f"GroupedTable has no attribute {attr}") from e - raise AttributeError("GroupBy has no attribute %r" % attr) - - def _column_wrapper(self, attr): - col = self.table[attr] - if isinstance(col, ir.NumericValue): - return GroupedNumbers(col, self) + if isinstance(field, ir.NumericValue): + return GroupedNumbers(field, self) else: - return GroupedArray(col, self) + return GroupedArray(field, self) - def aggregate(self, metrics=None, **kwds) -> ir.Table: + def aggregate(self, metrics=(), **kwds) -> ir.Table: """Compute aggregates over a group by.""" - return self.table.aggregate(metrics, by=self.by, having=self._having, **kwds) + return self.table.to_expr().aggregate( + metrics, by=self.groupings, having=self.havings, **kwds + ) agg = aggregate @@ -126,12 +90,9 @@ def having(self, expr: ir.BooleanScalar) -> GroupedTable: GroupedTable A grouped table expression """ - return self.__class__( - self.table, - self.by, - having=self._having + util.promote_list(expr), - order_by=self._order_by, - ) + table = self.table.to_expr() + havings = tuple(bind(table, expr)) + return self.copy(havings=self.havings + havings) def order_by(self, expr: ir.Value | Iterable[ir.Value]) -> GroupedTable: """Sort a grouped table expression by `expr`. @@ -150,12 +111,9 @@ def order_by(self, expr: ir.Value | Iterable[ir.Value]) -> GroupedTable: GroupedTable A sorted grouped GroupedTable """ - return self.__class__( - self.table, - self.by, - having=self._having, - order_by=self._order_by + util.promote_list(expr), - ) + table = self.table.to_expr() + orderings = tuple(bind(table, expr)) + return self.copy(orderings=self.orderings + orderings) def mutate( self, *exprs: ir.Value | Sequence[ir.Value], **kwexprs: ir.Value @@ -228,7 +186,7 @@ def mutate( A table expression with window functions applied """ exprs = self._selectables(*exprs, **kwexprs) - return self.table.mutate(exprs) + return self.table.to_expr().mutate(exprs) def select(self, *exprs, **kwexprs) -> ir.Table: """Project new columns out of the grouped table. @@ -238,7 +196,7 @@ def select(self, *exprs, **kwexprs) -> ir.Table: [`GroupedTable.mutate`](#ibis.expr.types.groupby.GroupedTable.mutate) """ exprs = self._selectables(*exprs, **kwexprs) - return self.table.select(exprs) + return self.table.to_expr().select(exprs) def _selectables(self, *exprs, **kwexprs): """Project new columns out of the grouped table. @@ -247,22 +205,14 @@ def _selectables(self, *exprs, **kwexprs): -------- [`GroupedTable.mutate`](#ibis.expr.types.groupby.GroupedTable.mutate) """ - table = self.table - default_frame = ops.RowsWindowFrame( + table = self.table.to_expr() + frame = ops.RowsWindowFrame( table=self.table, - group_by=bind_expr(self.table, self.by), - order_by=bind_expr(self.table, self._order_by), + group_by=self.groupings, + order_by=self.orderings, ) - return [ - an.windowize_function(e2, default_frame) - for expr in exprs - for e1 in util.promote_list(expr) - for e2 in util.promote_list(table._ensure_expr(e1)) - ] + [ - an.windowize_function(e, default_frame).name(k) - for k, expr in kwexprs.items() - for e in util.promote_list(table._ensure_expr(expr)) - ] + values = bind(table, (exprs, kwexprs)) + return [rewrite_window_input(expr.op(), frame).to_expr() for expr in values] projection = select @@ -319,8 +269,8 @@ def count(self) -> ir.Table: Table The aggregated table """ - metric = self.table.count() - return self.table.aggregate([metric], by=self.by, having=self._having) + table = self.table.to_expr() + return table.aggregate([table.count()], by=self.groupings, having=self.havings) size = count diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py new file mode 100644 index 000000000000..bde607b1d3ee --- /dev/null +++ b/ibis/expr/types/joins.py @@ -0,0 +1,247 @@ +from ibis.expr.types.relations import ( + bind, + dereference_values, + unwrap_aliases, +) +from public import public +import ibis.expr.operations as ops +from ibis.expr.types import Table, ValueExpr +from typing import Any, Optional +from collections.abc import Iterator, Mapping +from ibis.common.deferred import Deferred +from ibis.expr.analysis import flatten_predicates +from ibis.expr.operations.relations import JoinKind +from ibis.common.exceptions import ExpressionError, IntegrityError +from ibis import util +import functools +from ibis.expr.types.relations import dereference_mapping +import ibis + + +def disambiguate_fields(how, left_fields, right_fields, lname, rname): + collisions = set() + + if how in ("semi", "anti"): + # discard the right fields per left semi and left anty join semantics + return left_fields, collisions + + lname = lname or "{name}" + rname = rname or "{name}" + overlap = left_fields.keys() & right_fields.keys() + + fields = {} + for name, field in left_fields.items(): + if name in overlap: + name = lname.format(name=name) + fields[name] = field + for name, field in right_fields.items(): + if name in overlap: + name = rname.format(name=name) + # only add if there is no collision + if name in fields: + collisions.add(name) + else: + fields[name] = field + + return fields, collisions + + +def dereference_targets(chain): + yield chain.first + for join in chain.rest: + if join.how not in ("semi", "anti"): + yield join.table + + +def dereference_mapping_left(chain): + rels = dereference_targets(chain) + subs = dereference_mapping(rels) + # join chain fields => link table fields + for k, v in chain.values.items(): + subs[ops.Field(chain, k)] = v + return subs + + +def dereference_mapping_right(right): + if isinstance(right, ops.SelfReference): + # no support for dereferencing, the user must use the right table + # directly in the predicates + return {}, right + + # wrap the right table in a self reference to ensure its uniqueness in the + # join chain which requires dereferencing the predicates from + # right => SelfReference(right) + right = ops.SelfReference(right) + subs = {v: ops.Field(right, k) for k, v in right.values.items()} + return subs, right + + +def dereference_sides(left, right, deref_left, deref_right): + left = left.replace(deref_left, filter=ops.Value) + right = right.replace(deref_right, filter=ops.Value) + return left, right + + +def dereference_binop(pred, deref_left, deref_right): + left, right = dereference_sides(pred.left, pred.right, deref_left, deref_right) + return pred.copy(left=left, right=right) + + +def dereference_value(pred, deref_left, deref_right): + deref_both = {**deref_left, **deref_right} + if isinstance(pred, ops.Binary) and pred.left == pred.right: + return dereference_binop(pred, deref_left, deref_right) + else: + return pred.replace(deref_both, filter=ops.Value) + + +def prepare_predicates(left, right, predicates, deref_left, deref_right, deref_both): + """Bind and dereference predicates to the left and right tables.""" + + for pred in util.promote_list(predicates): + if pred is True or pred is False: + yield ops.Literal(pred, dtype="bool") + elif isinstance(pred, ValueExpr): + node = pred.op() + yield dereference_value(node, deref_left, deref_right) + # yield node.replace(deref_both, filter=ops.Value) + elif isinstance(pred, Deferred): + # resolve deferred expressions on the left table + node = pred.resolve(left).op() + yield dereference_value(node, deref_left, deref_right) + # yield node.replace(deref_both, filter=ops.Value) + else: + if isinstance(pred, tuple): + if len(pred) != 2: + raise ExpressionError("Join key tuple must be length 2") + lk, rk = pred + else: + lk = rk = pred + + # bind the predicates to the join chain + (left_value,) = bind(left, lk) + (right_value,) = bind(right, rk) + + # dereference the left value to one of the relations in the join chain + left_value, right_value = dereference_sides( + left_value.op(), right_value.op(), deref_left, deref_right + ) + yield ops.Equals(left_value, right_value).to_expr() + + +def finished(method): + """Decorator to ensure the join chain is finished before calling a method.""" + + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + return method(self._finish(), *args, **kwargs) + + return wrapper + + +@public +class JoinExpr(Table): + __slots__ = ("_collisions",) + + def __init__(self, arg, collisions=None): + super().__init__(arg) + object.__setattr__(self, "_collisions", collisions or set()) + + def _finish(self) -> Table: + """Construct a valid table expression from this join expression.""" + if self._collisions: + raise IntegrityError(f"Name collisions: {self._collisions}") + return Table(self.op()) + + def join( + self, + right, + predicates: Any, + how: JoinKind = "inner", + *, + lname: str = "", + rname: str = "{name}_right", + ): + """Join with another table.""" + import pyarrow as pa + import pandas as pd + + if isinstance(right, (pd.DataFrame, pa.Table)): + right = ibis.memtable(right) + elif not isinstance(right, Table): + raise TypeError( + f"right operand must be a Table, got {type(right).__name__}" + ) + + if how == "left_semi": + how = "semi" + + left = self.op() + right = right.op() + subs_left = dereference_mapping_left(left) + subs_right, right = dereference_mapping_right(right) + subs_both = {**subs_left, **subs_right} + + # bind and dereference the predicates + preds = prepare_predicates( + left.to_expr(), + right.to_expr(), + predicates, + deref_left=subs_left, + deref_right=subs_right, + deref_both=subs_both, + ) + preds = flatten_predicates(list(preds)) + + # calculate the fields based in lname and rname, this should be a best + # effort to avoid collisions, but does not raise if there are any + # if no disambiaution happens using a final .select() call, then + # the finish() method will raise due to the name collisions + values, collisions = disambiguate_fields( + how, left.values, right.fields, lname, rname + ) + + # construct a new join link and add it to the join chain + link = ops.JoinLink(how, table=right, predicates=preds) + left = left.copy(rest=left.rest + (link,), values=values) + + # return with a new JoinExpr wrapping the new join chain + return self.__class__(left, collisions=collisions) + + def select(self, *args, **kwargs): + """Select expressions.""" + chain = self.op() + values = bind(self, (args, kwargs)) + values = unwrap_aliases(values) + + # if there are values referencing fields from the join chain constructed + # so far, we need to replace them the fields from one of the join links + subs = dereference_mapping_left(chain) + values = {k: v.replace(subs, filter=ops.Value) for k, v in values.items()} + + node = chain.copy(values=values) + return Table(node) + + aggregate = finished(Table.aggregate) + alias = finished(Table.alias) + cast = finished(Table.cast) + compile = finished(Table.compile) + count = finished(Table.count) + difference = finished(Table.difference) + distinct = finished(Table.distinct) + drop = finished(Table.drop) + dropna = finished(Table.dropna) + execute = finished(Table.execute) + fillna = finished(Table.fillna) + filter = finished(Table.filter) + group_by = finished(Table.group_by) + intersect = finished(Table.intersect) + limit = finished(Table.limit) + mutate = finished(Table.mutate) + nunique = finished(Table.nunique) + order_by = finished(Table.order_by) + sample = finished(Table.sample) + sql = finished(Table.sql) + unbind = finished(Table.unbind) + union = finished(Table.union) + view = finished(Table.view) diff --git a/ibis/expr/types/logical.py b/ibis/expr/types/logical.py index ab0574546a6a..09927223f2ac 100644 --- a/ibis/expr/types/logical.py +++ b/ibis/expr/types/logical.py @@ -244,6 +244,10 @@ class BooleanColumn(NumericColumn, BooleanValue): def any(self, where: BooleanValue | None = None) -> BooleanValue: """Return whether at least one element is `True`. + If the expression does not reference any foreign tables, the result + will be a scalar reduction, otherwise it will be a deferred expression + constructing an exists subquery when passed to a table method. + Parameters ---------- where @@ -254,6 +258,41 @@ def any(self, where: BooleanValue | None = None) -> BooleanValue: BooleanValue Whether at least one element is `True`. + Notes + ----- + Consider the following ibis expressions + + ```python + import ibis + + t = ibis.table(dict(a="string")) + s = ibis.table(dict(a="string")) + + cond = (t.a == s.a).any() + ``` + + Without knowing the table to use as the outer query there are two ways to + turn this expression into a SQL `EXISTS` predicate, depending on which of + `t` or `s` is filtered on. + + Filtering from `t`: + + ```sql + SELECT * + FROM t + WHERE EXISTS (SELECT 1 FROM s WHERE t.a = s.a) + ``` + + Filtering from `s`: + + ```sql + SELECT * + FROM s + WHERE EXISTS (SELECT 1 FROM t WHERE t.a = s.a) + ``` + + Notably the correlated subquery cannot stand on its own. + Examples -------- >>> import ibis @@ -267,17 +306,25 @@ def any(self, where: BooleanValue | None = None) -> BooleanValue: >>> (t.arr == None).any(where=t.arr != None) False """ - import ibis.expr.analysis as an + from ibis.common.deferred import Call, _, Deferred - tables = an.find_immediate_parent_tables(self.op()) + parents = self.op().relations - if len(tables) > 1: - op = ops.UnresolvedExistsSubquery( - tables=[t.to_expr() for t in tables], - predicates=an.find_predicates(self.op(), flatten=True), - ) - else: + def resolve_exists_subquery(outer): + """An exists subquery whose outer leaf table is unknown.""" + (inner,) = (t for t in parents if t != outer.op()) + relation = ops.Project(ops.Filter(inner, [self]), {"1": 1}) + return ops.ExistsSubquery(relation).to_expr() + + if len(parents) == 2: + return Deferred(Call(resolve_exists_subquery, _)) + elif len(parents) == 1: op = ops.Any(self, where=self._bind_reduction_filter(where)) + else: + raise NotImplementedError( + f'Cannot compute "any" for expression of type {type(self)} ' + f"with multiple foreign tables" + ) return op.to_expr() diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 96cfe2e68535..75dad930e4d2 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -1,13 +1,22 @@ from __future__ import annotations -import collections -import contextlib -import functools + import itertools import operator import re +from collections.abc import Mapping from keyword import iskeyword -from typing import TYPE_CHECKING, Callable, Iterable, Literal, Mapping, Sequence + +from typing import ( + TYPE_CHECKING, + Callable, + Iterable, + Literal, + Mapping, + Sequence, + Any, + Iterator, +) import toolz from public import public @@ -18,9 +27,14 @@ import ibis.expr.operations as ops import ibis.expr.schema as sch from ibis import util -from ibis.common.deferred import Deferred, Resolver +from ibis.common.annotations import annotated + +from ibis.common.deferred import Deferred from ibis.expr.types.core import Expr, _FixedTextJupyterMixin from ibis.expr.types.generic import literal +from ibis.expr.types.generic import ValueExpr +from ibis.expr.operations.relations import JoinKind +from ibis.selectors import Selector if TYPE_CHECKING: import pandas as pd @@ -29,6 +43,7 @@ import ibis.expr.types as ir import ibis.selectors as s from ibis.common.typing import SupportsSchema + from ibis.expr.types import Table from ibis.expr.types.groupby import GroupedTable from ibis.expr.types.tvf import WindowedTable from ibis.selectors import IfAnyAll, Selector @@ -37,23 +52,6 @@ _ALIASES = (f"_ibis_view_{n:d}" for n in itertools.count()) -def _ensure_expr(table, expr): - from ibis.selectors import Selector - - # This is different than self._ensure_expr, since we don't want to - # treat `str` or `int` values as column indices - if isinstance(expr, Expr): - return expr - elif util.is_function(expr): - return expr(table) - elif isinstance(expr, Deferred): - return expr.resolve(table) - elif isinstance(expr, Selector): - return expr.expand(table) - else: - return literal(expr) - - def _regular_join_method( name: str, how: Literal[ @@ -68,8 +66,8 @@ def _regular_join_method( ], ): def f( # noqa: D417 - self: Table, - right: Table, + self: ir.Table, + right: ir.Table, predicates: str | Sequence[ str | tuple[str | ir.Column, str | ir.Column] | ir.BooleanValue @@ -77,7 +75,7 @@ def f( # noqa: D417 *, lname: str = "", rname: str = "{name}_right", - ) -> Table: + ) -> ir.Table: """Perform a join between two tables. Parameters @@ -104,6 +102,120 @@ def f( # noqa: D417 return f +# TODO(kszucs): should use (table, *args, **kwargs) instead to avoid interpreting +# nested inputs +def bind(table: TableExpr, value: Any, prefer_column=True) -> Iterator[ir.Value]: + """Bind a value to a table expression.""" + if prefer_column and isinstance(value, (str, int)): + yield table._get_column(value) + elif isinstance(value, ValueExpr): + yield value + elif isinstance(value, TableExpr): + for name in value.columns: + yield value._get_column(name) + elif isinstance(value, Deferred): + yield value.resolve(table) + elif isinstance(value, Selector): + yield from value.expand(table) + elif isinstance(value, Mapping): + for k, v in value.items(): + for val in bind(table, v, prefer_column=prefer_column): + yield val.name(k) + elif util.is_iterable(value): + for v in value: + yield from bind(table, v, prefer_column=prefer_column) + elif isinstance(value, ops.Value): + # TODO(kszucs): from certain builders, like ir.GroupedTable we pass + # operation nodes instead of expressions to table methods, it would + # be better to convert them to expressions before passing them to + # this function + yield value.to_expr() + elif callable(value): + yield value(table) + else: + yield literal(value) + + +def unwrap_aliases(values: Iterator[ir.Value]) -> Mapping[str, ir.Value]: + """ + Unwrap aliases into a mapping of {name: expression}. + """ + result = {} + for value in values: + node = value.op() + if node.name in result: + raise com.IntegrityError( + f"Duplicate column name {node.name!r} in result set" + ) + if isinstance(node, ops.Alias): + result[node.name] = node.arg + else: + result[node.name] = node + return result + + +def dereference_mapping(parents): + mapping = {} + parents = util.promote_list(parents) + for parent in parents: + for k, v in parent.values.items(): + if isinstance(v, ops.Field): + # track down the field in the hierarchy until no modification + # is made so only follow ops.Field nodes not arbitrary values; + # also stop tracking if the field belongs to a parent which + # we want to dereference to, see the docstring of + # `dereference_values()` for more details + while isinstance(v, ops.Field) and v.rel not in parents: + mapping[v] = ops.Field(parent, k) + v = v.rel.values.get(v.name) + elif v.relations: + # do not dereference literal expressions + mapping[v] = ops.Field(parent, k) + return mapping + + +def dereference_values( + parents: Iterable[ops.Parents], values: Mapping[str, ops.Value] +) -> Mapping[str, ops.Value]: + """Trace and replace fields from earlier relations in the hierarchy. + + In order to provide a nice user experience, we need to allow expressions + from earlier relations in the hierarchy. Consider the following example: + + t = ibis.table([('a', 'int64'), ('b', 'string')], name='t') + t1 = t.select([t.a, t.b]) + t2 = t1.filter(t.a > 0) # note that not t1.a is referenced here + t3 = t2.select(t.a) # note that not t2.a is referenced here + + However the relational operations in the IR are strictly enforcing that + the expressions are referencing the immediate parent only. So we need to + track fields upwards the hierarchy to replace `t.a` with `t1.a` and `t2.a` + in the example above. This is called dereferencing. + + Whether we can treat or not a field of a relation semantically equivalent + with a field of an earlier relation in the hierarchy depends on the + `.values` mapping of the relation. Leaf relations, like `t` in the example + above, have an empty `.values` mapping, so we cannot dereference fields + from them. On the other hand a projection, like `t1` in the example above, + has a `.values` mapping like `{'a': t.a, 'b': t.b}`, so we can deduce that + `t1.a` is semantically equivalent with `t.a` and so on. + + Parameters + ---------- + parents + The relations we want the values to point to. + values + The values to dereference. + + Returns + ------- + The same mapping as `values` but with all the dereferenceable fields + replaced with the fields from the parents. + """ + subs = dereference_mapping(parents) + return {k: v.replace(subs, filter=ops.Value) for k, v in values.items()} + + @public class Table(Expr, _FixedTextJupyterMixin): """An immutable and lazy dataframe. @@ -386,6 +498,13 @@ def __interactive_rich_console__(self, console, options): raise e return console.render(table, options=options) + # TODO(kszucs): expose this method in the public API + def _get_column(self, name: str | int) -> ir.Column: + """Get a column from the table.""" + if isinstance(name, int): + name = self.schema().name_at_position(name) + return ops.Field(self, name).to_expr() + def __getitem__(self, what): """Select items from a table expression. @@ -630,33 +749,24 @@ def __getitem__(self, what): │ 36.7 │ 19.3 │ 193 │ 3450 │ └────────────────┴───────────────┴───────────────────┴─────────────┘ """ - from ibis.expr.types.generic import Column from ibis.expr.types.logical import BooleanValue if isinstance(what, (str, int)): - return ops.TableColumn(self, what).to_expr() - - if isinstance(what, slice): + return self._get_column(what) + elif isinstance(what, slice): limit, offset = util.slice_to_limit_offset(what, self.count()) return self.limit(limit, offset=offset) - - what = bind_expr(self, what) - - if isinstance(what, (list, tuple, Table)): + elif isinstance(what, (list, tuple, Table)): # Projection case return self.select(what) - elif isinstance(what, BooleanValue): - # Boolean predicate + + (what,) = bind(self, what) + if isinstance(what, BooleanValue): + # TODO(kszucs): this branch should be removed, .filter should be + # used instead return self.filter([what]) - elif isinstance(what, Column): - # Projection convenience - return self.select(what) else: - raise NotImplementedError( - "Selection rows or columns with {} objects is not supported".format( - type(what).__name__ - ) - ) + return self.select(what) def __len__(self): raise com.ExpressionError("Use .count() instead") @@ -698,8 +808,10 @@ def __getattr__(self, key: str) -> ir.Column: │ … │ └───────────┘ """ - with contextlib.suppress(com.IbisTypeError): - return ops.TableColumn(self, key).to_expr() + try: + return self._get_column(key) + except com.IbisTypeError: + pass # A mapping of common attribute typos, mapping them to the proper name common_typos = { @@ -714,6 +826,7 @@ def __getattr__(self, key: str) -> ir.Column: raise AttributeError( f"{type(self).__name__} object has no attribute {key!r}, did you mean {hint!r}" ) + raise AttributeError(f"'Table' object has no attribute {key!r}") def __dir__(self) -> list[str]: @@ -724,28 +837,6 @@ def __dir__(self) -> list[str]: def _ipython_key_completions_(self) -> list[str]: return self.columns - def _ensure_expr(self, expr): - import numpy as np - - from ibis.selectors import Selector - - if isinstance(expr, str): - # treat strings as column names - return self[expr] - elif isinstance(expr, (int, np.integer)): - # treat Python integers as a column index - return self[self.schema().name_at_position(expr)] - elif isinstance(expr, Deferred): - return expr.resolve(self) - elif isinstance(expr, Resolver): - return expr.resolve({"_": self}) - elif isinstance(expr, Selector): - return expr.expand(self) - elif callable(expr): - return expr(self) - else: - return expr - @property def columns(self) -> list[str]: """The list of column names in this table. @@ -796,7 +887,7 @@ def schema(self) -> sch.Schema: def group_by( self, - by: str | ir.Value | Iterable[str] | Iterable[ir.Value] | None = None, + by: str | ir.Value | Iterable[str] | Iterable[ir.Value] | None = (), **key_exprs: str | ir.Value | Iterable[str] | Iterable[ir.Value], ) -> GroupedTable: """Create a grouped table expression. @@ -852,8 +943,13 @@ def group_by( """ from ibis.expr.types.groupby import GroupedTable - return GroupedTable(self, by, **key_exprs) + if by is None: + by = () + groups = bind(self, (by, key_exprs)) + return GroupedTable(self, groups) + + # TODO(kszucs): shouldn't this be ibis.rowid() instead not bound to a specific table? def rowid(self) -> ir.IntegerValue: """A unique integer per row. @@ -889,7 +985,10 @@ def view(self) -> Table: Table Table expression """ - return ops.SelfReference(self).to_expr() + if isinstance(self.op(), ops.SelfReference): + return self + else: + return ops.SelfReference(self).to_expr() def difference(self, table: Table, *rest: Table, distinct: bool = True) -> Table: """Compute the set difference of multiple table expressions. @@ -954,9 +1053,9 @@ def difference(self, table: Table, *rest: Table, distinct: bool = True) -> Table def aggregate( self, - metrics: Sequence[ir.Scalar] | None = None, - by: Sequence[ir.Value] | None = None, - having: Sequence[ir.BooleanValue] | None = None, + metrics: Sequence[ir.Scalar] | None = (), + by: Sequence[ir.Value] | None = (), + having: Sequence[ir.BooleanValue] | None = (), **kwargs: ir.Value, ) -> Table: """Aggregate a table with a given set of reductions grouping by `by`. @@ -1021,33 +1120,46 @@ def aggregate( │ orange │ 0.33 │ 0.33 │ └────────┴────────────┴──────────┘ """ - import ibis.expr.analysis as an + from ibis.expr.rewrites import p + from ibis.common.patterns import Contains, In + + node = self.op() + + groups = bind(self, by) + metrics = bind(self, (metrics, kwargs)) + having = bind(self, having) + + groups = unwrap_aliases(groups) + metrics = unwrap_aliases(metrics) + having = unwrap_aliases(having) + + groups = dereference_values(self.op(), groups) + metrics = dereference_values(self.op(), metrics) + having = dereference_values(self.op(), having) + + # the user doesn't need to specify the metrics used in the having clause + # explicitly, we implicitly add them to the metrics list by looking for + # any metrics depending on self which are not specified explicitly + pattern = p.Reduction(relations=Contains(node)) & ~In(set(metrics.values())) + original_metrics = metrics.copy() + for pred in having.values(): + for metric in pred.find_topmost(pattern): + if metric.name in metrics: + metrics[util.get_name("metric")] = metric + else: + metrics[metric.name] = metric - metrics = itertools.chain( - itertools.chain.from_iterable( - ( - (_ensure_expr(self, m) for m in metric) - if isinstance(metric, (list, tuple)) - else util.promote_list(_ensure_expr(self, metric)) - ) - for metric in util.promote_list(metrics) - ), - ( - e.name(name) - for name, expr in kwargs.items() - for e in util.promote_list(_ensure_expr(self, expr)) - ), - ) + # construct the aggregate node + agg = ops.Aggregate(node, groups, metrics).to_expr() - agg = ops.Aggregation( - self, - metrics=list(metrics), - by=bind_expr(self, util.promote_list(by)), - having=bind_expr(self, util.promote_list(having)), - ) - agg = an.simplify_aggregation(agg) + if having: + # apply the having clause + agg = agg.filter(*having.values()) + # remove any metrics that were only used in the having clause + if metrics != original_metrics: + agg = agg.select(*groups.keys(), *original_metrics.keys()) - return agg.to_expr() + return agg agg = aggregate @@ -1556,22 +1668,14 @@ def order_by( │ 2 │ B │ 6 │ └───────┴────────┴───────┘ """ - import ibis.selectors as s - - sort_keys = [] - for item in util.promote_list(by): - if isinstance(item, tuple): - if len(item) != 2: - raise ValueError(f"Tuple must be of length 2, got {len(item):d}") - sort_keys.append(bind_expr(self, item[0]), item[1]) - elif isinstance(item, s.Selector): - sort_keys.extend(item.expand(self)) - else: - sort_keys.append(bind_expr(self, item)) - - if not sort_keys: + keys = bind(self, by) + keys = unwrap_aliases(keys) + keys = dereference_values(self.op(), keys) + if not keys: raise com.IbisError("At least one sort key must be provided") - return self.op().order_by(sort_keys).to_expr() + + node = ops.Sort(self, keys.values()) + return node.to_expr() def union(self, table: Table, *rest: Table, distinct: bool = False) -> Table: """Compute the set union of multiple table expressions. @@ -1708,25 +1812,7 @@ def intersect(self, table: Table, *rest: Table, distinct: bool = True) -> Table: node = ops.Intersection(node, table, distinct=distinct) return node.to_expr().select(self.columns) - def to_array(self) -> ir.Column: - """View a single column table as an array. - - Returns - ------- - Value - A single column view of a table - """ - schema = self.schema() - if len(schema) != 1: - raise com.ExpressionError( - "Table must have exactly one column when viewed as array" - ) - - return ops.TableArrayView(self).to_expr() - - def mutate( - self, exprs: Sequence[ir.Expr] | None = None, **mutations: ir.Value - ) -> Table: + def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Table: """Add columns to a table expression. Parameters @@ -1816,28 +1902,14 @@ def mutate( │ Adelie │ 2007 │ -7.22193 │ └─────────┴───────┴────────────────┘ """ - import ibis.expr.analysis as an - - exprs = [] if exprs is None else util.promote_list(exprs) - - new_exprs = [] - - for expr in exprs: - if isinstance(expr, Mapping): - new_exprs.extend( - _ensure_expr(self, val).name(name) for name, val in expr.items() - ) - else: - new_exprs.extend(util.promote_list(_ensure_expr(self, expr))) - - new_exprs.extend( - e.name(name) - for name, expr in mutations.items() - for e in util.promote_list(_ensure_expr(self, expr)) - ) - - mutation_exprs = an.get_mutation_exprs(new_exprs, self) - return self.select(mutation_exprs) + # string and integer inputs are going to be coerced to literals instead + # of interpreted as column references like in select + node = self.op() + values = bind(self, (exprs, mutations), prefer_column=False) + values = unwrap_aliases(values) + # allow overriding of fields, hence the mutation behavior + values = {**node.fields, **values} + return self.select(**values) def select( self, @@ -2016,39 +2088,22 @@ def select( │ 43.92193 │ 17.15117 │ 200.915205 │ 4201.754386 │ └────────────────┴───────────────┴───────────────────┴─────────────┘ """ - import ibis.expr.analysis as an - from ibis.selectors import Selector - - new_exprs = [] + from ibis.expr.rewrites import rewrite_project_input - for expr in exprs: - if isinstance(expr, Selector): - new_exprs.extend(expr.expand(self)) - elif isinstance(expr, Mapping): - new_exprs.extend( - self._ensure_expr(value).name(name) for name, value in expr.items() - ) - else: - new_exprs.extend(map(self._ensure_expr, util.promote_list(expr))) - - new_exprs.extend( - self._ensure_expr(expr).name(name) for name, expr in named_exprs.items() - ) - - if not new_exprs: + values = bind(self, (exprs, named_exprs)) + values = unwrap_aliases(values) + values = dereference_values(self.op(), values) + if not values: raise com.IbisTypeError( "You must select at least one column for a valid projection" ) - for ex in new_exprs: - if not isinstance(ex, Expr): - raise com.IbisTypeError( - "All arguments to `.select` must be coerceable to " - f"expressions - got {type(ex)!r}" - ) - - op = an.Projector(self, new_exprs).get_result() - return op.to_expr() + # we need to detect reductions which are either turned into window functions + # or scalar subqueries depending on whether they are originating from self + values = { + k: rewrite_project_input(v, relation=self.op()) for k, v in values.items() + } + return ops.Project(self, values).to_expr() projection = select @@ -2359,7 +2414,7 @@ def drop(self, *fields: str | Selector) -> Table: def filter( self, - predicates: ir.BooleanValue | Sequence[ir.BooleanValue] | IfAnyAll, + *predicates: ir.BooleanValue | Sequence[ir.BooleanValue] | IfAnyAll, ) -> Table: """Select rows from `table` based on `predicates`. @@ -2408,11 +2463,17 @@ def filter( │ male │ 68 │ └────────┴───────────┘ """ - import ibis.expr.analysis as an - - resolved_predicates = _resolve_predicates(self, predicates) - relation = an.pushdown_selection_filters(self.op(), resolved_predicates) - return relation.to_expr() + from ibis.expr.analysis import flatten_predicates + from ibis.expr.rewrites import rewrite_filter_input + + preds = bind(self, predicates) + preds = unwrap_aliases(preds) + preds = dereference_values(self.op(), preds) + preds = flatten_predicates(list(preds.values())) + preds = list(map(rewrite_filter_input, preds)) + if not preds: + raise com.IbisInputError("You must pass at least one predicate to filter") + return ops.Filter(self, preds).to_expr() def nunique(self, where: ir.BooleanValue | None = None) -> ir.IntegerScalar: """Compute the number of unique rows in the table. @@ -2541,7 +2602,7 @@ def dropna( 344 """ if subset is not None: - subset = bind_expr(self, util.promote_list(subset)) + subset = bind(self, subset) return ops.DropNa(self, how, subset).to_expr() def fillna( @@ -2613,7 +2674,7 @@ def fillna( """ schema = self.schema() - if isinstance(replacements, collections.abc.Mapping): + if isinstance(replacements, Mapping): for col, val in replacements.items(): if col not in schema: columns_formatted = ", ".join(map(repr, schema.names)) @@ -2771,17 +2832,7 @@ def join( str | ir.Column | ir.Deferred, ] ] = (), - how: Literal[ - "inner", - "left", - "outer", - "right", - "semi", - "anti", - "any_inner", - "any_left", - "left_semi", - ] = "inner", + how: JoinKind = "inner", *, lname: str = "", rname: str = "{name}_right", @@ -2940,29 +2991,26 @@ def join( │ 106782 │ Leonardo DiCaprio │ 5989 │ Leonardo DiCaprio │ └─────────┴───────────────────┴───────────────┴───────────────────┘ """ + from ibis.expr.types.joins import JoinExpr + + # the first participant of the join can be any Relation, but the rest + # must be wrapped in SelfReferences so that we can join the same table + # with itself multiple times and to enable optimization passes later on + left = left.op() + if isinstance(left, ops.JoinChain): + # if the left side is already a join chain, we can reuse it, for + # example in the `a.join(b)[fields].join(c)` expression the first + # join followed by a projection `a.join(b)[...]` constructs a + # `ir.Table(ops.JoinChain())` expression, which we can reuse here + expr = left.to_expr() + else: + if isinstance(left, ops.SelfReference): + left = left.parent + # construct an empty join chain and wrap it with a JoinExpr, the + # projected fields are the fields of the starting table + expr = ops.JoinChain(left, rest=(), values=left.fields).to_expr() - _join_classes = { - "inner": ops.InnerJoin, - "left": ops.LeftJoin, - "any_inner": ops.AnyInnerJoin, - "any_left": ops.AnyLeftJoin, - "outer": ops.OuterJoin, - "right": ops.RightJoin, - "left_semi": ops.LeftSemiJoin, - "semi": ops.LeftSemiJoin, - "anti": ops.LeftAntiJoin, - "cross": ops.CrossJoin, - } - - klass = _join_classes[how.lower()] - expr = klass(left, right, predicates).to_expr() - - # semi/anti join only give access to the left table's fields, so - # there's never overlap - if how in ("left_semi", "semi", "anti"): - return expr - - return ops.relations._dedup_join_columns(expr, lname=lname, rname=rname) + return expr.join(right, predicates, how=how, lname=lname, rname=rname) def asof_join( left: Table, @@ -3005,14 +3053,22 @@ def asof_join( Table Table expression """ - op = ops.AsOfJoin( - left=left, - right=right, - predicates=predicates, - by=by, - tolerance=tolerance, - ) - return ops.relations._dedup_join_columns(op.to_expr(), lname=lname, rname=rname) + if by: + # `by` is an argument that comes from pandas, which for pandas was + # a convenient and fast way to perform a standard join before the + # asof join, so we implement the equivalent behavior here for + # consistency across backends. + left = left.join(right, by, lname=lname, rname=rname) + + if tolerance is not None: + if not isinstance(predicates, str): + raise TypeError( + "tolerance can only be specified when predicates is a string" + ) + left_key, right_key = left[predicates], right[predicates] + predicates = [left_key == right_key, left_key - right_key <= tolerance] + + return left.join(right, predicates, how="asof", lname=lname, rname=rname) def cross_join( left: Table, @@ -3088,12 +3144,12 @@ def cross_join( >>> expr.count() 344 """ - op = ops.CrossJoin( - left, - functools.reduce(Table.cross_join, rest, right), - [], - ) - return ops.relations._dedup_join_columns(op.to_expr(), lname=lname, rname=rname) + left = left.join(right, how="cross", predicates=(), lname=lname, rname=rname) + for right in rest: + left = left.join( + right, how="cross", predicates=(), lname=lname, rname=rname + ) + return left inner_join = _regular_join_method("inner_join", "inner") left_join = _regular_join_method("left_join", "left") @@ -4361,44 +4417,4 @@ def release(self): return current_backend._release_cached(self) -# TODO(kszucs): used at a single place along with an.apply_filter(), should be -# consolidated into a single function -def _resolve_predicates( - table: Table, predicates -) -> tuple[list[ir.BooleanValue], list[tuple[ir.BooleanValue, ir.Table]]]: - import ibis.expr.types as ir - from ibis.common.deferred import _ - from ibis.expr.analysis import flatten_predicate, p - - # TODO(kszucs): clean this up, too much flattening and resolving happens here - predicates = [ - pred.op() - for preds in map( - functools.partial(ir.relations.bind_expr, table), - util.promote_list(predicates), - ) - for pred in util.promote_list(preds) - ] - predicates = flatten_predicate(predicates) - - rules = ( - # turn reductions into table array views so that they can be used as - # WHERE t1.`a` = (SELECT max(t1.`a`) AS `Max(a)` - p.Reduction >> (lambda _: ops.TableArrayView(_.to_expr().as_table())) - | - # resolve unresolved exists subqueries to IN subqueries - p.UnresolvedExistsSubquery >> (lambda _: _.resolve(table.op())) - ) - # do not apply the rules below the following nodes - until = p.Value & ~p.WindowFunction & ~p.TableArrayView & ~p.ExistsSubquery - return [pred.replace(rules, filter=until) for pred in predicates] - - -def bind_expr(table, expr): - if util.is_iterable(expr): - return [bind_expr(table, x) for x in expr] - - return table._ensure_expr(expr) - - -public(TableExpr=Table) +public(TableExpr=Table, CachedTableExpr=CachedTable) diff --git a/ibis/expr/types/temporal_windows.py b/ibis/expr/types/temporal_windows.py index d357c127b15a..865a9922e6a2 100644 --- a/ibis/expr/types/temporal_windows.py +++ b/ibis/expr/types/temporal_windows.py @@ -10,36 +10,19 @@ import ibis.expr.types as ir from ibis.common.deferred import Deferred from ibis.selectors import Selector +from ibis.expr.types.relations import bind if TYPE_CHECKING: from ibis.expr.types import Table -def _get_window_by_key(table, value): - if isinstance(value, str): - return table[value] - elif isinstance(value, Deferred): - return value.resolve(table) - elif isinstance(value, Selector): - matches = value.expand(table) - if len(matches) != 1: - raise com.IbisInputError( - "Multiple columns match the selector; only 1 is expected" - ) - return next(iter(matches)) - elif isinstance(value, ir.Expr): - return an.sub_immediate_parents(value.op(), table.op()).to_expr() - else: - return value - - @public class WindowedTable: """An intermediate table expression to hold windowing information.""" def __init__(self, table: ir.Table, time_col: ir.Value): self.table = table - self.time_col = _get_window_by_key(table, time_col) + self.time_col = next(bind(table, time_col)) if self.time_col is None: raise com.IbisInputError( @@ -68,9 +51,10 @@ def tumble( Table Table expression after applying tumbling table-valued function. """ + time_col = next(bind(self.table, self.time_col)) return ops.TumbleWindowingTVF( table=self.table, - time_col=_get_window_by_key(self.table, self.time_col), + time_col=time_col, window_size=window_size, offset=offset, ).to_expr() @@ -106,9 +90,10 @@ def hop( Table Table expression after applying hopping table-valued function. """ + time_col = next(bind(self.table, self.time_col)) return ops.HopWindowingTVF( table=self.table, - time_col=_get_window_by_key(self.table, self.time_col), + time_col=time_col, window_size=window_size, window_slide=window_slide, offset=offset, @@ -143,9 +128,10 @@ def cumulate( Table Table expression after applying cumulate table-valued function. """ + time_col = next(bind(self.table, self.time_col)) return ops.CumulateWindowingTVF( table=self.table, - time_col=_get_window_by_key(self.table, self.time_col), + time_col=time_col, window_size=window_size, window_step=window_step, offset=offset, diff --git a/ibis/expr/visualize.py b/ibis/expr/visualize.py index 0af8f3118336..ef16463251ce 100644 --- a/ibis/expr/visualize.py +++ b/ibis/expr/visualize.py @@ -56,7 +56,7 @@ def get_label(node): node, ( ops.Literal, - ops.TableColumn, + ops.Field, ops.Alias, ops.PhysicalTable, ops.window.RangeWindowFrame, @@ -70,14 +70,14 @@ def get_label(node): label_fmt = "<{}>" label = label_fmt.format(escape(name)) else: - if isinstance(node, ops.TableNode): + if isinstance(node, ops.Relation): label_fmt = "<{}: {}{}>" else: label_fmt = '<{}: {}
:: {}>' # typename is already escaped label = label_fmt.format(escape(nodename), escape(name), typename) else: - if isinstance(node, ops.TableNode): + if isinstance(node, ops.Relation): label_fmt = "<{}{}>" else: label_fmt = '<{}
:: {}>' diff --git a/ibis/selectors.py b/ibis/selectors.py index 9bc5f1a9e654..b094b74839df 100644 --- a/ibis/selectors.py +++ b/ibis/selectors.py @@ -393,7 +393,7 @@ def c(*names: str | ir.Column) -> Predicate: names = frozenset(col if isinstance(col, str) else col.get_name() for col in names) def func(col: ir.Value) -> bool: - schema = col.op().table.schema + schema = col.op().rel.schema if extra_cols := (names - schema.keys()): raise exc.IbisInputError( f"Columns {extra_cols} are not present in {schema.names}" diff --git a/ibis/tests/expr/snapshots/test_format_sql_operations/test_format_sql_query_result/repr.txt b/ibis/tests/expr/snapshots/test_format_sql_operations/test_format_sql_query_result/repr.txt index 1cd75a4812d8..9589a01e618b 100644 --- a/ibis/tests/expr/snapshots/test_format_sql_operations/test_format_sql_query_result/repr.txt +++ b/ibis/tests/expr/snapshots/test_format_sql_operations/test_format_sql_query_result/repr.txt @@ -36,8 +36,7 @@ r1 := SQLStringView[r0]: foo carrier string avg_arrdelay float64 -Selection[r1] - selections: - carrier: r1.carrier - avg_arrdelay: Round(r1.avg_arrdelay, digits=1) - island: Lowercase(r1.carrier) \ No newline at end of file +Project[r1] + carrier: r1.carrier + avg_arrdelay: Round(r1.avg_arrdelay, digits=1) + island: Lowercase(r1.carrier) \ No newline at end of file diff --git a/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt b/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt index 6266bb50b1cc..b67141c7beda 100644 --- a/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt +++ b/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt @@ -7,16 +7,25 @@ r1 := DatabaseTable: test1 f float64 g string -r2 := Selection[r1] - predicates: - r1.f > 0 +r2 := Filter[r1] + r1.f > 0 -r3 := InnerJoin[r0, r2] r2.g == r0.key +r3 := SelfReference[r2] -Aggregation[r3] +r4 := JoinChain[r0] + JoinLink[inner, r3] + r3.g == r0.key + values: + key: r0.key + value: r0.value + c: r3.c + f: r3.f + g: r3.g + +Aggregate[r4] + groups: + g: r4.g + key: r4.key metrics: - foo: Mean(r2.f - r0.value) - bar: Sum(r2.f) - by: - g: r2.g - key: r0.key \ No newline at end of file + foo: Mean(r4.f - r4.value) + bar: Sum(r4.f) \ No newline at end of file diff --git a/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_insert_sort_key/repr.txt b/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_insert_sort_key/repr.txt index d5e678285698..3514fa501a73 100644 --- a/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_insert_sort_key/repr.txt +++ b/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_insert_sort_key/repr.txt @@ -29,23 +29,20 @@ r0 := DatabaseTable: airlines security_delay int32 late_aircraft_delay int32 -r1 := Selection[r0] - selections: - arrdelay: r0.arrdelay - dest: r0.dest +r1 := Project[r0] + arrdelay: r0.arrdelay + dest: r0.dest -r2 := Selection[r1] - selections: - r1 - dest_avg: WindowFunction(func=Mean(r1.arrdelay), frame=RowsWindowFrame(table=r1, group_by=[r1.dest])) - dev: r1.arrdelay - WindowFunction(func=Mean(r1.arrdelay), frame=RowsWindowFrame(table=r1, group_by=[r1.dest])) +r2 := Project[r1] + arrdelay: r1.arrdelay + dest: r1.dest + dest_avg: WindowFunction(func=Mean(r1.arrdelay), frame=RowsWindowFrame(table=r1, group_by=[r1.dest])) + dev: r1.arrdelay - WindowFunction(func=Mean(r1.arrdelay), frame=RowsWindowFrame(table=r1, group_by=[r1.dest])) -r3 := Selection[r2] - predicates: - NotNull(r2.dev) +r3 := Filter[r2] + NotNull(r2.dev) -r4 := Selection[r3] - sort_keys: - desc r3.dev +r4 := Sort[r3] + desc r3.dev Limit[r4, n=10] \ No newline at end of file diff --git a/ibis/tests/expr/test_analysis.py b/ibis/tests/expr/test_analysis.py index fa430a6d7fbe..a0b52b84cd6f 100644 --- a/ibis/tests/expr/test_analysis.py +++ b/ibis/tests/expr/test_analysis.py @@ -5,16 +5,12 @@ import ibis import ibis.common.exceptions as com import ibis.expr.operations as ops -from ibis.tests.util import assert_equal +from ibis.expr.rewrites import simplify # Place to collect esoteric expression analysis bugs and tests -# TODO(kszucs): not directly using an analysis function anymore, move to a -# more appropriate test module def test_rewrite_join_projection_without_other_ops(con): - # See #790, predicate pushdown in joins not supported - # Star schema with fact table table = con.table("star1") table2 = con.table("star2") @@ -32,10 +28,32 @@ def test_rewrite_join_projection_without_other_ops(con): view = j2[[filtered, table2["value1"], table3["value2"]]] # Construct the thing we expect to obtain - ex_pred2 = table["bar_id"] == table3["bar_id"] - ex_expr = table.left_join(table2, [pred1]).inner_join(table3, [ex_pred2]) - - assert view.op().table != ex_expr.op() + table2_ref = j2.op().rest[0].table.to_expr() + table3_ref = j2.op().rest[1].table.to_expr() + expected = ops.JoinChain( + first=filtered, + rest=[ + ops.JoinLink( + how="left", + table=table2_ref, + predicates=[filtered["foo_id"] == table2_ref["foo_id"]], + ), + ops.JoinLink( + how="inner", + table=table3_ref, + predicates=[filtered["bar_id"] == table3_ref["bar_id"]], + ), + ], + values={ + "c": filtered.c, + "f": filtered.f, + "foo_id": filtered.foo_id, + "bar_id": filtered.bar_id, + "value1": table2_ref.value1, + "value2": table3_ref.value2, + }, + ) + assert view.op() == expected def test_multiple_join_deeper_reference(): @@ -86,8 +104,8 @@ def test_filter_on_projected_field(con): # Now then! Predicate pushdown here is inappropriate, so we check that # it didn't occur. - assert isinstance(result.op(), ops.Selection) - assert result.op().table == tpch.op() + assert isinstance(result.op(), ops.Filter) + assert result.op().parent == tpch.op() def test_join_predicate_from_derived_raises(): @@ -101,18 +119,18 @@ def test_join_predicate_from_derived_raises(): filter_pred = table["f"] > 0 table3 = table[filter_pred] - with pytest.raises(com.ExpressionError): + with pytest.raises(com.IntegrityError, match="they belong to another relation"): + # TODO(kszucs): could be smarter actually and rewrite the predicate + # to contain the conditions from the filter table.inner_join(table2, [table3["g"] == table2["key"]]) def test_bad_join_predicate_raises(): table = ibis.table([("c", "int32"), ("f", "double"), ("g", "string")], "foo_table") - table2 = ibis.table([("key", "string"), ("value", "double")], "bar_table") - table3 = ibis.table([("key", "string"), ("value", "double")], "baz_table") - with pytest.raises(com.ExpressionError): + with pytest.raises(com.IntegrityError): table.inner_join(table2, [table["g"] == table3["key"]]) @@ -130,9 +148,22 @@ def test_filter_self_join(): metric = purchases.amount.sum().name("total") agged = purchases.group_by(["region", "kind"]).aggregate(metric) + assert agged.op() == ops.Aggregate( + parent=purchases, + groups={"region": purchases.region, "kind": purchases.kind}, + metrics={"total": purchases.amount.sum()}, + ) left = agged[agged.kind == "foo"] right = agged[agged.kind == "bar"] + assert left.op() == ops.Filter( + parent=agged, + predicates=[agged.kind == "foo"], + ) + assert right.op() == ops.Filter( + parent=agged, + predicates=[agged.kind == "bar"], + ) cond = left.region == right.region joined = left.join(right, cond) @@ -141,11 +172,18 @@ def test_filter_self_join(): what = [left.region, metric] projected = joined.select(what) - proj_exprs = projected.op().selections - - # proj exprs unaffected by analysis - assert_equal(proj_exprs[0], left.region.op()) - assert_equal(proj_exprs[1], metric.op()) + right_ = joined.op().rest[0].table.to_expr() + join = ops.JoinChain( + first=left, + rest=[ + ops.JoinLink("inner", right_, [left.region == right_.region]), + ], + values={ + "region": left.region, + "diff": left.total - right_.total, + }, + ) + assert projected.op() == join def test_is_ancestor_analytic(): @@ -169,20 +207,17 @@ def test_mutation_fusion_no_overwrite(): result = result.mutate(col1=t["col"] + 1) result = result.mutate(col2=t["col"] + 2) result = result.mutate(col3=t["col"] + 3) - result = result.op() - - first_selection = result - - assert len(result.selections) == 4 - - col1 = (t["col"] + 1).name("col1") - assert first_selection.selections[1] == col1.op() - col2 = (t["col"] + 2).name("col2") - assert first_selection.selections[2] == col2.op() - - col3 = (t["col"] + 3).name("col3") - assert first_selection.selections[3] == col3.op() + simplified = simplify(result.op()) + assert simplified == ops.Project( + parent=t, + values={ + "col": t["col"], + "col1": t["col"] + 1, + "col2": t["col"] + 2, + "col3": t["col"] + 3, + }, + ) # Pr 2635 @@ -196,39 +231,21 @@ def test_mutation_fusion_overwrite(): result = result.mutate(col2=t["col"] + 2) result = result.mutate(col3=t["col"] + 3) result = result.mutate(col=t["col"] - 1) - result = result.mutate(col4=t["col"] + 4) - - second_selection = result.op() - first_selection = second_selection.table - - assert len(first_selection.selections) == 4 - col1 = (t["col"] + 1).name("col1").op() - assert first_selection.selections[1] == col1 - - col2 = (t["col"] + 2).name("col2").op() - assert first_selection.selections[2] == col2 - - col3 = (t["col"] + 3).name("col3").op() - assert first_selection.selections[3] == col3 - - # Since the second selection overwrites existing columns, it will - # not have the Table as the first selection - assert len(second_selection.selections) == 5 - - col = (t["col"] - 1).name("col").op() - assert second_selection.selections[0] == col - col1 = first_selection.to_expr()["col1"].op() - assert second_selection.selections[1] == col1 - - col2 = first_selection.to_expr()["col2"].op() - assert second_selection.selections[2] == col2 - - col3 = first_selection.to_expr()["col3"].op() - assert second_selection.selections[3] == col3 - - col4 = (t["col"] + 4).name("col4").op() - assert second_selection.selections[4] == col4 + with pytest.raises(com.IntegrityError): + # unable to dereference the column since result doesn't contain it anymore + result.mutate(col4=t["col"] + 4) + + simplified = simplify(result.op()) + assert simplified == ops.Project( + parent=t, + values={ + "col": t["col"] - 1, + "col1": t["col"] + 1, + "col2": t["col"] + 2, + "col3": t["col"] + 3, + }, + ) # Pr 2635 @@ -237,41 +254,21 @@ def test_select_filter_mutate_fusion(): t = ibis.table(ibis.schema([("col", "float32")]), "t") - result = t[["col"]] - result = result[result["col"].isnan()] - result = result.mutate(col=result["col"].cast("int32")) - - second_selection = result.op() - first_selection = second_selection.table - assert len(second_selection.selections) == 1 - - col = first_selection.to_expr()["col"].cast("int32").name("col").op() - assert second_selection.selections[0] == col - - # we don't look past the projection when a filter is encountered, so the - # number of selections in the first projection (`first_selection`) is 0 - # - # previously we did, but this was buggy when executing against the pandas - # backend - # - # eventually we will bring this back, but we're trading off the ability - # to remove materialize for some performance in the short term - assert len(first_selection.selections) == 1 - assert len(first_selection.predicates) == 1 + t1 = t[["col"]] + assert t1.op() == ops.Project(parent=t, values={"col": t.col}) + t2 = t1[t1["col"].isnan()] + assert t2.op() == ops.Filter(parent=t1, predicates=[t1.col.isnan()]) -def test_no_filter_means_no_selection(): - t = ibis.table(dict(a="string")) - proj = t.filter([]) - assert proj.equals(t) + t3 = t2.mutate(col=t2["col"].cast("int32")) + assert t3.op() == ops.Project(parent=t2, values={"col": t2.col.cast("int32")}) + # create the expected expression + filt = ops.Filter(parent=t, predicates=[t.col.isnan()]).to_expr() + proj = ops.Project(parent=filt, values={"col": filt.col.cast("int32")}).to_expr() -def test_mutate_overwrites_existing_column(): - t = ibis.table(dict(a="string")) - mut = t.mutate(a=42).select(["a"]) - sel = mut.op().selections[0].table.selections[0].arg - assert isinstance(sel, ops.Literal) - assert sel.value == 42 + t3_opt = simplify(t3.op()).to_expr() + assert t3_opt.equals(proj) def test_agg_selection_does_not_share_roots(): @@ -280,5 +277,5 @@ def test_agg_selection_does_not_share_roots(): gb = t.group_by("a") n = s.count() - with pytest.raises(com.RelationError, match="Selection expressions"): + with pytest.raises(com.IntegrityError, match=" they belong to another relation"): gb.aggregate(n=n) diff --git a/ibis/tests/expr/test_selectors.py b/ibis/tests/expr/test_selectors.py index 656e02e38324..a39b773fcebf 100644 --- a/ibis/tests/expr/test_selectors.py +++ b/ibis/tests/expr/test_selectors.py @@ -479,14 +479,14 @@ def test_c_error_on_misspelled_column(penguins): def test_order_by_with_selectors(penguins): expr = penguins.order_by(s.of_type("string")) - assert tuple(key.name for key in expr.op().sort_keys) == ( + assert tuple(key.name for key in expr.op().keys) == ( "species", "island", "sex", ) expr = penguins.order_by(s.all()) - assert tuple(key.name for key in expr.op().sort_keys) == tuple(expr.columns) + assert tuple(key.name for key in expr.op().keys) == tuple(expr.columns) with pytest.raises(exc.IbisError): penguins.order_by(~s.all()) diff --git a/ibis/tests/expr/test_set_operations.py b/ibis/tests/expr/test_set_operations.py index 5299852cfd60..b872520ea403 100644 --- a/ibis/tests/expr/test_set_operations.py +++ b/ibis/tests/expr/test_set_operations.py @@ -51,13 +51,13 @@ def test_operation_supports_schemas_with_different_field_order(method): assert u1.schema() == a.schema() - u1 = u1.op().table + u1 = u1.op().parent assert u1.left == a.op() assert u1.right == b.op() # a selection is added to ensure that the field order of the right table # matches the field order of the left table - u2 = u2.op().table + u2 = u2.op().parent assert u2.schema == a.schema() assert u2.left == a.op() diff --git a/ibis/tests/expr/test_struct.py b/ibis/tests/expr/test_struct.py index 6911f0d0765f..c960fefbd126 100644 --- a/ibis/tests/expr/test_struct.py +++ b/ibis/tests/expr/test_struct.py @@ -71,8 +71,16 @@ def test_unpack_from_table(t): def test_lift_join(t, s): join = t.join(s, t.d == s.a.g) result = join.a_right.lift() - expected = join[_.a_right.f, _.a_right.g] - assert result.equals(expected) + + s_ = join.op().rest[0].table.to_expr() + join = ops.JoinChain( + first=t, + rest=[ + ops.JoinLink("inner", s_, [t.d == s_.a.g]), + ], + values={"f": s_.a.f, "g": s_.a.g}, + ) + assert result.op() == join def test_unpack_join_from_table(t, s): diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index dd873f95bf99..85bffdfc5668 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -11,17 +11,17 @@ import ibis import ibis.common.exceptions as com -import ibis.expr.analysis as an import ibis.expr.datatypes as dt import ibis.expr.operations as ops import ibis.expr.schema as sch import ibis.expr.types as ir import ibis.selectors as s from ibis import _ -from ibis import literal as L from ibis.common.annotations import ValidationError -from ibis.common.exceptions import RelationError +from ibis.common.deferred import Deferred +from ibis.common.exceptions import ExpressionError, IntegrityError, RelationError from ibis.expr import api +from ibis.expr.rewrites import simplify from ibis.expr.types import Column, Table from ibis.tests.util import assert_equal, assert_pickle_roundtrip @@ -75,11 +75,19 @@ def test_view_new_relation(table): # # This thing is not exactly a projection, since it has no semantic # meaning when it comes to execution - tview = table.view() + tview1 = table.view() + tview2 = table.view() + tview2_ = tview2.view() - roots = an.find_immediate_parent_tables(tview.op()) - assert len(roots) == 1 - assert roots[0] is tview.op() + node1 = tview1.op() + node2 = tview2.op() + node2_ = tview2_.op() + + assert isinstance(node1, ops.SelfReference) + assert isinstance(node2, ops.SelfReference) + assert node1.parent is node2.parent + assert node1 != node2 + assert node2_ is node2 def test_getitem_column_select(table): @@ -136,7 +144,7 @@ def test_projection(table): proj = table[cols] assert isinstance(proj, Table) - assert isinstance(proj.op(), ops.Selection) + assert isinstance(proj.op(), ops.Project) assert proj.schema().names == tuple(cols) for c in cols: @@ -181,7 +189,7 @@ def test_projection_invalid_root(table): right = api.table(schema1, name="bar") exprs = [right["foo"], right["bar"]] - with pytest.raises(RelationError): + with pytest.raises(IntegrityError): left.select(exprs) @@ -199,7 +207,7 @@ def test_projection_with_star_expr(table): # cannot pass an invalid table expression t2 = t.aggregate([t["a"].sum().name("sum(a)")], by=["g"]) - with pytest.raises(RelationError): + with pytest.raises(IntegrityError): t[[t2]] # TODO: there may be some ways this can be invalid @@ -242,14 +250,16 @@ def test_projection_no_expr(table, empty): table.select(empty) -def test_projection_invalid_nested_list(table): - errmsg = "must be coerceable to expressions" - with pytest.raises(com.IbisTypeError, match=errmsg): - table.select(["a", ["b"]]) - with pytest.raises(com.IbisTypeError, match=errmsg): - table[["a", ["b"]]] - with pytest.raises(com.IbisTypeError, match=errmsg): - table["a", ["b"]] +# FIXME(kszucs): currently bind() flattens the list of expressions, so arbitrary +# nesting is allowed, need to revisit +# def test_projection_invalid_nested_list(table): +# errmsg = "must be coerceable to expressions" +# with pytest.raises(com.IbisTypeError, match=errmsg): +# table.select(["a", ["b"]]) +# with pytest.raises(com.IbisTypeError, match=errmsg): +# table[["a", ["b"]]] +# with pytest.raises(com.IbisTypeError, match=errmsg): +# table["a", ["b"]] def test_mutate(table): @@ -331,14 +341,14 @@ def test_filter_no_list(table): def test_add_predicate(table): pred = table["a"] > 5 result = table[pred] - assert isinstance(result.op(), ops.Selection) + assert isinstance(result.op(), ops.Filter) def test_invalid_predicate(table, schema): # a lookalike table2 = api.table(schema, name="bar") predicate = table2.a > 5 - with pytest.raises(RelationError): + with pytest.raises(IntegrityError): table.filter(predicate) @@ -349,13 +359,13 @@ def test_add_predicate_coalesce(table): pred1 = table["a"] > 5 pred2 = table["b"] > 0 - result = table[pred1][pred2] + result = simplify(table[pred1][pred2].op()).to_expr() expected = table.filter([pred1, pred2]) assert_equal(result, expected) # 59, if we are not careful, we can obtain broken refs subset = table[pred1] - result = subset.filter([subset["b"] > 0]) + result = simplify(subset.filter([subset["b"] > 0]).op()).to_expr() assert_equal(result, expected) @@ -496,7 +506,7 @@ def test_limit(table): def test_order_by(table): result = table.order_by(["f"]).op() - sort_key = result.sort_keys[0] + sort_key = result.keys[0] assert_equal(sort_key.expr, table.f.op()) assert sort_key.ascending @@ -505,7 +515,7 @@ def test_order_by(table): result2 = table.order_by("f").op() assert_equal(result, result2) - key2 = result2.sort_keys[0] + key2 = result2.keys[0] assert key2.descending is False @@ -534,24 +544,24 @@ def test_order_by_asc_deferred_sort_key(table): [ param(ibis.NA, ibis.NA.op(), id="na"), param(ibis.random(), ibis.random().op(), id="random"), - param(1.0, L(1.0).op(), id="float"), - param(L("a"), L("a").op(), id="string"), - param(L([1, 2, 3]), L([1, 2, 3]).op(), id="array"), + param(1.0, ibis.literal(1.0).op(), id="float"), + param(ibis.literal("a"), ibis.literal("a").op(), id="string"), + param(ibis.literal([1, 2, 3]), ibis.literal([1, 2, 3]).op(), id="array"), ], ) def test_order_by_scalar(table, key, expected): result = table.order_by(key) - assert result.op().sort_keys == (ops.SortKey(expected),) + assert result.op().keys == (ops.SortKey(expected),) @pytest.mark.parametrize( ("key", "exc_type"), [ ("bogus", com.IbisTypeError), - (("bogus", False), com.IbisTypeError), + # (("bogus", False), com.IbisTypeError), (ibis.desc("bogus"), com.IbisTypeError), (1000, IndexError), - ((1000, False), IndexError), + # ((1000, False), IndexError), (_.bogus, AttributeError), (_.bogus.desc(), AttributeError), ], @@ -652,15 +662,51 @@ def test_aggregate_keys_basic(table): repr(result) -def test_aggregate_non_list_inputs(table): - # per #150 +def test_aggregate_having_implicit_metric(table): metric = table.f.sum().name("total") by = "g" having = table.c.sum() > 10 - result = table.aggregate(metric, by=by, having=having) - expected = table.aggregate([metric], by=[by], having=[having]) - assert_equal(result, expected) + implicit_having_metric = table.aggregate(metric, by=by, having=having) + expected_aggregate = ops.Aggregate( + parent=table, + groups={"g": table.g}, + metrics={"total": table.f.sum(), table.c.sum().get_name(): table.c.sum()}, + ) + expected_filter = ops.Filter( + parent=expected_aggregate, + predicates=[ + ops.Greater(ops.Field(expected_aggregate, table.c.sum().get_name()), 10) + ], + ) + expected_project = ops.Project( + parent=expected_filter, + values={ + "g": ops.Field(expected_filter, "g"), + "total": ops.Field(expected_filter, "total"), + }, + ) + assert implicit_having_metric.op() == expected_project + + +def test_agg_having_explicit_metric(table): + metric = table.f.sum().name("total") + by = "g" + having = table.c.sum() > 10 + + explicit_having_metric = table.aggregate( + [metric, table.c.sum().name("sum")], by=by, having=having + ) + expected_aggregate = ops.Aggregate( + parent=table, + groups={"g": table.g}, + metrics={"total": table.f.sum(), "sum": table.c.sum()}, + ) + expected_filter = ops.Filter( + parent=expected_aggregate, + predicates=[ops.Greater(ops.Field(expected_aggregate, "sum"), 10)], + ) + assert explicit_having_metric.op() == expected_filter def test_aggregate_keywords(table): @@ -674,56 +720,32 @@ def test_aggregate_keywords(table): assert_equal(expr2, expected) -def test_filter_aggregate_pushdown_predicate(table): - # In the case where we want to add a predicate to an aggregate - # expression after the fact, rather than having to backpedal and add it - # before calling aggregate. - # - # TODO (design decision): This could happen automatically when adding a - # predicate originating from the same root table; if an expression is - # created from field references from the aggregated table then it - # becomes a filter predicate applied on top of a view - - pred = table.f > 0 - metrics = [table.a.sum().name("total")] - agged = table.aggregate(metrics, by=["g"]) - filtered = agged.filter([pred]) - expected = table[pred].aggregate(metrics, by=["g"]) - assert_equal(filtered, expected) - - def test_filter_on_literal_then_aggregate(table): # Mostly just a smoketest, this used to error on construction expr = table.filter(ibis.literal(True)).agg(lambda t: t.a.sum().name("total")) assert expr.columns == ["total"] -@pytest.mark.parametrize( - "case_fn", - [ - param(lambda t: t.f.sum(), id="non_boolean"), - param(lambda t: t.f > 2, id="non_scalar"), - ], -) -def test_aggregate_post_predicate(table, case_fn): - # Test invalid having clause - metrics = [table.f.sum().name("total")] - by = ["g"] - having = [case_fn(table)] - - with pytest.raises(ValidationError): - table.aggregate(metrics, by=by, having=having) - - def test_group_by_having_api(table): # #154, add a HAVING post-predicate in a composable way metric = table.f.sum().name("foo") postp = table.d.mean() > 1 - expr = table.group_by("g").having(postp).aggregate(metric) - expected = table.aggregate(metric, by="g", having=postp) - assert_equal(expr, expected) + agg = ops.Aggregate( + parent=table, + groups={"g": table.g}, + metrics={"foo": table.f.sum(), "Mean(d)": table.d.mean()}, + ).to_expr() + filt = ops.Filter( + parent=agg, + predicates=[agg["Mean(d)"] > 1], + ).to_expr() + proj = ops.Project( + parent=filt, + values={"g": filt.g, "foo": filt.foo}, + ) + assert expr.op() == proj def test_group_by_kwargs(table): @@ -756,6 +778,12 @@ def test_groupby_convenience(table): assert_equal(expr, expected) +@pytest.mark.parametrize("group", [[], (), None]) +def test_group_by_nothing(table, group): + with pytest.raises(com.IbisInputError): + table.group_by(group) + + def test_group_by_count_size(table): # #148, convenience for interactive use, and so forth result1 = table.group_by("g").size() @@ -820,16 +848,56 @@ def test_join_no_predicate_list(con): pred = region.r_regionkey == nation.n_regionkey joined = region.inner_join(nation, pred) - expected = region.inner_join(nation, [pred]) - assert_equal(joined, expected) + + nation_ = joined.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=region, + rest=[ + ops.JoinLink("inner", nation_, [region.r_regionkey == nation_.n_regionkey]) + ], + values={ + "r_regionkey": region.r_regionkey, + "r_name": region.r_name, + "r_comment": region.r_comment, + "n_nationkey": nation_.n_nationkey, + "n_name": nation_.n_name, + "n_regionkey": nation_.n_regionkey, + "n_comment": nation_.n_comment, + }, + ) + assert joined.op() == expected def test_join_deferred(con): region = con.table("tpch_region") nation = con.table("tpch_nation") res = region.join(nation, _.r_regionkey == nation.n_regionkey) - exp = region.join(nation, region.r_regionkey == nation.n_regionkey) - assert_equal(res, exp) + + nation_ = res.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=region, + rest=[ + ops.JoinLink("inner", nation_, [region.r_regionkey == nation_.n_regionkey]) + ], + values={ + "r_regionkey": region.r_regionkey, + "r_name": region.r_name, + "r_comment": region.r_comment, + "n_nationkey": nation_.n_nationkey, + "n_name": nation_.n_name, + "n_regionkey": nation_.n_regionkey, + "n_comment": nation_.n_comment, + }, + ) + assert res.op() == expected + + +def test_join_invalid_predicate(con): + region = con.table("tpch_region") + nation = con.table("tpch_nation") + + with pytest.raises(com.InputTypeError): + region.inner_join(nation, object()) def test_asof_join(): @@ -843,24 +911,51 @@ def test_asof_join(): "time_right", "value2", ] - pred = joined.op().table.predicates[0] + pred = joined.op().rest[0].predicates[0] assert pred.left.name == pred.right.name == "time" +# TODO(kszucs): ensure the correctness of the pd.merge_asof(by=...) argument emulation def test_asof_join_with_by(): left = ibis.table([("time", "int32"), ("key", "int32"), ("value", "double")]) right = ibis.table([("time", "int32"), ("key", "int32"), ("value2", "double")]) - joined = api.asof_join(left, right, "time", by="key") - assert joined.columns == [ - "time", - "key", - "value", - "time_right", - "key_right", - "value2", - ] - by = joined.op().table.by[0] - assert by.left.name == by.right.name == "key" + + join_without_by = api.asof_join(left, right, "time") + right_ = join_without_by.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=left, + rest=[ops.JoinLink("asof", right_, [left.time == right_.time])], + values={ + "time": left.time, + "key": left.key, + "value": left.value, + "time_right": right_.time, + "key_right": right_.key, + "value2": right_.value2, + }, + ) + assert join_without_by.op() == expected + + join_with_by = api.asof_join(left, right, "time", by="key") + right_ = join_with_by.op().rest[0].table.to_expr() + right__ = join_with_by.op().rest[1].table.to_expr() + expected = ops.JoinChain( + first=left, + rest=[ + ops.JoinLink("inner", right_, [left.key == right_.key]), + ops.JoinLink("asof", right__, [left.time == right__.time]), + ], + values={ + "time": left.time, + "key": left.key, + "value": left.value, + "time_right": right_.time, + "key_right": right_.key, + "value2": right_.value2, + "value2_right": right__.value2, + }, + ) + assert join_with_by.op() == expected @pytest.mark.parametrize( @@ -885,14 +980,28 @@ def test_asof_join_with_tolerance(ibis_interval, timedelta_interval): left = ibis.table([("time", "int32"), ("key", "int32"), ("value", "double")]) right = ibis.table([("time", "int32"), ("key", "int32"), ("value2", "double")]) - joined = api.asof_join(left, right, "time", tolerance=ibis_interval).op() - tolerance = joined.table.tolerance - assert_equal(tolerance, ibis_interval.op()) - - joined = api.asof_join(left, right, "time", tolerance=timedelta_interval).op() - tolerance = joined.table.tolerance - assert isinstance(tolerance.to_expr(), ir.IntervalScalar) - assert isinstance(tolerance, ops.Literal) + for interval in [ibis_interval, timedelta_interval]: + joined = api.asof_join(left, right, "time", tolerance=interval) + right_ = joined.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=left, + rest=[ + ops.JoinLink( + "asof", + right_, + [left.time == right_.time, (left.time - right_.time) <= interval], + ) + ], + values={ + "time": left.time, + "key": left.key, + "value": left.value, + "time_right": right_.time, + "key_right": right_.key, + "value2": right_.value2, + }, + ) + assert joined.op() == expected def test_equijoin_schema_merge(): @@ -976,7 +1085,9 @@ def test_self_join_no_view_convenience(table): result = table.join(table, [("g", "g")]) expected_cols = list(table.columns) - expected_cols.extend(f"{c}_right" for c in table.columns if c != "g") + # TODO(kszucs): the inner join convenience to don't duplicate the + # equivalent columns from the right table is not implemented yet + expected_cols.extend(f"{c}_right" for c in table.columns) # if c != "g") assert result.columns == expected_cols @@ -1050,8 +1161,26 @@ def test_cross_join_multiple(table): c = table["f", "h"] joined = ibis.cross_join(a, b, c) - expected = a.cross_join(b.cross_join(c)) - assert joined.equals(expected) + b_ = joined.op().rest[0].table.to_expr() + c_ = joined.op().rest[1].table.to_expr() + assert joined.op() == ops.JoinChain( + first=a, + rest=[ + ops.JoinLink("cross", b_, []), + ops.JoinLink("cross", c_, []), + ], + values={ + "a": a.a, + "b": a.b, + "c": a.c, + "d": b_.d, + "e": b_.e, + "f": c_.f, + "h": c_.h, + }, + ) + # TODO(kszucs): it must be simplified first using an appropriate rewrite rule + assert not joined.equals(a.cross_join(b.cross_join(c))) def test_filter_join(): @@ -1064,41 +1193,43 @@ def test_filter_join(): repr(filtered) -def test_inner_join_overlapping_column_names(): - t1 = ibis.table([("foo", "string"), ("bar", "string"), ("value1", "double")]) - t2 = ibis.table([("foo", "string"), ("bar", "string"), ("value2", "double")]) - - joined = t1.join(t2, "foo") - expected = t1.join(t2, t1.foo == t2.foo) - assert_equal(joined, expected) - assert joined.columns == ["foo", "bar", "value1", "bar_right", "value2"] - - joined = t1.join(t2, ["foo", "bar"]) - expected = t1.join(t2, [t1.foo == t2.foo, t1.bar == t2.bar]) - assert_equal(joined, expected) - assert joined.columns == ["foo", "bar", "value1", "value2"] - - # Equality predicates don't have same name, need to rename - joined = t1.join(t2, t1.foo == t2.bar) - assert joined.columns == [ - "foo", - "bar", - "value1", - "foo_right", - "bar_right", - "value2", - ] - - # Not all predicates are equality, still need to rename - joined = t1.join(t2, ["foo", t1.value1 < t2.value2]) - assert joined.columns == [ - "foo", - "bar", - "value1", - "foo_right", - "bar_right", - "value2", - ] +# TODO(kszucs): the inner join convenience to don't duplicate the equivalent +# columns from the right table is not implemented yet +# def test_inner_join_overlapping_column_names(): +# t1 = ibis.table([("foo", "string"), ("bar", "string"), ("value1", "double")]) +# t2 = ibis.table([("foo", "string"), ("bar", "string"), ("value2", "double")]) + +# joined = t1.join(t2, "foo") +# expected = t1.join(t2, t1.foo == t2.foo) +# assert_equal(joined, expected) +# assert joined.columns == ["foo", "bar", "value1", "bar_right", "value2"] + +# joined = t1.join(t2, ["foo", "bar"]) +# expected = t1.join(t2, [t1.foo == t2.foo, t1.bar == t2.bar]) +# assert_equal(joined, expected) +# assert joined.columns == ["foo", "bar", "value1", "value2"] + +# # Equality predicates don't have same name, need to rename +# joined = t1.join(t2, t1.foo == t2.bar) +# assert joined.columns == [ +# "foo", +# "bar", +# "value1", +# "foo_right", +# "bar_right", +# "value2", +# ] + +# # Not all predicates are equality, still need to rename +# joined = t1.join(t2, ["foo", t1.value1 < t2.value2]) +# assert joined.columns == [ +# "foo", +# "bar", +# "value1", +# "foo_right", +# "bar_right", +# "value2", +# ] @pytest.mark.parametrize( @@ -1116,24 +1247,38 @@ def test_inner_join_overlapping_column_names(): def test_join_key_alternatives(con, key_maker): t1 = con.table("star1") t2 = con.table("star2") - expected = t1.inner_join(t2, [t1.foo_id == t2.foo_id]) key = key_maker(t1, t2) + joined = t1.inner_join(t2, key) - assert_equal(joined, expected) + t2_ = joined.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=t1, + rest=[ + ops.JoinLink("inner", t2_, [t1.foo_id == t2_.foo_id]), + ], + values={ + "c": t1.c, + "f": t1.f, + "foo_id": t1.foo_id, + "bar_id": t1.bar_id, + "foo_id_right": t2_.foo_id, + "value1": t2_.value1, + "value3": t2_.value3, + }, + ) + assert joined.op() == expected -@pytest.mark.parametrize( - "key,error", - [ - ([("foo_id", "foo_id", "foo_id")], com.ExpressionError), - ([(s.c("foo_id"), s.c("foo_id"))], ValueError), - ], -) -def test_join_key_invalid(con, key, error): + +def test_join_key_invalid(con): t1 = con.table("star1") t2 = con.table("star2") - with pytest.raises(error): - t1.inner_join(t2, key) + + with pytest.raises(ExpressionError): + t1.inner_join(t2, [("foo_id", "foo_id", "foo_id")]) + + # it is working now + t1.inner_join(t2, [(s.c("foo_id"), s.c("foo_id"))]) def test_join_invalid_refs(con): @@ -1142,7 +1287,7 @@ def test_join_invalid_refs(con): t3 = con.table("star3") predicate = t1.bar_id == t3.bar_id - with pytest.raises(com.RelationError): + with pytest.raises(com.IntegrityError): t1.inner_join(t2, [predicate]) @@ -1151,7 +1296,7 @@ def test_join_invalid_expr_type(con): invalid_right = left.foo_id join_key = ["bar_id"] - with pytest.raises(ValidationError): + with pytest.raises(TypeError): left.inner_join(invalid_right, join_key) @@ -1161,7 +1306,7 @@ def test_join_non_boolean_expr(con): # oops predicate = t1.f * t2.value1 - with pytest.raises(com.ExpressionError): + with pytest.raises(ValidationError): t1.inner_join(t2, [predicate]) @@ -1191,8 +1336,28 @@ def test_unravel_compound_equijoin(table): p3 = t1.key3 == t2.key3 joined = t1.inner_join(t2, [p1 & p2 & p3]) - expected = t1.inner_join(t2, [p1, p2, p3]) - assert_equal(joined, expected) + t2_ = joined.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=t1, + rest=[ + ops.JoinLink( + "inner", + t2_, + [t1.key1 == t2_.key1, t1.key2 == t2_.key2, t1.key3 == t2_.key3], + ) + ], + values={ + "key1": t1.key1, + "key2": t1.key2, + "key3": t1.key3, + "value1": t1.value1, + "key1_right": t2_.key1, + "key2_right": t2_.key2, + "key3_right": t2_.key3, + "value2": t2_.value2, + }, + ) + assert joined.op() == expected def test_union( @@ -1202,11 +1367,11 @@ def test_union( setops_relation_error_message, ): result = setops_table_foo.union(setops_table_bar) - assert isinstance(result.op().table, ops.Union) - assert not result.op().table.distinct + assert isinstance(result.op().parent, ops.Union) + assert not result.op().parent.distinct result = setops_table_foo.union(setops_table_bar, distinct=True) - assert result.op().table.distinct + assert result.op().parent.distinct with pytest.raises(RelationError, match=setops_relation_error_message): setops_table_foo.union(setops_table_baz) @@ -1219,7 +1384,7 @@ def test_intersection( setops_relation_error_message, ): result = setops_table_foo.intersect(setops_table_bar) - assert isinstance(result.op().table, ops.Intersection) + assert isinstance(result.op().parent, ops.Intersection) with pytest.raises(RelationError, match=setops_relation_error_message): setops_table_foo.intersect(setops_table_baz) @@ -1232,7 +1397,7 @@ def test_difference( setops_relation_error_message, ): result = setops_table_foo.difference(setops_table_bar) - assert isinstance(result.op().table, ops.Difference) + assert isinstance(result.op().parent, ops.Difference) with pytest.raises(RelationError, match=setops_relation_error_message): setops_table_foo.difference(setops_table_baz) @@ -1274,14 +1439,23 @@ def t2(): def test_unresolved_existence_predicate(t1, t2): expr = (t1.key1 == t2.key1).any() - assert isinstance(expr, ir.BooleanColumn) - assert isinstance(expr.op(), ops.UnresolvedExistsSubquery) + assert isinstance(expr, Deferred) + + filtered = t2.filter(t1.key1 == t2.key1).select(ibis.literal(1)) + subquery = ops.ExistsSubquery(filtered) + expected = ops.Filter(parent=t1, predicates=[subquery]) + assert t1[expr].op() == expected + + filtered = t1.filter(t1.key1 == t2.key1).select(ibis.literal(1)) + subquery = ops.ExistsSubquery(filtered) + expected = ops.Filter(parent=t2, predicates=[subquery]) + assert t2[expr].op() == expected def test_resolve_existence_predicate(t1, t2): expr = t1[(t1.key1 == t2.key1).any()] op = expr.op() - assert isinstance(op, ops.Selection) + assert isinstance(op, ops.Filter) pred = op.predicates[0].to_expr() assert isinstance(pred.op(), ops.ExistsSubquery) @@ -1317,11 +1491,23 @@ def test_group_by_keys(table): def test_having(table): m = table.mutate(foo=table.f * 2, bar=table.e / 2) - expr = m.group_by("foo").having(lambda x: x.foo.sum() > 10).size() - expected = m.group_by("foo").having(m.foo.sum() > 10).size() - assert_equal(expr, expected) + agg = ops.Aggregate( + parent=m, + groups={"foo": m.foo}, + metrics={"CountStar()": ops.CountStar(m), "Sum(foo)": ops.Sum(m.foo)}, + ).to_expr() + filt = ops.Filter( + parent=agg, + predicates=[agg["Sum(foo)"] > 10], + ).to_expr() + proj = ops.Project( + parent=filt, + values={"foo": filt.foo, "CountStar()": filt["CountStar()"]}, + ).to_expr() + + assert expr.equals(proj) def test_filter(table): @@ -1494,16 +1680,20 @@ def test_mutate_chain(): one = ibis.table([("a", "string"), ("b", "string")], name="t") two = one.mutate(b=lambda t: t.b.fillna("Short Term")) three = two.mutate(a=lambda t: t.a.fillna("Short Term")) - a, b = three.op().selections - # we can't fuse these correctly yet - assert isinstance(a, ops.Alias) - assert isinstance(a.arg, ops.Coalesce) - assert isinstance(b, ops.TableColumn) - - expr = b.table.selections[1] - assert isinstance(expr, ops.Alias) - assert isinstance(expr.arg, ops.Coalesce) + values = three.op().values + assert isinstance(values["a"], ops.Coalesce) + assert isinstance(values["b"], ops.Field) + assert values["b"].rel == two.op() + + three_opt = simplify(three.op()) + assert three_opt == ops.Project( + parent=one, + values={ + "a": one.a.fillna("Short Term"), + "b": one.b.fillna("Short Term"), + }, + ) # TODO(kszucs): move this test case to ibis/tests/sql since it requires the @@ -1613,11 +1803,11 @@ def test_join_lname_rname_still_collide(): t2 = ibis.table({"id": "int64", "col1": "int64", "col2": "int64"}) t3 = ibis.table({"id": "int64", "col1": "int64", "col2": "int64"}) - with pytest.raises(com.IntegrityError) as rec: - t1.left_join(t2, "id").left_join(t3, "id") + with pytest.raises(com.IntegrityError): + t1.left_join(t2, "id").left_join(t3, "id")._finish() - assert "`['col1_right', 'col2_right', 'id_right']`" in str(rec.value) - assert "`lname='', rname='{name}_right'`" in str(rec.value) + # assert "`['col1_right', 'col2_right', 'id_right']`" in str(rec.value) + # assert "`lname='', rname='{name}_right'`" in str(rec.value) def test_drop(): @@ -1690,22 +1880,15 @@ def test_array_string_compare(): @pytest.mark.parametrize("value", [True, False]) -@pytest.mark.parametrize( - "api", - [ - param(lambda t, value: t[value], id="getitem"), - param(lambda t, value: t.filter(value), id="filter"), - ], -) -def test_filter_with_literal(value, api): +def test_filter_with_literal(value): t = ibis.table(dict(a="string")) - filt = api(t, ibis.literal(value)) - assert filt is not None + filt = t.filter(ibis.literal(value)) + assert filt.op() == ops.Filter(parent=t, predicates=[ibis.literal(value)]) # ints are invalid predicates int_val = ibis.literal(int(value)) - with pytest.raises((NotImplementedError, ValidationError, com.IbisTypeError)): - api(t, int_val) + with pytest.raises(ValidationError): + t.filter(int_val) def test_cast(): diff --git a/ibis/tests/expr/test_value_exprs.py b/ibis/tests/expr/test_value_exprs.py index 4e597f435834..bb9e3e8ba8f6 100644 --- a/ibis/tests/expr/test_value_exprs.py +++ b/ibis/tests/expr/test_value_exprs.py @@ -308,7 +308,7 @@ def test_distinct_table(functional_alltypes): expr = functional_alltypes.distinct() assert isinstance(expr.op(), ops.Distinct) assert isinstance(expr, ir.Table) - assert expr.op().table == functional_alltypes.op() + assert expr.op().parent == functional_alltypes.op() def test_nunique(functional_alltypes): @@ -1465,10 +1465,9 @@ def test_deferred_r_ops(op_name, expected_left, expected_right): op = getattr(operator, op_name) expr = t[op(left, right).name("b")] - - op = expr.op().selections[0].arg - assert op.left.equals(expected_left(t).op()) - assert op.right.equals(expected_right(t).op()) + node = expr.op().values["b"] + assert node.left.equals(expected_left(t).op()) + assert node.right.equals(expected_right(t).op()) @pytest.mark.parametrize( @@ -1671,9 +1670,9 @@ def test_quantile_shape(): projs = [b1] expr = t.select(projs) - (b1,) = expr.op().selections + b1 = expr.br2 - assert b1.shape.is_columnar() + assert b1.op().shape.is_columnar() def test_sample(): diff --git a/ibis/tests/expr/test_window_frames.py b/ibis/tests/expr/test_window_frames.py index 1b0d8cd7268f..31585fc28ef5 100644 --- a/ibis/tests/expr/test_window_frames.py +++ b/ibis/tests/expr/test_window_frames.py @@ -502,8 +502,7 @@ def test_window_analysis_combine_preserves_existing_window(): ) w = ibis.cumulative_window(order_by=t.one) mut = t.group_by(t.three).mutate(four=t.two.sum().over(w)) - - assert mut.op().selections[1].arg.frame.start is None + assert mut.op().values["four"].frame.start is None def test_window_analysis_auto_windowize_bug(): @@ -552,19 +551,18 @@ def test_group_by_with_window_function_preserves_range(alltypes): w = ibis.cumulative_window(order_by=t.one) expr = t.group_by(t.three).mutate(four=t.two.sum().over(w)) - expected = ops.Selection( - t, - [ - t, - ops.Alias( - ops.WindowFunction( - func=ops.Sum(t.two), - frame=ops.RowsWindowFrame( - table=t, end=0, group_by=[t.three], order_by=[t.one] - ), + expected = ops.Project( + parent=t, + values={ + "one": t.one, + "two": t.two, + "three": t.three, + "four": ops.WindowFunction( + func=ops.Sum(t.two), + frame=ops.RowsWindowFrame( + table=t, end=0, group_by=[t.three], order_by=[t.one] ), - name="four", ), - ], + }, ) assert expr.op() == expected diff --git a/ibis/tests/expr/test_window_functions.py b/ibis/tests/expr/test_window_functions.py index 1c2fd6110468..fdc7be22f385 100644 --- a/ibis/tests/expr/test_window_functions.py +++ b/ibis/tests/expr/test_window_functions.py @@ -40,9 +40,10 @@ def test_mutate_with_analytic_functions(alltypes): exprs = [expr.name("e%d" % i) for i, expr in enumerate(exprs)] proj = g.mutate(exprs) - for field in proj.op().selections[1:]: - assert isinstance(field, ops.Alias) - assert isinstance(field.arg, ops.WindowFunction) + + values = list(proj.op().values.values()) + for field in values[len(t.schema()) :]: + assert isinstance(field, ops.WindowFunction) def test_value_over_api(alltypes): @@ -70,5 +71,5 @@ def test_conflicting_window_boundaries(alltypes): def test_rank_followed_by_over_call_merge_frames(alltypes): t = alltypes expr1 = t.f.percent_rank().over(ibis.window(group_by=t.f.notnull())) - expr2 = ibis.percent_rank().over(group_by=t.f.notnull(), order_by=t.f).resolve(t) + expr2 = ibis.percent_rank().over(group_by=t.f.notnull(), order_by=t.f) assert expr1.equals(expr2) From 0ecc96f2f70f8adef32a90d900d67aa5d7a34bc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 21 Dec 2023 16:40:45 +0100 Subject: [PATCH 011/161] refactor(ir): wrap `JoinChain.first` in `ops.SelfReference` similar to the rest of the join tables --- ibis/expr/operations/relations.py | 2 +- .../test_format/test_asof_join/repr.txt | 26 +- .../repr.txt | 16 +- .../repr.txt | 18 +- .../test_table_count_expr/join_repr.txt | 20 +- .../test_format/test_two_inner_joins/repr.txt | 30 +- ibis/expr/tests/test_newrels.py | 783 ++++++++++-------- ibis/expr/types/joins.py | 6 +- ibis/expr/types/relations.py | 14 +- .../test_memoize_database_table/repr.txt | 32 +- ibis/tests/expr/test_analysis.py | 101 ++- ibis/tests/expr/test_struct.py | 25 +- ibis/tests/expr/test_table.py | 319 +++---- 13 files changed, 730 insertions(+), 662 deletions(-) diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index d42637013730..9a2ecef54622 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -238,7 +238,7 @@ class JoinLink(Node): @public class JoinChain(Relation): - first: Relation + first: SelfReference rest: VarTuple[JoinLink] values: FrozenDict[str, Unaliased[Value]] diff --git a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt index 263a594f7ef7..e28f3c5bb0df 100644 --- a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt @@ -6,19 +6,21 @@ r1 := UnboundTable: right time2 int32 value2 float64 -r2 := SelfReference[r1] +r2 := SelfReference[r0] r3 := SelfReference[r1] -JoinChain[r0] - JoinLink[asof, r2] - r0.time1 == r2.time2 - JoinLink[inner, r3] - r0.value == r3.value2 +r4 := SelfReference[r1] + +JoinChain[r2] + JoinLink[asof, r3] + r2.time1 == r3.time2 + JoinLink[inner, r4] + r2.value == r4.value2 values: - time1: r0.time1 - value: r0.value - time2: r2.time2 - value2: r2.value2 - time2_right: r3.time2 - value2_right: r3.value2 \ No newline at end of file + time1: r2.time1 + value: r2.value + time2: r3.time2 + value2: r3.value2 + time2_right: r4.time2 + value2_right: r4.value2 \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt index d1ed4735f67a..2e5cd4a00c70 100644 --- a/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt @@ -19,15 +19,17 @@ r4 := SelfReference[r2] r5 := Filter[r0] r0.f > 0 -JoinChain[r5] +r6 := SelfReference[r5] + +JoinChain[r6] JoinLink[left, r3] - r5.foo_id == r3.foo_id + r6.foo_id == r3.foo_id JoinLink[inner, r4] - r5.bar_id == r4.bar_id + r6.bar_id == r4.bar_id values: - c: r5.c - f: r5.f - foo_id: r5.foo_id - bar_id: r5.bar_id + c: r6.c + f: r6.f + foo_id: r6.foo_id + bar_id: r6.bar_id value1: r3.value1 value2: r4.value2 \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt b/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt index 95a08486a774..758b88722b59 100644 --- a/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt @@ -17,13 +17,15 @@ r2 := Filter[r1] r3 := Filter[r1] r1.kind == 'bar' -r4 := SelfReference[r3] +r4 := SelfReference[r2] -JoinChain[r2] - JoinLink[inner, r4] - r2.region == r4.region +r5 := SelfReference[r3] + +JoinChain[r4] + JoinLink[inner, r5] + r4.region == r5.region values: - region: r2.region - kind: r2.kind - total: r2.total - right_total: r4.total \ No newline at end of file + region: r4.region + kind: r4.kind + total: r4.total + right_total: r5.total \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt index 96aa59a58a31..999c2664f114 100644 --- a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt @@ -6,15 +6,17 @@ r1 := UnboundTable: t2 a int64 b float64 -r2 := SelfReference[r1] +r2 := SelfReference[r0] -r3 := JoinChain[r0] - JoinLink[inner, r2] - r0.a == r2.a +r3 := SelfReference[r1] + +r4 := JoinChain[r2] + JoinLink[inner, r3] + r2.a == r3.a values: - a: r0.a - b: r0.b - a_right: r2.a - b_right: r2.b + a: r2.a + b: r2.b + a_right: r3.a + b_right: r3.b -CountStar(): CountStar(r3) \ No newline at end of file +CountStar(): CountStar(r4) \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt b/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt index aa61982fec8f..37d25bcc6b54 100644 --- a/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt @@ -8,22 +8,24 @@ r1 := UnboundTable: right value2 float64 b string -r2 := SelfReference[r1] +r2 := SelfReference[r0] r3 := SelfReference[r1] -JoinChain[r0] - JoinLink[inner, r2] - r0.a == r2.b +r4 := SelfReference[r1] + +JoinChain[r2] JoinLink[inner, r3] - r0.value == r3.value2 + r2.a == r3.b + JoinLink[inner, r4] + r2.value == r4.value2 values: - time1: r0.time1 - value: r0.value - a: r0.a - time2: r2.time2 - value2: r2.value2 - b: r2.b - time2_right: r3.time2 - value2_right: r3.value2 - b_right: r3.b \ No newline at end of file + time1: r2.time1 + value: r2.value + a: r2.a + time2: r3.time2 + value2: r3.value2 + b: r3.b + time2_right: r4.time2 + value2_right: r4.value2 + b_right: r4.b \ No newline at end of file diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 1f0737c3897c..d3821cecd73f 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -1,5 +1,8 @@ from __future__ import annotations +import contextlib +import itertools + import pytest import ibis @@ -32,6 +35,19 @@ ) +@contextlib.contextmanager +def self_references(*tables): + old_counter = ops.SelfReference._uid_counter + # set a new counter with 1000 to avoid colliding with manually created + # self-references using t.view() + new_counter = itertools.count(1000) + try: + ops.SelfReference._uid_counter = new_counter + yield tuple(ops.SelfReference(t).to_expr() for t in tables) + finally: + ops.SelfReference._uid_counter = old_counter + + def test_field(): f = Field(t, "bool_col") assert f.rel == t.op() @@ -471,7 +487,9 @@ def test_join(): t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) - joined = t1.join(t2, [t1.a == t2.c]) + with self_references(): + joined = t1.join(t2, [t1.a == t2.c]) + assert isinstance(joined, ir.JoinExpr) assert isinstance(joined.op(), JoinChain) assert isinstance(joined.op().to_expr(), ir.JoinExpr) @@ -481,39 +499,39 @@ def test_join(): assert isinstance(joined.op(), JoinChain) assert isinstance(joined.op().to_expr(), ir.JoinExpr) - t2_ = joined.op().rest[0].table.to_expr() - assert result.op() == JoinChain( - first=t1, - rest=[ - JoinLink("inner", t2_, [t1.a == t2_.c]), - ], - values={ - "a": t1.a, - "b": t1.b, - "c": t2_.c, - "d": t2_.d, - }, - ) + with self_references(t1, t2) as (t1, t2): + assert result.op() == JoinChain( + first=t1, + rest=[ + JoinLink("inner", t2, [t1.a == t2.c]), + ], + values={ + "a": t1.a, + "b": t1.b, + "c": t2.c, + "d": t2.d, + }, + ) def test_join_unambiguous_select(): a = ibis.table(name="a", schema={"a_int": "int64", "a_str": "string"}) b = ibis.table(name="b", schema={"b_int": "int64", "b_str": "string"}) - join = a.join(b, a.a_int == b.b_int) - expr1 = join["a_int", "b_int"] - expr2 = join.select("a_int", "b_int") - assert expr1.equals(expr2) - - b_ = join.op().rest[0].table.to_expr() - assert expr1.op() == JoinChain( - first=a, - rest=[JoinLink("inner", b_, [a.a_int == b_.b_int])], - values={ - "a_int": a.a_int, - "b_int": b_.b_int, - }, - ) + with self_references(): + join = a.join(b, a.a_int == b.b_int) + expr1 = join["a_int", "b_int"] + expr2 = join.select("a_int", "b_int") + assert expr1.equals(expr2) + with self_references(a, b) as (r1, r2): + assert expr1.op() == JoinChain( + first=r1, + rest=[JoinLink("inner", r2, [r1.a_int == r2.b_int])], + values={ + "a_int": r1.a_int, + "b_int": r2.b_int, + }, + ) def test_join_with_subsequent_projection(): @@ -521,40 +539,42 @@ def test_join_with_subsequent_projection(): t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) # a single computed value is pulled to a subsequent projection - joined = t1.join(t2, [t1.a == t2.c]) - expr = joined.select(t1.a, t1.b, col=t2.c + 1) - t2_ = joined.op().rest[0].table.to_expr() - expected = JoinChain( - first=t1, - rest=[JoinLink("inner", t2_, [t1.a == t2_.c])], - values={"a": t1.a, "b": t1.b, "col": t2_.c + 1}, - ) - assert expr.op() == expected + with self_references(): + joined = t1.join(t2, [t1.a == t2.c]) + expr = joined.select(t1.a, t1.b, col=t2.c + 1) + with self_references(t1, t2) as (r1, r2): + expected = JoinChain( + first=r1, + rest=[JoinLink("inner", r2, [r1.a == r2.c])], + values={"a": r1.a, "b": r1.b, "col": r2.c + 1}, + ) + assert expr.op() == expected # multiple computed values - joined = t1.join(t2, [t1.a == t2.c]) - expr = joined.select( - t1.a, - t1.b, - foo=t2.c + 1, - bar=t2.c + 2, - baz=t2.d.name("bar") + "3", - baz2=(t2.c + t1.a).name("foo"), - ) - t2_ = joined.op().rest[0].table.to_expr() - expected = JoinChain( - first=t1, - rest=[JoinLink("inner", t2_, [t1.a == t2_.c])], - values={ - "a": t1.a, - "b": t1.b, - "foo": t2_.c + 1, - "bar": t2_.c + 2, - "baz": t2_.d.name("bar") + "3", - "baz2": t2_.c + t1.a, - }, - ) - assert expr.op() == expected + with self_references(): + joined = t1.join(t2, [t1.a == t2.c]) + expr = joined.select( + t1.a, + t1.b, + foo=t2.c + 1, + bar=t2.c + 2, + baz=t2.d.name("bar") + "3", + baz2=(t2.c + t1.a).name("foo"), + ) + with self_references(t1, t2) as (r1, r2): + expected = JoinChain( + first=r1, + rest=[JoinLink("inner", r2, [r1.a == r2.c])], + values={ + "a": r1.a, + "b": r1.b, + "foo": r2.c + 1, + "bar": r2.c + 2, + "baz": r2.d.name("bar") + "3", + "baz2": r2.c + r1.a, + }, + ) + assert expr.op() == expected def test_join_with_subsequent_projection_colliding_names(): @@ -563,25 +583,26 @@ def test_join_with_subsequent_projection_colliding_names(): name="t2", schema={"a": "int64", "b": "string", "c": "float", "d": "string"} ) - joined = t1.join(t2, [t1.a == t2.a]) - expr = joined.select( - t1.a, - t1.b, - foo=t2.a + 1, - bar=t1.a + t2.a, - ) - t2_ = joined.op().rest[0].table.to_expr() - expected = JoinChain( - first=t1, - rest=[JoinLink("inner", t2_, [t1.a == t2_.a])], - values={ - "a": t1.a, - "b": t1.b, - "foo": t2_.a + 1, - "bar": t1.a + t2_.a, - }, - ) - assert expr.op() == expected + with self_references(): + joined = t1.join(t2, [t1.a == t2.a]) + expr = joined.select( + t1.a, + t1.b, + foo=t2.a + 1, + bar=t1.a + t2.a, + ) + with self_references(t1, t2) as (r1, r2): + expected = JoinChain( + first=r1, + rest=[JoinLink("inner", r2, [r1.a == r2.a])], + values={ + "a": r1.a, + "b": r1.b, + "foo": r2.a + 1, + "bar": r1.a + r2.a, + }, + ) + assert expr.op() == expected def test_chained_join(): @@ -589,44 +610,44 @@ def test_chained_join(): b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) - joined = a.join(b, [a.a == b.c]).join(c, [a.a == c.e]) - result = joined._finish() + with self_references(): + joined = a.join(b, [a.a == b.c]).join(c, [a.a == c.e]) + result = joined._finish() - b_ = joined.op().rest[0].table.to_expr() - c_ = joined.op().rest[1].table.to_expr() - assert result.op() == JoinChain( - first=a, - rest=[ - JoinLink("inner", b_, [a.a == b_.c]), - JoinLink("inner", c_, [a.a == c_.e]), - ], - values={ - "a": a.a, - "b": a.b, - "c": b_.c, - "d": b_.d, - "e": c_.e, - "f": c_.f, - }, - ) + with self_references(a, b, c) as (r1, r2, r3): + assert result.op() == JoinChain( + first=r1, + rest=[ + JoinLink("inner", r2, [r1.a == r2.c]), + JoinLink("inner", r3, [r1.a == r3.e]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r2.d, + "e": r3.e, + "f": r3.f, + }, + ) - joined = a.join(b, [a.a == b.c]).join(c, [b.c == c.e]) - result = joined.select(a.a, b.d, c.f) + with self_references(): + joined = a.join(b, [a.a == b.c]).join(c, [b.c == c.e]) + result = joined.select(a.a, b.d, c.f) - b_ = joined.op().rest[0].table.to_expr() - c_ = joined.op().rest[1].table.to_expr() - assert result.op() == JoinChain( - first=a, - rest=[ - JoinLink("inner", b_, [a.a == b_.c]), - JoinLink("inner", c_, [b_.c == c_.e]), - ], - values={ - "a": a.a, - "d": b_.d, - "f": c_.f, - }, - ) + with self_references(a, b, c) as (r1, r2, r3): + assert result.op() == JoinChain( + first=r1, + rest=[ + JoinLink("inner", r2, [r1.a == r2.c]), + JoinLink("inner", r3, [r2.c == r3.e]), + ], + values={ + "a": r1.a, + "d": r2.d, + "f": r3.f, + }, + ) def test_chained_join_referencing_intermediate_table(): @@ -634,26 +655,30 @@ def test_chained_join_referencing_intermediate_table(): b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) - ab = a.join(b, [a.a == b.c]) - assert isinstance(ab, ir.JoinExpr) + with self_references(): + ab = a.join(b, [a.a == b.c]) + abc = ab.join(c, [ab.a == c.e]) + result = abc._finish() + with self_references(a, b, c) as (r1, r2, r3): + assert result.op() == JoinChain( + first=r1, + rest=[ + JoinLink("inner", r2, [r1.a == r2.c]), + JoinLink("inner", r3, [r1.a == r3.e]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r2.d, + "e": r3.e, + "f": r3.f, + }, + ) - # assert ab.a.op() == Field(ab, "a") - abc = ab.join(c, [ab.a == c.e]) + assert isinstance(ab, ir.JoinExpr) assert isinstance(abc, ir.JoinExpr) - result = abc._finish() - - b_ = abc.op().rest[0].table.to_expr() - c_ = abc.op().rest[1].table.to_expr() - assert result.op() == JoinChain( - first=a, - rest=[ - JoinLink("inner", b_, [a.a == b_.c]), - JoinLink("inner", c_, [a.a == c_.e]), - ], - values={"a": a.a, "b": a.b, "c": b_.c, "d": b_.d, "e": c_.e, "f": c_.f}, - ) - def test_join_predicate_dereferencing(): # See #790, predicate pushdown in joins not supported @@ -666,68 +691,60 @@ def test_join_predicate_dereferencing(): filtered = table[table["f"] > 0] # dereference table.foo_id to filtered.foo_id - j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) - - table2_ = j1.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=filtered, - rest=[ - ops.JoinLink("left", table2_, [filtered.foo_id == table2_.foo_id]), - ], - values={ - "c": filtered.c, - "f": filtered.f, - "foo_id": filtered.foo_id, - "bar_id": filtered.bar_id, - "foo_id_right": table2_.foo_id, - "value1": table2_.value1, - "value3": table2_.value3, - }, - ) - assert j1.op() == expected - - j2 = j1.inner_join(table3, filtered["bar_id"] == table3["bar_id"]) + with self_references(): + j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) + with self_references(filtered, table2) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("left", r2, [r1.foo_id == r2.foo_id]), + ], + values={ + "c": r1.c, + "f": r1.f, + "foo_id": r1.foo_id, + "bar_id": r1.bar_id, + "foo_id_right": r2.foo_id, + "value1": r2.value1, + "value3": r2.value3, + }, + ) + assert j1.op() == expected - table2_ = j2.op().rest[0].table.to_expr() - table3_ = j2.op().rest[1].table.to_expr() - expected = ops.JoinChain( - first=filtered, - rest=[ - ops.JoinLink("left", table2_, [filtered.foo_id == table2_.foo_id]), - ops.JoinLink("inner", table3_, [filtered.bar_id == table3_.bar_id]), - ], - values={ - "c": filtered.c, - "f": filtered.f, - "foo_id": filtered.foo_id, - "bar_id": filtered.bar_id, - "foo_id_right": table2_.foo_id, - "value1": table2_.value1, - "value3": table2_.value3, - "bar_id_right": table3_.bar_id, - "value2": table3_.value2, - }, - ) - assert j2.op() == expected - - # Project out the desired fields - view = j2[[filtered, table2["value1"], table3["value2"]]] - expected = ops.JoinChain( - first=filtered, - rest=[ - ops.JoinLink("left", table2_, [filtered.foo_id == table2_.foo_id]), - ops.JoinLink("inner", table3_, [filtered.bar_id == table3_.bar_id]), - ], - values={ - "c": filtered.c, - "f": filtered.f, - "foo_id": filtered.foo_id, - "bar_id": filtered.bar_id, - "value1": table2_.value1, - "value2": table3_.value2, - }, - ) - assert view.op() == expected + with self_references(): + j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) + j2 = j1.inner_join(table3, filtered["bar_id"] == table3["bar_id"]) + view = j2[[filtered, table2["value1"], table3["value2"]]] + with self_references(filtered, table2, table3) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("left", r2, [r1.foo_id == r2.foo_id]), + ops.JoinLink("inner", r3, [r1.bar_id == r3.bar_id]), + ], + values={ + "c": r1.c, + "f": r1.f, + "foo_id": r1.foo_id, + "bar_id": r1.bar_id, + "foo_id_right": r2.foo_id, + "value1": r2.value1, + "value3": r2.value3, + "bar_id_right": r3.bar_id, + "value2": r3.value2, + }, + ) + assert j2.op() == expected + assert view.op() == expected.copy( + values={ + "c": r1.c, + "f": r1.f, + "foo_id": r1.foo_id, + "bar_id": r1.bar_id, + "value1": r2.value1, + "value2": r3.value2, + } + ) def test_aggregate(): @@ -927,96 +944,100 @@ def test_self_join(): t1 = t0.filter(ibis.literal(True)) t2 = t1[["key"]] - t3 = t2.join(t2, ["key"]) - t2_ = t3.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=t2, - rest=[ - ops.JoinLink("inner", t2_, [t2.key == t2_.key]), - ], - values={"key": t2.key, "key_right": t2_.key}, - ) - assert t3.op() == expected + with self_references(): + t3 = t2.join(t2, ["key"]) + t4 = t3.join(t3, ["key"]) - t4 = t3.join(t3, ["key"]) - t3_ = t4.op().rest[1].table.to_expr() + with self_references(t2, t2, t3) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.key == r2.key]), + ], + values={"key": r1.key, "key_right": r2.key}, + ) + assert t3.op() == expected - expected = ops.JoinChain( - first=t2, - rest=[ - ops.JoinLink("inner", t2_, [t2.key == t2_.key]), - ops.JoinLink("inner", t3_, [t2.key == t3_.key]), - ], - values={ - "key": t2.key, - "key_right": t2_.key, - "key_right_right": t3_.key_right, - }, - ) - assert t4.op() == expected + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.key == r2.key]), + ops.JoinLink("inner", r3, [r1.key == r3.key]), + ], + values={ + "key": r1.key, + "key_right": r2.key, + "key_right_right": r3.key_right, + }, + ) + assert t4.op() == expected def test_self_join_view(): t = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) t_view = t.view() - expr = t.join(t_view, t.x == t_view.y).select("x", "y", "z", "z_right") - t_view_ = expr.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=t, - rest=[ - ops.JoinLink("inner", t_view_, [t.x == t_view_.y]), - ], - values={"x": t.x, "y": t.y, "z": t.z, "z_right": t_view_.z}, - ) - assert expr.op() == expected + with self_references(): + expr = t.join(t_view, t.x == t_view.y).select("x", "y", "z", "z_right") + + with self_references(t) as (r1,): + r2 = t_view + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.x == r2.y]), + ], + values={"x": r1.x, "y": r1.y, "z": r1.z, "z_right": r2.z}, + ) + assert expr.op() == expected def test_self_join_with_view_projection(): t1 = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) t2 = t1.view() - expr = t1.inner_join(t2, ["x"])[[t1]] - t2_ = expr.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=t1, - rest=[ - ops.JoinLink("inner", t2_, [t1.x == t2_.x]), - ], - values={"x": t1.x, "y": t1.y, "z": t1.z}, - ) - assert expr.op() == expected + with self_references(): + expr = t1.inner_join(t2, ["x"])[[t1]] + with self_references(t1) as (r1,): + r2 = t2 + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.x == r2.x]), + ], + values={"x": r1.x, "y": r1.y, "z": r1.z}, + ) + assert expr.op() == expected def test_joining_same_table_twice(): left = ibis.table(name="left", schema={"time1": int, "value": float, "a": str}) right = ibis.table(name="right", schema={"time2": int, "value2": float, "b": str}) - joined = left.inner_join(right, left.a == right.b).inner_join( - right, left.value == right.value2 - ) - - right_ = joined.op().rest[0].table.to_expr() - right__ = joined.op().rest[1].table.to_expr() - expected = ops.JoinChain( - first=left, - rest=[ - ops.JoinLink("inner", right_, [left.a == right_.b]), - ops.JoinLink("inner", right__, [left.value == right__.value2]), - ], - values={ - "time1": left.time1, - "value": left.value, - "a": left.a, - "time2": right_.time2, - "value2": right_.value2, - "b": right_.b, - "time2_right": right__.time2, - "value2_right": right__.value2, - "b_right": right__.b, - }, - ) - assert joined.op() == expected + with self_references(): + joined = left.inner_join(right, left.a == right.b).inner_join( + right, left.value == right.value2 + ) + with self_references(left, right, right) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.b]), + ops.JoinLink("inner", r3, [r1.value == r3.value2]), + ], + values={ + "time1": r1.time1, + "value": r1.value, + "a": r1.a, + "time2": r2.time2, + "value2": r2.value2, + "b": r2.b, + "time2_right": r3.time2, + "value2_right": r3.value2, + "b_right": r3.b, + }, + ) + assert joined.op() == expected def test_join_chain_gets_reused_and_continued_after_a_select(): @@ -1024,129 +1045,122 @@ def test_join_chain_gets_reused_and_continued_after_a_select(): b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) - ab = a.join(b, [a.a == b.c]) - abc = ab[a.b, b.d].join(c, [a.a == c.e]) - - b_ = abc.op().rest[0].table.to_expr() - c_ = abc.op().rest[1].table.to_expr() - expected = ops.JoinChain( - first=a, - rest=[ - ops.JoinLink("inner", b_, [a.a == b_.c]), - ops.JoinLink("inner", c_, [a.a == c_.e]), - ], - values={ - "b": a.b, - "d": b_.d, - "e": c_.e, - "f": c_.f, - }, - ) - assert abc.op() == expected - assert abc._finish().op() == expected + with self_references(): + ab = a.join(b, [a.a == b.c]) + abc = ab[a.b, b.d].join(c, [a.a == c.e]) + with self_references(a, b, c) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.c]), + ops.JoinLink("inner", r3, [r1.a == r3.e]), + ], + values={ + "b": r1.b, + "d": r2.d, + "e": r3.e, + "f": r3.f, + }, + ) + assert abc.op() == expected def test_self_join_extensive(): a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) - aa = a.join(a, [a.a == a.a]) - aa_ = a.join(a, "a") - aa__ = a.join(a, [("a", "a")]) - for join in [aa, aa_, aa__]: - a1 = join.op().rest[0].table.to_expr() + with self_references(): + aa = a.join(a, [a.a == a.a]) + with self_references(): + aa1 = a.join(a, "a") + with self_references(): + aa2 = a.join(a, [("a", "a")]) + with self_references(a, a) as (r1, r2): expected = ops.JoinChain( - first=a, + first=r1, rest=[ - ops.JoinLink("inner", a1, [a.a == a1.a]), + ops.JoinLink("inner", r2, [r1.a == r2.a]), ], values={ - "a": a.a, - "b": a.b, - "a_right": a1.a, - "b_right": a1.b, + "a": r1.a, + "b": r1.b, + "a_right": r2.a, + "b_right": r2.b, }, ) - assert join.op() == expected - - aaa = a.join(a, [a.a == a.a]).join(a, [a.a == a.a]) - a0 = a - a1 = aaa.op().rest[0].table.to_expr() - a2 = aaa.op().rest[1].table.to_expr() - expected = ops.JoinChain( - first=a0, - rest=[ - ops.JoinLink("inner", a1, [a0.a == a1.a]), - ops.JoinLink("inner", a2, [a0.a == a2.a]), - ], - values={ - "a": a0.a, - "b": a0.b, - "a_right": a1.a, - "b_right": a1.b, - }, - ) - - aaa = aa.join(a, [aa.a == a.a]) - aaa_ = aa.join(a, "a") - aaa__ = aa.join(a, [("a", "a")]) - for join in [aaa, aaa_, aaa__]: - a1 = join.op().rest[0].table.to_expr() - a2 = join.op().rest[1].table.to_expr() + assert aa.op() == expected + assert aa1.op() == expected + assert aa2.op() == expected + + with self_references(): + aaa = a.join(a, [a.a == a.a]).join(a, [a.a == a.a]) + with self_references(): + aa = a.join(a, [a.a == a.a]) + aaa1 = aa.join(a, [aa.a == a.a]) + with self_references(): + aa = a.join(a, [a.a == a.a]) + aaa2 = aa.join(a, "a") + with self_references(): + aa = a.join(a, [a.a == a.a]) + aaa3 = aa.join(a, [("a", "a")]) + with self_references(a, a, a) as (r1, r2, r3): expected = ops.JoinChain( - first=a, + first=r1, rest=[ - ops.JoinLink("inner", a1, [a.a == a1.a]), - ops.JoinLink("inner", a2, [a.a == a2.a]), + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r1.a == r3.a]), ], values={ - "a": a.a, - "b": a.b, - "a_right": a1.a, - "b_right": a1.b, + "a": r1.a, + "b": r1.b, + "a_right": r2.a, + "b_right": r2.b, }, ) - assert join.op() == expected + assert aaa.op() == expected + assert aaa1.op() == expected + assert aaa2.op() == expected + assert aaa3.op() == expected def test_self_join_with_intermediate_selection(): a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) - join = a[["b", "a"]].join(a, [a.a == a.a]) - a0 = a[["b", "a"]] - a1 = join.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=a0, - rest=[ - ops.JoinLink("inner", a1, [a0.a == a1.a]), - ], - values={ - "b": a0.b, - "a": a0.a, - "a_right": a1.a, - "b_right": a1.b, - }, - ) - assert join.op() == expected - - aa_ = a.join(a, [a.a == a.a])["a", "b_right"] - aaa_ = aa_.join(a, [aa_.a == a.a]) - a0 = a - a1 = aaa_.op().rest[0].table.to_expr() - a2 = aaa_.op().rest[1].table.to_expr() - expected = ops.JoinChain( - first=a0, - rest=[ - ops.JoinLink("inner", a1, [a0.a == a1.a]), - ops.JoinLink("inner", a2, [a0.a == a2.a]), - ], - values={ - "a": a0.a, - "b_right": a1.b, - "a_right": a2.a, - "b": a2.b, - }, - ) - assert aaa_.op() == expected + with self_references(): + proj = a[["b", "a"]] + join = proj.join(a, [a.a == a.a]) + with self_references(proj, a) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ], + values={ + "b": r1.b, + "a": r1.a, + "a_right": r2.a, + "b_right": r2.b, + }, + ) + assert join.op() == expected + + with self_references(): + aa = a.join(a, [a.a == a.a])["a", "b_right"] + aaa = aa.join(a, [aa.a == a.a]) + with self_references(a, a, a) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r1.a == r3.a]), + ], + values={ + "a": r1.a, + "b_right": r2.b, + "a_right": r3.a, + "b": r3.b, + }, + ) + assert aaa.op() == expected # TODO(kszucs): this use case could be supported if `_get_column` gets # overridden to return underlying column reference, but that would mean @@ -1191,3 +1205,46 @@ def test_name_collisions_raise(): abc = a.join(b, [a.a == b.a]).join(c, [a.a == c.a]) with pytest.raises(IntegrityError): abc.filter(abc.a < 1) + + +def test_self_view_join_followed_by_aggregate_correctly_dereference_fields(): + t = ibis.table( + name="t", schema={"a": "int64", "b": "int64", "f": "int64", "g": "string"} + ) + + agged = t.aggregate([t.f.sum().name("total")], by=["g", "a", "b"]) + view = agged.view() + metrics = [(agged.total - view.total).max().name("metric")] + + with self_references(): + join = agged.inner_join(view, [agged.a == view.b]) + agg = join.aggregate(metrics, by=[agged.g]) + with self_references(agged) as (r1,): + r2 = view + expected_join = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.b]), + ], + values={ + "g": r1.g, + "a": r1.a, + "b": r1.b, + "total": r1.total, + "g_right": r2.g, + "a_right": r2.a, + "b_right": r2.b, + "total_right": r2.total, + }, + ).to_expr() + expected_agg = ops.Aggregate( + parent=join, + groups={ + "g": join.g, + }, + metrics={ + "metric": (join.total - join.total_right).max(), + }, + ).to_expr() + assert join.equals(expected_join) + assert agg.equals(expected_agg) diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index bde607b1d3ee..7568f05cb236 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -95,7 +95,7 @@ def dereference_value(pred, deref_left, deref_right): return pred.replace(deref_both, filter=ops.Value) -def prepare_predicates(left, right, predicates, deref_left, deref_right, deref_both): +def prepare_predicates(left, right, predicates, deref_left, deref_right): """Bind and dereference predicates to the left and right tables.""" for pred in util.promote_list(predicates): @@ -104,12 +104,10 @@ def prepare_predicates(left, right, predicates, deref_left, deref_right, deref_b elif isinstance(pred, ValueExpr): node = pred.op() yield dereference_value(node, deref_left, deref_right) - # yield node.replace(deref_both, filter=ops.Value) elif isinstance(pred, Deferred): # resolve deferred expressions on the left table node = pred.resolve(left).op() yield dereference_value(node, deref_left, deref_right) - # yield node.replace(deref_both, filter=ops.Value) else: if isinstance(pred, tuple): if len(pred) != 2: @@ -180,7 +178,6 @@ def join( right = right.op() subs_left = dereference_mapping_left(left) subs_right, right = dereference_mapping_right(right) - subs_both = {**subs_left, **subs_right} # bind and dereference the predicates preds = prepare_predicates( @@ -189,7 +186,6 @@ def join( predicates, deref_left=subs_left, deref_right=subs_right, - deref_both=subs_both, ) preds = flatten_predicates(list(preds)) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 75dad930e4d2..94542884ccd6 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -165,10 +165,10 @@ def dereference_mapping(parents): # also stop tracking if the field belongs to a parent which # we want to dereference to, see the docstring of # `dereference_values()` for more details - while isinstance(v, ops.Field) and v.rel not in parents: + while isinstance(v, ops.Field) and v not in mapping: mapping[v] = ops.Field(parent, k) v = v.rel.values.get(v.name) - elif v.relations: + elif v.relations and v not in mapping: # do not dereference literal expressions mapping[v] = ops.Field(parent, k) return mapping @@ -2993,9 +2993,6 @@ def join( """ from ibis.expr.types.joins import JoinExpr - # the first participant of the join can be any Relation, but the rest - # must be wrapped in SelfReferences so that we can join the same table - # with itself multiple times and to enable optimization passes later on left = left.op() if isinstance(left, ops.JoinChain): # if the left side is already a join chain, we can reuse it, for @@ -3004,8 +3001,11 @@ def join( # `ir.Table(ops.JoinChain())` expression, which we can reuse here expr = left.to_expr() else: - if isinstance(left, ops.SelfReference): - left = left.parent + # all participants of the join must be wrapped in SelfReferences so + # that we can join the same table with itself multiple times and to + # enable optimization passes later on + if not isinstance(left, ops.SelfReference): + left = ops.SelfReference(left) # construct an empty join chain and wrap it with a JoinExpr, the # projected fields are the fields of the starting table expr = ops.JoinChain(left, rest=(), values=left.fields).to_expr() diff --git a/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt b/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt index b67141c7beda..b4761984f092 100644 --- a/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt +++ b/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt @@ -7,25 +7,27 @@ r1 := DatabaseTable: test1 f float64 g string -r2 := Filter[r1] +r2 := SelfReference[r0] + +r3 := Filter[r1] r1.f > 0 -r3 := SelfReference[r2] +r4 := SelfReference[r3] -r4 := JoinChain[r0] - JoinLink[inner, r3] - r3.g == r0.key +r5 := JoinChain[r2] + JoinLink[inner, r4] + r4.g == r2.key values: - key: r0.key - value: r0.value - c: r3.c - f: r3.f - g: r3.g + key: r2.key + value: r2.value + c: r4.c + f: r4.f + g: r4.g -Aggregate[r4] +Aggregate[r5] groups: - g: r4.g - key: r4.key + g: r5.g + key: r5.key metrics: - foo: Mean(r4.f - r4.value) - bar: Sum(r4.f) \ No newline at end of file + foo: Mean(r5.f - r5.value) + bar: Sum(r5.f) \ No newline at end of file diff --git a/ibis/tests/expr/test_analysis.py b/ibis/tests/expr/test_analysis.py index a0b52b84cd6f..7db0ae06c67a 100644 --- a/ibis/tests/expr/test_analysis.py +++ b/ibis/tests/expr/test_analysis.py @@ -6,6 +6,7 @@ import ibis.common.exceptions as com import ibis.expr.operations as ops from ibis.expr.rewrites import simplify +from ibis.expr.tests.test_newrels import self_references # Place to collect esoteric expression analysis bugs and tests @@ -21,39 +22,37 @@ def test_rewrite_join_projection_without_other_ops(con): pred1 = table["foo_id"] == table2["foo_id"] pred2 = filtered["bar_id"] == table3["bar_id"] - j1 = filtered.left_join(table2, [pred1]) - j2 = j1.inner_join(table3, [pred2]) - - # Project out the desired fields - view = j2[[filtered, table2["value1"], table3["value2"]]] - - # Construct the thing we expect to obtain - table2_ref = j2.op().rest[0].table.to_expr() - table3_ref = j2.op().rest[1].table.to_expr() - expected = ops.JoinChain( - first=filtered, - rest=[ - ops.JoinLink( - how="left", - table=table2_ref, - predicates=[filtered["foo_id"] == table2_ref["foo_id"]], - ), - ops.JoinLink( - how="inner", - table=table3_ref, - predicates=[filtered["bar_id"] == table3_ref["bar_id"]], - ), - ], - values={ - "c": filtered.c, - "f": filtered.f, - "foo_id": filtered.foo_id, - "bar_id": filtered.bar_id, - "value1": table2_ref.value1, - "value2": table3_ref.value2, - }, - ) - assert view.op() == expected + with self_references(): + j1 = filtered.left_join(table2, [pred1]) + j2 = j1.inner_join(table3, [pred2]) + # Project out the desired fields + view = j2[[filtered, table2["value1"], table3["value2"]]] + with self_references(filtered, table2, table3) as (r1, r2, r3): + # Construct the thing we expect to obtain + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink( + how="left", + table=r2, + predicates=[r1["foo_id"] == r2["foo_id"]], + ), + ops.JoinLink( + how="inner", + table=r3, + predicates=[r1["bar_id"] == r3["bar_id"]], + ), + ], + values={ + "c": r1.c, + "f": r1.f, + "foo_id": r1.foo_id, + "bar_id": r1.bar_id, + "value1": r2.value1, + "value2": r3.value2, + }, + ) + assert view.op() == expected def test_multiple_join_deeper_reference(): @@ -166,24 +165,24 @@ def test_filter_self_join(): ) cond = left.region == right.region - joined = left.join(right, cond) - - metric = (left.total - right.total).name("diff") - what = [left.region, metric] - projected = joined.select(what) - - right_ = joined.op().rest[0].table.to_expr() - join = ops.JoinChain( - first=left, - rest=[ - ops.JoinLink("inner", right_, [left.region == right_.region]), - ], - values={ - "region": left.region, - "diff": left.total - right_.total, - }, - ) - assert projected.op() == join + with self_references(): + joined = left.join(right, cond) + metric = (left.total - right.total).name("diff") + what = [left.region, metric] + projected = joined.select(what) + + with self_references(left, right) as (r1, r2): + join = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.region == r2.region]), + ], + values={ + "region": r1.region, + "diff": r1.total - r2.total, + }, + ) + assert projected.op() == join def test_is_ancestor_analytic(): diff --git a/ibis/tests/expr/test_struct.py b/ibis/tests/expr/test_struct.py index c960fefbd126..d7e5b05ddd5a 100644 --- a/ibis/tests/expr/test_struct.py +++ b/ibis/tests/expr/test_struct.py @@ -8,6 +8,7 @@ import ibis.expr.operations as ops import ibis.expr.types as ir from ibis import _ +from ibis.expr.tests.test_newrels import self_references from ibis.tests.util import assert_pickle_roundtrip @@ -69,18 +70,18 @@ def test_unpack_from_table(t): def test_lift_join(t, s): - join = t.join(s, t.d == s.a.g) - result = join.a_right.lift() - - s_ = join.op().rest[0].table.to_expr() - join = ops.JoinChain( - first=t, - rest=[ - ops.JoinLink("inner", s_, [t.d == s_.a.g]), - ], - values={"f": s_.a.f, "g": s_.a.g}, - ) - assert result.op() == join + with self_references(): + join = t.join(s, t.d == s.a.g) + result = join.a_right.lift() + with self_references(t, s) as (r1, r2): + join = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.d == r2.a.g]), + ], + values={"f": r2.a.f, "g": r2.a.g}, + ) + assert result.op() == join def test_unpack_join_from_table(t, s): diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 85bffdfc5668..9f26fb41d205 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -22,6 +22,7 @@ from ibis.common.exceptions import ExpressionError, IntegrityError, RelationError from ibis.expr import api from ibis.expr.rewrites import simplify +from ibis.expr.tests.test_newrels import self_references from ibis.expr.types import Column, Table from ibis.tests.util import assert_equal, assert_pickle_roundtrip @@ -846,50 +847,47 @@ def test_join_no_predicate_list(con): region = con.table("tpch_region") nation = con.table("tpch_nation") - pred = region.r_regionkey == nation.n_regionkey - joined = region.inner_join(nation, pred) - - nation_ = joined.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=region, - rest=[ - ops.JoinLink("inner", nation_, [region.r_regionkey == nation_.n_regionkey]) - ], - values={ - "r_regionkey": region.r_regionkey, - "r_name": region.r_name, - "r_comment": region.r_comment, - "n_nationkey": nation_.n_nationkey, - "n_name": nation_.n_name, - "n_regionkey": nation_.n_regionkey, - "n_comment": nation_.n_comment, - }, - ) - assert joined.op() == expected + with self_references(): + pred = region.r_regionkey == nation.n_regionkey + joined = region.inner_join(nation, pred) + with self_references(region, nation) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ops.JoinLink("inner", r2, [r1.r_regionkey == r2.n_regionkey])], + values={ + "r_regionkey": r1.r_regionkey, + "r_name": r1.r_name, + "r_comment": r1.r_comment, + "n_nationkey": r2.n_nationkey, + "n_name": r2.n_name, + "n_regionkey": r2.n_regionkey, + "n_comment": r2.n_comment, + }, + ) + assert joined.op() == expected def test_join_deferred(con): region = con.table("tpch_region") nation = con.table("tpch_nation") - res = region.join(nation, _.r_regionkey == nation.n_regionkey) - nation_ = res.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=region, - rest=[ - ops.JoinLink("inner", nation_, [region.r_regionkey == nation_.n_regionkey]) - ], - values={ - "r_regionkey": region.r_regionkey, - "r_name": region.r_name, - "r_comment": region.r_comment, - "n_nationkey": nation_.n_nationkey, - "n_name": nation_.n_name, - "n_regionkey": nation_.n_regionkey, - "n_comment": nation_.n_comment, - }, - ) - assert res.op() == expected + with self_references(): + res = region.join(nation, _.r_regionkey == nation.n_regionkey) + with self_references(region, nation) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ops.JoinLink("inner", r2, [r1.r_regionkey == r2.n_regionkey])], + values={ + "r_regionkey": r1.r_regionkey, + "r_name": r1.r_name, + "r_comment": r1.r_comment, + "n_nationkey": r2.n_nationkey, + "n_name": r2.n_name, + "n_regionkey": r2.n_regionkey, + "n_comment": r2.n_comment, + }, + ) + assert res.op() == expected def test_join_invalid_predicate(con): @@ -920,42 +918,44 @@ def test_asof_join_with_by(): left = ibis.table([("time", "int32"), ("key", "int32"), ("value", "double")]) right = ibis.table([("time", "int32"), ("key", "int32"), ("value2", "double")]) - join_without_by = api.asof_join(left, right, "time") - right_ = join_without_by.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=left, - rest=[ops.JoinLink("asof", right_, [left.time == right_.time])], - values={ - "time": left.time, - "key": left.key, - "value": left.value, - "time_right": right_.time, - "key_right": right_.key, - "value2": right_.value2, - }, - ) - assert join_without_by.op() == expected - - join_with_by = api.asof_join(left, right, "time", by="key") - right_ = join_with_by.op().rest[0].table.to_expr() - right__ = join_with_by.op().rest[1].table.to_expr() - expected = ops.JoinChain( - first=left, - rest=[ - ops.JoinLink("inner", right_, [left.key == right_.key]), - ops.JoinLink("asof", right__, [left.time == right__.time]), - ], - values={ - "time": left.time, - "key": left.key, - "value": left.value, - "time_right": right_.time, - "key_right": right_.key, - "value2": right_.value2, - "value2_right": right__.value2, - }, - ) - assert join_with_by.op() == expected + with self_references(): + join_without_by = api.asof_join(left, right, "time") + with self_references(left, right) as (r1, r2): + r2 = join_without_by.op().rest[0].table.to_expr() + expected = ops.JoinChain( + first=r1, + rest=[ops.JoinLink("asof", r2, [r1.time == r2.time])], + values={ + "time": r1.time, + "key": r1.key, + "value": r1.value, + "time_right": r2.time, + "key_right": r2.key, + "value2": r2.value2, + }, + ) + assert join_without_by.op() == expected + + with self_references(): + join_with_by = api.asof_join(left, right, "time", by="key") + with self_references(left, right, right) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.key == r2.key]), + ops.JoinLink("asof", r3, [r1.time == r3.time]), + ], + values={ + "time": r1.time, + "key": r1.key, + "value": r1.value, + "time_right": r2.time, + "key_right": r2.key, + "value2": r2.value2, + "value2_right": r3.value2, + }, + ) + assert join_with_by.op() == expected @pytest.mark.parametrize( @@ -981,27 +981,28 @@ def test_asof_join_with_tolerance(ibis_interval, timedelta_interval): right = ibis.table([("time", "int32"), ("key", "int32"), ("value2", "double")]) for interval in [ibis_interval, timedelta_interval]: - joined = api.asof_join(left, right, "time", tolerance=interval) - right_ = joined.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=left, - rest=[ - ops.JoinLink( - "asof", - right_, - [left.time == right_.time, (left.time - right_.time) <= interval], - ) - ], - values={ - "time": left.time, - "key": left.key, - "value": left.value, - "time_right": right_.time, - "key_right": right_.key, - "value2": right_.value2, - }, - ) - assert joined.op() == expected + with self_references(): + joined = api.asof_join(left, right, "time", tolerance=interval) + with self_references(left, right) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink( + "asof", + r2, + [r1.time == r2.time, (r1.time - r2.time) <= interval], + ) + ], + values={ + "time": r1.time, + "key": r1.key, + "value": r1.value, + "time_right": r2.time, + "key_right": r2.key, + "value2": r2.value2, + }, + ) + assert joined.op() == expected def test_equijoin_schema_merge(): @@ -1160,27 +1161,28 @@ def test_cross_join_multiple(table): b = table["d", "e"] c = table["f", "h"] - joined = ibis.cross_join(a, b, c) - b_ = joined.op().rest[0].table.to_expr() - c_ = joined.op().rest[1].table.to_expr() - assert joined.op() == ops.JoinChain( - first=a, - rest=[ - ops.JoinLink("cross", b_, []), - ops.JoinLink("cross", c_, []), - ], - values={ - "a": a.a, - "b": a.b, - "c": a.c, - "d": b_.d, - "e": b_.e, - "f": c_.f, - "h": c_.h, - }, - ) - # TODO(kszucs): it must be simplified first using an appropriate rewrite rule - assert not joined.equals(a.cross_join(b.cross_join(c))) + with self_references(): + joined = ibis.cross_join(a, b, c) + with self_references(a, b, c) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("cross", r2, []), + ops.JoinLink("cross", r3, []), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r1.c, + "d": r2.d, + "e": r2.e, + "f": r3.f, + "h": r3.h, + }, + ) + assert joined.op() == expected + # TODO(kszucs): it must be simplified first using an appropriate rewrite rule + assert not joined.equals(a.cross_join(b.cross_join(c))) def test_filter_join(): @@ -1249,25 +1251,25 @@ def test_join_key_alternatives(con, key_maker): t2 = con.table("star2") key = key_maker(t1, t2) - joined = t1.inner_join(t2, key) - - t2_ = joined.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=t1, - rest=[ - ops.JoinLink("inner", t2_, [t1.foo_id == t2_.foo_id]), - ], - values={ - "c": t1.c, - "f": t1.f, - "foo_id": t1.foo_id, - "bar_id": t1.bar_id, - "foo_id_right": t2_.foo_id, - "value1": t2_.value1, - "value3": t2_.value3, - }, - ) - assert joined.op() == expected + with self_references(): + joined = t1.inner_join(t2, key) + with self_references(t1, t2) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.foo_id == r2.foo_id]), + ], + values={ + "c": r1.c, + "f": r1.f, + "foo_id": r1.foo_id, + "bar_id": r1.bar_id, + "foo_id_right": r2.foo_id, + "value1": r2.value1, + "value3": r2.value3, + }, + ) + assert joined.op() == expected def test_join_key_invalid(con): @@ -1335,29 +1337,30 @@ def test_unravel_compound_equijoin(table): p2 = t1.key2 == t2.key2 p3 = t1.key3 == t2.key3 - joined = t1.inner_join(t2, [p1 & p2 & p3]) - t2_ = joined.op().rest[0].table.to_expr() - expected = ops.JoinChain( - first=t1, - rest=[ - ops.JoinLink( - "inner", - t2_, - [t1.key1 == t2_.key1, t1.key2 == t2_.key2, t1.key3 == t2_.key3], - ) - ], - values={ - "key1": t1.key1, - "key2": t1.key2, - "key3": t1.key3, - "value1": t1.value1, - "key1_right": t2_.key1, - "key2_right": t2_.key2, - "key3_right": t2_.key3, - "value2": t2_.value2, - }, - ) - assert joined.op() == expected + with self_references(): + joined = t1.inner_join(t2, [p1 & p2 & p3]) + with self_references(t1, t2) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink( + "inner", + r2, + [r1.key1 == r2.key1, r1.key2 == r2.key2, r1.key3 == r2.key3], + ) + ], + values={ + "key1": r1.key1, + "key2": r1.key2, + "key3": r1.key3, + "value1": r1.value1, + "key1_right": r2.key1, + "key2_right": r2.key2, + "key3_right": r2.key3, + "value2": r2.value2, + }, + ) + assert joined.op() == expected def test_union( From a1627184b304e964899db2b03af8fc924b8aedcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 21 Dec 2023 17:37:38 +0100 Subject: [PATCH 012/161] test(ir): cover constructing reductions in the core test suite --- ibis/expr/tests/test_reductions.py | 101 +++++++++++++++++++++++++++++ ibis/expr/types/generic.py | 9 +-- 2 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 ibis/expr/tests/test_reductions.py diff --git a/ibis/expr/tests/test_reductions.py b/ibis/expr/tests/test_reductions.py new file mode 100644 index 000000000000..0427aea40d24 --- /dev/null +++ b/ibis/expr/tests/test_reductions.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import pytest +from pytest import param + +import ibis +import ibis.expr.operations as ops +from ibis import _ +from ibis.common.deferred import Deferred + + +@pytest.mark.parametrize( + ("fn", "operation"), + [ + param( + lambda t, where: t.int_col.nunique(where=where), + ops.CountDistinct, + id="nunique", + ), + param(lambda t, where: t.bool_col.any(where=where), ops.Any, id="any"), + param(lambda t, where: t.bool_col.all(where=where), ops.All, id="all"), + param(lambda t, where: t.int_col.sum(where=where), ops.Sum, id="sum"), + param(lambda t, where: t.int_col.mean(where=where), ops.Mean, id="mean"), + param(lambda t, where: t.int_col.min(where=where), ops.Min, id="min"), + param(lambda t, where: t.int_col.max(where=where), ops.Max, id="max"), + param( + lambda t, where: t.int_col.argmin(t.string_col, where=where), + ops.ArgMin, + id="argmin", + ), + param( + lambda t, where: t.int_col.argmax(t.string_col, where=where), + ops.ArgMax, + id="argmax", + ), + param( + lambda t, where: t.int_col.std(how="pop", where=where), + ops.StandardDev, + id="std", + ), + param(lambda t, where: t.int_col.var(where=where), ops.Variance, id="var"), + param( + lambda t, where: t.int_col.approx_nunique(where=where), + ops.ApproxCountDistinct, + id="approx_nunique", + ), + param( + lambda t, where: t.int_col.arbitrary(where=where), + ops.Arbitrary, + id="arbitrary", + ), + param(lambda t, where: t.int_col.first(where=where), ops.First, id="first"), + param(lambda t, where: t.int_col.last(where=where), ops.Last, id="last"), + param( + lambda t, where: t.int_col.bit_and(where=where), ops.BitAnd, id="bit_and" + ), + param(lambda t, where: t.int_col.bit_or(where=where), ops.BitOr, id="bit_or"), + param( + lambda t, where: t.int_col.bit_xor(where=where), ops.BitXor, id="bit_xor" + ), + param( + lambda t, where: t.int_col.collect(where=where), + ops.ArrayCollect, + id="collect", + ), + ], +) +@pytest.mark.parametrize( + "cond", + [ + pytest.param(lambda t: None, id="no_cond"), + pytest.param( + lambda t: t.string_col.isin(["1", "7"]), + id="is_in", + ), + pytest.param( + lambda t: _.string_col.isin(["1", "7"]), + id="is_in_deferred", + ), + ], +) +def test_reduction_methods(fn, operation, cond): + t = ibis.table( + name="t", + schema={ + "string_col": "string", + "int_col": "int64", + "bool_col": "boolean", + }, + ) + where = cond(t) + expr = fn(t, where) + node = expr.op() + assert isinstance(node, operation) + if where is None: + assert node.where is None + elif isinstance(where, Deferred): + resolved = where.resolve(t).op() + assert node.where == resolved + else: + assert node.where == where.op() diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 10c487475d11..24add1bff5e3 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -1333,15 +1333,16 @@ def __pandas_result__(self, df: pd.DataFrame) -> pd.Series: return PandasData.convert_column(df.loc[:, column], self.type()) def _bind_reduction_filter(self, where): + rels = self.op().relations if isinstance(where, Deferred): - if len(node.relations) == 0: + if len(rels) == 0: raise com.IbisInputError( "Unable to bind deferred expression to a table because " "the expression doesn't depend on any tables" ) - elif len(node.relations) == 1: - (table,) = node.relations - return where.resolve(table) + elif len(rels) == 1: + (table,) = rels + return where.resolve(table.to_expr()) else: raise com.RelationError( "Cannot bind deferred expression to a table because the " From c988ab9b6f821fe875a0bcbfc9910e985e37b01a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 22 Dec 2023 18:42:30 +0100 Subject: [PATCH 013/161] refactor(ir): add `JoinTable` operation unique to `JoinChain` instead of using the globally unique `SelfReference` This enables us to maintain join expression equality: `a.join(b).equals(a.join(b))` So far we have been using SelfReference to make join tables unique, but it was globally unique which broke the equality check above. Therefore we need to restrict the uniqueness to the scope of the join chain. The simplest solution for that is to simply enumerate the join tables in the join chain, hence now all join participants must be `ops.JoinTable(rel, index)` instances. `ops.SelfReference` is still required to distinguish between two identical tables at the API level, but it is now decoupled from the join internal representation. --- ibis/expr/decompile.py | 7 +- ibis/expr/format.py | 7 +- ibis/expr/operations/relations.py | 15 +- .../test_format/test_asof_join/repr.txt | 28 +-- .../repr.txt | 30 +-- .../repr.txt | 18 +- .../test_table_count_expr/join_repr.txt | 20 +- .../test_format/test_two_inner_joins/repr.txt | 34 ++- ibis/expr/tests/test_newrels.py | 213 ++++++++---------- ibis/expr/types/joins.py | 38 ++-- ibis/expr/types/relations.py | 11 +- .../test_memoize_database_table/repr.txt | 32 ++- ibis/tests/expr/test_analysis.py | 25 +- ibis/tests/expr/test_struct.py | 10 +- ibis/tests/expr/test_table.py | 46 ++-- 15 files changed, 238 insertions(+), 296 deletions(-) diff --git a/ibis/expr/decompile.py b/ibis/expr/decompile.py index af279447bd76..79b53dbe4ba2 100644 --- a/ibis/expr/decompile.py +++ b/ibis/expr/decompile.py @@ -184,6 +184,11 @@ def self_reference(op, parent, identifier): return parent +@translate.register(ops.JoinTable) +def join_table(op, parent, index): + return parent + + @translate.register(ops.JoinLink) def join_link(op, table, predicates, how): return f".{how}_join({table}, {_try_unwrap(predicates)})" @@ -327,7 +332,7 @@ def isin(op, value, options): class CodeContext: always_assign = (ops.ScalarParameter, ops.UnboundTable, ops.Aggregate) always_ignore = ( - ops.SelfReference, + ops.JoinTable, ops.Field, dt.Primitive, dt.Variadic, diff --git a/ibis/expr/format.py b/ibis/expr/format.py index 6ac9dfeb7b8a..4a904a99462b 100644 --- a/ibis/expr/format.py +++ b/ibis/expr/format.py @@ -162,7 +162,7 @@ def pretty(node): def mapper(op, _, **kwargs): result = fmt(op, **kwargs) - if isinstance(op, ops.Relation): + if isinstance(op, ops.Relation) and not isinstance(op, ops.JoinTable): tables[op] = result result = f"r{next(refcnt)}" return Rendered(result) @@ -337,6 +337,11 @@ def _self_reference(op, parent, **kwargs): return f"{op.__class__.__name__}[{parent}]" +@fmt.register(ops.JoinTable) +def _join_table(op, parent, index): + return parent + + @fmt.register(ops.Literal) def _literal(op, value, **kwargs): if op.dtype.is_interval(): diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 9a2ecef54622..a4c37ce2912b 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -229,27 +229,38 @@ def name(self) -> str: ] +@public +class JoinTable(Simple): + index: int + + @public class JoinLink(Node): how: JoinKind - table: SelfReference + table: JoinTable predicates: VarTuple[Value[dt.Boolean]] @public class JoinChain(Relation): - first: SelfReference + first: JoinTable rest: VarTuple[JoinLink] values: FrozenDict[str, Unaliased[Value]] def __init__(self, first, rest, values): allowed_parents = {first} + assert first.index == 0 for join in rest: + assert join.table.index == len(allowed_parents) allowed_parents.add(join.table) _check_integrity(join.predicates, allowed_parents) _check_integrity(values.values(), allowed_parents) super().__init__(first=first, rest=rest, values=values) + @property + def length(self): + return len(self.rest) + 1 + @attribute def schema(self): return Schema({k: v.dtype.copy(nullable=True) for k, v in self.values.items()}) diff --git a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt index e28f3c5bb0df..6c43f0adfc6b 100644 --- a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt @@ -6,21 +6,15 @@ r1 := UnboundTable: right time2 int32 value2 float64 -r2 := SelfReference[r0] - -r3 := SelfReference[r1] - -r4 := SelfReference[r1] - -JoinChain[r2] - JoinLink[asof, r3] - r2.time1 == r3.time2 - JoinLink[inner, r4] - r2.value == r4.value2 +JoinChain[r0] + JoinLink[asof, r1] + r0.time1 == r1.time2 + JoinLink[inner, r1] + r0.value == r1.value2 values: - time1: r2.time1 - value: r2.value - time2: r3.time2 - value2: r3.value2 - time2_right: r4.time2 - value2_right: r4.value2 \ No newline at end of file + time1: r0.time1 + value: r0.value + time2: r1.time2 + value2: r1.value2 + time2_right: r1.time2 + value2_right: r1.value2 \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt index 2e5cd4a00c70..8879597be115 100644 --- a/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_format_multiple_join_with_projection/repr.txt @@ -12,24 +12,18 @@ r2 := UnboundTable: three bar_id string value2 float64 -r3 := SelfReference[r1] - -r4 := SelfReference[r2] - -r5 := Filter[r0] +r3 := Filter[r0] r0.f > 0 -r6 := SelfReference[r5] - -JoinChain[r6] - JoinLink[left, r3] - r6.foo_id == r3.foo_id - JoinLink[inner, r4] - r6.bar_id == r4.bar_id +JoinChain[r3] + JoinLink[left, r1] + r3.foo_id == r1.foo_id + JoinLink[inner, r2] + r3.bar_id == r2.bar_id values: - c: r6.c - f: r6.f - foo_id: r6.foo_id - bar_id: r6.bar_id - value1: r3.value1 - value2: r4.value2 \ No newline at end of file + c: r3.c + f: r3.f + foo_id: r3.foo_id + bar_id: r3.bar_id + value1: r1.value1 + value2: r2.value2 \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt b/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt index 758b88722b59..128ffd518dd6 100644 --- a/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_memoize_filtered_tables_in_join/repr.txt @@ -17,15 +17,11 @@ r2 := Filter[r1] r3 := Filter[r1] r1.kind == 'bar' -r4 := SelfReference[r2] - -r5 := SelfReference[r3] - -JoinChain[r4] - JoinLink[inner, r5] - r4.region == r5.region +JoinChain[r2] + JoinLink[inner, r3] + r2.region == r3.region values: - region: r4.region - kind: r4.kind - total: r4.total - right_total: r5.total \ No newline at end of file + region: r2.region + kind: r2.kind + total: r2.total + right_total: r3.total \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt index 999c2664f114..6f7009dc8056 100644 --- a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt @@ -6,17 +6,13 @@ r1 := UnboundTable: t2 a int64 b float64 -r2 := SelfReference[r0] - -r3 := SelfReference[r1] - -r4 := JoinChain[r2] - JoinLink[inner, r3] - r2.a == r3.a +r2 := JoinChain[r0] + JoinLink[inner, r1] + r0.a == r1.a values: - a: r2.a - b: r2.b - a_right: r3.a - b_right: r3.b + a: r0.a + b: r0.b + a_right: r1.a + b_right: r1.b -CountStar(): CountStar(r4) \ No newline at end of file +CountStar(): CountStar(r2) \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt b/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt index 37d25bcc6b54..672faadf9ba2 100644 --- a/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_two_inner_joins/repr.txt @@ -8,24 +8,18 @@ r1 := UnboundTable: right value2 float64 b string -r2 := SelfReference[r0] - -r3 := SelfReference[r1] - -r4 := SelfReference[r1] - -JoinChain[r2] - JoinLink[inner, r3] - r2.a == r3.b - JoinLink[inner, r4] - r2.value == r4.value2 +JoinChain[r0] + JoinLink[inner, r1] + r0.a == r1.b + JoinLink[inner, r1] + r0.value == r1.value2 values: - time1: r2.time1 - value: r2.value - a: r2.a - time2: r3.time2 - value2: r3.value2 - b: r3.b - time2_right: r4.time2 - value2_right: r4.value2 - b_right: r4.b \ No newline at end of file + time1: r0.time1 + value: r0.value + a: r0.a + time2: r1.time2 + value2: r1.value2 + b: r1.b + time2_right: r1.time2 + value2_right: r1.value2 + b_right: r1.b \ No newline at end of file diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index d3821cecd73f..4735596a970d 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -1,7 +1,6 @@ from __future__ import annotations import contextlib -import itertools import pytest @@ -36,16 +35,8 @@ @contextlib.contextmanager -def self_references(*tables): - old_counter = ops.SelfReference._uid_counter - # set a new counter with 1000 to avoid colliding with manually created - # self-references using t.view() - new_counter = itertools.count(1000) - try: - ops.SelfReference._uid_counter = new_counter - yield tuple(ops.SelfReference(t).to_expr() for t in tables) - finally: - ops.SelfReference._uid_counter = old_counter +def join_tables(*tables): + yield tuple(ops.JoinTable(t, i).to_expr() for i, t in enumerate(tables)) def test_field(): @@ -486,9 +477,7 @@ def test_project_before_and_after_filter(): def test_join(): t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) - - with self_references(): - joined = t1.join(t2, [t1.a == t2.c]) + joined = t1.join(t2, [t1.a == t2.c]) assert isinstance(joined, ir.JoinExpr) assert isinstance(joined.op(), JoinChain) @@ -499,7 +488,7 @@ def test_join(): assert isinstance(joined.op(), JoinChain) assert isinstance(joined.op().to_expr(), ir.JoinExpr) - with self_references(t1, t2) as (t1, t2): + with join_tables(t1, t2) as (t1, t2): assert result.op() == JoinChain( first=t1, rest=[ @@ -518,12 +507,12 @@ def test_join_unambiguous_select(): a = ibis.table(name="a", schema={"a_int": "int64", "a_str": "string"}) b = ibis.table(name="b", schema={"b_int": "int64", "b_str": "string"}) - with self_references(): - join = a.join(b, a.a_int == b.b_int) - expr1 = join["a_int", "b_int"] - expr2 = join.select("a_int", "b_int") - assert expr1.equals(expr2) - with self_references(a, b) as (r1, r2): + join = a.join(b, a.a_int == b.b_int) + expr1 = join["a_int", "b_int"] + expr2 = join.select("a_int", "b_int") + assert expr1.equals(expr2) + + with join_tables(a, b) as (r1, r2): assert expr1.op() == JoinChain( first=r1, rest=[JoinLink("inner", r2, [r1.a_int == r2.b_int])], @@ -539,10 +528,9 @@ def test_join_with_subsequent_projection(): t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) # a single computed value is pulled to a subsequent projection - with self_references(): - joined = t1.join(t2, [t1.a == t2.c]) - expr = joined.select(t1.a, t1.b, col=t2.c + 1) - with self_references(t1, t2) as (r1, r2): + joined = t1.join(t2, [t1.a == t2.c]) + expr = joined.select(t1.a, t1.b, col=t2.c + 1) + with join_tables(t1, t2) as (r1, r2): expected = JoinChain( first=r1, rest=[JoinLink("inner", r2, [r1.a == r2.c])], @@ -551,17 +539,16 @@ def test_join_with_subsequent_projection(): assert expr.op() == expected # multiple computed values - with self_references(): - joined = t1.join(t2, [t1.a == t2.c]) - expr = joined.select( - t1.a, - t1.b, - foo=t2.c + 1, - bar=t2.c + 2, - baz=t2.d.name("bar") + "3", - baz2=(t2.c + t1.a).name("foo"), - ) - with self_references(t1, t2) as (r1, r2): + joined = t1.join(t2, [t1.a == t2.c]) + expr = joined.select( + t1.a, + t1.b, + foo=t2.c + 1, + bar=t2.c + 2, + baz=t2.d.name("bar") + "3", + baz2=(t2.c + t1.a).name("foo"), + ) + with join_tables(t1, t2) as (r1, r2): expected = JoinChain( first=r1, rest=[JoinLink("inner", r2, [r1.a == r2.c])], @@ -583,15 +570,14 @@ def test_join_with_subsequent_projection_colliding_names(): name="t2", schema={"a": "int64", "b": "string", "c": "float", "d": "string"} ) - with self_references(): - joined = t1.join(t2, [t1.a == t2.a]) - expr = joined.select( - t1.a, - t1.b, - foo=t2.a + 1, - bar=t1.a + t2.a, - ) - with self_references(t1, t2) as (r1, r2): + joined = t1.join(t2, [t1.a == t2.a]) + expr = joined.select( + t1.a, + t1.b, + foo=t2.a + 1, + bar=t1.a + t2.a, + ) + with join_tables(t1, t2) as (r1, r2): expected = JoinChain( first=r1, rest=[JoinLink("inner", r2, [r1.a == r2.a])], @@ -609,12 +595,10 @@ def test_chained_join(): a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) + joined = a.join(b, [a.a == b.c]).join(c, [a.a == c.e]) + result = joined._finish() - with self_references(): - joined = a.join(b, [a.a == b.c]).join(c, [a.a == c.e]) - result = joined._finish() - - with self_references(a, b, c) as (r1, r2, r3): + with join_tables(a, b, c) as (r1, r2, r3): assert result.op() == JoinChain( first=r1, rest=[ @@ -631,11 +615,10 @@ def test_chained_join(): }, ) - with self_references(): - joined = a.join(b, [a.a == b.c]).join(c, [b.c == c.e]) - result = joined.select(a.a, b.d, c.f) + joined = a.join(b, [a.a == b.c]).join(c, [b.c == c.e]) + result = joined.select(a.a, b.d, c.f) - with self_references(a, b, c) as (r1, r2, r3): + with join_tables(a, b, c) as (r1, r2, r3): assert result.op() == JoinChain( first=r1, rest=[ @@ -655,11 +638,11 @@ def test_chained_join_referencing_intermediate_table(): b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) - with self_references(): - ab = a.join(b, [a.a == b.c]) - abc = ab.join(c, [ab.a == c.e]) - result = abc._finish() - with self_references(a, b, c) as (r1, r2, r3): + ab = a.join(b, [a.a == b.c]) + abc = ab.join(c, [ab.a == c.e]) + result = abc._finish() + + with join_tables(a, b, c) as (r1, r2, r3): assert result.op() == JoinChain( first=r1, rest=[ @@ -691,9 +674,8 @@ def test_join_predicate_dereferencing(): filtered = table[table["f"] > 0] # dereference table.foo_id to filtered.foo_id - with self_references(): - j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) - with self_references(filtered, table2) as (r1, r2): + j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) + with join_tables(filtered, table2) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ @@ -711,11 +693,10 @@ def test_join_predicate_dereferencing(): ) assert j1.op() == expected - with self_references(): - j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) - j2 = j1.inner_join(table3, filtered["bar_id"] == table3["bar_id"]) - view = j2[[filtered, table2["value1"], table3["value2"]]] - with self_references(filtered, table2, table3) as (r1, r2, r3): + j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) + j2 = j1.inner_join(table3, filtered["bar_id"] == table3["bar_id"]) + view = j2[[filtered, table2["value1"], table3["value2"]]] + with join_tables(filtered, table2, table3) as (r1, r2, r3): expected = ops.JoinChain( first=r1, rest=[ @@ -943,12 +924,10 @@ def test_self_join(): t0 = ibis.table(schema=ibis.schema(dict(key="int")), name="leaf") t1 = t0.filter(ibis.literal(True)) t2 = t1[["key"]] + t3 = t2.join(t2, ["key"]) + t4 = t3.join(t3, ["key"]) - with self_references(): - t3 = t2.join(t2, ["key"]) - t4 = t3.join(t3, ["key"]) - - with self_references(t2, t2, t3) as (r1, r2, r3): + with join_tables(t2, t2, t3) as (r1, r2, r3): expected = ops.JoinChain( first=r1, rest=[ @@ -976,12 +955,9 @@ def test_self_join(): def test_self_join_view(): t = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) t_view = t.view() + expr = t.join(t_view, t.x == t_view.y).select("x", "y", "z", "z_right") - with self_references(): - expr = t.join(t_view, t.x == t_view.y).select("x", "y", "z", "z_right") - - with self_references(t) as (r1,): - r2 = t_view + with join_tables(t, t_view) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ @@ -995,11 +971,9 @@ def test_self_join_view(): def test_self_join_with_view_projection(): t1 = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) t2 = t1.view() + expr = t1.inner_join(t2, ["x"])[[t1]] - with self_references(): - expr = t1.inner_join(t2, ["x"])[[t1]] - with self_references(t1) as (r1,): - r2 = t2 + with join_tables(t1, t2) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ @@ -1014,11 +988,10 @@ def test_joining_same_table_twice(): left = ibis.table(name="left", schema={"time1": int, "value": float, "a": str}) right = ibis.table(name="right", schema={"time2": int, "value2": float, "b": str}) - with self_references(): - joined = left.inner_join(right, left.a == right.b).inner_join( - right, left.value == right.value2 - ) - with self_references(left, right, right) as (r1, r2, r3): + joined = left.inner_join(right, left.a == right.b).inner_join( + right, left.value == right.value2 + ) + with join_tables(left, right, right) as (r1, r2, r3): expected = ops.JoinChain( first=r1, rest=[ @@ -1045,10 +1018,10 @@ def test_join_chain_gets_reused_and_continued_after_a_select(): b = ibis.table(name="b", schema={"c": "int64", "d": "string"}) c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) - with self_references(): - ab = a.join(b, [a.a == b.c]) - abc = ab[a.b, b.d].join(c, [a.a == c.e]) - with self_references(a, b, c) as (r1, r2, r3): + ab = a.join(b, [a.a == b.c]) + abc = ab[a.b, b.d].join(c, [a.a == c.e]) + + with join_tables(a, b, c) as (r1, r2, r3): expected = ops.JoinChain( first=r1, rest=[ @@ -1068,13 +1041,10 @@ def test_join_chain_gets_reused_and_continued_after_a_select(): def test_self_join_extensive(): a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) - with self_references(): - aa = a.join(a, [a.a == a.a]) - with self_references(): - aa1 = a.join(a, "a") - with self_references(): - aa2 = a.join(a, [("a", "a")]) - with self_references(a, a) as (r1, r2): + aa = a.join(a, [a.a == a.a]) + aa1 = a.join(a, "a") + aa2 = a.join(a, [("a", "a")]) + with join_tables(a, a) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ @@ -1091,18 +1061,11 @@ def test_self_join_extensive(): assert aa1.op() == expected assert aa2.op() == expected - with self_references(): - aaa = a.join(a, [a.a == a.a]).join(a, [a.a == a.a]) - with self_references(): - aa = a.join(a, [a.a == a.a]) - aaa1 = aa.join(a, [aa.a == a.a]) - with self_references(): - aa = a.join(a, [a.a == a.a]) - aaa2 = aa.join(a, "a") - with self_references(): - aa = a.join(a, [a.a == a.a]) - aaa3 = aa.join(a, [("a", "a")]) - with self_references(a, a, a) as (r1, r2, r3): + aaa = a.join(a, [a.a == a.a]).join(a, [a.a == a.a]) + aaa1 = aa.join(a, [aa.a == a.a]) + aaa2 = aa.join(a, "a") + aaa3 = aa.join(a, [("a", "a")]) + with join_tables(a, a, a) as (r1, r2, r3): expected = ops.JoinChain( first=r1, rest=[ @@ -1124,11 +1087,9 @@ def test_self_join_extensive(): def test_self_join_with_intermediate_selection(): a = ibis.table(name="a", schema={"a": "int64", "b": "string"}) - - with self_references(): - proj = a[["b", "a"]] - join = proj.join(a, [a.a == a.a]) - with self_references(proj, a) as (r1, r2): + proj = a[["b", "a"]] + join = proj.join(a, [a.a == a.a]) + with join_tables(proj, a) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ @@ -1143,10 +1104,9 @@ def test_self_join_with_intermediate_selection(): ) assert join.op() == expected - with self_references(): - aa = a.join(a, [a.a == a.a])["a", "b_right"] - aaa = aa.join(a, [aa.a == a.a]) - with self_references(a, a, a) as (r1, r2, r3): + aa = a.join(a, [a.a == a.a])["a", "b_right"] + aaa = aa.join(a, [aa.a == a.a]) + with join_tables(a, a, a) as (r1, r2, r3): expected = ops.JoinChain( first=r1, rest=[ @@ -1215,12 +1175,10 @@ def test_self_view_join_followed_by_aggregate_correctly_dereference_fields(): agged = t.aggregate([t.f.sum().name("total")], by=["g", "a", "b"]) view = agged.view() metrics = [(agged.total - view.total).max().name("metric")] + join = agged.inner_join(view, [agged.a == view.b]) + agg = join.aggregate(metrics, by=[agged.g]) - with self_references(): - join = agged.inner_join(view, [agged.a == view.b]) - agg = join.aggregate(metrics, by=[agged.g]) - with self_references(agged) as (r1,): - r2 = view + with join_tables(agged, view) as (r1, r2): expected_join = ops.JoinChain( first=r1, rest=[ @@ -1248,3 +1206,12 @@ def test_self_view_join_followed_by_aggregate_correctly_dereference_fields(): ).to_expr() assert join.equals(expected_join) assert agg.equals(expected_agg) + + +def test_join_expressions_are_equal(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "int64"}) + t2 = ibis.table(name="t2", schema={"a": "int64", "b": "int64"}) + + join1 = t1.inner_join(t2, [t1.a == t2.a]) + join2 = t1.inner_join(t2, [t1.a == t2.a]) + assert join1.equals(join2) diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 7568f05cb236..e66b97732cd0 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -46,34 +46,28 @@ def disambiguate_fields(how, left_fields, right_fields, lname, rname): return fields, collisions -def dereference_targets(chain): - yield chain.first - for join in chain.rest: - if join.how not in ("semi", "anti"): - yield join.table - - def dereference_mapping_left(chain): - rels = dereference_targets(chain) + # construct the list of join table we wish to dereference fields to + rels = [chain.first] + for link in chain.rest: + if link.how not in ("semi", "anti"): + rels.append(link.table) + + # create the dereference mapping suitable to disambiguate field references + # from earlier in the relation hierarchy to one of the join tables subs = dereference_mapping(rels) - # join chain fields => link table fields + + # also allow to dereference fields of the join chain itself for k, v in chain.values.items(): subs[ops.Field(chain, k)] = v + return subs def dereference_mapping_right(right): - if isinstance(right, ops.SelfReference): - # no support for dereferencing, the user must use the right table - # directly in the predicates - return {}, right - - # wrap the right table in a self reference to ensure its uniqueness in the - # join chain which requires dereferencing the predicates from - # right => SelfReference(right) - right = ops.SelfReference(right) - subs = {v: ops.Field(right, k) for k, v in right.values.items()} - return subs, right + # the right table is wrapped in a JoinTable the uniqueness of the underlying + # table which requires the predicates to be dereferenced to the wrapped + return {v: ops.Field(right, k) for k, v in right.values.items()} def dereference_sides(left, right, deref_left, deref_right): @@ -175,9 +169,9 @@ def join( how = "semi" left = self.op() - right = right.op() + right = ops.JoinTable(right, index=left.length) subs_left = dereference_mapping_left(left) - subs_right, right = dereference_mapping_right(right) + subs_right = dereference_mapping_right(right) # bind and dereference the predicates preds = prepare_predicates( diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 94542884ccd6..1058d12941f6 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -3001,13 +3001,10 @@ def join( # `ir.Table(ops.JoinChain())` expression, which we can reuse here expr = left.to_expr() else: - # all participants of the join must be wrapped in SelfReferences so - # that we can join the same table with itself multiple times and to - # enable optimization passes later on - if not isinstance(left, ops.SelfReference): - left = ops.SelfReference(left) - # construct an empty join chain and wrap it with a JoinExpr, the - # projected fields are the fields of the starting table + # all participants of the join must be wrapped in JoinTable nodes + # so that we can join the same table with itself multiple times and + # to enable optimization passes later on + left = ops.JoinTable(left, index=0) expr = ops.JoinChain(left, rest=(), values=left.fields).to_expr() return expr.join(right, predicates, how=how, lname=lname, rname=rname) diff --git a/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt b/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt index b4761984f092..afa7b6662e2f 100644 --- a/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt +++ b/ibis/tests/expr/snapshots/test_format_sql_operations/test_memoize_database_table/repr.txt @@ -7,27 +7,23 @@ r1 := DatabaseTable: test1 f float64 g string -r2 := SelfReference[r0] - -r3 := Filter[r1] +r2 := Filter[r1] r1.f > 0 -r4 := SelfReference[r3] - -r5 := JoinChain[r2] - JoinLink[inner, r4] - r4.g == r2.key +r3 := JoinChain[r0] + JoinLink[inner, r2] + r2.g == r0.key values: - key: r2.key - value: r2.value - c: r4.c - f: r4.f - g: r4.g + key: r0.key + value: r0.value + c: r2.c + f: r2.f + g: r2.g -Aggregate[r5] +Aggregate[r3] groups: - g: r5.g - key: r5.key + g: r3.g + key: r3.key metrics: - foo: Mean(r5.f - r5.value) - bar: Sum(r5.f) \ No newline at end of file + foo: Mean(r3.f - r3.value) + bar: Sum(r3.f) \ No newline at end of file diff --git a/ibis/tests/expr/test_analysis.py b/ibis/tests/expr/test_analysis.py index 7db0ae06c67a..aeacf249edf0 100644 --- a/ibis/tests/expr/test_analysis.py +++ b/ibis/tests/expr/test_analysis.py @@ -6,7 +6,7 @@ import ibis.common.exceptions as com import ibis.expr.operations as ops from ibis.expr.rewrites import simplify -from ibis.expr.tests.test_newrels import self_references +from ibis.expr.tests.test_newrels import join_tables # Place to collect esoteric expression analysis bugs and tests @@ -22,12 +22,12 @@ def test_rewrite_join_projection_without_other_ops(con): pred1 = table["foo_id"] == table2["foo_id"] pred2 = filtered["bar_id"] == table3["bar_id"] - with self_references(): - j1 = filtered.left_join(table2, [pred1]) - j2 = j1.inner_join(table3, [pred2]) - # Project out the desired fields - view = j2[[filtered, table2["value1"], table3["value2"]]] - with self_references(filtered, table2, table3) as (r1, r2, r3): + j1 = filtered.left_join(table2, [pred1]) + j2 = j1.inner_join(table3, [pred2]) + # Project out the desired fields + view = j2[[filtered, table2["value1"], table3["value2"]]] + + with join_tables(filtered, table2, table3) as (r1, r2, r3): # Construct the thing we expect to obtain expected = ops.JoinChain( first=r1, @@ -165,13 +165,12 @@ def test_filter_self_join(): ) cond = left.region == right.region - with self_references(): - joined = left.join(right, cond) - metric = (left.total - right.total).name("diff") - what = [left.region, metric] - projected = joined.select(what) + joined = left.join(right, cond) + metric = (left.total - right.total).name("diff") + what = [left.region, metric] + projected = joined.select(what) - with self_references(left, right) as (r1, r2): + with join_tables(left, right) as (r1, r2): join = ops.JoinChain( first=r1, rest=[ diff --git a/ibis/tests/expr/test_struct.py b/ibis/tests/expr/test_struct.py index d7e5b05ddd5a..75707c650f2c 100644 --- a/ibis/tests/expr/test_struct.py +++ b/ibis/tests/expr/test_struct.py @@ -8,7 +8,7 @@ import ibis.expr.operations as ops import ibis.expr.types as ir from ibis import _ -from ibis.expr.tests.test_newrels import self_references +from ibis.expr.tests.test_newrels import join_tables from ibis.tests.util import assert_pickle_roundtrip @@ -70,10 +70,10 @@ def test_unpack_from_table(t): def test_lift_join(t, s): - with self_references(): - join = t.join(s, t.d == s.a.g) - result = join.a_right.lift() - with self_references(t, s) as (r1, r2): + join = t.join(s, t.d == s.a.g) + result = join.a_right.lift() + + with join_tables(t, s) as (r1, r2): join = ops.JoinChain( first=r1, rest=[ diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 9f26fb41d205..b2ee00a2ec27 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -22,7 +22,7 @@ from ibis.common.exceptions import ExpressionError, IntegrityError, RelationError from ibis.expr import api from ibis.expr.rewrites import simplify -from ibis.expr.tests.test_newrels import self_references +from ibis.expr.tests.test_newrels import join_tables from ibis.expr.types import Column, Table from ibis.tests.util import assert_equal, assert_pickle_roundtrip @@ -847,10 +847,10 @@ def test_join_no_predicate_list(con): region = con.table("tpch_region") nation = con.table("tpch_nation") - with self_references(): - pred = region.r_regionkey == nation.n_regionkey - joined = region.inner_join(nation, pred) - with self_references(region, nation) as (r1, r2): + pred = region.r_regionkey == nation.n_regionkey + joined = region.inner_join(nation, pred) + + with join_tables(region, nation) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ops.JoinLink("inner", r2, [r1.r_regionkey == r2.n_regionkey])], @@ -871,9 +871,9 @@ def test_join_deferred(con): region = con.table("tpch_region") nation = con.table("tpch_nation") - with self_references(): - res = region.join(nation, _.r_regionkey == nation.n_regionkey) - with self_references(region, nation) as (r1, r2): + res = region.join(nation, _.r_regionkey == nation.n_regionkey) + + with join_tables(region, nation) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ops.JoinLink("inner", r2, [r1.r_regionkey == r2.n_regionkey])], @@ -918,9 +918,8 @@ def test_asof_join_with_by(): left = ibis.table([("time", "int32"), ("key", "int32"), ("value", "double")]) right = ibis.table([("time", "int32"), ("key", "int32"), ("value2", "double")]) - with self_references(): - join_without_by = api.asof_join(left, right, "time") - with self_references(left, right) as (r1, r2): + join_without_by = api.asof_join(left, right, "time") + with join_tables(left, right) as (r1, r2): r2 = join_without_by.op().rest[0].table.to_expr() expected = ops.JoinChain( first=r1, @@ -936,9 +935,8 @@ def test_asof_join_with_by(): ) assert join_without_by.op() == expected - with self_references(): - join_with_by = api.asof_join(left, right, "time", by="key") - with self_references(left, right, right) as (r1, r2, r3): + join_with_by = api.asof_join(left, right, "time", by="key") + with join_tables(left, right, right) as (r1, r2, r3): expected = ops.JoinChain( first=r1, rest=[ @@ -981,9 +979,8 @@ def test_asof_join_with_tolerance(ibis_interval, timedelta_interval): right = ibis.table([("time", "int32"), ("key", "int32"), ("value2", "double")]) for interval in [ibis_interval, timedelta_interval]: - with self_references(): - joined = api.asof_join(left, right, "time", tolerance=interval) - with self_references(left, right) as (r1, r2): + joined = api.asof_join(left, right, "time", tolerance=interval) + with join_tables(left, right) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ @@ -1161,9 +1158,8 @@ def test_cross_join_multiple(table): b = table["d", "e"] c = table["f", "h"] - with self_references(): - joined = ibis.cross_join(a, b, c) - with self_references(a, b, c) as (r1, r2, r3): + joined = ibis.cross_join(a, b, c) + with join_tables(a, b, c) as (r1, r2, r3): expected = ops.JoinChain( first=r1, rest=[ @@ -1251,9 +1247,8 @@ def test_join_key_alternatives(con, key_maker): t2 = con.table("star2") key = key_maker(t1, t2) - with self_references(): - joined = t1.inner_join(t2, key) - with self_references(t1, t2) as (r1, r2): + joined = t1.inner_join(t2, key) + with join_tables(t1, t2) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ @@ -1337,9 +1332,8 @@ def test_unravel_compound_equijoin(table): p2 = t1.key2 == t2.key2 p3 = t1.key3 == t2.key3 - with self_references(): - joined = t1.inner_join(t2, [p1 & p2 & p3]) - with self_references(t1, t2) as (r1, r2): + joined = t1.inner_join(t2, [p1 & p2 & p3]) + with join_tables(t1, t2) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ From 881a93825a8bc11c1f7718d0d05ff6dbe05043e6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 23 Dec 2023 07:59:15 -0500 Subject: [PATCH 014/161] fix(decompile): ensure that `SelfReference` is decompiled with a call to `.view()` --- ibis/backends/tests/sql/test_compiler.py | 22 +++++++-------- ibis/backends/tests/sql/test_select_sql.py | 14 ++++----- ibis/backends/tests/sql/test_sql.py | 4 +-- ibis/expr/decompile.py | 14 +++++++-- ibis/expr/tests/test_decompile.py | 10 +++++++ ibis/tests/util.py | 33 ++++++++++++++++++---- 6 files changed, 69 insertions(+), 28 deletions(-) diff --git a/ibis/backends/tests/sql/test_compiler.py b/ibis/backends/tests/sql/test_compiler.py index 8e66b358fca0..646b9ad9b652 100644 --- a/ibis/backends/tests/sql/test_compiler.py +++ b/ibis/backends/tests/sql/test_compiler.py @@ -8,45 +8,45 @@ # from ibis.backends.base.sql.compiler import Compiler from ibis.backends.tests.sql.conftest import to_sql -from ibis.tests.util import assert_decompile_roundtrip +from ibis.tests.util import assert_decompile_roundtrip, schemas_eq pytestmark = pytest.mark.duckdb def test_union(union, snapshot): snapshot.assert_match(to_sql(union), "out.sql") - assert_decompile_roundtrip(union, snapshot, check_equality=False) + assert_decompile_roundtrip(union, snapshot, eq=schemas_eq) def test_union_project_column(union_all, snapshot): # select a column, get a subquery expr = union_all[[union_all.key]] snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_table_intersect(intersect, snapshot): snapshot.assert_match(to_sql(intersect), "out.sql") - assert_decompile_roundtrip(intersect, snapshot, check_equality=False) + assert_decompile_roundtrip(intersect, snapshot, eq=schemas_eq) def test_table_difference(difference, snapshot): snapshot.assert_match(to_sql(difference), "out.sql") - assert_decompile_roundtrip(difference, snapshot, check_equality=False) + assert_decompile_roundtrip(difference, snapshot, eq=schemas_eq) def test_intersect_project_column(intersect, snapshot): # select a column, get a subquery expr = intersect[[intersect.key]] snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_difference_project_column(difference, snapshot): # select a column, get a subquery expr = difference[[difference.key]] snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_table_distinct(con, snapshot): @@ -125,7 +125,7 @@ def test_having_from_filter(snapshot): expr = having.aggregate(filt.a.sum().name("sum")) snapshot.assert_match(to_sql(expr), "out.sql") # params get different auto incremented counter identifiers - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_simple_agg_filter(snapshot): @@ -174,7 +174,7 @@ def test_table_drop_with_filter(snapshot): joined = joined[left.a] expr = joined.filter(joined.a < 1.0) snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_table_drop_consistency(): @@ -210,7 +210,7 @@ def test_subquery_where_location(snapshot): out = Compiler.to_sql(expr, params={param: "20140101"}) snapshot.assert_match(out, "out.sql") # params get different auto incremented counter identifiers - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_column_expr_retains_name(snapshot): @@ -231,4 +231,4 @@ def test_union_order_by(snapshot): t = ibis.table(dict(a="int", b="string"), name="t") expr = t.order_by("b").union(t.order_by("b")) snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) diff --git a/ibis/backends/tests/sql/test_select_sql.py b/ibis/backends/tests/sql/test_select_sql.py index 24e2a65468d4..78eee22af88c 100644 --- a/ibis/backends/tests/sql/test_select_sql.py +++ b/ibis/backends/tests/sql/test_select_sql.py @@ -8,7 +8,7 @@ # from ibis.backends.base.sql.compiler import Compiler from ibis.backends.tests.sql.conftest import get_query, to_sql -from ibis.tests.util import assert_decompile_roundtrip +from ibis.tests.util import assert_decompile_roundtrip, schemas_eq pytestmark = pytest.mark.duckdb @@ -117,7 +117,7 @@ def test_join_between_joins(snapshot): exprs = [left, right.value3, right.value4] expr = joined.select(exprs) snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_join_just_materialized(nation, region, customer, snapshot): @@ -178,7 +178,7 @@ def test_where_analyze_scalar_op(functional_alltypes, snapshot): ] ).count() snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_bug_duplicated_where(airlines, snapshot): @@ -244,7 +244,7 @@ def test_fuse_projections(snapshot): # fusion works even if there's a filter table3_filtered = table2_filtered.select([table2, f2]) snapshot.assert_match(to_sql(table3_filtered), "project_filter.sql") - assert_decompile_roundtrip(table3_filtered, snapshot, check_equality=False) + assert_decompile_roundtrip(table3_filtered, snapshot, eq=schemas_eq) def test_projection_filter_fuse(projection_fuse_filter, snapshot): @@ -342,7 +342,7 @@ def test_subquery_in_union(alltypes, snapshot): expr = join1.union(join2) snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_limit_with_self_join(functional_alltypes, snapshot): @@ -351,7 +351,7 @@ def test_limit_with_self_join(functional_alltypes, snapshot): expr = t.join(t2, t.tinyint_col < t2.timestamp_col.minute()).count() snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot) + assert_decompile_roundtrip(expr, snapshot, eq=lambda x, y: repr(x) == repr(y)) def test_topk_predicate_pushdown_bug(nation, customer, region, snapshot): @@ -414,7 +414,7 @@ def test_case_in_projection(alltypes, snapshot): expr = t[expr.name("col1"), expr2.name("col2"), t] snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot, check_equality=False) + assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_identifier_quoting(snapshot): diff --git a/ibis/backends/tests/sql/test_sql.py b/ibis/backends/tests/sql/test_sql.py index 38a966a2e2fc..d91d1aff6683 100644 --- a/ibis/backends/tests/sql/test_sql.py +++ b/ibis/backends/tests/sql/test_sql.py @@ -465,9 +465,9 @@ def test_gh_1045(test1, test2, test3, snapshot): t2 = test2 t3 = test3 - t3 = t3[[c for c in t3.columns if c != "id3"]].mutate(id3=t3.id3.cast("int64")) + t3 = t3.mutate(id3=t3.id3.cast("int64")) - t3 = t3[[c for c in t3.columns if c != "val2"]].mutate(t3_val2=t3.id3) + t3 = t3.mutate(t3_val2=t3.id3) t4 = t3.join(t2, t2.id2b == t3.id3) t1 = t1[[t1[c].name(f"t1_{c}") for c in t1.columns]] diff --git a/ibis/expr/decompile.py b/ibis/expr/decompile.py index 79b53dbe4ba2..3b27c166852a 100644 --- a/ibis/expr/decompile.py +++ b/ibis/expr/decompile.py @@ -179,9 +179,14 @@ def aggregation(op, parent, groups, metrics): raise ValueError("No metrics to aggregate") +@translate.register(ops.Distinct) +def distinct(op, parent): + return f"{parent}.distinct()" + + @translate.register(ops.SelfReference) def self_reference(op, parent, identifier): - return parent + return f"{parent}.view()" @translate.register(ops.JoinTable) @@ -330,7 +335,12 @@ def isin(op, value, options): class CodeContext: - always_assign = (ops.ScalarParameter, ops.UnboundTable, ops.Aggregate) + always_assign = ( + ops.ScalarParameter, + ops.UnboundTable, + ops.Aggregate, + ops.SelfReference, + ) always_ignore = ( ops.JoinTable, ops.Field, diff --git a/ibis/expr/tests/test_decompile.py b/ibis/expr/tests/test_decompile.py index 23fe6a41645a..35186311742e 100644 --- a/ibis/expr/tests/test_decompile.py +++ b/ibis/expr/tests/test_decompile.py @@ -76,3 +76,13 @@ def test_basic(expr, expected): assert restored.equals(expected) else: assert restored == expected + + +def test_view(): + expr = ibis.table({"x": "int"}, name="t").view() + assert "t.view()" in decompile(expr) + + +def test_distinct(): + expr = ibis.table({"x": "int"}, name="t").distinct() + assert "t.distinct()" in decompile(expr) diff --git a/ibis/tests/util.py b/ibis/tests/util.py index 72a3f09c2827..f51dfca04ab7 100644 --- a/ibis/tests/util.py +++ b/ibis/tests/util.py @@ -3,8 +3,10 @@ from __future__ import annotations import pickle +from typing import Callable import ibis +import ibis.expr.types as ir from ibis import util @@ -27,8 +29,30 @@ def assert_pickle_roundtrip(obj): assert obj == loaded -def assert_decompile_roundtrip(expr, snapshot=None, check_equality=True): - """Assert that an ibis expression remains the same after decompilation.""" +def schemas_eq(left: ir.Expr, right: ir.Expr) -> bool: + assert left.as_table().schema().equals(right.as_table().schema()) + + +def assert_decompile_roundtrip( + expr: ir.Expr, + snapshot=None, + eq: Callable[[ir.Expr, ir.Expr], bool] = ir.Expr.equals, +): + """Assert that an ibis expression remains the same after decompilation. + + Parameters + ---------- + expr + The expression to decompile. + snapshot + A snapshot fixture. + eq + A callable that returns whether two Ibis expressions are equal. + Defaults to `ibis.expr.types.Expr.equals`. Use this to adjust + comparison behavior for expressions that contain `SelfReference` + operations from table.view() calls, or other relations whose equality + is difficult to roundtrip. + """ rendered = ibis.decompile(expr, format=True) if snapshot is not None: snapshot.assert_match(rendered, "decompiled.py") @@ -38,7 +62,4 @@ def assert_decompile_roundtrip(expr, snapshot=None, check_equality=True): exec(rendered, {}, locals_) restored = locals_["result"] - if check_equality: - assert expr.unbind().equals(restored) - else: - assert expr.as_table().schema().equals(restored.as_table().schema()) + assert eq(expr.unbind(), restored) From 59808956b90d2b828bcd7481fe1059c7a063aaaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 23 Dec 2023 02:27:58 +0100 Subject: [PATCH 015/161] refactor(ir): support join of joins while avoiding nesting --- ibis/expr/rewrites.py | 5 ++++ ibis/expr/tests/test_newrels.py | 41 +++++++++++++++++++++++++++++++++ ibis/expr/types/joins.py | 5 ++++ 3 files changed, 51 insertions(+) diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 74e2294ec3db..12a314379d9a 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -17,6 +17,11 @@ name = var("name") +@replace(p.Field(p.JoinChain)) +def peel_join_field(_): + return _.rel.values[_.name] + + @replace(ops.Analytic) def project_wrap_analytic(_, rel): # Wrap analytic functions in a window function diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 4735596a970d..906284c1a32c 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -1215,3 +1215,44 @@ def test_join_expressions_are_equal(): join1 = t1.inner_join(t2, [t1.a == t2.a]) join2 = t1.inner_join(t2, [t1.a == t2.a]) assert join1.equals(join2) + + +def test_join_between_joins(): + t1 = ibis.table( + [("key1", "string"), ("key2", "string"), ("value1", "double")], + "first", + ) + t2 = ibis.table([("key1", "string"), ("value2", "double")], "second") + t3 = ibis.table( + [("key2", "string"), ("key3", "string"), ("value3", "double")], + "third", + ) + t4 = ibis.table([("key3", "string"), ("value4", "double")], "fourth") + + left = t1.inner_join(t2, [("key1", "key1")])[t1, t2.value2] + right = t3.inner_join(t4, [("key3", "key3")])[t3, t4.value4] + + joined = left.inner_join(right, left.key2 == right.key2) + + # At one point, the expression simplification was resulting in bad refs + # here (right.value3 referencing the table inside the right join) + exprs = [left, right.value3, right.value4] + expr = joined.select(exprs) + + with join_tables(t1, t2, right) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.key1 == r2.key1]), + ops.JoinLink("inner", r3, [r1.key2 == r3.key2]), + ], + values={ + "key1": r1.key1, + "key2": r1.key2, + "value1": r1.value1, + "value2": r2.value2, + "value3": r3.value3, + "value4": r3.value4, + }, + ) + assert expr.op() == expected diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index e66b97732cd0..4afaca288980 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -17,6 +17,8 @@ from ibis.expr.types.relations import dereference_mapping import ibis +from ibis.expr.rewrites import peel_join_field + def disambiguate_fields(how, left_fields, right_fields, lname, rname): collisions = set() @@ -207,6 +209,9 @@ def select(self, *args, **kwargs): # if there are values referencing fields from the join chain constructed # so far, we need to replace them the fields from one of the join links subs = dereference_mapping_left(chain) + values = { + k: v.replace(peel_join_field, filter=ops.Value) for k, v in values.items() + } values = {k: v.replace(subs, filter=ops.Value) for k, v in values.items()} node = chain.copy(values=values) From 405f1bac99e70ae0df4eadf5b3a9a552ae76c69a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sun, 24 Dec 2023 13:40:59 +0100 Subject: [PATCH 016/161] feat(sql): lower expressions to SQL-like relational operations --- ibis/backends/base/sqlglot/rewrites.py | 131 ++++++++++++++++++ .../base/sqlglot/tests/test_rewrites.py | 69 +++++++++ 2 files changed, 200 insertions(+) create mode 100644 ibis/backends/base/sqlglot/rewrites.py create mode 100644 ibis/backends/base/sqlglot/tests/test_rewrites.py diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py new file mode 100644 index 000000000000..fcaed94d78dd --- /dev/null +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -0,0 +1,131 @@ +"""Lower the ibis expression graph to a SQL-like relational algebra.""" + + +from __future__ import annotations + +from typing import Literal, Optional + +from public import public + +import ibis.expr.datashape as ds +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.common.annotations import attribute +from ibis.common.collections import FrozenDict # noqa: TCH001 +from ibis.common.patterns import Object, replace +from ibis.common.typing import VarTuple # noqa: TCH001 +from ibis.expr.rewrites import p +from ibis.expr.schema import Schema + + +@public +class Select(ops.Relation): + """Relation modelled after SQL's SELECT statement.""" + + parent: ops.Relation + selections: FrozenDict[str, ops.Value] = {} + predicates: VarTuple[ops.Value[dt.Boolean]] = () + sort_keys: VarTuple[ops.SortKey] = () + + @attribute + def values(self): + return self.selections + + @attribute + def schema(self): + return Schema({k: v.dtype for k, v in self.selections.items()}) + + +@public +class Window(ops.Value): + """Window modelled after SQL's window statements.""" + + how: Literal["rows", "range"] + func: ops.Reduction | ops.Analytic + start: Optional[ops.WindowBoundary] = None + end: Optional[ops.WindowBoundary] = None + group_by: VarTuple[ops.Column] = () + order_by: VarTuple[ops.SortKey] = () + + shape = ds.columnar + + @attribute + def dtype(self): + return self.func.dtype + + +@replace(p.Project) +def project_to_select(_): + """Convert a Project node to a Select node.""" + return Select(_.parent, selections=_.values) + + +@replace(p.Filter) +def filter_to_select(_): + """Convert a Filter node to a Select node.""" + return Select(_.parent, selections=_.values, predicates=_.predicates) + + +@replace(p.Sort) +def sort_to_select(_): + """Convert a Sort node to a Select node.""" + return Select(_.parent, selections=_.values, sort_keys=_.keys) + + +@replace(p.WindowFunction) +def window_function_to_window(_): + """Convert a WindowFunction node to a Window node.""" + if isinstance(_.frame, ops.RowsWindowFrame) and _.frame.max_lookback is not None: + raise NotImplementedError("max_lookback is not supported for SQL backends") + return Window( + how=_.frame.how, + func=_.func, + start=_.frame.start, + end=_.frame.end, + group_by=_.frame.group_by, + order_by=_.frame.order_by, + ) + + +@replace(Object(Select, Object(Select))) +def merge_select_select(_): + """Merge subsequent Select relations into one. + + This rewrites eliminates `_.parent` by merging the outer and the inner + `predicates`, `sort_keys` and keeping the outer `selections`. All selections + from the inner Select are inlined into the outer Select. + """ + # don't merge if either the outer or the inner select has window functions + for v in _.selections.values(): + if v.find(Window, filter=ops.Value): + return _ + for v in _.parent.selections.values(): + if v.find((Window, ops.Unnest), filter=ops.Value): + return _ + for v in _.predicates: + if v.find(ops.ExistsSubquery, filter=ops.Value): + return _ + + subs = {ops.Field(_.parent, k): v for k, v in _.parent.values.items()} + selections = {k: v.replace(subs) for k, v in _.selections.items()} + predicates = tuple(p.replace(subs, filter=ops.Value) for p in _.predicates) + sort_keys = tuple(s.replace(subs) for s in _.sort_keys) + + return Select( + _.parent.parent, + selections=selections, + predicates=_.parent.predicates + predicates, + sort_keys=_.parent.sort_keys + sort_keys, + ) + + +def sqlize(node): + """Lower the ibis expression graph to a SQL-like relational algebra.""" + step1 = node.replace( + window_function_to_window + | project_to_select + | filter_to_select + | sort_to_select + ) + step2 = step1.replace(merge_select_select) + return step2 diff --git a/ibis/backends/base/sqlglot/tests/test_rewrites.py b/ibis/backends/base/sqlglot/tests/test_rewrites.py new file mode 100644 index 000000000000..5bebca86bcea --- /dev/null +++ b/ibis/backends/base/sqlglot/tests/test_rewrites.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import ibis +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.rewrites import Select, Window, sqlize + +t = ibis.table( + name="t", + schema={ + "a": dt.int64, + "b": dt.string, + "c": dt.double, + "d": dt.boolean, + }, +) + + +def test_sqlize(): + expr = t.mutate(e=t.a.fillna(0)).filter(t.a > 0).order_by(t.b).mutate(f=t.a + 1) + + result = sqlize(expr.op()) + expected = Select( + parent=t, + selections={ + "a": t.a, + "b": t.b, + "c": t.c, + "d": t.d, + "e": ops.Coalesce([t.a, 0]), + "f": t.a + 1, + }, + predicates=(t.a > 0,), + sort_keys=(t.b.asc(),), + ) + assert result == expected + + +def test_sqlize_dont_merge_windows(): + g = t.a.sum().name("g") + h = t.a.cumsum().name("h") + expr = t.mutate(g, h).filter(t.a > 0).select("a", "g", "h") + + result = sqlize(expr.op()) + sel1 = Select( + parent=t, + selections={ + "a": t.a, + "b": t.b, + "c": t.c, + "d": t.d, + "g": Window(how="rows", func=t.a.sum()), + "h": Window( + how="rows", func=t.a.sum(), end=ops.WindowBoundary(0, preceding=False) + ), + }, + ).to_expr() + + sel2 = Select( + parent=sel1, + selections={ + "a": sel1.a, + "g": sel1.g, + "h": sel1.h, + }, + predicates=(sel1.a > 0,), + ) + + assert result == sel2 From 413543dbb68895202164af948e7613b50e3ec7ba Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Mon, 10 Jul 2023 16:58:53 -0400 Subject: [PATCH 017/161] refactor(duckdb): initial cut of sqlglot DuckDB compiler it's alive! tests run (and fail) chore(duckdb): naive port of clickhouse compiler fix(duckdb): hacky fix for output shape feat(duckdb): bitwise ops (most of them) feat(duckdb): handle pandas dtype mapping in execute feat(duckdb): handle decimal types feat(duckdb): add euler's number test(duckdb): remove duckdb from alchemycon feat(duckdb): get _most_ of string ops working still some failures in re_exract feat(duckdb): add hash feat(duckdb): add CAST feat(duckdb): add cot and strright chore(duckdb): mark all the targets that still need attention (at least) feat(duckdb): combine binary bitwise ops chore(datestuff): some datetime ops feat(duckdb): add levenshtein, use op.dtype instead of output_dtype feat(duckdb): add blank list_schemas, use old current_database for now feat(duckdb): basic interval ops feat(duckdb): timestamp and temporal ops feat(duckdb): use pyarrow for fetching execute results feat(duckdb): handle interval casts, broken for columns feat(duckdb): shove literal handling up top feat(duckdb): more timestamp ops feat(duckdb): back to pandas output in execute feat(duckdb): timezone handling in cast feat(duckdb): ms and us epoch timestamp support chore(duckdb): misc cleanup feat(duckdb): initial create table feat(duckdb): add _from_url feat(duckdb): add read_parquet feat(duckdb): add persistent cache fix(duckdb): actually insert data if present in create_table feat(duckdb): use duckdb API read_parquet feat(duckdb): add read_csv This, frustratingly, cannot use the Python API for `read_csv` since that does not support list of files, for some reason. fix(duckdb): dont fully qualify the table names chore(duckdb): cleanup chore(duckdb): mark broken test broken fix(duckdb): fix read_parquet so it works feat(duckdb): add to_pyarrow, to_pyarrow_batches, sql() feat(duckdb): null checking feat(duckdb): translate uints fix(duckdb): fix file outputs and torch output fix(duckdb): add rest of integer types fix(duckdb): ops.InValues feat(duckdb): use sqlglot expressions (maybe a big mistake) fix(duckdb): don't stringify strings feat(duckdb): use sqlglot expr instead of strings for count fix(duckdb): fix isin fix(duckdb): fix some agg variance functions fix(duckdb): for logical equals, use sqlglot not operator fix(duckdb): struct not tuple for struct type --- ibis/backends/base/__init__.py | 4 +- ibis/backends/base/df/timecontext.py | 20 +- ibis/backends/base/sql/alchemy/registry.py | 3 +- ibis/backends/base/sql/registry/main.py | 3 +- ibis/backends/base/sqlglot/__init__.py | 328 +++-- ibis/backends/base/sqlglot/compiler.py | 1283 +++++++++++++++++ ibis/backends/base/sqlglot/datatypes.py | 21 - ibis/backends/base/sqlglot/rewrites.py | 10 + ibis/backends/clickhouse/__init__.py | 235 +-- ibis/backends/clickhouse/compiler.py | 686 +++++++++ ibis/backends/clickhouse/compiler/__init__.py | 13 - ibis/backends/clickhouse/compiler/core.py | 133 -- .../backends/clickhouse/compiler/relations.py | 215 --- ibis/backends/clickhouse/compiler/values.py | 1059 -------------- .../test_cast_double_col/float/out.sql | 2 +- .../test_noop_cast/bigint_col/out.sql | 2 +- .../test_noop_cast/bool_col/out.sql | 2 +- .../test_noop_cast/date_string_col/out.sql | 2 +- .../test_noop_cast/double_col/out.sql | 2 +- .../test_noop_cast/float_col/out.sql | 2 +- .../test_functions/test_noop_cast/id/out.sql | 2 +- .../test_noop_cast/int_col/out.sql | 2 +- .../test_noop_cast/month/out.sql | 2 +- .../test_noop_cast/smallint_col/out.sql | 2 +- .../test_noop_cast/string_col/out.sql | 2 +- .../test_noop_cast/timestamp_col/out.sql | 2 +- .../test_noop_cast/tinyint_col/out.sql | 2 +- .../test_noop_cast/year/out.sql | 2 +- .../test_string_column_like/out2.sql | 6 +- .../ceil/out.sql | 2 +- .../lambda1/out.sql | 4 +- .../lambda2/out.sql | 6 +- .../test_array_join_in_subquery/out.sql | 8 +- .../out.sql | 2 +- .../test_select/test_complex_join/out.sql | 11 + .../test_select/test_count_name/out.sql | 2 +- .../test_isin_notin_in_select/out1.sql | 14 +- .../test_isin_notin_in_select/out2.sql | 14 +- .../test_join_self_reference/out.sql | 14 +- .../test_named_from_filter_groupby/out1.sql | 17 +- .../test_named_from_filter_groupby/out2.sql | 17 +- .../playerID-awardID-any_inner_join/out.sql | 27 +- .../playerID-awardID-any_left_join/out.sql | 27 +- .../playerID-awardID-inner_join/out.sql | 27 +- .../playerID-awardID-left_join/out.sql | 27 +- .../playerID-playerID-any_inner_join/out.sql | 27 +- .../playerID-playerID-any_left_join/out.sql | 27 +- .../playerID-playerID-inner_join/out.sql | 27 +- .../playerID-playerID-left_join/out.sql | 27 +- .../test_simple_scalar_aggregates/out.sql | 24 +- .../test_table_column_unbox/out.sql | 32 +- .../test_where_simple_comparisons/out.sql | 14 +- .../test_where_with_between/out.sql | 14 +- .../test_where_with_timestamp/out.sql | 2 +- ibis/backends/clickhouse/tests/test_select.py | 9 + ibis/backends/duckdb/__init__.py | 829 +++++++---- ibis/backends/duckdb/compiler.py | 426 +++++- ibis/backends/duckdb/datatypes.py | 75 +- ibis/backends/duckdb/tests/conftest.py | 11 +- .../test_client/test_to_other_sql/out.sql | 3 + .../test_geospatial_dwithin/out.sql | 4 +- .../as_text/out.sql | 2 +- .../n_points/out.sql | 2 +- ibis/backends/duckdb/tests/test_client.py | 68 +- ibis/backends/duckdb/tests/test_datatypes.py | 14 +- ibis/backends/duckdb/tests/test_geospatial.py | 28 +- ibis/backends/duckdb/tests/test_register.py | 12 +- ibis/backends/duckdb/tests/test_udf.py | 13 +- .../test_many_subqueries/clickhouse/out.sql | 71 +- .../test_many_subqueries/datafusion/out.sql | 55 + .../test_many_subqueries/duckdb/out.sql | 83 +- .../test_many_subqueries/snowflake/out.sql | 83 +- .../test_join/test_complex_join_agg/out.sql | 17 + .../clickhouse/out.sql | 63 +- .../datafusion/out.sql | 48 + .../duckdb/out.sql | 52 +- .../snowflake/out.sql | 52 +- .../datafusion/out.sql | 22 + .../test_group_by_has_index/snowflake/out.sql | 6 +- .../test_sql/test_isin_bug/clickhouse/out.sql | 16 +- .../test_sql/test_isin_bug/datafusion/out.sql | 15 + .../test_sql/test_isin_bug/duckdb/out.sql | 16 +- .../test_sql/test_isin_bug/snowflake/out.sql | 18 +- .../test_union_aliasing/clickhouse/out.sql | 136 +- .../test_union_aliasing/duckdb/out.sql | 153 +- ibis/backends/tests/sql/conftest.py | 17 +- .../test_column_distinct/out.sql | 9 +- .../test_column_expr_default_name/out.sql | 3 +- .../test_column_expr_retains_name/out.sql | 3 +- .../test_count_distinct/decompiled.py | 7 +- .../test_compiler/test_count_distinct/out.sql | 18 +- .../decompiled.py | 21 +- .../test_difference_project_column/out.sql | 46 +- .../test_having_from_filter/decompiled.py | 10 +- .../test_having_from_filter/out.sql | 33 +- .../test_compiler/test_having_size/out.sql | 24 +- .../decompiled.py | 21 +- .../test_intersect_project_column/out.sql | 46 +- .../decompiled.py | 5 +- .../test_multiple_count_distinct/out.sql | 11 +- .../test_pushdown_with_or/out.sql | 23 +- .../test_table_difference/decompiled.py | 21 +- .../test_table_difference/out.sql | 41 +- .../test_compiler/test_table_distinct/out.sql | 10 +- .../test_table_drop_with_filter/decompiled.py | 11 +- .../test_table_drop_with_filter/out.sql | 46 +- .../test_table_intersect/decompiled.py | 21 +- .../test_table_intersect/out.sql | 41 +- .../test_compiler/test_union/decompiled.py | 21 +- .../test_compiler/test_union/out.sql | 41 +- .../test_union_order_by/decompiled.py | 5 +- .../test_compiler/test_union_order_by/out.sql | 24 +- .../test_union_project_column/decompiled.py | 20 +- .../test_union_project_column/out.sql | 46 +- .../test_aggregate_count_joined/decompiled.py | 18 +- .../test_aggregate_count_joined/out.sql | 8 +- .../test_aggregate_having/explicit.sql | 18 +- .../test_aggregate_having/inline.sql | 24 +- .../out.sql | 22 +- .../agg_filtered.sql | 37 +- .../agg_filtered2.sql | 39 +- .../filtered.sql | 28 +- .../proj.sql | 25 +- .../test_anti_join/decompiled.py | 2 +- .../test_select_sql/test_anti_join/out.sql | 12 +- .../test_select_sql/test_bool_bool/out.sql | 12 +- .../test_bug_project_multiple_times/out.sql | 77 +- .../test_case_in_projection/decompiled.py | 38 +- .../test_case_in_projection/out.sql | 29 +- .../result.sql | 51 +- .../test_complex_union/result.sql | 57 +- .../out.sql | 36 +- .../test_select_sql/test_endswith/out.sql | 3 +- .../test_filter_inside_exists/out.sql | 33 +- .../test_filter_predicates/out.sql | 12 +- .../result.sql | 56 +- .../expr3.sql | 28 +- .../expr4.sql | 30 +- .../test_fuse_projections/project.sql | 12 +- .../test_fuse_projections/project_filter.sql | 19 +- .../test_identifier_quoting/out.sql | 6 +- .../result.sql | 17 +- .../result.sql | 15 +- .../test_join_between_joins/decompiled.py | 14 +- .../test_join_between_joins/out.sql | 44 +- .../test_join_just_materialized/decompiled.py | 26 +- .../test_join_just_materialized/out.sql | 27 +- .../test_join_projection_subquery_bug/out.sql | 33 +- .../result.sql | 23 + .../test_join_with_limited_table/out.sql | 20 +- .../test_limit_with_self_join/out.sql | 22 +- .../test_loj_subquery_filter_handling/out.sql | 40 +- .../test_multiple_joins/decompiled.py | 19 +- .../test_multiple_joins/out.sql | 22 +- .../test_multiple_limits/out.sql | 12 +- .../decompiled.py | 4 +- .../out.sql | 17 +- .../out.sql | 17 +- .../agg_explicit_column/decompiled.py | 2 +- .../agg_explicit_column/out.sql | 9 +- .../agg_string_columns/decompiled.py | 4 +- .../agg_string_columns/out.sql | 11 +- .../aggregate_table_count_metric/out.sql | 3 +- .../test_select_sql/filter_then_limit/out.sql | 10 +- .../test_select_sql/limit_simple/out.sql | 5 +- .../test_select_sql/limit_then_filter/out.sql | 15 +- .../test_select_sql/limit_with_offset/out.sql | 8 +- .../self_reference_simple/out.sql | 5 +- .../test_select_sql/single_column/out.sql | 8 +- .../out.sql | 5 +- .../out.sql | 40 +- .../test_semi_join/decompiled.py | 2 +- .../test_select_sql/test_semi_join/out.sql | 12 +- .../test_simple_joins/decompiled.py | 2 +- .../test_simple_joins/inner.sql | 12 +- .../test_simple_joins/inner_two_preds.sql | 13 +- .../test_simple_joins/left.sql | 12 +- .../test_simple_joins/outer.sql | 12 +- .../result1.sql | 25 +- .../result2.sql | 25 +- .../test_select_sql/test_startswith/out.sql | 3 +- .../out.sql | 77 +- .../expr.sql | 17 +- .../expr2.sql | 26 +- .../test_subquery_in_union/out.sql | 81 +- .../test_subquery_used_for_self_join/out.sql | 54 +- .../test_topk_analysis_bug/out.sql | 62 +- .../test_topk_operation/e1.sql | 36 +- .../test_topk_operation/e2.sql | 36 +- .../test_topk_predicate_pushdown_bug/out.sql | 83 +- .../test_topk_to_aggregate/out.sql | 17 +- .../test_tpch_self_join_failure/out.sql | 75 +- .../decompiled.py | 8 +- .../test_where_analyze_scalar_op/out.sql | 5 +- .../decompiled.py | 6 +- .../test_where_no_pushdown_possible/out.sql | 23 +- .../test_where_with_between/decompiled.py | 2 +- .../test_where_with_between/out.sql | 12 +- .../test_where_with_join/decompiled.py | 7 +- .../test_where_with_join/out.sql | 26 +- .../test_aggregate/having_count/out.sql | 20 + .../test_aggregate/having_sum/out.sql | 14 + .../test_sql/test_aggregate/single/out.sql | 6 + .../test_sql/test_aggregate/two/out.sql | 8 + .../snapshots/test_sql/test_between/out.sql | 1 + .../test_boolean_conjunction/and/out.sql | 5 + .../test_boolean_conjunction/or/out.sql | 5 + .../snapshots/test_sql/test_coalesce/out.sql | 5 + .../test_sql/test_comparisons/eq/out.sql | 1 + .../test_sql/test_comparisons/ge/out.sql | 1 + .../test_sql/test_comparisons/gt/out.sql | 1 + .../test_sql/test_comparisons/le/out.sql | 1 + .../test_sql/test_comparisons/lt/out.sql | 1 + .../test_sql/test_comparisons/ne/out.sql | 1 + .../out.sql | 20 + .../test_distinct/count_distinct/out.sql | 1 + .../group_by_count_distinct/out.sql | 6 + .../test_distinct/projection_distinct/out.sql | 8 + .../single_column_projection_distinct/out.sql | 7 + .../test_distinct/table_distinct/out.sql | 3 + .../sql/snapshots/test_sql/test_exists/e1.sql | 19 + .../sql/snapshots/test_sql/test_exists/e2.sql | 23 + .../out.sql | 14 + .../test_isnull_notnull/isnull/out.sql | 1 + .../test_isnull_notnull/notnull/out.sql | 1 + .../test_join_just_materialized/out.sql | 21 + .../test_sql/test_limit/expr_fn0/out.sql | 4 + .../test_sql/test_limit/expr_fn1/out.sql | 5 + .../test_sql/test_limit_filter/out.sql | 8 + .../test_sql/test_limit_subquery/out.sql | 12 + .../decompiled.py | 15 + .../test_lower_projection_sort_key/out.sql | 28 + .../test_sql/test_multi_join/out.sql | 20 + .../out.sql | 11 + .../test_sql/test_named_expr/out.sql | 3 + .../snapshots/test_sql/test_negate/out.sql | 3 + .../test_sql/test_no_cart_join/out.sql | 36 + .../test_sql/test_no_cartesian_join/out.sql | 51 + .../test_sql/test_no_cross_join/out.sql | 16 + .../test_sql/test_not_exists/out.sql | 21 + .../test_sql/test_order_by/column/out.sql | 5 + .../test_sql/test_order_by/random/out.sql | 5 + .../test_sql/test_order_by_expr/out.sql | 13 + .../test_sql/test_searched_case/out.sql | 7 + .../anti.sql | 21 + .../semi.sql | 19 + .../test_sql/test_self_reference_join/out.sql | 8 + .../test_sql/test_simple_case/out.sql | 1 + .../out.sql | 12 + .../test_sql/test_subquery_aliased/out.sql | 14 + .../test_where_correlated_subquery/out.sql | 19 + .../out.sql | 33 + .../decompiled.py | 9 + .../test_where_simple_comparisons/out.sql | 11 + .../test_where_uncorrelated_subquery/out.sql | 9 + ibis/backends/tests/sql/test_compiler.py | 1 - ibis/backends/tests/sql/test_select_sql.py | 32 +- ibis/backends/tests/sql/test_sql.py | 47 + ibis/backends/tests/test_aggregation.py | 55 +- ibis/backends/tests/test_array.py | 4 +- ibis/backends/tests/test_client.py | 14 +- ibis/backends/tests/test_dot_sql.py | 12 +- ibis/backends/tests/test_examples.py | 2 +- ibis/backends/tests/test_export.py | 19 +- ibis/backends/tests/test_generic.py | 56 +- ibis/backends/tests/test_join.py | 12 +- ibis/backends/tests/test_numeric.py | 107 +- ibis/backends/tests/test_param.py | 4 +- ibis/backends/tests/test_register.py | 6 +- ibis/backends/tests/test_sql.py | 19 +- ibis/backends/tests/test_string.py | 31 +- ibis/backends/tests/test_struct.py | 39 +- ibis/backends/tests/test_temporal.py | 124 +- ibis/backends/tests/test_window.py | 41 +- ibis/backends/tests/tpch/conftest.py | 22 +- .../test_h01/test_tpc_h01/duckdb/h01.sql | 57 +- .../test_h01/test_tpc_h01/snowflake/h01.sql | 103 +- .../test_h02/test_tpc_h02/duckdb/h02.sql | 186 ++- .../test_h02/test_tpc_h02/snowflake/h02.sql | 307 ++-- .../test_h03/test_tpc_h03/duckdb/h03.sql | 126 +- .../test_h03/test_tpc_h03/snowflake/h03.sql | 210 ++- .../test_h04/test_tpc_h04/duckdb/h04.sql | 57 +- .../test_h04/test_tpc_h04/snowflake/h04.sql | 90 +- .../test_h05/test_tpc_h05/duckdb/h05.sql | 144 +- .../test_h05/test_tpc_h05/snowflake/h05.sql | 276 ++-- .../test_h06/test_tpc_h06/duckdb/h06.sql | 33 +- .../test_h06/test_tpc_h06/snowflake/h06.sql | 53 +- .../test_h07/test_tpc_h07/snowflake/h07.sql | 222 +-- .../test_h08/test_tpc_h08/duckdb/h08.sql | 104 +- .../test_h08/test_tpc_h08/snowflake/h08.sql | 276 ++-- .../test_h09/test_tpc_h09/duckdb/h09.sql | 74 +- .../test_h09/test_tpc_h09/snowflake/h09.sql | 210 +-- .../test_h10/test_tpc_h10/duckdb/h10.sql | 156 +- .../test_h10/test_tpc_h10/snowflake/h10.sql | 254 ++-- .../test_h11/test_tpc_h11/duckdb/h11.sql | 132 +- .../test_h11/test_tpc_h11/snowflake/h11.sql | 159 ++ .../test_h12/test_tpc_h12/duckdb/h12.sql | 90 +- .../test_h12/test_tpc_h12/snowflake/h12.sql | 157 +- .../test_h13/test_tpc_h13/duckdb/h13.sql | 55 +- .../test_h13/test_tpc_h13/snowflake/h13.sql | 102 +- .../test_h14/test_tpc_h14/duckdb/h14.sql | 78 +- .../test_h14/test_tpc_h14/snowflake/h14.sql | 146 +- .../test_h15/test_tpc_h15/duckdb/h15.sql | 150 +- .../test_h15/test_tpc_h15/snowflake/h15.sql | 202 ++- .../test_h16/test_tpc_h16/duckdb/h16.sql | 98 +- .../test_h16/test_tpc_h16/snowflake/h16.sql | 147 +- .../test_h17/test_tpc_h17/duckdb/h17.sql | 103 +- .../test_h17/test_tpc_h17/snowflake/h17.sql | 154 +- .../test_h18/test_tpc_h18/duckdb/h18.sql | 142 +- .../test_h18/test_tpc_h18/snowflake/h18.sql | 243 +++- .../test_h19/test_tpc_h19/duckdb/h19.sql | 172 ++- .../test_h19/test_tpc_h19/snowflake/h19.sql | 235 ++- .../test_h20/test_tpc_h20/duckdb/h20.sql | 129 +- .../test_h20/test_tpc_h20/snowflake/h20.sql | 202 +-- .../test_h21/test_tpc_h21/duckdb/h21.sql | 114 +- .../test_h21/test_tpc_h21/snowflake/h21.sql | 166 +++ .../test_h22/test_tpc_h22/duckdb/h22.sql | 112 +- .../test_h22/test_tpc_h22/snowflake/h22.sql | 52 + ibis/backends/tests/tpch/test_h01.py | 2 +- ibis/backends/tests/tpch/test_h04.py | 2 +- ibis/backends/tests/tpch/test_h08.py | 6 - ibis/backends/tests/tpch/test_h11.py | 7 - ibis/backends/tests/tpch/test_h14.py | 6 - ibis/backends/tests/tpch/test_h17.py | 6 - ibis/backends/tests/tpch/test_h21.py | 8 - ibis/backends/tests/tpch/test_h22.py | 7 - 326 files changed, 10943 insertions(+), 6146 deletions(-) create mode 100644 ibis/backends/base/sqlglot/compiler.py create mode 100644 ibis/backends/clickhouse/compiler.py delete mode 100644 ibis/backends/clickhouse/compiler/__init__.py delete mode 100644 ibis/backends/clickhouse/compiler/core.py delete mode 100644 ibis/backends/clickhouse/compiler/relations.py delete mode 100644 ibis/backends/clickhouse/compiler/values.py create mode 100644 ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_client/test_to_other_sql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/decompiled.py create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql create mode 100644 ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql create mode 100644 ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql create mode 100644 ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py index a208a8f00bc1..a5a0cd0d2106 100644 --- a/ibis/backends/base/__init__.py +++ b/ibis/backends/base/__init__.py @@ -1217,9 +1217,9 @@ def _cached(self, expr: ir.Table): if (result := self._query_cache.get(op)) is None: self._query_cache.store(expr) result = self._query_cache[op] - return ir.CachedTable(result) + return ir.CachedTableExpr(result) - def _release_cached(self, expr: ir.CachedTable) -> None: + def _release_cached(self, expr: ir.CachedTableExpr) -> None: """Releases the provided cached expression. Parameters diff --git a/ibis/backends/base/df/timecontext.py b/ibis/backends/base/df/timecontext.py index f84dd473bc4c..a04f905ce0c5 100644 --- a/ibis/backends/base/df/timecontext.py +++ b/ibis/backends/base/df/timecontext.py @@ -265,19 +265,19 @@ def adjust_context_alias( return adjust_context(op.arg, scope, timecontext) -@adjust_context.register(ops.AsOfJoin) -def adjust_context_asof_join( - op: ops.AsOfJoin, scope: Scope, timecontext: TimeContext -) -> TimeContext: - begin, end = timecontext +# @adjust_context.register(ops.AsOfJoin) +# def adjust_context_asof_join( +# op: ops.AsOfJoin, scope: Scope, timecontext: TimeContext +# ) -> TimeContext: +# begin, end = timecontext - if op.tolerance is not None: - from ibis.backends.pandas.execution import execute +# if op.tolerance is not None: +# from ibis.backends.pandas.execution import execute - timedelta = execute(op.tolerance) - return (begin - timedelta, end) +# timedelta = execute(op.tolerance) +# return (begin - timedelta, end) - return timecontext +# return timecontext @adjust_context.register(ops.WindowFunction) diff --git a/ibis/backends/base/sql/alchemy/registry.py b/ibis/backends/base/sql/alchemy/registry.py index f4a809c364e1..5ff058f72c8c 100644 --- a/ibis/backends/base/sql/alchemy/registry.py +++ b/ibis/backends/base/sql/alchemy/registry.py @@ -629,8 +629,7 @@ class array_filter(FunctionElement): ops.Literal: _literal, ops.SimpleCase: _simple_case, ops.SearchedCase: _searched_case, - ops.TableColumn: _table_column, - ops.TableArrayView: _table_array_view, + ops.Field: _table_column, ops.ExistsSubquery: _exists_subquery, # miscellaneous varargs ops.Least: varargs(sa.func.least), diff --git a/ibis/backends/base/sql/registry/main.py b/ibis/backends/base/sql/registry/main.py index 1f1d6cc99db3..8a2d8bcbb5b9 100644 --- a/ibis/backends/base/sql/registry/main.py +++ b/ibis/backends/base/sql/registry/main.py @@ -358,8 +358,7 @@ def _floor(t, op): ops.InColumn: binary_infix.in_column, ops.SimpleCase: case.simple_case, ops.SearchedCase: case.searched_case, - ops.TableColumn: table_column, - ops.TableArrayView: table_array_view, + ops.Field: table_column, ops.DateAdd: timestamp.timestamp_op("date_add"), ops.DateSub: timestamp.timestamp_op("date_sub"), ops.DateDiff: timestamp.timestamp_op("datediff"), diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index c3e2a942068f..50cb23ec9a71 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -1,97 +1,245 @@ from __future__ import annotations -from functools import partial -from typing import TYPE_CHECKING, Any, Callable +import abc +from typing import TYPE_CHECKING, Any, ClassVar import sqlglot as sg +import sqlglot.expressions as sge -if TYPE_CHECKING: - import ibis.expr.datatypes as dt - from ibis.backends.base.sqlglot.datatypes import SqlglotType - - -class AggGen: - __slots__ = ("aggfunc",) - - def __init__(self, *, aggfunc: Callable) -> None: - self.aggfunc = aggfunc - - def __getattr__(self, name: str) -> partial: - return partial(self.aggfunc, name) - - def __getitem__(self, key: str) -> partial: - return getattr(self, key) - - -def _func(name: str, *args: Any, **kwargs: Any): - return sg.func(name, *map(sg.exp.convert, args), **kwargs) - - -class FuncGen: - __slots__ = () - - def __getattr__(self, name: str) -> partial: - return partial(_func, name) - - def __getitem__(self, key: str) -> partial: - return getattr(self, key) - - def array(self, *args): - return sg.exp.Array.from_arg_list(list(map(sg.exp.convert, args))) - - def tuple(self, *args): - return sg.func("tuple", *map(sg.exp.convert, args)) - - def exists(self, query): - return sg.exp.Exists(this=query) - - def concat(self, *args): - return sg.exp.Concat(expressions=list(map(sg.exp.convert, args))) +import ibis +import ibis.expr.operations as ops +import ibis.expr.schema as sch +from ibis.backends.base import BaseBackend +from ibis.backends.base.sqlglot.compiler import STAR - def map(self, keys, values): - return sg.exp.Map(keys=keys, values=values) - - -class ColGen: - __slots__ = () - - def __getattr__(self, name: str) -> sg.exp.Column: - return sg.column(name) - - def __getitem__(self, key: str) -> sg.exp.Column: - return sg.column(key) - - -def paren(expr): - """Wrap a sqlglot expression in parentheses.""" - return sg.exp.Paren(this=expr) - - -def parenthesize(op, arg): - import ibis.expr.operations as ops - - if isinstance(op, (ops.Binary, ops.Unary)): - return paren(arg) - # function calls don't need parens - return arg - - -def interval(value, *, unit): - return sg.exp.Interval(this=sg.exp.convert(value), unit=sg.exp.var(unit)) - - -C = ColGen() -F = FuncGen() -NULL = sg.exp.Null() -FALSE = sg.exp.false() -TRUE = sg.exp.true() -STAR = sg.exp.Star() - - -def make_cast( - converter: SqlglotType, -) -> Callable[[sg.exp.Expression, dt.DataType], sg.exp.Cast]: - def cast(arg: sg.exp.Expression, to: dt.DataType) -> sg.exp.Cast: - return sg.cast(arg, to=converter.from_ibis(to)) +if TYPE_CHECKING: + from collections.abc import Iterator - return cast + import ibis.expr.datatypes as dt + import ibis.expr.types as ir + from ibis.backends.base.sqlglot.compiler import SQLGlotCompiler + from ibis.common.typing import SupportsSchema + + +class SQLGlotBackend(BaseBackend): + compiler: ClassVar[SQLGlotCompiler] + name: ClassVar[str] + + @classmethod + def has_operation(cls, operation: type[ops.Value]) -> bool: + # singledispatchmethod overrides `__get__` so we can't directly access + # the dispatcher + dispatcher = cls.compiler.visit_node.register.__self__.dispatcher + return dispatcher.dispatch(operation) is not dispatcher.dispatch(object) + + def _transform( + self, sql: sge.Expression, table_expr: ir.TableExpr + ) -> sge.Expression: + return sql + + def table( + self, name: str, schema: str | None = None, database: str | None = None + ) -> ir.Table: + """Construct a table expression. + + Parameters + ---------- + name + Table name + schema + Schema name + database + Database name + + Returns + ------- + Table + Table expression + """ + table_schema = self.get_schema(name, schema=schema, database=database) + return ops.DatabaseTable( + name, + schema=table_schema, + source=self, + namespace=ops.Namespace(database=database, schema=schema), + ).to_expr() + + def _to_sqlglot( + self, expr: ir.Expr, limit: str | None = None, params=None, **_: Any + ): + """Compile an Ibis expression to a sqlglot object.""" + table_expr = expr.as_table() + + if limit == "default": + limit = ibis.options.sql.default_limit + if limit is not None: + table_expr = table_expr.limit(limit) + + if params is None: + params = {} + + sql = self.compiler.translate(table_expr.op(), params=params) + assert not isinstance(sql, sge.Subquery) + + if isinstance(sql, sge.Table): + sql = sg.select(STAR).from_(sql) + + assert not isinstance(sql, sge.Subquery) + return [self._transform(sql, table_expr)] + + def compile( + self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any + ): + """Compile an Ibis expression to a ClickHouse SQL string.""" + queries = self._to_sqlglot(expr, limit=limit, params=params, **kwargs) + + return ";\n\n".join( + query.sql(dialect=self.name, pretty=True) for query in queries + ) + + def _to_sql(self, expr: ir.Expr, **kwargs) -> str: + return self.compile(expr, **kwargs) + + def _log(self, sql: str) -> None: + """Log `sql`. + + This method can be implemented by subclasses. Logging occurs when + `ibis.options.verbose` is `True`. + """ + from ibis import util + + util.log(sql) + + def sql( + self, + query: str, + schema: SupportsSchema | None = None, + dialect: str | None = None, + ) -> ir.Table: + query = self._transpile_sql(query, dialect=dialect) + if schema is None: + schema = self._get_schema_using_query(query) + return ops.SQLQueryResult(query, ibis.schema(schema), self).to_expr() + + @abc.abstractmethod + def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: + """Return the metadata of a SQL query.""" + + def _get_schema_using_query(self, query: str) -> sch.Schema: + """Return an ibis Schema from a backend-specific SQL string.""" + return sch.Schema.from_tuples(self._metadata(query)) + + def create_view( + self, + name: str, + obj: ir.Table, + *, + database: str | None = None, + schema: str | None = None, + overwrite: bool = False, + ) -> ir.Table: + src = sge.Create( + this=sg.table( + name, db=schema, catalog=database, quoted=self.compiler.quoted + ), + kind="VIEW", + replace=overwrite, + expression=self.compile(obj), + ) + self._register_in_memory_tables(obj) + with self._safe_raw_sql(src): + pass + return self.table(name, database=database) + + def _register_in_memory_tables(self, expr: ir.Expr) -> None: + for memtable in expr.op().find(ops.InMemoryTable): + self._register_in_memory_table(memtable) + + def drop_view( + self, + name: str, + *, + database: str | None = None, + schema: str | None = None, + force: bool = False, + ) -> None: + src = sge.Drop( + this=sg.table( + name, db=schema, catalog=database, quoted=self.compiler.quoted + ), + kind="VIEW", + exists=force, + ) + with self._safe_raw_sql(src): + pass + + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=True, + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + ) + + def _create_temp_view(self, table_name, source): + if table_name not in self._temp_views and table_name in self.list_tables(): + raise ValueError( + f"{table_name} already exists as a non-temporary table or view" + ) + + with self._safe_raw_sql(self._get_temp_view_definition(table_name, source)): + pass + + self._temp_views.add(table_name) + self._register_temp_view_cleanup(table_name) + + def _register_temp_view_cleanup(self, name: str) -> None: + """Register a clean up function for a temporary view. + + No-op by default. + + Parameters + ---------- + name + The temporary view to register for clean up. + """ + + def _load_into_cache(self, name, expr): + self.create_table(name, expr, schema=expr.schema(), temp=True) + + def _clean_up_cached_table(self, op): + self.drop_table(op.name) + + def execute( + self, expr: ir.Expr, limit: str | None = "default", **kwargs: Any + ) -> Any: + """Execute an expression.""" + + self._run_pre_execute_hooks(expr) + table = expr.as_table() + sql = self.compile(table, limit=limit, **kwargs) + + schema = table.schema() + self._log(sql) + + with self._safe_raw_sql(sql) as cur: + result = self.fetch_from_cursor(cur, schema) + return expr.__pandas_result__(result) + + def drop_table( + self, + name: str, + database: str | None = None, + schema: str | None = None, + force: bool = False, + ) -> None: + drop_stmt = sg.exp.Drop( + kind="TABLE", + this=sg.table( + name, db=schema, catalog=database, quoted=self.compiler.quoted + ), + exists=force, + ) + with self._safe_raw_sql(drop_stmt): + pass diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py new file mode 100644 index 000000000000..c7e5e092778a --- /dev/null +++ b/ibis/backends/base/sqlglot/compiler.py @@ -0,0 +1,1283 @@ +from __future__ import annotations + +import abc +import calendar +import functools +import itertools +import math +import operator +import string +from collections.abc import Mapping +from functools import partial, singledispatchmethod +from typing import TYPE_CHECKING, Any, Callable + +import sqlglot as sg +import sqlglot.expressions as sge +from public import public + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.rewrites import Select, Window, sqlize +from ibis.common.deferred import _ +from ibis.common.patterns import replace +from ibis.expr.analysis import p, x + +if TYPE_CHECKING: + import ibis.expr.schema as sch + import ibis.expr.types as ir + from ibis.backends.base.sqlglot.datatypes import SqlglotType + + +class AggGen: + __slots__ = ("aggfunc",) + + def __init__(self, *, aggfunc: Callable) -> None: + self.aggfunc = aggfunc + + def __getattr__(self, name: str) -> partial: + return partial(self.aggfunc, name) + + def __getitem__(self, key: str) -> partial: + return getattr(self, key) + + +class FuncGen: + __slots__ = () + + def __getattr__(self, name: str) -> partial: + return lambda *args, **kwargs: sg.func(name, *map(sge.convert, args), **kwargs) + + def __getitem__(self, key: str) -> partial: + return getattr(self, key) + + def array(self, *args): + return sge.Array.from_arg_list(list(map(sge.convert, args))) + + def tuple(self, *args): + return sg.func("tuple", *map(sge.convert, args)) + + def exists(self, query): + return sge.Exists(this=query) + + def concat(self, *args): + return sge.Concat(expressions=list(map(sge.convert, args))) + + def map(self, keys, values): + return sge.Map(keys=keys, values=values) + + +class ColGen: + __slots__ = () + + def __getattr__(self, name: str) -> sge.Column: + return sg.column(name) + + def __getitem__(self, key: str) -> sge.Column: + return sg.column(key) + + +def paren(expr): + """Wrap a sqlglot expression in parentheses.""" + return sge.Paren(this=expr) + + +def parenthesize(op, arg): + if isinstance(op, (ops.Binary, ops.Unary)): + return paren(arg) + # function calls don't need parens + return arg + + +C = ColGen() +F = FuncGen() +NULL = sge.NULL +FALSE = sge.FALSE +TRUE = sge.TRUE +STAR = sge.Star() + + +@replace(p.InValues(..., ())) +def empty_in_values_right_side(_): + """Replace checks against an empty right side with `False`.""" + return ops.Literal(False, dtype=dt.bool) + + +@replace( + p.WindowFunction( + p.PercentRank(x) | p.RankBase(x) | p.CumeDist(x) | p.NTile(x), + p.WindowFrame(..., order_by=()) >> _.copy(order_by=(x,)), + ) +) +def add_order_by_to_empty_ranking_window_functions(_): + """Add an ORDER BY clause to rank window functions that don't have one.""" + return _ + + +@replace( + p.WindowFunction(p.RankBase | p.NTile) + | p.StringFind + | p.FindInSet + | p.ArrayPosition +) +def one_to_zero_index(_, **__): + """Subtract one from one-index functions.""" + return ops.Subtract(_, 1) + + +@replace(ops.NthValue) +def add_one_to_nth_value_input(_, **__): + if isinstance(_.nth, ops.Literal): + nth = ops.Literal(_.nth.value + 1, dtype=_.nth.dtype) + else: + nth = ops.Add(_.nth, 1) + return _.copy(nth=nth) + + +@public +class SQLGlotCompiler(abc.ABC): + __slots__ = "agg", "f" + + rewrites: tuple = ( + empty_in_values_right_side, + add_order_by_to_empty_ranking_window_functions, + one_to_zero_index, + add_one_to_nth_value_input, + ) + """A sequence of rewrites to apply to the expression tree before compilation.""" + + no_limit_value: sge.Null | None = None + """The value to use to indicate no limit.""" + + quoted: bool | None = None + """Whether to always quote identifiers.""" + + NAN = sge.Literal.number("'NaN'::double") + """Backend's NaN literal.""" + + POS_INF = sge.Literal.number("'Inf'::double") + """Backend's positive infinity literal.""" + + NEG_INF = sge.Literal.number("'-Inf'::double") + """Backend's negative infinity literal.""" + + def __init__(self) -> None: + self.agg = AggGen(aggfunc=self._aggregate) + self.f = FuncGen() + + @property + @abc.abstractmethod + def dialect(self) -> str: + """Backend dialect.""" + + @property + @abc.abstractmethod + def type_mapper(self) -> type[SqlglotType]: + """The type mapper for the backend.""" + + @abc.abstractmethod + def _aggregate(self, funcname, *args, where): + """Translate an aggregate function. + + Three flavors of filtering aggregate function inputs: + + 1. supports filter (duckdb, postgres, others) + e.g.: sum(x) filter (where predicate) + 2. use null to filter out + e.g.: sum(if(predicate, x, NULL)) + 3. clickhouse's ${func}If implementation, e.g.: + sumIf(predicate, x) + """ + + # Concrete API + + def if_(self, condition, true, false: sge.Expression | None = None) -> sge.If: + return sge.If( + this=sge.convert(condition), + true=sge.convert(true), + false=false if false is None else sge.convert(false), + ) + + def cast(self, arg, to: dt.DataType) -> sge.Cast: + return sg.cast(sge.convert(arg), to=self.type_mapper.from_ibis(to)) + + def translate(self, op, *, params: Mapping[ir.Value, Any]) -> sge.Expression: + """Translate an ibis operation to a sqlglot expression. + + Parameters + ---------- + op + An ibis operation + params + A mapping of expressions to concrete values + compiler + An instance of SQLGlotCompiler + translate_rel + Relation node translator + translate_val + Value node translator + + Returns + ------- + sqlglot.expressions.Expression + A sqlglot expression + """ + + gen_alias_index = itertools.count() + quoted = self.quoted + + def fn(node, _, **kwargs): + result = self.visit_node(node, **kwargs) + + # don't alias root nodes or value ops + if node is op or isinstance(node, ops.Value): + return result + + alias_index = next(gen_alias_index) + alias = sg.to_identifier(f"t{alias_index:d}", quoted=quoted) + + try: + return result.subquery(alias) + except AttributeError: + return result.as_(alias, quoted=quoted) + + # substitute parameters immediately to avoid having to define a + # ScalarParameter translation rule + # + # this lets us avoid threading `params` through every `translate_val` call + # only to be used in the one place it would be needed: the ScalarParameter + # `translate_val` rule + params = {param.op(): value for param, value in (params or {}).items()} + replace_literals = p.ScalarParameter >> ( + lambda _: ops.Literal(value=params[_], dtype=_.dtype) + ) + + op = op.replace( + replace_literals | functools.reduce(operator.or_, self.rewrites) + ) + op = sqlize(op) + # apply translate rules in topological order + results = op.map(fn) + node = results[op] + return node.this if isinstance(node, sge.Subquery) else node + + @singledispatchmethod + def visit_node(self, op: ops.Node, **_): + raise com.OperationNotDefinedError( + f"No translation rule for {type(op).__name__}" + ) + + @visit_node.register(ops.Field) + def visit_Field(self, op, *, rel, name): + return sg.column( + self._gen_valid_name(name), table=rel.alias_or_name, quoted=self.quoted + ) + + @visit_node.register(ops.ScalarSubquery) + def visit_ScalarSubquery(self, op, *, rel): + return rel.this.subquery() + + @visit_node.register(ops.Alias) + def visit_Alias(self, op, *, arg, name): + return arg.as_(self._gen_valid_name(name), quoted=self.quoted) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype, **kw): + if value is None: + if dtype.nullable: + return NULL if dtype.is_null() else self.cast(NULL, dtype) + raise com.UnsupportedOperationError( + f"Unsupported NULL for non-nullable type: {dtype!r}" + ) + elif dtype.is_integer(): + return sge.convert(value) + elif dtype.is_floating(): + if math.isnan(value): + return self.NAN + elif math.isinf(value): + return self.POS_INF if value < 0 else self.NEG_INF + return sge.convert(value) + elif dtype.is_decimal(): + return self.cast(sge.convert(str(value)), dtype) + elif dtype.is_interval(): + return sge.Interval( + this=sge.convert(str(value)), unit=dtype.resolution.upper() + ) + elif dtype.is_boolean(): + return sge.Boolean(this=bool(value)) + elif dtype.is_string(): + return sge.convert(value) + elif dtype.is_inet() or dtype.is_macaddr(): + return sge.convert(str(value)) + elif dtype.is_timestamp() or dtype.is_time(): + return self.cast(value.isoformat(), dtype) + elif dtype.is_date(): + return self.f.datefromparts(value.year, value.month, value.day) + elif dtype.is_array(): + value_type = dtype.value_type + return self.f.array( + *( + self.visit_Literal( + ops.Literal(v, value_type), value=v, dtype=value_type + ) + for v in value + ) + ) + elif dtype.is_map(): + key_type = dtype.key_type + keys = self.f.array( + *( + self.visit_Literal( + ops.Literal(k, key_type), value=k, dtype=key_type, **kw + ) + for k in value.keys() + ) + ) + + value_type = dtype.value_type + values = self.f.array( + *( + self.visit_Literal( + ops.Literal(v, value_type), value=v, dtype=value_type, **kw + ) + for v in value.values() + ) + ) + + return self.f.map(keys, values) + elif dtype.is_struct(): + items = [ + sge.Slice( + this=sge.convert(k), + expression=self.visit_Literal( + ops.Literal(v, field_dtype), value=v, dtype=field_dtype, **kw + ), + ) + for field_dtype, (k, v) in zip(dtype.types, value.items()) + ] + return sge.Struct.from_arg_list(items) + else: + raise NotImplementedError(f"Unsupported type: {dtype!r}") + + @visit_node.register(ops.BitwiseNot) + def visit_BitwiseNot(self, op, *, arg): + return sge.BitwiseNot(this=arg) + + ### Mathematical Calisthenics + + @visit_node.register(ops.E) + def visit_E(self, op): + return self.f.exp(1) + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + if base is None: + return self.f.ln(arg) + elif str(base) in ("2", "10"): + return self.f[f"log{base}"](arg) + else: + return self.f.ln(arg) / self.f.ln(base) + + @visit_node.register(ops.Clip) + def visit_Clip(self, op, *, arg, lower, upper): + if upper is not None: + arg = self.if_(arg.is_(NULL), arg, self.f.least(upper, arg)) + + if lower is not None: + arg = self.if_(arg.is_(NULL), arg, self.f.greatest(lower, arg)) + + return arg + + @visit_node.register(ops.FloorDivide) + def visit_FloorDivide(self, op, *, left, right): + return self.cast(self.f.floor(left / right), op.dtype) + + @visit_node.register(ops.Ceil) + @visit_node.register(ops.Floor) + def visit_CeilFloor(self, op, *, arg): + return self.cast(self.f[type(op).__name__.lower()](arg), op.dtype) + + @visit_node.register(ops.Round) + def visit_Round(self, op, *, arg, digits): + if digits is not None: + return sge.Round(this=arg, decimals=digits) + return sge.Round(this=arg) + + ### Dtype Dysmorphia + + @visit_node.register(ops.TryCast) + def visit_TryCast(self, op, *, arg, to): + return sge.TryCast(this=arg, to=self.type_mapper.from_ibis(to)) + + ### Comparator Conundrums + + @visit_node.register(ops.Between) + def visit_Between(self, op, *, arg, lower_bound, upper_bound): + return sge.Between(this=arg, low=lower_bound, high=upper_bound) + + @visit_node.register(ops.Negate) + def visit_Negate(self, op, *, arg): + return -paren(arg) + + @visit_node.register(ops.Not) + def visit_Not(self, op, *, arg): + if isinstance(arg, sge.Filter): + return sge.Filter( + this=self._de_morgan_law(arg.this), expression=arg.expression + ) # transform the not expression using _de_morgan_law + return sg.not_(paren(arg)) + + @staticmethod + def _de_morgan_law(logical_op: sge.Expression): + if isinstance(logical_op, sge.LogicalAnd): + return sge.LogicalOr(this=sg.not_(paren(logical_op.this))) + if isinstance(logical_op, sge.LogicalOr): + return sge.LogicalAnd(this=sg.not_(paren(logical_op.this))) + return None + + ### Timey McTimeFace + + @visit_node.register(ops.Date) + def visit_Date(self, op, *, arg): + return sge.Date(this=arg) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return sge.DateFromParts(year=year, month=month, day=day) + + @visit_node.register(ops.Time) + def visit_Time(self, op, *, arg): + return self.cast(arg, to=dt.time) + + @visit_node.register(ops.TimestampNow) + def visit_TimestampNow(self, op): + """DuckDB current timestamp defaults to timestamp + tz.""" + return self.cast(sge.CurrentTimestamp(), dt.timestamp) + + @visit_node.register(ops.Strftime) + def visit_Strftime(self, op, *, arg, format_str): + if not isinstance(op.format_str, ops.Literal): + raise com.UnsupportedOperationError( + f"{self.dialect} `format_str` must be a literal `str`; got {type(op.format_str)}" + ) + return sge.TimeToStr(this=arg, format=format_str) + + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractEpochSeconds(self, op, *, arg): + return self.f.epoch(self.cast(arg, dt.timestamp)) + + @visit_node.register(ops.ExtractYear) + def visit_ExtractYear(self, op, *, arg): + return self.f.extract("year", arg) + + @visit_node.register(ops.ExtractMonth) + def visit_ExtractMonth(self, op, *, arg): + return self.f.extract("month", arg) + + @visit_node.register(ops.ExtractDay) + def visit_ExtractDay(self, op, *, arg): + return self.f.extract("day", arg) + + @visit_node.register(ops.ExtractDayOfYear) + def visit_ExtractDayOfYear(self, op, *, arg): + return self.f.extract("dayofyear", arg) + + @visit_node.register(ops.ExtractQuarter) + def visit_ExtractQuarter(self, op, *, arg): + return self.f.extract("quarter", arg) + + @visit_node.register(ops.ExtractWeekOfYear) + def visit_ExtractWeekOfYear(self, op, *, arg): + return self.f.extract("week", arg) + + @visit_node.register(ops.ExtractHour) + def visit_ExtractHour(self, op, *, arg): + return self.f.extract("hour", arg) + + @visit_node.register(ops.ExtractMinute) + def visit_ExtractMinute(self, op, *, arg): + return self.f.extract("minute", arg) + + @visit_node.register(ops.ExtractSecond) + def visit_ExtractSecond(self, op, *, arg): + return self.f.extract("second", arg) + + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimeTruncate) + def visit_TimestampTruncate(self, op, *, arg, unit): + unit_mapping = { + "Y": "year", + "M": "month", + "W": "week", + "D": "day", + "h": "hour", + "m": "minute", + "s": "second", + "ms": "ms", + "us": "us", + } + + unit = unit.short + if (duckunit := unit_mapping.get(unit)) is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") + + return self.f.date_trunc(duckunit, arg) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return (self.f.dayofweek(arg) + 6) % 7 + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + # day of week number is 0-indexed + # Sunday == 0 + # Saturday == 6 + return sge.Case( + this=(self.f.dayofweek(arg) + 6) % 7, + ifs=list(itertools.starmap(self.if_, enumerate(calendar.day_name))), + ) + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + return sge.Interval(this=sge.convert(arg), unit=unit.singular.upper()) + + ### String Instruments + + @visit_node.register(ops.Strip) + def visit_Strip(self, op, *, arg): + return self.f.trim(arg, string.whitespace) + + @visit_node.register(ops.RStrip) + def visit_RStrip(self, op, *, arg): + return self.f.rtrim(arg, string.whitespace) + + @visit_node.register(ops.LStrip) + def visit_LStrip(self, op, *, arg): + return self.f.ltrim(arg, string.whitespace) + + @visit_node.register(ops.Substring) + def visit_Substring(self, op, *, arg, start, length): + if_pos = sge.Substring(this=arg, start=start + 1, length=length) + if_neg = sge.Substring(this=arg, start=start, length=length) + + return self.if_(start >= 0, if_pos, if_neg) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if end is not None: + raise com.UnsupportedOperationError( + "String find doesn't support `end` argument" + ) + + if start is not None: + arg = self.f.substr(arg, start + 1) + pos = self.f.strpos(arg, substr) + return self.if_(pos > 0, pos + start, 0) + + return self.f.strpos(arg, substr) + + @visit_node.register(ops.RegexSearch) + def visit_RegexSearch(self, op, *, arg, pattern): + return self.f.regexp_matches(arg, pattern, "s") + + @visit_node.register(ops.RegexReplace) + def visit_RegexReplace(self, op, *, arg, pattern, replacement): + return self.f.regexp_replace(arg, pattern, replacement, "g") + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + return self.f.regexp_extract(arg, pattern, index, dialect=self.dialect) + + @visit_node.register(ops.StringConcat) + def visit_StringConcat(self, op, *, arg): + return self.f.concat(*arg) + + @visit_node.register(ops.StringSQLLike) + def visit_StringSQLLike(self, op, *, arg, pattern, escape): + return arg.like(pattern) + + @visit_node.register(ops.StringSQLILike) + def visit_StringSQLILike(self, op, *, arg, pattern, escape): + return arg.ilike(pattern) + + @visit_node.register(ops.StringToTimestamp) + def visit_StringToTimestamp(self, op, *, arg, format_str): + return sge.StrToTime(this=arg, format=format_str) + + ### NULL PLAYER CHARACTER + @visit_node.register(ops.IsNull) + def visit_IsNull(self, op, *, arg): + return arg.is_(NULL) + + @visit_node.register(ops.NotNull) + def visit_NotNull(self, op, *, arg): + return arg.is_(sg.not_(NULL)) + + @visit_node.register(ops.InValues) + def visit_InValues(self, op, *, value, options): + return value.isin(*options) + + ### Definitely Not Tensors + + @visit_node.register(ops.ArrayStringJoin) + def visit_ArrayStringJoin(self, op, *, sep, arg): + return self.f.array_to_string(arg, sep) + + ### Counting + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + return self.agg.count(sge.Distinct(expressions=[arg]), where=where) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, arg, where): + return self.agg.count(sge.Distinct(expressions=[STAR]), where=where) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + return self.agg.count(STAR, where=where) + + @visit_node.register(ops.Sum) + def visit_Sum(self, op, *, arg, where): + arg = self.cast(arg, op.dtype) if op.arg.dtype.is_boolean() else arg + return self.agg.sum(arg, where=where) + + ### Stats + + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + def visit_Quantile(self, op, *, arg, quantile, where): + return self.agg.quantile_cont(arg, quantile, where=where) + + @visit_node.register(ops.Variance) + @visit_node.register(ops.StandardDev) + @visit_node.register(ops.Covariance) + def visit_VarianceStandardDevCovariance(self, op, *, how, where, **kw): + hows = {"sample": "samp", "pop": "pop"} + funcs = { + ops.Variance: "var", + ops.StandardDev: "stddev", + ops.Covariance: "covar", + } + + args = [] + + for oparg, arg in zip(op.args, kw.values()): + if (arg_dtype := oparg.dtype).is_boolean(): + arg = self.cast(arg, dt.Int32(nullable=arg_dtype.nullable)) + args.append(arg) + + funcname = f"{funcs[type(op)]}_{hows[how]}" + return self.agg[funcname](*args, where=where) + + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if how == "heavy": + raise com.UnsupportedOperationError( + f"how='heavy' not supported in the {self.dialect} backend" + ) + return self.agg[how](arg, where=where) + + @visit_node.register(ops.SimpleCase) + @visit_node.register(ops.SearchedCase) + def visit_SimpleCase(self, op, *, base=None, cases, results, default): + return sge.Case( + this=base, ifs=list(map(self.if_, cases, results)), default=default + ) + + @visit_node.register(ops.ExistsSubquery) + def visit_ExistsSubquery(self, op, *, rel): + return self.f.exists(rel.this.subquery()) + + @visit_node.register(ops.InSubquery) + def visit_InSubquery(self, op, *, rel, needle): + return needle.isin(rel.this.subquery()) + + @visit_node.register(ops.Array) + def visit_Array(self, op, *, exprs): + return self.f.array(*exprs) + + @visit_node.register(ops.StructColumn) + def visit_StructColumn(self, op, *, names, values): + return sge.Struct.from_arg_list( + [ + sge.Slice(this=sge.convert(name), expression=value) + for name, value in zip(names, values) + ] + ) + + @visit_node.register(ops.StructField) + def visit_StructField(self, op, *, arg, field): + val = arg.this if isinstance(op.arg, ops.Alias) else arg + return val[sge.convert(field)] + + @visit_node.register(ops.IdenticalTo) + def visit_IdenticalTo(self, op, *, left, right): + return sge.NullSafeEQ(this=left, expression=right) + + @visit_node.register(ops.Greatest) + def visit_Greatest(self, op, *, arg): + return self.f.greatest(*arg) + + @visit_node.register(ops.Least) + def visit_Least(self, op, *, arg): + return self.f.least(*arg) + + @visit_node.register(ops.Coalesce) + def visit_Coalesce(self, op, *, arg): + return self.f.coalesce(*arg) + + ### Ordering and window functions + + @visit_node.register(ops.RowNumber) + def visit_RowNumber(self, op): + return sge.RowNumber() + + @visit_node.register(ops.SortKey) + def visit_SortKey(self, op, *, expr, ascending: bool): + return sge.Ordered(this=expr, desc=not ascending) + + @visit_node.register(ops.ApproxMedian) + def visit_ApproxMedian(self, op, *, arg, where): + return self.agg.approx_quantile(arg, 0.5, where=where) + + @visit_node.register(ops.WindowBoundary) + def visit_WindowBoundary(self, op, *, value, preceding): + # TODO: bit of a hack to return a dict, but there's no sqlglot expression + # that corresponds to _only_ this information + return {"value": value, "side": "preceding" if preceding else "following"} + + @visit_node.register(Window) + def visit_Window(self, op, *, how, func, start, end, group_by, order_by): + if start is None: + start = {} + if end is None: + end = {} + + start_value = start.get("value", "UNBOUNDED") + start_side = start.get("side", "PRECEDING") + end_value = end.get("value", "UNBOUNDED") + end_side = end.get("side", "FOLLOWING") + + spec = sge.WindowSpec( + kind=op.how.upper(), + start=start_value, + start_side=start_side, + end=end_value, + end_side=end_side, + over="OVER", + ) + order = sge.Order(expressions=order_by) if order_by else None + + return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) + + @visit_node.register(ops.Lag) + @visit_node.register(ops.Lead) + def visit_LagLead(self, op, *, arg, offset, default): + args = [arg] + + if default is not None: + if offset is None: + offset = 1 + + args.append(offset) + args.append(default) + elif offset is not None: + args.append(offset) + + return self.f[type(op).__name__.lower()](*args) + + @visit_node.register(ops.Argument) + def visit_Argument(self, op, *, name: str, shape, dtype): + return sg.to_identifier(name) + + @visit_node.register(ops.RowID) + def visit_RowID(self, op, *, table): + return sg.column(op.name, table=table.alias_or_name, quoted=self.quoted) + + @visit_node.register(ops.ScalarUDF) + def visit_ScalarUDF(self, op, **kw): + return self.f[op.__full_name__](*kw.values()) + + @visit_node.register(ops.AggUDF) + def visit_AggUDF(self, op, *, where, **kw): + return self.agg[op.__full_name__](*kw.values(), where=where) + + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.TimestampDelta) + def visit_TimestampDelta(self, op, *, part, left, right): + # dialect is necessary due to sqlglot's default behavior + # of `part` coming last + return sge.DateDiff( + this=left, expression=right, unit=part, dialect=self.dialect + ) + + @visit_node.register(ops.TimestampBucket) + def visit_TimestampBucket(self, op, *, arg, interval, offset): + origin = self.f.cast("epoch", self.type_mapper.from_ibis(dt.timestamp)) + if offset is not None: + origin += offset + return self.f.time_bucket(interval, arg, origin) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + return sge.ArrayConcat(this=arg[0], expressions=list(arg[1:])) + + @visit_node.register(ops.ArrayContains) + def visit_ArrayContains(self, op, *, arg, other): + return sge.ArrayContains(this=arg, expression=other) + + ## relations + + @visit_node.register(Select) + def visit_Select(self, op, *, parent, selections, predicates, sort_keys): + # if we've constructed a useless projection return the parent relation + if not selections and not predicates and not sort_keys: + return parent + + result = parent + + if selections: + result = sg.select( + *(sel.as_(name, quoted=self.quoted) for name, sel in selections.items()) + ).from_(result) + + if predicates: + result = result.where(*predicates) + + if sort_keys: + result = result.order_by(*sort_keys) + + return result + + @visit_node.register(ops.DummyTable) + def visit_DummyTable(self, op, *, values): + return sg.select( + *(value.as_(key, quoted=self.quoted) for key, value in values.items()) + ) + + @visit_node.register(ops.UnboundTable) + def visit_UnboundTable(self, op, *, name: str, schema: sch.Schema): + return sg.table(name, quoted=self.quoted) + + @visit_node.register(ops.InMemoryTable) + def visit_InMemoryTable(self, op, *, name: str, schema: sch.Schema, data): + return sg.table(name, quoted=self.quoted) + + @visit_node.register(ops.DatabaseTable) + def visit_DatabaseTable(self, op, *, name, namespace, schema, source): + return sg.table( + name, db=namespace.schema, catalog=namespace.database, quoted=self.quoted + ) + + @visit_node.register(ops.SelfReference) + def visit_SelfReference(self, op, *, parent, identifier): + return parent.as_(op.name, quoted=self.quoted) + + @visit_node.register(ops.JoinChain) + def visit_JoinChain(self, op, *, first, rest, values): + result = sg.select( + *(value.as_(key, quoted=self.quoted) for key, value in values.items()) + ).from_(first) + + for link in rest: + if isinstance(link, sge.Alias): + link = link.this + result = result.join(link) + return result + + @visit_node.register(ops.JoinLink) + def visit_JoinLink(self, op, *, how, table, predicates): + sides = { + "inner": None, + "left": "left", + "right": "right", + "semi": "left", + "anti": "left", + "cross": None, + "outer": "full", + "asof": "left", + "any_left": "left", + "any_inner": None, + } + kinds = { + "any_left": "any", + "any_inner": "any", + "asof": "asof", + "inner": "inner", + "left": "outer", + "right": "outer", + "semi": "semi", + "anti": "anti", + "cross": "cross", + "outer": "outer", + } + return sge.Join( + this=table, side=sides[how], kind=kinds[how], on=sg.and_(*predicates) + ) + + @staticmethod + def _gen_valid_name(name: str) -> str: + return name + + @visit_node.register(ops.Project) + def visit_Project(self, op, *, parent, values): + # needs_alias should never be true here in explicitly, but it may get + # passed via a (recursive) call to translate_val + return sg.select( + *( + value.as_(self._gen_valid_name(key), quoted=self.quoted) + for key, value in values.items() + ) + ).from_(parent) + + @staticmethod + def _generate_groups(groups): + return map(sge.convert, range(1, len(groups) + 1)) + + @visit_node.register(ops.Aggregate) + def visit_Aggregate(self, op, *, parent, groups, metrics): + sel = sg.select( + *( + value.as_(self._gen_valid_name(key), quoted=self.quoted) + for key, value in groups.items() + ), + *( + value.as_(self._gen_valid_name(key), quoted=self.quoted) + for key, value in metrics.items() + ), + ).from_(parent) + + if groups: + sel = sel.group_by(*self._generate_groups(groups.values())) + + return sel + + def _add_parens(self, op, sg_expr): + if type(op) in _BINARY_INFIX_OPS: + return paren(sg_expr) + return sg_expr + + @visit_node.register(ops.Filter) + def visit_Filter(self, op, *, parent, predicates): + predicates = ( + self._add_parens(raw_predicate, predicate) + for raw_predicate, predicate in zip(op.predicates, predicates) + ) + try: + return parent.where(*predicates) + except AttributeError: + return sg.select(STAR).from_(parent).where(*predicates) + + @visit_node.register(ops.Sort) + def visit_Sort(self, op, *, parent, keys): + try: + return parent.order_by(*keys) + except AttributeError: + return sg.select(STAR).from_(parent).order_by(*keys) + + @visit_node.register(ops.Union) + def visit_Union(self, op, *, left, right, distinct): + if isinstance(left, sge.Table): + left = sg.select(STAR).from_(left) + + if isinstance(right, sge.Table): + right = sg.select(STAR).from_(right) + + return sg.union( + left.args.get("this", left), + right.args.get("this", right), + distinct=distinct, + ) + + @visit_node.register(ops.Intersection) + def visit_Intersection(self, op, *, left, right, distinct): + if isinstance(left, sge.Table): + left = sg.select(STAR).from_(left) + + if isinstance(right, sge.Table): + right = sg.select(STAR).from_(right) + + return sg.intersect( + left.args.get("this", left), + right.args.get("this", right), + distinct=distinct, + ) + + @visit_node.register(ops.Difference) + def visit_Difference(self, op, *, left, right, distinct): + if isinstance(left, sge.Table): + left = sg.select(STAR).from_(left) + + if isinstance(right, sge.Table): + right = sg.select(STAR).from_(right) + + return sg.except_( + left.args.get("this", left), + right.args.get("this", right), + distinct=distinct, + ) + + @visit_node.register(ops.Limit) + def visit_Limit(self, op, *, parent, n, offset): + # push limit/offset into subqueries + if isinstance(parent, sge.Subquery) and parent.this.args.get("limit") is None: + result = parent.this + alias = parent.alias + else: + result = sg.select(STAR).from_(parent) + alias = None + + if isinstance(n, int): + result = result.limit(n) + elif n is not None: + result = result.limit(sg.select(n).from_(parent).subquery()) + else: + assert n is None, n + if self.no_limit_value is not None: + result = result.limit(self.no_limit_value) + + assert offset is not None, "offset is None" + + if not isinstance(offset, int): + skip = offset + skip = sg.select(skip).from_(parent).subquery() + elif not offset: + if alias is not None: + return result.subquery(alias) + return result + else: + skip = offset + + result = result.offset(skip) + if alias is not None: + return result.subquery(alias) + return result + + @visit_node.register(ops.Distinct) + def visit_Distinct(self, op, *, parent): + return sg.select(STAR).distinct().from_(parent) + + @visit_node.register(ops.DropNa) + def visit_DropNa(self, op, *, parent, how, subset): + if subset is None: + subset = [ + sg.column(name, table=parent.alias_or_name, quoted=self.quoted) + for name in op.schema.names + ] + + if subset: + predicate = functools.reduce( + sg.and_ if how == "any" else sg.or_, + (sg.not_(col.is_(NULL)) for col in subset), + ) + elif how == "all": + predicate = FALSE + else: + predicate = None + + if predicate is None: + return parent + + try: + return parent.where(predicate) + except AttributeError: + return sg.select(STAR).from_(parent).where(predicate) + + @visit_node.register(ops.FillNa) + def visit_FillNa(self, op, *, parent, replacements): + if isinstance(replacements, Mapping): + mapping = replacements + else: + mapping = { + name: replacements + for name, dtype in op.schema.items() + if dtype.nullable + } + exprs = [ + ( + sg.alias( + sge.Coalesce( + this=sg.column(col, quoted=self.quoted), + expressions=[sge.convert(alt)], + ), + col, + ) + if (alt := mapping.get(col)) is not None + else sg.column(col, quoted=self.quoted) + ) + for col in op.schema.keys() + ] + return sg.select(*exprs).from_(parent) + + # @visit_node.register(ops.View) + # def visit_View(self, op, *, child, name: str): + # # TODO: find a way to do this without creating a temporary view + # backend = op.child.to_expr()._find_backend() + # backend._create_temp_view(table_name=name, source=sg.select(STAR).from_(child)) + # return sg.table(name, quoted=self.quoted) + + @visit_node.register(ops.SQLStringView) + def visit_SQLStringView(self, op, *, query: str, name: str, child): + table = sg.table(name, quoted=self.quoted) + return ( + sg.select(STAR).from_(table).with_(table, as_=query, dialect=self.dialect) + ) + + @visit_node.register(ops.SQLQueryResult) + def visit_SQLQueryResult(self, op, *, query, schema, source): + return sg.parse_one(query, read=self.dialect).subquery() + + @visit_node.register(ops.Unnest) + def visit_Unnest(self, op, *, arg): + return sge.Explode(this=arg) + + +_SIMPLE_OPS = { + ops.All: "bool_and", + ops.Any: "bool_or", + ops.ArgMax: "max_by", + ops.ArgMin: "min_by", + ops.Power: "pow", + # Unary operations + ops.IsNan: "isnan", + ops.IsInf: "isinf", + ops.Abs: "abs", + ops.Exp: "exp", + ops.Sqrt: "sqrt", + ops.Ln: "ln", + ops.Log2: "log2", + ops.Log10: "log", + ops.Acos: "acos", + ops.Asin: "asin", + ops.Atan: "atan", + ops.Atan2: "atan2", + ops.Cos: "cos", + ops.Sin: "sin", + ops.Tan: "tan", + ops.Cot: "cot", + ops.Pi: "pi", + ops.RandomScalar: "random", + ops.Sign: "sign", + # Unary aggregates + ops.ApproxCountDistinct: "approx_distinct", + ops.Median: "median", + ops.Mean: "avg", + ops.Max: "max", + ops.Min: "min", + ops.ArgMin: "argmin", + ops.ArgMax: "argmax", + ops.First: "first", + ops.Last: "last", + ops.Count: "count", + ops.All: "bool_and", + ops.Any: "bool_or", + ops.ArrayCollect: "array_agg", + ops.GroupConcat: "group_concat", + # string operations + ops.StringContains: "contains", + ops.StringLength: "length", + ops.Lowercase: "lower", + ops.Uppercase: "upper", + ops.StartsWith: "starts_with", + ops.StrRight: "right", + # Other operations + ops.IfElse: "if", + ops.ArrayLength: "length", + ops.NullIf: "nullif", + ops.Repeat: "repeat", + ops.Map: "map", + ops.JSONGetItem: "json_extract", + ops.ArrayFlatten: "flatten", + # common enough to be in the base, but not modeled in sqlglot + ops.NTile: "ntile", + ops.Degrees: "degrees", + ops.Radians: "radians", + ops.FirstValue: "first_value", + ops.LastValue: "last_value", + ops.NthValue: "nth_value", + ops.MinRank: "rank", + ops.DenseRank: "dense_rank", + ops.PercentRank: "percent_rank", + ops.CumeDist: "cume_dist", + ops.ArrayLength: "array_size", + ops.ArraySort: "array_sort", + ops.Capitalize: "initcap", + ops.Translate: "translate", + ops.StringReplace: "replace", + ops.Reverse: "reverse", + ops.StringSplit: "split", +} + +_BINARY_INFIX_OPS = { + # Binary operations + ops.Add: sge.Add, + ops.Subtract: sge.Sub, + ops.Multiply: sge.Mul, + ops.Divide: sge.Div, + ops.Modulus: sge.Mod, + # Comparisons + ops.GreaterEqual: sge.GTE, + ops.Greater: sge.GT, + ops.LessEqual: sge.LTE, + ops.Less: sge.LT, + ops.Equals: sge.EQ, + ops.NotEquals: sge.NEQ, + # Boolean comparisons + ops.And: sge.And, + ops.Or: sge.Or, + ops.Xor: sge.Xor, + # Bitwise business + ops.BitwiseLeftShift: sge.BitwiseLeftShift, + ops.BitwiseRightShift: sge.BitwiseRightShift, + ops.BitwiseAnd: sge.BitwiseAnd, + ops.BitwiseOr: sge.BitwiseOr, + ops.BitwiseXor: sge.BitwiseXor, + # Time arithmetic + ops.DateAdd: sge.Add, + ops.DateSub: sge.Sub, + ops.DateDiff: sge.Sub, + ops.TimestampAdd: sge.Add, + ops.TimestampSub: sge.Sub, + ops.TimestampDiff: sge.Sub, + # Interval Marginalia + ops.IntervalAdd: sge.Add, + ops.IntervalMultiply: sge.Mul, + ops.IntervalSubtract: sge.Sub, +} + +for _op, _sym in _BINARY_INFIX_OPS.items(): + + @SQLGlotCompiler.visit_node.register(_op) + def _fmt(self, op, *, _sym: sge.Expression = _sym, left, right): + return _sym( + this=self._add_parens(op.left, left), + expression=self._add_parens(op.right, right), + ) + + setattr(SQLGlotCompiler, f"visit_{_op.__name__}", _fmt) + + +del _op, _sym, _fmt + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @SQLGlotCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @SQLGlotCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(SQLGlotCompiler, f"visit_{_op.__name__}", _fmt) + + +del _op, _name, _fmt diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 37e2bc6926ce..eeff6692258a 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -444,26 +444,5 @@ class OracleType(SqlglotType): dialect = "oracle" -class SnowflakeType(SqlglotType): - dialect = "snowflake" - default_temporal_scale = 9 - - @classmethod - def _from_sqlglot_FLOAT(cls) -> dt.Float64: - return dt.Float64(nullable=cls.default_nullable) - - @classmethod - def _from_sqlglot_DECIMAL(cls, precision=None, scale=None) -> dt.Decimal: - if scale is None or int(scale.this.this) == 0: - return dt.Int64(nullable=cls.default_nullable) - else: - return super()._from_sqlglot_DECIMAL(precision, scale) - - @classmethod - def _from_sqlglot_ARRAY(cls, value_type=None) -> dt.Array: - assert value_type is None - return dt.Array(dt.json, nullable=cls.default_nullable) - - class SQLiteType(SqlglotType): dialect = "sqlite" diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index fcaed94d78dd..37c32e038530 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -3,6 +3,7 @@ from __future__ import annotations +import os from typing import Literal, Optional from public import public @@ -119,6 +120,9 @@ def merge_select_select(_): ) +DEBUG = os.environ.get("IBIS_SQL_DEBUG", False) + + def sqlize(node): """Lower the ibis expression graph to a SQL-like relational algebra.""" step1 = node.replace( @@ -127,5 +131,11 @@ def sqlize(node): | filter_to_select | sort_to_select ) + if DEBUG: + print("--------- STEP 1 ---------") + print(step1.to_expr()) step2 = step1.replace(merge_select_select) + if DEBUG: + print("--------- STEP 2 ---------") + print(step2.to_expr()) return step2 diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index b5f90b64f47d..9668f47ca49f 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -2,8 +2,9 @@ import ast import atexit +import contextlib import glob -from contextlib import closing, suppress +from contextlib import closing from functools import partial from typing import TYPE_CHECKING, Any, Literal @@ -12,6 +13,7 @@ import pyarrow_hotfix # noqa: F401 import sqlalchemy as sa import sqlglot as sg +import sqlglot.expressions as sge import toolz from clickhouse_connect.driver.external import ExternalData @@ -23,8 +25,9 @@ import ibis.expr.types as ir from ibis import util from ibis.backends.base import BaseBackend, CanCreateDatabase -from ibis.backends.base.sqlglot import STAR, C, F -from ibis.backends.clickhouse.compiler import translate +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import C, F +from ibis.backends.clickhouse.compiler import ClickHouseCompiler from ibis.backends.clickhouse.datatypes import ClickhouseType if TYPE_CHECKING: @@ -33,15 +36,14 @@ import pandas as pd - from ibis.common.typing import SupportsSchema - def _to_memtable(v): return ibis.memtable(v).op() if not isinstance(v, ops.InMemoryTable) else v -class Backend(BaseBackend, CanCreateDatabase): +class Backend(SQLGlotBackend, CanCreateDatabase): name = "clickhouse" + compiler = ClickHouseCompiler() # ClickHouse itself does, but the client driver does not supports_temporary_tables = False @@ -57,25 +59,6 @@ class Options(ibis.config.Config): bool_type: Literal["Bool", "UInt8", "Int8"] = "Bool" - def _log(self, sql: str) -> None: - """Log `sql`. - - This method can be implemented by subclasses. Logging occurs when - `ibis.options.verbose` is `True`. - """ - util.log(sql) - - def sql( - self, - query: str, - schema: SupportsSchema | None = None, - dialect: str | None = None, - ) -> ir.Table: - query = self._transpile_sql(query, dialect=dialect) - if schema is None: - schema = self._get_schema_using_query(query) - return ops.SQLQueryResult(query, ibis.schema(schema), self).to_expr() - def _from_url(self, url: str, **kwargs) -> BaseBackend: """Connect to a backend using a URL `url`. @@ -109,7 +92,7 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend: return self.connect(**kwargs) def _convert_kwargs(self, kwargs): - with suppress(KeyError): + with contextlib.suppress(KeyError): kwargs["secure"] = bool(ast.literal_eval(kwargs["secure"])) def do_connect( @@ -182,15 +165,20 @@ def do_connect( def version(self) -> str: return self.con.server_version + @contextlib.contextmanager + def _safe_raw_sql(self, *args, **kwargs): + with contextlib.closing(self.raw_sql(*args, **kwargs)) as result: + yield result + @property def current_database(self) -> str: - with closing(self.raw_sql(sg.select(F.currentDatabase()))) as result: + with self._safe_raw_sql(sg.select(F.currentDatabase())) as result: [(db,)] = result.result_rows return db def list_databases(self, like: str | None = None) -> list[str]: - with closing( - self.raw_sql(sg.select(C.name).from_(sg.table("databases", db="system"))) + with self._safe_raw_sql( + sg.select(C.name).from_(sg.table("databases", db="system")) ) as result: results = result.result_columns @@ -208,11 +196,11 @@ def list_tables( if database is None: database = F.currentDatabase() else: - database = sg.exp.convert(database) + database = sge.convert(database) query = query.where(C.database.eq(database).or_(C.is_temporary)) - with closing(self.raw_sql(query)) as result: + with self._safe_raw_sql(query) as result: results = result.result_columns if results: @@ -380,6 +368,8 @@ def execute( if df.empty: df = pd.DataFrame(columns=schema.names) + else: + df.columns = list(schema.names) # TODO: remove the extra conversion # @@ -387,64 +377,6 @@ def execute( # in single column conversion and whole table conversion return expr.__pandas_result__(table.__pandas_result__(df)) - def _to_sqlglot( - self, expr: ir.Expr, limit: str | None = None, params=None, **_: Any - ): - """Compile an Ibis expression to a sqlglot object.""" - table_expr = expr.as_table() - - if limit == "default": - limit = ibis.options.sql.default_limit - if limit is not None: - table_expr = table_expr.limit(limit) - - if params is None: - params = {} - - sql = translate(table_expr.op(), params=params) - assert not isinstance(sql, sg.exp.Subquery) - - if isinstance(sql, sg.exp.Table): - sql = sg.select(STAR).from_(sql) - - assert not isinstance(sql, sg.exp.Subquery) - return sql - - def compile( - self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any - ): - """Compile an Ibis expression to a ClickHouse SQL string.""" - return self._to_sqlglot(expr, limit=limit, params=params, **kwargs).sql( - dialect=self.name, pretty=True - ) - - def _to_sql(self, expr: ir.Expr, **kwargs) -> str: - return self.compile(expr, **kwargs) - - def table(self, name: str, database: str | None = None) -> ir.Table: - """Construct a table expression. - - Parameters - ---------- - name - Table name - database - Database name - - Returns - ------- - Table - Table expression - """ - schema = self.get_schema(name, database=database) - op = ops.DatabaseTable( - name=name, - schema=schema, - source=self, - namespace=ops.Namespace(database=database), - ) - return op.to_expr() - def insert( self, name: str, @@ -468,7 +400,7 @@ def insert( def raw_sql( self, - query: str | sg.exp.Expression, + query: str | sge.Expression, external_tables: Mapping[str, pd.DataFrame] | None = None, **kwargs, ) -> Any: @@ -491,7 +423,7 @@ def raw_sql( """ external_tables = toolz.valmap(_to_memtable, external_tables or {}) external_data = self._normalize_external_tables(external_tables) - with suppress(AttributeError): + with contextlib.suppress(AttributeError): query = query.sql(dialect=self.name, pretty=True) self._log(query) return self.con.query(query, external_data=external_data, **kwargs) @@ -500,7 +432,9 @@ def close(self) -> None: """Close ClickHouse connection.""" self.con.close() - def get_schema(self, table_name: str, database: str | None = None) -> sch.Schema: + def get_schema( + self, table_name: str, database: str | None = None, schema: str | None = None + ) -> sch.Schema: """Return a Schema object for the indicated table and database. Parameters @@ -510,19 +444,25 @@ def get_schema(self, table_name: str, database: str | None = None) -> sch.Schema qualify the identifier. database Database name + schema + Schema name, not supported by ClickHouse Returns ------- sch.Schema Ibis schema """ - query = sg.exp.Describe(this=sg.table(table_name, db=database)) - with closing(self.raw_sql(query)) as results: + if schema is not None: + raise com.UnsupportedBackendFeatureError( + "`schema` namespaces are not supported by clickhouse" + ) + query = sge.Describe(this=sg.table(table_name, db=database)) + with self._safe_raw_sql(query) as results: names, types, *_ = results.result_columns return sch.Schema(dict(zip(names, map(ClickhouseType.from_string, types)))) - def _get_schema_using_query(self, query: str) -> sch.Schema: - name = util.gen_name("get_schema_using_query") + def _metadata(self, query: str) -> sch.Schema: + name = util.gen_name("clickhouse_metadata") with closing(self.raw_sql(f"CREATE VIEW {name} AS {query}")): pass try: @@ -531,43 +471,30 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: finally: with closing(self.raw_sql(f"DROP VIEW {name}")): pass - return sch.Schema(dict(zip(names, map(ClickhouseType.from_string, types)))) - - @classmethod - def has_operation(cls, operation: type[ops.Value]) -> bool: - from ibis.backends.clickhouse.compiler.values import translate_val - - return translate_val.dispatch(operation) is not translate_val.dispatch(object) + return zip(names, map(ClickhouseType.from_string, types)) def create_database( self, name: str, *, force: bool = False, engine: str = "Atomic" ) -> None: - src = sg.exp.Create( + src = sge.Create( this=sg.to_identifier(name), kind="DATABASE", exists=force, - properties=sg.exp.Properties( - expressions=[sg.exp.EngineProperty(this=sg.to_identifier(engine))] + properties=sge.Properties( + expressions=[sge.EngineProperty(this=sg.to_identifier(engine))] ), ) - with closing(self.raw_sql(src)): + with self._safe_raw_sql(src): pass def drop_database(self, name: str, *, force: bool = False) -> None: - src = sg.exp.Drop(this=sg.to_identifier(name), kind="DATABASE", exists=force) - with closing(self.raw_sql(src)): + src = sge.Drop(this=sg.to_identifier(name), kind="DATABASE", exists=force) + with self._safe_raw_sql(src): pass def truncate_table(self, name: str, database: str | None = None) -> None: ident = sg.table(name, db=database).sql(self.name) - with closing(self.raw_sql(f"TRUNCATE TABLE {ident}")): - pass - - def drop_table( - self, name: str, database: str | None = None, force: bool = False - ) -> None: - src = sg.exp.Drop(this=sg.table(name, db=database), kind="TABLE", exists=force) - with closing(self.raw_sql(src)): + with self._safe_raw_sql(f"TRUNCATE TABLE {ident}"): pass def read_parquet( @@ -686,10 +613,10 @@ def create_table( if schema is None: schema = obj.schema() - this = sg.exp.Schema( + this = sge.Schema( this=sg.table(name, db=database), expressions=[ - sg.exp.ColumnDef( + sge.ColumnDef( this=sg.to_identifier(name), kind=ClickhouseType.from_ibis(typ) ) for name, typ in schema.items() @@ -698,20 +625,20 @@ def create_table( properties = [ # the engine cannot be quoted, since clickhouse won't allow e.g., # "File(Native)" - sg.exp.EngineProperty(this=sg.to_identifier(engine, quoted=False)) + sge.EngineProperty(this=sg.to_identifier(engine, quoted=False)) ] if temp: - properties.append(sg.exp.TemporaryProperty()) + properties.append(sge.TemporaryProperty()) if order_by is not None or engine == "MergeTree": # engine == "MergeTree" requires an order by clause, which is the # empty tuple if order_by is False-y properties.append( - sg.exp.Order( + sge.Order( expressions=[ - sg.exp.Ordered( - this=sg.exp.Tuple( + sge.Ordered( + this=sge.Tuple( expressions=list(map(sg.column, order_by or ())) ) ) @@ -721,8 +648,8 @@ def create_table( if partition_by is not None: properties.append( - sg.exp.PartitionedByProperty( - this=sg.exp.Schema( + sge.PartitionedByProperty( + this=sge.Schema( expressions=list(map(sg.to_identifier, partition_by)) ) ) @@ -730,19 +657,19 @@ def create_table( if sample_by is not None: properties.append( - sg.exp.SampleProperty( - this=sg.exp.Tuple(expressions=list(map(sg.column, sample_by))) + sge.SampleProperty( + this=sge.Tuple(expressions=list(map(sg.column, sample_by))) ) ) if settings: properties.append( - sg.exp.SettingsProperty( + sge.SettingsProperty( expressions=[ - sg.exp.SetItem( - this=sg.exp.EQ( + sge.SetItem( + this=sge.EQ( this=sg.to_identifier(name), - expression=sg.exp.convert(value), + expression=sge.convert(value), ) ) for name, value in settings.items() @@ -754,15 +681,15 @@ def create_table( expression = None if obj is not None: - expression = self._to_sqlglot(obj) + (expression,) = self._to_sqlglot(obj) external_tables.update(self._collect_in_memory_tables(obj)) - code = sg.exp.Create( + code = sge.Create( this=this, kind="TABLE", replace=overwrite, expression=expression, - properties=sg.exp.Properties(expressions=properties), + properties=sge.Properties(expressions=properties), ) external_data = self._normalize_external_tables(external_tables) @@ -781,46 +708,30 @@ def create_view( database: str | None = None, overwrite: bool = False, ) -> ir.Table: - src = sg.exp.Create( + (expression,) = self._to_sqlglot(obj) + src = sge.Create( this=sg.table(name, db=database), kind="VIEW", replace=overwrite, - expression=self._to_sqlglot(obj), + expression=expression, ) external_tables = self._collect_in_memory_tables(obj) - with closing(self.raw_sql(src, external_tables=external_tables)): + with self._safe_raw_sql(src, external_tables=external_tables): pass return self.table(name, database=database) - def drop_view( - self, name: str, *, database: str | None = None, force: bool = False - ) -> None: - src = sg.exp.Drop(this=sg.table(name, db=database), kind="VIEW", exists=force) - with closing(self.raw_sql(src)): - pass - - def _load_into_cache(self, name, expr): - self.create_table(name, expr, schema=expr.schema(), temp=True) - - def _clean_up_cached_table(self, op): - self.drop_table(op.name) - - def _create_temp_view(self, table_name, source): - if table_name not in self._temp_views and table_name in self.list_tables(): - raise ValueError( - f"{table_name} already exists as a non-temporary table or view" - ) - src = sg.exp.Create( - this=sg.table(table_name), kind="VIEW", replace=True, expression=source + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=True, ) - self.raw_sql(src) - self._temp_views.add(table_name) - self._register_temp_view_cleanup(table_name) def _register_temp_view_cleanup(self, name: str) -> None: def drop(self, name: str, query: str): self.raw_sql(query) self._temp_views.discard(name) - query = sg.exp.Drop(this=sg.table(name), kind="VIEW", exists=True) + query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) atexit.register(drop, self, name=name, query=query) diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py new file mode 100644 index 000000000000..56496271c513 --- /dev/null +++ b/ibis/backends/clickhouse/compiler.py @@ -0,0 +1,686 @@ +from __future__ import annotations + +import calendar +import math +from functools import singledispatchmethod +from typing import Any + +import sqlglot as sg +from sqlglot import exp +from sqlglot.dialects import ClickHouse +from sqlglot.dialects.dialect import rename_func + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis import util +from ibis.backends.base.sqlglot.compiler import ( + NULL, + STAR, + SQLGlotCompiler, + parenthesize, +) +from ibis.backends.clickhouse.datatypes import ClickhouseType +from ibis.expr.rewrites import rewrite_sample + +ClickHouse.Generator.TRANSFORMS |= { + exp.ArraySize: rename_func("length"), + exp.ArraySort: rename_func("arraySort"), +} + + +class ClickHouseCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "clickhouse" + type_mapper = ClickhouseType + rewrites = (rewrite_sample, *SQLGlotCompiler.rewrites) + + def _aggregate(self, funcname: str, *args, where): + has_filter = where is not None + func = self.f[funcname + "If" * has_filter] + args += (where,) * has_filter + return func(*args) + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + _interval_cast_suffixes = { + "s": "Second", + "m": "Minute", + "h": "Hour", + "D": "Day", + "W": "Week", + "M": "Month", + "Q": "Quarter", + "Y": "Year", + } + + if to.is_interval(): + suffix = _interval_cast_suffixes[to.unit.short] + return self.f[f"toInterval{suffix}"](arg) + + result = self.cast(arg, to) + if (timezone := getattr(to, "timezone", None)) is not None: + return self.f.toTimeZone(result, timezone) + return result + + @visit_node.register(ops.TryCast) + def visit_TryCast(self, op, *, arg, to): + return self.f.accurateCastOrNull(arg, self.type_mapper.to_string(to)) + + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return arg[self.if_(index >= 0, index + 1, index)] + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + param = sg.to_identifier("_") + func = sg.exp.Lambda(this=arg, expressions=[param]) + return self.f.arrayFlatten(self.f.arrayMap(func, self.f.range(times))) + + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + start = parenthesize(op.start, start) + start_correct = self.if_(start < 0, start, start + 1) + + if stop is not None: + stop = parenthesize(op.stop, stop) + + length = self.if_( + stop < 0, + stop, + self.if_( + start < 0, + self.f.greatest(0, stop - (self.f.length(arg) + start)), + self.f.greatest(0, stop - start), + ), + ) + return self.f.arraySlice(arg, start_correct, length) + else: + return self.f.arraySlice(arg, start_correct) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, where, arg): + if where is not None: + return self.f.countIf(where) + return sg.exp.Count(this=STAR) + + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + def visit_QuantileMultiQuantile(self, op, *, arg, quantile, where): + if where is None: + return self.agg.quantile(arg, quantile, where=where) + + func = "quantile" + "s" * isinstance(op, ops.MultiQuantile) + return sg.exp.ParameterizedAgg( + this=f"{func}If", + expressions=util.promote_list(quantile), + params=[arg, where], + ) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "pop": + raise ValueError( + "ClickHouse only implements `sample` correlation coefficient" + ) + return self.agg.corr(left, right, where=where) + + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if how == "first": + return self.agg.any(arg, where=where) + elif how == "last": + return self.agg.anyLast(arg, where=where) + else: + assert how == "heavy" + return self.agg.anyHeavy(arg, where=where) + + @visit_node.register(ops.Substring) + def visit_Substring(self, op, *, arg, start, length): + # Clickhouse is 1-indexed + suffix = (length,) * (length is not None) + if_pos = self.f.substring(arg, start + 1, *suffix) + if_neg = self.f.substring(arg, self.f.length(arg) + start + 1, *suffix) + return self.if_(start >= 0, if_pos, if_neg) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if end is not None: + raise com.UnsupportedOperationError( + "String find doesn't support end argument" + ) + + if start is not None: + return self.f.locate(arg, substr, start) + + return self.f.locate(arg, substr) + + @visit_node.register(ops.RegexSearch) + def visit_RegexSearch(self, op, *, arg, pattern): + return sg.exp.RegexpLike(this=arg, expression=pattern) + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + arg = self.cast(arg, dt.String(nullable=False)) + + pattern = self.f.concat("(", pattern, ")") + + if index is None: + index = 0 + + index += 1 + + then = self.f.extractGroups(arg, pattern)[index] + + return self.if_(self.f.notEmpty(then), then, NULL) + + @visit_node.register(ops.FindInSet) + def visit_FindInSet(self, op, *, needle, values): + return self.f.indexOf(self.f.array(*values), needle) + + @visit_node.register(ops.Sign) + def visit_Sign(self, op, *, arg): + """Workaround for missing sign function in older versions of clickhouse.""" + return self.f.intDivOrZero(arg, self.f.abs(arg)) + + @visit_node.register(ops.Hash) + def visit_Hash(self, op, *, arg): + return self.f.sipHash64(arg) + + @visit_node.register(ops.HashBytes) + def visit_HashBytes(self, op, *, arg, how): + supported_algorithms = frozenset( + ( + "MD5", + "halfMD5", + "SHA1", + "SHA224", + "SHA256", + "intHash32", + "intHash64", + "cityHash64", + "sipHash64", + "sipHash128", + ) + ) + if how not in supported_algorithms: + raise com.UnsupportedOperationError(f"Unsupported hash algorithm {how}") + + return self.f[how](arg) + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + dtype = op.dtype + if dtype.unit.short in ("ms", "us", "ns"): + raise com.UnsupportedOperationError( + "Clickhouse doesn't support subsecond interval resolutions" + ) + return super().visit_node(op, arg=arg, unit=unit) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype, **kw): + if value is None: + return super().visit_node(op, value=value, dtype=dtype, **kw) + elif dtype.is_inet(): + v = str(value) + return self.f.toIPv6(v) if ":" in v else self.f.toIPv4(v) + elif dtype.is_string(): + return sg.exp.convert(str(value).replace(r"\0", r"\\0")) + elif dtype.is_decimal(): + precision = dtype.precision + if precision is None or not 1 <= precision <= 76: + raise NotImplementedError( + f"Unsupported precision. Supported values: [1 : 76]. Current value: {precision!r}" + ) + + if 1 <= precision <= 9: + type_name = self.f.toDecimal32 + elif 10 <= precision <= 18: + type_name = self.f.toDecimal64 + elif 19 <= precision <= 38: + type_name = self.f.toDecimal128 + else: + type_name = self.f.toDecimal256 + return type_name(value, dtype.scale) + elif dtype.is_numeric(): + if not math.isfinite(value): + return sg.exp.Literal.number(str(value)) + return sg.exp.convert(value) + elif dtype.is_interval(): + if dtype.unit.short in ("ms", "us", "ns"): + raise com.UnsupportedOperationError( + "Clickhouse doesn't support subsecond interval resolutions" + ) + + return sg.exp.Interval( + this=sg.exp.convert(str(value)), unit=dtype.resolution.upper() + ) + elif dtype.is_timestamp(): + funcname = "toDateTime" + fmt = "%Y-%m-%dT%H:%M:%S" + + if micros := value.microsecond: + funcname += "64" + fmt += ".%f" + + args = [value.strftime(fmt)] + + if micros % 1000: + args.append(6) + elif micros // 1000: + args.append(3) + + if (timezone := dtype.timezone) is not None: + args.append(timezone) + + return self.f[funcname](*args) + elif dtype.is_date(): + return self.f.toDate(value.strftime("%Y-%m-%d")) + elif dtype.is_struct(): + fields = [ + self.visit_Literal( + ops.Literal(v, dtype=field_type), value=v, dtype=field_type, **kw + ) + for field_type, v in zip(dtype.types, value.values()) + ] + return self.f.tuple(*fields) + else: + return super().visit_node(op, value=value, dtype=dtype, **kw) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + if (unit := unit.short) in {"ms", "us", "ns"}: + raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") + return self.f.toDateTime(arg) + + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.TimeTruncate) + def visit_TimeTruncate(self, op, *, arg, unit): + converters = { + "Y": "toStartOfYear", + "M": "toStartOfMonth", + "W": "toMonday", + "D": "toDate", + "h": "toStartOfHour", + "m": "toStartOfMinute", + "s": "toDateTime", + } + + unit = unit.short + if (converter := converters.get(unit)) is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") + + return self.f[converter](arg) + + @visit_node.register(ops.TimestampBucket) + def visit_TimestampBucket(self, op, *, arg, interval, offset): + if offset is not None: + raise com.UnsupportedOperationError( + "Timestamp bucket with offset is not supported" + ) + + return self.f.toStartOfInterval(arg, interval) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return self.f.toDate( + self.f.concat( + self.f.toString(year), + "-", + self.f.leftPad(self.f.toString(month), 2, "0"), + "-", + self.f.leftPad(self.f.toString(day), 2, "0"), + ) + ) + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds, **_ + ): + to_datetime = self.f.toDateTime( + self.f.concat( + self.f.toString(year), + "-", + self.f.leftPad(self.f.toString(month), 2, "0"), + "-", + self.f.leftPad(self.f.toString(day), 2, "0"), + " ", + self.f.leftPad(self.f.toString(hours), 2, "0"), + ":", + self.f.leftPad(self.f.toString(minutes), 2, "0"), + ":", + self.f.leftPad(self.f.toString(seconds), 2, "0"), + ) + ) + if timezone := op.dtype.timezone: + return self.f.toTimeZone(to_datetime, timezone) + return to_datetime + + @visit_node.register(ops.StringSplit) + def visit_StringSplit(self, op, *, arg, delimiter): + return self.f.splitByString( + delimiter, self.cast(arg, dt.String(nullable=False)) + ) + + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, sep, arg): + return self.f.arrayStringConcat(self.f.array(*arg), sep) + + @visit_node.register(ops.Capitalize) + def visit_Capitalize(self, op, *, arg): + return self.f.concat( + self.f.upper(self.f.substr(arg, 1, 1)), self.f.lower(self.f.substr(arg, 2)) + ) + + @visit_node.register(ops.GroupConcat) + def visit_GroupConcat(self, op, *, arg, sep, where): + call = self.agg.groupArray(arg, where=where) + return self.if_(self.f.empty(call), NULL, self.f.arrayStringConcat(call, sep)) + + @visit_node.register(ops.Cot) + def visit_Cot(self, op, *, arg): + return 1.0 / self.f.tan(arg) + + @visit_node.register(ops.StructColumn) + def visit_StructColumn(self, op, *, values, names): + # ClickHouse struct types cannot be nullable + # (non-nested fields can be nullable) + return self.cast(self.f.tuple(*values), op.dtype.copy(nullable=False)) + + @visit_node.register(ops.Clip) + def visit_Clip(self, op, *, arg, lower, upper): + if upper is not None: + arg = self.if_(self.f.isNull(arg), NULL, self.f.least(upper, arg)) + + if lower is not None: + arg = self.if_(self.f.isNull(arg), NULL, self.f.greatest(lower, arg)) + + return arg + + @visit_node.register(ops.StructField) + def visit_StructField(self, op, *, arg, field: str): + arg_dtype = op.arg.dtype + idx = arg_dtype.names.index(field) + return self.cast( + sg.exp.Dot(this=arg, expression=sg.exp.convert(idx + 1)), op.dtype + ) + + @visit_node.register(ops.Repeat) + def visit_Repeat(self, op, *, arg, times): + return self.f.repeat(arg, self.f.accurateCast(times, "UInt64")) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, haystack, needle): + return self.f.locate(haystack, needle) > 0 + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + weekdays = len(calendar.day_name) + return (((self.f.toDayOfWeek(arg) - 1) % weekdays) + weekdays) % weekdays + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + # ClickHouse 20 doesn't support dateName + # + # ClickHouse 21 supports dateName is broken for regexen: + # https://github.com/ClickHouse/ClickHouse/issues/32777 + # + # ClickHouses 20 and 21 also have a broken case statement hence the ifnull: + # https://github.com/ClickHouse/ClickHouse/issues/32849 + # + # We test against 20 in CI, so we implement day_of_week_name as follows + days = calendar.day_name + num_weekdays = len(days) + base = ( + ((self.f.toDayOfWeek(arg) - 1) % num_weekdays) + num_weekdays + ) % num_weekdays + return sg.exp.Case( + this=base, + ifs=list(map(self.if_, *zip(*enumerate(days)))), + default=sg.exp.convert(""), + ) + + @visit_node.register(ops.Map) + def visit_Map(self, op, *, keys, values): + # cast here to allow lookups of nullable columns + return self.cast(self.f.tuple(keys, values), op.dtype) + + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + return self.if_(self.f.mapContains(arg, key), arg[key], default) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + return self.f.arrayConcat(*arg) + + @visit_node.register(ops.BitAnd) + @visit_node.register(ops.BitOr) + @visit_node.register(ops.BitXor) + def visit_BitAndOrXor(self, op, *, arg, where): + if not (dtype := op.arg.dtype).is_unsigned_integer(): + nbits = dtype.nbytes * 8 + arg = self.f[f"reinterpretAsUInt{nbits}"](arg) + return self.agg[f"group{type(op).__name__}"](arg, where=where) + + @visit_node.register(ops.StandardDev) + @visit_node.register(ops.Variance) + @visit_node.register(ops.Covariance) + def visit_StandardDevVariance(self, op, *, how, where, **kw): + funcs = { + ops.StandardDev: "stddev", + ops.Variance: "var", + ops.Covariance: "covar", + } + func = funcs[type(op)] + variants = {"sample": f"{func}Samp", "pop": f"{func}Pop"} + funcname = variants[how] + return self.agg[funcname](*kw.values(), where=where) + + @visit_node.register(ops.ArrayDistinct) + def visit_ArrayDistinct(self, op, *, arg): + null_element = self.if_( + self.f.countEqual(arg, NULL) > 0, self.f.array(NULL), self.f.array() + ) + return self.f.arrayConcat(self.f.arrayDistinct(arg), null_element) + + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + dtype = op.dtype + return self.cast( + self.f.toUnixTimestamp64Micro(self.cast(arg, op.arg.dtype.copy(scale=6))) + % 1_000_000, + dtype, + ) + + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + dtype = op.dtype + return self.cast( + self.f.toUnixTimestamp64Milli(self.cast(arg, op.arg.dtype.copy(scale=3))) + % 1_000, + dtype, + ) + + @visit_node.register(ops.Lag) + @visit_node.register(ops.Lead) + def formatter(self, op, *, arg, offset, default): + args = [arg] + + if default is not None: + if offset is None: + offset = 1 + + args.append(offset) + args.append(default) + elif offset is not None: + args.append(offset) + + func = self.f[f"{type(op).__name__.lower()}InFrame"] + return func(*args) + + @visit_node.register(ops.ExtractFile) + def visit_ExtractFile(self, op, *, arg): + return self.f.cutFragment(self.f.pathFull(arg)) + + @visit_node.register(ops.ExtractQuery) + def visit_ExtractQuery(self, op, *, arg, key): + if key is not None: + return self.f.extractURLParameter(arg, key) + else: + return self.f.queryString(arg) + + @visit_node.register(ops.ArrayStringJoin) + def visit_ArrayStringJoin(self, op, *, arg, sep): + return self.f.arrayStringConcat(arg, sep) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, param, body): + func = sg.exp.Lambda(this=body, expressions=[param]) + return self.f.arrayMap(func, arg) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, param, body): + func = sg.exp.Lambda(this=body, expressions=[param]) + return self.f.arrayFilter(func, arg) + + @visit_node.register(ops.ArrayRemove) + def visit_ArrayRemove(self, op, *, arg, other): + x = sg.to_identifier("x") + body = x.neq(other) + return self.f.arrayFilter(sg.exp.Lambda(this=body, expressions=[x]), arg) + + @visit_node.register(ops.ArrayUnion) + def visit_ArrayUnion(self, op, *, left, right): + arg = self.f.arrayConcat(left, right) + null_element = self.if_( + self.f.countEqual(arg, NULL) > 0, self.f.array(NULL), self.f.array() + ) + return self.f.arrayConcat(self.f.arrayDistinct(arg), null_element) + + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op: ops.ArrayZip, *, arg, **_: Any) -> str: + return self.f.arrayZip(*arg) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar( + self, op: ops.CountDistinctStar, *, where, **_: Any + ) -> str: + columns = self.f.tuple(*map(sg.column, op.arg.schema.names)) + + if where is not None: + return self.f.countDistinctIf(columns, where) + else: + return self.f.countDistinct(columns) + + @staticmethod + def _generate_groups(groups): + return groups + + @visit_node.register(ops.RowID) + @visit_node.register(ops.CumeDist) + @visit_node.register(ops.PercentRank) + @visit_node.register(ops.Time) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.StringToTimestamp) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.All: "min", + ops.Any: "max", + ops.ApproxCountDistinct: "uniqHLL12", + ops.ApproxMedian: "median", + ops.ArgMax: "argMax", + ops.ArgMin: "argMin", + ops.ArrayCollect: "groupArray", + ops.ArrayContains: "has", + ops.ArrayFlatten: "arrayFlatten", + ops.ArrayIntersect: "arrayIntersect", + ops.ArrayPosition: "indexOf", + ops.BitwiseAnd: "bitAnd", + ops.BitwiseLeftShift: "bitShiftLeft", + ops.BitwiseNot: "bitNot", + ops.BitwiseOr: "bitOr", + ops.BitwiseRightShift: "bitShiftRight", + ops.BitwiseXor: "bitXor", + ops.Capitalize: "initcap", + ops.CountDistinct: "uniq", + ops.Date: "toDate", + ops.E: "e", + ops.EndsWith: "endsWith", + ops.ExtractAuthority: "netloc", + ops.ExtractDay: "toDayOfMonth", + ops.ExtractDayOfYear: "toDayOfYear", + ops.ExtractEpochSeconds: "toRelativeSecondNum", + ops.ExtractFragment: "fragment", + ops.ExtractHost: "domain", + ops.ExtractHour: "toHour", + ops.ExtractMinute: "toMinute", + ops.ExtractMonth: "toMonth", + ops.ExtractPath: "path", + ops.ExtractProtocol: "protocol", + ops.ExtractQuarter: "toQuarter", + ops.ExtractSecond: "toSecond", + ops.ExtractWeekOfYear: "toISOWeek", + ops.ExtractYear: "toYear", + ops.First: "any", + ops.IntegerRange: "range", + ops.IsInf: "isInfinite", + ops.IsNan: "isNaN", + ops.IsNull: "isNull", + ops.LPad: "leftPad", + ops.LStrip: "trimLeft", + ops.Last: "anyLast", + ops.Ln: "log", + ops.Log10: "log10", + ops.MapContains: "mapContains", + ops.MapKeys: "mapKeys", + ops.MapLength: "length", + ops.MapMerge: "mapUpdate", + ops.MapValues: "mapValues", + ops.Median: "quantileExactExclusive", + ops.NotNull: "isNotNull", + ops.NullIf: "nullIf", + ops.RPad: "rightPad", + ops.RStrip: "trimRight", + ops.RandomScalar: "randCanonical", + ops.RegexReplace: "replaceRegexpAll", + ops.Repeat: "repeat", + ops.RowNumber: "row_number", + ops.StartsWith: "startsWith", + ops.StrRight: "right", + ops.Strftime: "formatDateTime", + ops.StringAscii: "ascii", + ops.StringLength: "length", + ops.StringReplace: "replaceAll", + ops.Strip: "trimBoth", + ops.TimestampNow: "now", + ops.Translate: "translate", + ops.TypeOf: "toTypeName", + ops.Unnest: "arrayJoin", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @ClickHouseCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @ClickHouseCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(ClickHouseCompiler, f"visit_{_op.__name__}", _fmt) + +del _op, _name, _fmt diff --git a/ibis/backends/clickhouse/compiler/__init__.py b/ibis/backends/clickhouse/compiler/__init__.py deleted file mode 100644 index 5d3b87047e38..000000000000 --- a/ibis/backends/clickhouse/compiler/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from __future__ import annotations - -from public import public - -from ibis.backends.clickhouse.compiler.core import translate -from ibis.backends.clickhouse.compiler.relations import translate_rel -from ibis.backends.clickhouse.compiler.values import translate_val - -public( - translate=translate, - translate_rel=translate_rel, - translate_val=translate_val, -) diff --git a/ibis/backends/clickhouse/compiler/core.py b/ibis/backends/clickhouse/compiler/core.py deleted file mode 100644 index e45c919201e5..000000000000 --- a/ibis/backends/clickhouse/compiler/core.py +++ /dev/null @@ -1,133 +0,0 @@ -"""ClickHouse ibis expression to sqlglot compiler. - -The compiler is built with a few `singledispatch` functions: - - 1. `translate_rel` for compiling `ops.TableNode`s - 1. `translate_val` for compiling `ops.Value`s - -## `translate` - -### Node Implementation - -There's a single `ops.Node` implementation for `ops.TableNode`s instances. - -This function compiles each node in topological order. The topological sorting, -result caching, and iteration are all handled by -`ibis.expr.operations.core.Node.map`. -""" - -from __future__ import annotations - -import itertools -from typing import TYPE_CHECKING, Any - -import sqlglot as sg - -import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis.backends.clickhouse.compiler.relations import translate_rel -from ibis.backends.clickhouse.compiler.values import translate_val -from ibis.common.deferred import _ -from ibis.common.patterns import replace -from ibis.expr.analysis import c, find_first_base_table, p, x -from ibis.expr.rewrites import rewrite_dropna, rewrite_fillna, rewrite_sample - -if TYPE_CHECKING: - from collections.abc import Mapping - - -def _translate_node(node, **kwargs): - if isinstance(node, ops.Value): - return translate_val(node, **kwargs) - assert isinstance(node, ops.TableNode) - return translate_rel(node, **kwargs) - - -@replace(ops.InColumn) -def replace_in_column_with_table_array_view(_): - # replace the right side of InColumn into a scalar subquery for sql backends - base = find_first_base_table(_.options) - options = ops.TableArrayView(ops.Selection(table=base, selections=(_.options,))) - return _.copy(options=options) - - -def translate(op: ops.TableNode, params: Mapping[ir.Value, Any]) -> sg.exp.Expression: - """Translate an ibis operation to a sqlglot expression. - - Parameters - ---------- - op - An ibis `TableNode` - params - A mapping of expressions to concrete values - - Returns - ------- - sqlglot.expressions.Expression - A sqlglot expression - """ - - gen_alias_index = itertools.count() - - def fn(node, _, **kwargs): - result = _translate_node(node, **kwargs) - - # don't alias root nodes or value ops - if node is op or isinstance(node, ops.Value): - return result - - assert isinstance(node, ops.TableNode) - - alias_index = next(gen_alias_index) - alias = f"t{alias_index:d}" - - try: - return result.subquery(alias) - except AttributeError: - return sg.alias(result, alias) - - # substitute parameters immediately to avoid having to define a - # ScalarParameter translation rule - # - # this lets us avoid threading `params` through every `translate_val` call - # only to be used in the one place it would be needed: the ScalarParameter - # `translate_val` rule - params = {param.op(): value for param, value in params.items()} - replace_literals = p.ScalarParameter(dtype=x) >> ( - lambda _, x: ops.Literal(value=params[_], dtype=x) - ) - - # replace any checks against an empty right side of the IN operation with - # `False` - replace_empty_in_values_with_false = p.InValues(options=()) >> c.Literal( - False, dtype="bool" - ) - - # subtract one from one-based functions to convert to zero-based indexing - subtract_one_from_one_indexed_functions = ( - p.WindowFunction(p.RankBase | p.NTile) - | p.StringFind - | p.FindInSet - | p.ArrayPosition - ) >> c.Subtract(_, 1) - - add_one_to_nth_value_input = p.NthValue >> _.copy(nth=c.Add(_.nth, 1)) - - nullify_empty_string_results = (p.ExtractURLField | p.DayOfWeekName) >> c.NullIf( - _, "" - ) - - op = op.replace( - replace_literals - | replace_in_column_with_table_array_view - | replace_empty_in_values_with_false - | subtract_one_from_one_indexed_functions - | add_one_to_nth_value_input - | nullify_empty_string_results - | rewrite_fillna - | rewrite_dropna - | rewrite_sample - ) - # apply translate rules in topological order - node = op.map(fn)[op] - return node.this if isinstance(node, sg.exp.Subquery) else node diff --git a/ibis/backends/clickhouse/compiler/relations.py b/ibis/backends/clickhouse/compiler/relations.py deleted file mode 100644 index ae2be72a80e8..000000000000 --- a/ibis/backends/clickhouse/compiler/relations.py +++ /dev/null @@ -1,215 +0,0 @@ -from __future__ import annotations - -import functools -from typing import Any - -import sqlglot as sg - -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.sqlglot import STAR - - -@functools.singledispatch -def translate_rel(op: ops.TableNode, **_): - """Translate a table node into sqlglot.""" - raise com.OperationNotDefinedError(f"No translation rule for {type(op)}") - - -@translate_rel.register -def _dummy(op: ops.DummyTable, *, values, **_): - return sg.select(*values) - - -@translate_rel.register -def _physical_table(op: ops.PhysicalTable, **_): - return sg.table(op.name) - - -@translate_rel.register -def _database_table(op: ops.DatabaseTable, *, name, namespace, **_): - return sg.table(name, db=namespace.schema, catalog=namespace.database) - - -def replace_tables_with_star_selection(node, alias=None): - if isinstance(node, (sg.exp.Subquery, sg.exp.Table, sg.exp.CTE)): - return sg.exp.Column( - this=STAR, - table=sg.to_identifier(alias if alias is not None else node.alias_or_name), - ) - return node - - -@translate_rel.register -def _selection(op: ops.Selection, *, table, selections, predicates, sort_keys, **_): - # needs_alias should never be true here in explicitly, but it may get - # passed via a (recursive) call to translate_val - if isinstance(op.table, ops.Join) and not isinstance( - op.table, (ops.LeftSemiJoin, ops.LeftAntiJoin) - ): - args = table.this.args - from_ = args["from"] - (join,) = args["joins"] - else: - from_ = join = None - - alias = table.alias_or_name - selections = tuple( - replace_tables_with_star_selection( - node, - # replace the table name with the alias if the table is **not** a - # join, because we may be selecting from a subquery or an aliased - # table; otherwise we'll select from the _unaliased_ table or the - # _child_ table, which may have a different alias than the one we - # generated for the input table - alias if from_ is None and join is None else None, - ) - for node in selections - ) or (STAR,) - - sel = sg.select(*selections).from_(from_ if from_ is not None else table) - - if join is not None: - sel = sel.join(join) - - if predicates: - if join is not None: - sel = sg.select(STAR).from_(sel.subquery(alias)) - sel = sel.where(*predicates) - - if sort_keys: - sel = sel.order_by(*sort_keys) - - return sel - - -@translate_rel.register(ops.Aggregation) -def _aggregation( - op: ops.Aggregation, *, table, metrics, by, having, predicates, sort_keys, **_ -): - selections = (by + metrics) or (STAR,) - sel = sg.select(*selections).from_(table) - - if by: - sel = sel.group_by( - *(key.this if isinstance(key, sg.exp.Alias) else key for key in by) - ) - - if predicates: - sel = sel.where(*predicates) - - if having: - sel = sel.having(*having) - - if sort_keys: - sel = sel.order_by(*sort_keys) - - return sel - - -_JOIN_TYPES = { - ops.InnerJoin: "INNER", - ops.AnyInnerJoin: "ANY", - ops.LeftJoin: "LEFT OUTER", - ops.AnyLeftJoin: "LEFT ANY", - ops.RightJoin: "RIGHT OUTER", - ops.OuterJoin: "FULL OUTER", - ops.CrossJoin: "CROSS", - ops.LeftSemiJoin: "LEFT SEMI", - ops.LeftAntiJoin: "LEFT ANTI", - ops.AsOfJoin: "LEFT ASOF", -} - - -@translate_rel.register -def _join(op: ops.Join, *, left, right, predicates, **_): - on = sg.and_(*predicates) if predicates else None - join_type = _JOIN_TYPES[type(op)] - try: - # dialect must be passed to allow clickhouse's ANY/LEFT ANY/ASOF joins - return left.join(right, join_type=join_type, on=on, dialect="clickhouse") - except AttributeError: - select_args = [f"{left.alias_or_name}.*"] - - # select from both the left and right side of the join if the join - # is not a filtering join (semi join or anti join); filtering joins - # only return the left side columns - if not isinstance(op, (ops.LeftSemiJoin, ops.LeftAntiJoin)): - select_args.append(f"{right.alias_or_name}.*") - return ( - sg.select(*select_args) - .from_(left) - .join(right, join_type=join_type, on=on, dialect="clickhouse") - ) - - -@translate_rel.register -def _self_ref(op: ops.SelfReference, *, table, **_): - return sg.alias(table, op.name) - - -@translate_rel.register -def _query(op: ops.SQLQueryResult, *, query, **_): - return sg.parse_one(query, read="clickhouse").subquery() - - -_SET_OP_FUNC = { - ops.Union: sg.union, - ops.Intersection: sg.intersect, - ops.Difference: sg.except_, -} - - -@translate_rel.register -def _set_op(op: ops.SetOp, *, left, right, distinct: bool = False, **_): - if isinstance(left, sg.exp.Table): - left = sg.select(STAR).from_(left) - - if isinstance(right, sg.exp.Table): - right = sg.select(STAR).from_(right) - - func = _SET_OP_FUNC[type(op)] - - left = left.args.get("this", left) - right = right.args.get("this", right) - - return func(left, right, distinct=distinct) - - -@translate_rel.register -def _limit(op: ops.Limit, *, table, n, offset, **_): - result = sg.select(STAR).from_(table) - - if n is not None: - if not isinstance(n, int): - limit = sg.select(n).from_(table).subquery() - else: - limit = n - result = result.limit(limit) - - if not isinstance(offset, int): - return result.offset( - sg.select(offset).from_(table).subquery().sql("clickhouse") - ) - - return result.offset(offset) if offset != 0 else result - - -@translate_rel.register -def _distinct(op: ops.Distinct, *, table, **_): - return sg.select(STAR).distinct().from_(table) - - -@translate_rel.register -def _sql_string_view(op: ops.SQLStringView, query: str, **_: Any): - table = sg.table(op.name) - return sg.select(STAR).from_(table).with_(table, as_=query, dialect="clickhouse") - - -@translate_rel.register -def _view(op: ops.View, *, child, name: str, **_): - # TODO: find a way to do this without creating a temporary view - backend = op.child.to_expr()._find_backend() - source = sg.select(STAR).from_(child) - backend._create_temp_view(table_name=name, source=source) - return sg.table(name) diff --git a/ibis/backends/clickhouse/compiler/values.py b/ibis/backends/clickhouse/compiler/values.py deleted file mode 100644 index df169ac36af8..000000000000 --- a/ibis/backends/clickhouse/compiler/values.py +++ /dev/null @@ -1,1059 +0,0 @@ -from __future__ import annotations - -import calendar -import functools -import math -import operator -from functools import partial -from typing import Any - -import sqlglot as sg - -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis import util -from ibis.backends.base.sqlglot import NULL, STAR, AggGen, C, F, interval, make_cast -from ibis.backends.clickhouse.datatypes import ClickhouseType - - -def _aggregate(funcname, *args, where): - has_filter = where is not None - func = F[funcname + "If" * has_filter] - args += (where,) * has_filter - return func(*args) - - -agg = AggGen(aggfunc=_aggregate) -if_ = F["if"] -cast = make_cast(ClickhouseType) - - -@functools.singledispatch -def translate_val(op, **_): - """Translate a value expression into sqlglot.""" - raise com.OperationNotDefinedError(f"No translation rule for {type(op)}") - - -@translate_val.register(ops.TableColumn) -def _column(op, *, table, name, **_): - return sg.column(name, table=table.alias_or_name) - - -@translate_val.register(ops.Alias) -def _alias(op, *, arg, name, **_): - return arg.as_(name) - - -_interval_cast_suffixes = { - "s": "Second", - "m": "Minute", - "h": "Hour", - "D": "Day", - "W": "Week", - "M": "Month", - "Q": "Quarter", - "Y": "Year", -} - - -@translate_val.register(ops.Cast) -def _cast(op, *, arg, to, **_): - if to.is_interval(): - suffix = _interval_cast_suffixes[to.unit.short] - return F[f"toInterval{suffix}"](arg) - - result = cast(arg, to) - if (timezone := getattr(to, "timezone", None)) is not None: - return F.toTimeZone(result, timezone) - return result - - -@translate_val.register(ops.TryCast) -def _try_cast(op, *, arg, to, **_): - return F.accurateCastOrNull(arg, ClickhouseType.to_string(to)) - - -@translate_val.register(ops.Between) -def _between(op, *, arg, lower_bound, upper_bound, **_): - return sg.exp.Between(this=arg, low=lower_bound, high=upper_bound) - - -@translate_val.register(ops.Negate) -def _negate(op, *, arg, **_): - return -sg.exp.Paren(this=arg) - - -@translate_val.register(ops.Not) -def _not(op, *, arg, **_): - return sg.not_(sg.exp.Paren(this=arg)) - - -def _parenthesize(op, arg): - if isinstance(op, (ops.Binary, ops.Unary)): - return sg.exp.Paren(this=arg) - else: - # function calls don't need parens - return arg - - -@translate_val.register(ops.ArrayIndex) -def _array_index_op(op, *, arg, index, **_): - return arg[if_(index >= 0, index + 1, index)] - - -@translate_val.register(ops.ArrayRepeat) -def _array_repeat_op(op, *, arg, times, **_): - return ( - sg.select(F.arrayFlatten(F.groupArray(C.arr))) - .from_( - sg.select(arg.as_("arr")) - .from_(sg.table("numbers", db="system")) - .limit(times) - .subquery() - ) - .subquery() - ) - - -@translate_val.register(ops.ArraySlice) -def _array_slice_op(op, *, arg, start, stop, **_): - start = _parenthesize(op.start, start) - start_correct = if_(start < 0, start, start + 1) - - if stop is not None: - stop = _parenthesize(op.stop, stop) - - length = if_( - stop < 0, - stop, - if_( - start < 0, - F.greatest(0, stop - (F.length(arg) + start)), - F.greatest(0, stop - start), - ), - ) - return F.arraySlice(arg, start_correct, length) - else: - return F.arraySlice(arg, start_correct) - - -@translate_val.register(ops.Count) -def _count(op, *, arg, where, **_): - if where is not None: - return sg.exp.Anonymous(this="countIf", expressions=[arg, where]) - return sg.exp.Count(this=arg) - - -@translate_val.register(ops.CountStar) -def _count_star(op, *, where, **_): - if where is not None: - return sg.exp.Anonymous(this="countIf", expressions=[where]) - return sg.exp.Count(this=STAR) - - -def _quantile(func: str): - def _compile(op, *, arg, quantile, where, **_): - if where is None: - return agg.quantile(arg, quantile, where=where) - - return sg.exp.ParameterizedAgg( - this=f"{func}If", - expressions=util.promote_list(quantile), - params=[arg, where], - ) - - return _compile - - -translate_val.register(ops.Quantile)(_quantile("quantile")) -translate_val.register(ops.MultiQuantile)(_quantile("quantiles")) - - -def _agg_variance_like(func): - variants = {"sample": f"{func}Samp", "pop": f"{func}Pop"} - - def formatter(_, *, how, where, **kw): - funcname = variants[how] - return agg[funcname](*kw.values(), where=where) - - return formatter - - -@translate_val.register(ops.Correlation) -def _corr(op, *, left, right, how, where, **_): - if how == "pop": - raise ValueError("ClickHouse only implements `sample` correlation coefficient") - return agg.corr(left, right, where=where) - - -@translate_val.register(ops.Arbitrary) -def _arbitrary(op, *, arg, how, where, **_): - if how == "first": - return agg.any(arg, where=where) - elif how == "last": - return agg.anyLast(arg, where=where) - else: - assert how == "heavy" - return agg.anyHeavy(arg, where=where) - - -@translate_val.register(ops.Substring) -def _substring(op, *, arg, start, length, **_): - # Clickhouse is 1-indexed - suffix = (length,) * (length is not None) - if_pos = F.substring(arg, start + 1, *suffix) - if_neg = F.substring(arg, F.length(arg) + start + 1, *suffix) - return if_(start >= 0, if_pos, if_neg) - - -@translate_val.register(ops.StringFind) -def _string_find(op, *, arg, substr, start, end, **_): - if end is not None: - raise com.UnsupportedOperationError("String find doesn't support end argument") - - if start is not None: - return F.locate(arg, substr, start) - - return F.locate(arg, substr) - - -@translate_val.register(ops.RegexSearch) -def _regex_search(op, *, arg, pattern, **_): - return sg.exp.RegexpLike(this=arg, expression=pattern) - - -@translate_val.register(ops.RegexExtract) -def _regex_extract(op, *, arg, pattern, index, **_): - arg = cast(arg, dt.String(nullable=False)) - - pattern = F.concat("(", pattern, ")") - - if index is None: - index = 0 - - index += 1 - - then = F.extractGroups(arg, pattern)[index] - - return if_(F.notEmpty(then), then, NULL) - - -@translate_val.register(ops.FindInSet) -def _index_of(op, *, needle, values, **_): - return F.indexOf(F.array(*values), needle) - - -@translate_val.register(ops.Round) -def _round(op, *, arg, digits, **_): - if digits is not None: - return F.round(arg, digits) - return F.round(arg) - - -@translate_val.register(ops.Sign) -def _sign(op, *, arg, **_): - """Workaround for missing sign function.""" - return F.intDivOrZero(arg, F.abs(arg)) - - -@translate_val.register(ops.Hash) -def _hash(op, *, arg, **_): - return F.sipHash64(arg) - - -_SUPPORTED_ALGORITHMS = frozenset( - ( - "MD5", - "halfMD5", - "SHA1", - "SHA224", - "SHA256", - "intHash32", - "intHash64", - "cityHash64", - "sipHash64", - "sipHash128", - ) -) - - -@translate_val.register(ops.HashBytes) -def _hash_bytes(op, *, arg, how, **_): - if how in ("md5", "sha1", "sha224", "sha256"): - how = how.upper() - if how not in _SUPPORTED_ALGORITHMS: - raise com.UnsupportedOperationError(f"Unsupported hash algorithm {how}") - - return F[how](arg) - - -@translate_val.register(ops.Log) -def _log(op, *, arg, base, **_): - if base is None: - return F.ln(arg) - elif str(base) in ("2", "10"): - return F[f"log{base}"](arg) - else: - return F.ln(arg) / F.ln(base) - - -@translate_val.register(ops.IntervalFromInteger) -def _interval_from_integer(op, *, arg, unit, **_): - dtype = op.dtype - if dtype.unit.short in ("ms", "us", "ns"): - raise com.UnsupportedOperationError( - "Clickhouse doesn't support subsecond interval resolutions" - ) - - return interval(arg, unit=dtype.resolution.upper()) - - -@translate_val.register(ops.Literal) -def _literal(op, *, value, dtype, **kw): - if value is None and dtype.nullable: - if dtype.is_null(): - return NULL - return cast(NULL, dtype) - elif dtype.is_boolean(): - return sg.exp.convert(bool(value)) - elif dtype.is_inet(): - v = str(value) - return F.toIPv6(v) if ":" in v else F.toIPv4(v) - elif dtype.is_string(): - return sg.exp.convert(str(value).replace(r"\0", r"\\0")) - elif dtype.is_macaddr(): - return sg.exp.convert(str(value)) - elif dtype.is_decimal(): - precision = dtype.precision - if precision is None or not 1 <= precision <= 76: - raise NotImplementedError( - f"Unsupported precision. Supported values: [1 : 76]. Current value: {precision!r}" - ) - - if 1 <= precision <= 9: - type_name = F.toDecimal32 - elif 10 <= precision <= 18: - type_name = F.toDecimal64 - elif 19 <= precision <= 38: - type_name = F.toDecimal128 - else: - type_name = F.toDecimal256 - return type_name(value, dtype.scale) - elif dtype.is_numeric(): - if math.isnan(value): - return sg.exp.Literal(this="NaN", is_string=False) - elif math.isinf(value): - inf = sg.exp.Literal(this="inf", is_string=False) - return -inf if value < 0 else inf - return sg.exp.convert(value) - elif dtype.is_interval(): - if dtype.unit.short in ("ms", "us", "ns"): - raise com.UnsupportedOperationError( - "Clickhouse doesn't support subsecond interval resolutions" - ) - - return interval(value, unit=dtype.resolution.upper()) - elif dtype.is_timestamp(): - funcname = "parseDateTime" - - if micros := value.microsecond: - funcname += "64" - - funcname += "BestEffort" - - args = [value.isoformat()] - - if micros % 1000: - args.append(6) - elif micros // 1000: - args.append(3) - - if (timezone := dtype.timezone) is not None: - args.append(timezone) - - return F[funcname](*args) - elif dtype.is_date(): - return F.toDate(value.isoformat()) - elif dtype.is_array(): - value_type = dtype.value_type - values = [ - _literal(ops.Literal(v, dtype=value_type), value=v, dtype=value_type, **kw) - for v in value - ] - return F.array(*values) - elif dtype.is_map(): - value_type = dtype.value_type - keys = [] - values = [] - - for k, v in value.items(): - keys.append(sg.exp.convert(k)) - values.append( - _literal( - ops.Literal(v, dtype=value_type), value=v, dtype=value_type, **kw - ) - ) - - return F.map(F.array(*keys), F.array(*values)) - elif dtype.is_struct(): - fields = [ - _literal(ops.Literal(v, dtype=field_type), value=v, dtype=field_type, **kw) - for field_type, v in zip(dtype.types, value.values()) - ] - return F.tuple(*fields) - else: - raise NotImplementedError(f"Unsupported type: {dtype!r}") - - -@translate_val.register(ops.SimpleCase) -@translate_val.register(ops.SearchedCase) -def _case(op, *, base=None, cases, results, default, **_): - return sg.exp.Case(this=base, ifs=list(map(if_, cases, results)), default=default) - - -@translate_val.register(ops.TableArrayView) -def _table_array_view(op, *, table, **_): - return table.args["this"].subquery() - - -@translate_val.register(ops.TimestampFromUNIX) -def _timestamp_from_unix(op, *, arg, unit, **_): - if (unit := unit.short) in {"ms", "us", "ns"}: - raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") - return F.toDateTime(arg) - - -@translate_val.register(ops.DateTruncate) -@translate_val.register(ops.TimestampTruncate) -@translate_val.register(ops.TimeTruncate) -def _truncate(op, *, arg, unit, **_): - converters = { - "Y": F.toStartOfYear, - "M": F.toStartOfMonth, - "W": F.toMonday, - "D": F.toDate, - "h": F.toStartOfHour, - "m": F.toStartOfMinute, - "s": F.toDateTime, - } - - unit = unit.short - if (converter := converters.get(unit)) is None: - raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") - - return converter(arg) - - -@translate_val.register(ops.TimestampBucket) -def _timestamp_bucket(op, *, arg, interval, offset, **_): - if offset is not None: - raise com.UnsupportedOperationError( - "Timestamp bucket with offset is not supported" - ) - - return F.toStartOfInterval(arg, interval) - - -@translate_val.register(ops.DateFromYMD) -def _date_from_ymd(op, *, year, month, day, **_): - return F.toDate( - F.concat( - F.toString(year), - "-", - F.leftPad(F.toString(month), 2, "0"), - "-", - F.leftPad(F.toString(day), 2, "0"), - ) - ) - - -@translate_val.register(ops.TimestampFromYMDHMS) -def _timestamp_from_ymdhms(op, *, year, month, day, hours, minutes, seconds, **_): - to_datetime = F.toDateTime( - F.concat( - F.toString(year), - "-", - F.leftPad(F.toString(month), 2, "0"), - "-", - F.leftPad(F.toString(day), 2, "0"), - " ", - F.leftPad(F.toString(hours), 2, "0"), - ":", - F.leftPad(F.toString(minutes), 2, "0"), - ":", - F.leftPad(F.toString(seconds), 2, "0"), - ) - ) - if timezone := op.dtype.timezone: - return F.toTimeZone(to_datetime, timezone) - return to_datetime - - -@translate_val.register(ops.ExistsSubquery) -def _exists_subquery(op, *, foreign_table, predicates, **_): - # https://github.com/ClickHouse/ClickHouse/issues/6697 - # - # this would work if clickhouse supported correlated subqueries - subq = sg.select(1).from_(foreign_table).where(sg.condition(predicates)).subquery() - return F.exists(subq) - - -@translate_val.register(ops.StringSplit) -def _string_split(op, *, arg, delimiter, **_): - return F.splitByString(delimiter, cast(arg, dt.String(nullable=False))) - - -@translate_val.register(ops.StringJoin) -def _string_join(op, *, sep, arg, **_): - return F.arrayStringConcat(F.array(*arg), sep) - - -@translate_val.register(ops.StringConcat) -def _string_concat(op, *, arg, **_): - return F.concat(*arg) - - -@translate_val.register(ops.StringSQLLike) -def _string_like(op, *, arg, pattern, **_): - return arg.like(pattern) - - -@translate_val.register(ops.StringSQLILike) -def _string_ilike(op, *, arg, pattern, **_): - return arg.ilike(pattern) - - -@translate_val.register(ops.Capitalize) -def _string_capitalize(op, *, arg, **_): - return F.concat(F.upper(F.substr(arg, 1, 1)), F.lower(F.substr(arg, 2))) - - -@translate_val.register(ops.GroupConcat) -def _group_concat(op, *, arg, sep, where, **_): - call = agg.groupArray(arg, where=where) - return if_(F.empty(call), NULL, F.arrayStringConcat(call, sep)) - - -@translate_val.register(ops.StrRight) -def _string_right(op, *, arg, nchars, **_): - nchars = _parenthesize(op.nchars, nchars) - return F.substring(arg, -nchars) - - -@translate_val.register(ops.Cot) -def _cotangent(op, *, arg, **_): - return 1.0 / F.tan(arg) - - -def _bit_agg(func: str): - def _translate(op, *, arg, where, **_): - if not (dtype := op.arg.dtype).is_unsigned_integer(): - nbits = dtype.nbytes * 8 - arg = F[f"reinterpretAsUInt{nbits}"](arg) - return agg[func](arg, where=where) - - return _translate - - -@translate_val.register(ops.Array) -def _array_column(op, *, exprs, **_): - return F.array(*exprs) - - -@translate_val.register(ops.StructColumn) -def _struct_column(op, *, values, **_): - # ClickHouse struct types cannot be nullable - # (non-nested fields can be nullable) - return cast(F.tuple(*values), op.dtype.copy(nullable=False)) - - -@translate_val.register(ops.Clip) -def _clip(op, *, arg, lower, upper, **_): - if upper is not None: - arg = if_(F.isNull(arg), NULL, F.least(upper, arg)) - - if lower is not None: - arg = if_(F.isNull(arg), NULL, F.greatest(lower, arg)) - - return arg - - -@translate_val.register(ops.StructField) -def _struct_field(op, *, arg, field: str, **_): - arg_dtype = op.arg.dtype - idx = arg_dtype.names.index(field) - return cast(sg.exp.Dot(this=arg, expression=sg.exp.convert(idx + 1)), op.dtype) - - -@translate_val.register(ops.Repeat) -def _repeat(op, *, arg, times, **_): - return F.repeat(arg, F.accurateCast(times, "UInt64")) - - -@translate_val.register(ops.FloorDivide) -def _floor_divide(op, *, left, right, **_): - return F.floor(left / right) - - -@translate_val.register(ops.StringContains) -def _string_contains(op, haystack, needle, **_): - return F.locate(haystack, needle) > 0 - - -@translate_val.register(ops.InValues) -def _in_values(op, *, value, options, **_): - return _parenthesize(op.value, value).isin(*options) - - -@translate_val.register(ops.InColumn) -def _in_column(op, *, value, options, **_): - return value.isin(options.this if isinstance(options, sg.exp.Subquery) else options) - - -_DAYS = calendar.day_name -_NUM_WEEKDAYS = len(_DAYS) - - -@translate_val.register(ops.DayOfWeekIndex) -def _day_of_week_index(op, *, arg, **_): - weekdays = _NUM_WEEKDAYS - return (((F.toDayOfWeek(arg) - 1) % weekdays) + weekdays) % weekdays - - -@translate_val.register(ops.DayOfWeekName) -def day_of_week_name(op, *, arg, **_): - # ClickHouse 20 doesn't support dateName - # - # ClickHouse 21 supports dateName is broken for regexen: - # https://github.com/ClickHouse/ClickHouse/issues/32777 - # - # ClickHouses 20 and 21 also have a broken case statement hence the ifnull: - # https://github.com/ClickHouse/ClickHouse/issues/32849 - # - # We test against 20 in CI, so we implement day_of_week_name as follows - num_weekdays = _NUM_WEEKDAYS - base = (((F.toDayOfWeek(arg) - 1) % num_weekdays) + num_weekdays) % num_weekdays - return sg.exp.Case( - this=base, - ifs=[if_(i, day) for i, day in enumerate(_DAYS)], - default=sg.exp.convert(""), - ) - - -@translate_val.register(ops.Greatest) -@translate_val.register(ops.Least) -@translate_val.register(ops.Coalesce) -def _vararg_func(op, *, arg, **_): - return F[op.__class__.__name__.lower()](*arg) - - -@translate_val.register(ops.Map) -def _map(op, *, keys, values, **_): - # cast here to allow lookups of nullable columns - return cast(F.tuple(keys, values), op.dtype) - - -@translate_val.register(ops.MapGet) -def _map_get(op, *, arg, key, default, **_): - return if_(F.mapContains(arg, key), arg[key], default) - - -@translate_val.register(ops.ArrayConcat) -def _array_concat(op, *, arg, **_): - return F.arrayConcat(*arg) - - -def _binary_infix(func): - def formatter(op, *, left, right, **_): - left = _parenthesize(op.left, left) - right = _parenthesize(op.right, right) - return func(left, right) - - return formatter - - -_binary_infix_ops = { - # Binary operations - ops.Add: operator.add, - ops.Subtract: operator.sub, - ops.Multiply: operator.mul, - ops.Divide: operator.truediv, - ops.Modulus: operator.mod, - # Comparisons - ops.Equals: sg.exp.Condition.eq, - ops.NotEquals: sg.exp.Condition.neq, - ops.GreaterEqual: operator.ge, - ops.Greater: operator.gt, - ops.LessEqual: operator.le, - ops.Less: operator.lt, - # Boolean comparisons - ops.And: operator.and_, - ops.Or: operator.or_, - ops.Xor: F.xor, - ops.DateAdd: operator.add, - ops.DateSub: operator.sub, - ops.DateDiff: operator.sub, - ops.TimestampAdd: operator.add, - ops.TimestampSub: operator.sub, - ops.TimestampDiff: operator.sub, -} - - -for _op, _func in _binary_infix_ops.items(): - translate_val.register(_op)(_binary_infix(_func)) - -del _op, _func - -translate_val.register(ops.BitAnd)(_bit_agg("groupBitAnd")) -translate_val.register(ops.BitOr)(_bit_agg("groupBitOr")) -translate_val.register(ops.BitXor)(_bit_agg("groupBitXor")) - -translate_val.register(ops.StandardDev)(_agg_variance_like("stddev")) -translate_val.register(ops.Variance)(_agg_variance_like("var")) -translate_val.register(ops.Covariance)(_agg_variance_like("covar")) - - -_simple_ops = { - ops.Power: "pow", - # Unary operations - ops.TypeOf: "toTypeName", - ops.IsNan: "isNaN", - ops.IsInf: "isInfinite", - ops.Abs: "abs", - ops.Ceil: "ceil", - ops.Floor: "floor", - ops.Exp: "exp", - ops.Sqrt: "sqrt", - ops.Ln: "log", - ops.Log2: "log2", - ops.Log10: "log10", - ops.Acos: "acos", - ops.Asin: "asin", - ops.Atan: "atan", - ops.Atan2: "atan2", - ops.Cos: "cos", - ops.Sin: "sin", - ops.Tan: "tan", - ops.Pi: "pi", - ops.E: "e", - ops.RandomScalar: "randCanonical", - # Unary aggregates - ops.ApproxMedian: "median", - ops.Median: "quantileExactExclusive", - # TODO: there is also a `uniq` function which is the - # recommended way to approximate cardinality - ops.ApproxCountDistinct: "uniqHLL12", - ops.Mean: "avg", - ops.Sum: "sum", - ops.Max: "max", - ops.Min: "min", - ops.Any: "max", - ops.All: "min", - ops.ArgMin: "argMin", - ops.ArgMax: "argMax", - ops.ArrayCollect: "groupArray", - ops.CountDistinct: "uniq", - ops.First: "any", - ops.Last: "anyLast", - # string operations - ops.StringLength: "length", - ops.Lowercase: "lower", - ops.Uppercase: "upper", - ops.Reverse: "reverse", - ops.StringReplace: "replaceAll", - ops.StartsWith: "startsWith", - ops.EndsWith: "endsWith", - ops.LPad: "leftPad", - ops.RPad: "rightPad", - ops.LStrip: "trimLeft", - ops.RStrip: "trimRight", - ops.Strip: "trimBoth", - ops.RegexReplace: "replaceRegexpAll", - ops.StringAscii: "ascii", - # Temporal operations - ops.Date: "toDate", - ops.TimestampNow: "now", - ops.ExtractYear: "toYear", - ops.ExtractMonth: "toMonth", - ops.ExtractDay: "toDayOfMonth", - ops.ExtractDayOfYear: "toDayOfYear", - ops.ExtractQuarter: "toQuarter", - ops.ExtractWeekOfYear: "toISOWeek", - ops.ExtractHour: "toHour", - ops.ExtractMinute: "toMinute", - ops.ExtractSecond: "toSecond", - # Other operations - ops.E: "e", - # for more than 2 args this should be arrayGreatest|Least(array([])) - # because clickhouse"s greatest and least doesn"t support varargs - ops.IfElse: "if", - ops.ArrayLength: "length", - ops.Unnest: "arrayJoin", - ops.Degrees: "degrees", - ops.Radians: "radians", - ops.Strftime: "formatDateTime", - ops.IsNull: "isNull", - ops.NotNull: "isNotNull", - ops.NullIf: "nullIf", - ops.MapContains: "mapContains", - ops.MapLength: "length", - ops.MapKeys: "mapKeys", - ops.MapValues: "mapValues", - ops.MapMerge: "mapUpdate", - ops.BitwiseAnd: "bitAnd", - ops.BitwiseOr: "bitOr", - ops.BitwiseXor: "bitXor", - ops.BitwiseLeftShift: "bitShiftLeft", - ops.BitwiseRightShift: "bitShiftRight", - ops.BitwiseNot: "bitNot", - ops.ArraySort: "arraySort", - ops.ArrayContains: "has", - ops.FirstValue: "first_value", - ops.LastValue: "last_value", - ops.NTile: "ntile", - ops.ArrayIntersect: "arrayIntersect", - ops.ExtractEpochSeconds: "toRelativeSecondNum", - ops.NthValue: "nth_value", - ops.MinRank: "rank", - ops.DenseRank: "dense_rank", - ops.RowNumber: "row_number", - ops.ExtractProtocol: "protocol", - ops.ExtractAuthority: "netloc", - ops.ExtractHost: "domain", - ops.ExtractPath: "path", - ops.ExtractFragment: "fragment", - ops.ArrayPosition: "indexOf", - ops.ArrayFlatten: "arrayFlatten", - ops.IntegerRange: "range", -} - - -for _op, _name in _simple_ops.items(): - assert isinstance(type(_op), type), type(_op) - if issubclass(_op, ops.Reduction): - - @translate_val.register(_op) - def _fmt(_, _name: str = _name, *, where, **kw): - return agg[_name](*kw.values(), where=where) - - else: - - @translate_val.register(_op) - def _fmt(_, _name: str = _name, **kw): - return F[_name](*kw.values()) - - -del _fmt, _name, _op - - -@translate_val.register(ops.ArrayDistinct) -def _array_distinct(op, *, arg, **_): - null_element = if_(F.countEqual(arg, NULL) > 0, F.array(NULL), F.array()) - return F.arrayConcat(F.arrayDistinct(arg), null_element) - - -@translate_val.register(ops.ExtractMicrosecond) -def _extract_microsecond(op, *, arg, **_): - dtype = op.dtype - return cast( - F.toUnixTimestamp64Micro(cast(arg, op.arg.dtype.copy(scale=6))) % 1_000_000, - dtype, - ) - - -@translate_val.register(ops.ExtractMillisecond) -def _extract_millisecond(op, *, arg, **_): - dtype = op.dtype - return cast( - F.toUnixTimestamp64Milli(cast(arg, op.arg.dtype.copy(scale=3))) % 1_000, dtype - ) - - -@translate_val.register -def _sort_key(op: ops.SortKey, *, expr, ascending: bool, **_): - return sg.exp.Ordered(this=expr, desc=not ascending) - - -@translate_val.register(ops.WindowBoundary) -def _window_boundary(op, *, value, preceding, **_): - # TODO: bit of a hack to return a dict, but there's no sqlglot expression - # that corresponds to _only_ this information - return {"value": value, "side": "preceding" if preceding else "following"} - - -@translate_val.register(ops.RowsWindowFrame) -@translate_val.register(ops.RangeWindowFrame) -def _window_frame(op, *, group_by, order_by, start, end, max_lookback=None, **_): - if max_lookback is not None: - raise NotImplementedError( - "`max_lookback` is not supported in the ClickHouse backend" - ) - - if start is None: - start = {} - - start_value = start.get("value", "UNBOUNDED") - start_side = start.get("side", "PRECEDING") - - if end is None: - end = {} - - end_value = end.get("value", "UNBOUNDED") - end_side = end.get("side", "FOLLOWING") - - spec = sg.exp.WindowSpec( - kind=op.how.upper(), - start=start_value, - start_side=start_side, - end=end_value, - end_side=end_side, - over="OVER", - ) - - order = sg.exp.Order(expressions=order_by) if order_by else None - - # TODO: bit of a hack to return a partial, but similar to `WindowBoundary` - # there's no sqlglot expression that corresponds to _only_ this information - return partial(sg.exp.Window, partition_by=group_by, order=order, spec=spec) - - -@translate_val.register(ops.WindowFunction) -def _window(op: ops.WindowFunction, *, func, frame, **_: Any): - # frame is a partial call to sg.exp.Window - return frame(this=func) - - -def shift_like(op_class, func): - @translate_val.register(op_class) - def formatter(op, *, arg, offset, default, **_): - args = [arg] - - if default is not None: - if offset is None: - offset = 1 - - args.append(offset) - args.append(default) - elif offset is not None: - args.append(offset) - - return func(*args) - - return formatter - - -shift_like(ops.Lag, F.lagInFrame) -shift_like(ops.Lead, F.leadInFrame) - - -@translate_val.register(ops.ExtractFile) -def _extract_file(op, *, arg, **_): - return F.cutFragment(F.pathFull(arg)) - - -@translate_val.register(ops.ExtractQuery) -def _extract_query(op, *, arg, key, **_): - if key is not None: - return F.extractURLParameter(arg, key) - else: - return F.queryString(arg) - - -@translate_val.register(ops.ArrayStringJoin) -def _array_string_join(op, *, arg, sep, **_): - return F.arrayStringConcat(arg, sep) - - -@translate_val.register(ops.Argument) -def _argument(op, **_): - return sg.to_identifier(op.param) - - -@translate_val.register(ops.ArrayMap) -def _array_map(op, *, arg, param, body, **_): - func = sg.exp.Lambda(this=body, expressions=[param]) - return F.arrayMap(func, arg) - - -@translate_val.register(ops.ArrayFilter) -def _array_filter(op, *, arg, param, body, **_): - func = sg.exp.Lambda(this=body, expressions=[param]) - return F.arrayFilter(func, arg) - - -@translate_val.register(ops.ArrayRemove) -def _array_remove(op, *, arg, other, **_): - x = sg.to_identifier("x") - body = x.neq(other) - return F.arrayFilter(sg.exp.Lambda(this=body, expressions=[x]), arg) - - -@translate_val.register(ops.ArrayUnion) -def _array_union(op, *, left, right, **_): - arg = F.arrayConcat(left, right) - null_element = if_(F.countEqual(arg, NULL) > 0, F.array(NULL), F.array()) - return F.arrayConcat(F.arrayDistinct(arg), null_element) - - -@translate_val.register(ops.ArrayZip) -def _array_zip(op: ops.ArrayZip, *, arg, **_: Any) -> str: - return F.arrayZip(*arg) - - -@translate_val.register(ops.CountDistinctStar) -def _count_distinct_star(op: ops.CountDistinctStar, *, where, **_: Any) -> str: - columns = F.tuple(*map(sg.column, op.arg.schema.names)) - - if where is not None: - return F.countDistinctIf(columns, where) - else: - return F.countDistinct(columns) - - -@translate_val.register(ops.ScalarUDF) -def _scalar_udf(op, **kw) -> str: - return F[op.__full_name__](*kw.values()) - - -@translate_val.register(ops.AggUDF) -def _agg_udf(op, *, where, **kw) -> str: - return agg[op.__full_name__](*kw.values(), where=where) - - -@translate_val.register(ops.DateDelta) -@translate_val.register(ops.TimestampDelta) -def _delta(op, *, part, left, right, **_): - return sg.exp.DateDiff(this=left, expression=right, unit=part) - - -@translate_val.register(ops.TimestampRange) -def _timestamp_range(op, *, start, stop, step, **_): - unit = op.step.dtype.unit.name.lower() - - if not isinstance(op.step, ops.Literal): - raise com.UnsupportedOperationError( - "ClickHouse doesn't support non-literal step values" - ) - - step_value = op.step.value - - offset = sg.to_identifier("offset") - - # e.g., offset -> dateAdd(DAY, offset, start) - func = sg.exp.Lambda( - this=F.dateAdd(sg.to_identifier(unit), offset, start), expressions=[offset] - ) - - if step_value == 0: - return F.array() - - result = F.arrayMap( - func, F.range(0, F.timestampDiff(unit, start, stop), step_value) - ) - return result - - -@translate_val.register(ops.RegexSplit) -def _regex_split(op, *, arg, pattern, **_): - return F.splitByRegexp(pattern, cast(arg, dt.String(nullable=False))) diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql index f97b0b7e0747..e86039758336 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col + t0.double_col AS double_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql index b2e0d4507c4f..4bec79041636 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.bigint_col + t0.bigint_col AS bigint_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql index d967873daf1f..130cc6720677 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.bool_col + t0.bool_col AS bool_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql index c8ad0f838a31..30f45904aefe 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.date_string_col + t0.date_string_col AS date_string_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql index f97b0b7e0747..e86039758336 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col + t0.double_col AS double_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql index 33277148af85..5303bda94aa1 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.float_col + t0.float_col AS float_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql index b4012dbb377d..8a5c9fda9ccc 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql @@ -1,3 +1,3 @@ SELECT - t0.id + t0.id AS id FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql index 6b3541821ed4..f6d916f49144 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col + t0.int_col AS int_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql index d0eb5143c2b4..c7dc05252aa5 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql @@ -1,3 +1,3 @@ SELECT - t0.month + t0.month AS month FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql index dfcd8e0c0149..7a69adfa78c9 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.smallint_col + t0.smallint_col AS smallint_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql index cfe88fb96a8c..9be6e5524121 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.string_col + t0.string_col AS string_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql index 860302ea8039..f251eb35e692 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.timestamp_col + t0.timestamp_col AS timestamp_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql index c9f057e3aa11..4f3dd3c555a9 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.tinyint_col + t0.tinyint_col AS tinyint_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql index 5295b8fc6a8d..7d21b2c53090 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql @@ -1,3 +1,3 @@ SELECT - t0.year + t0.year AS year FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql index 0fc7c1f4cbe2..8ec21cad88cf 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql @@ -1,7 +1,3 @@ SELECT - ( - t0.string_col LIKE 'foo%' - ) OR ( - t0.string_col LIKE '%bar' - ) AS "Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))" + t0.string_col LIKE 'foo%' OR t0.string_col LIKE '%bar' AS "Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))" FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql index c2b64e683d01..fafc564ab456 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql @@ -1,3 +1,3 @@ SELECT - CEIL(t0.double_col) AS "Ceil(double_col)" + CAST(CEIL(t0.double_col) AS Nullable(Int64)) AS "Ceil(double_col)" FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql index c2f4023b696c..3c61319c349c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql @@ -1,5 +1,3 @@ SELECT - ( - LN(t0.int_col) - ) + t0.double_col AS "Add(Log(int_col), double_col)" + LN(t0.int_col) + t0.double_col AS "Add(Log(int_col), double_col)" FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql index ab420235e83d..08d6cd257a99 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql @@ -1,7 +1,5 @@ SELECT - t0.tinyint_col + ( - -( - t0.int_col + t0.double_col - ) + t0.tinyint_col + -( + t0.int_col + t0.double_col ) AS "Add(tinyint_col, Negate(Add(int_col, double_col)))" FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql index c405dd7be487..05665f06de1d 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql @@ -1,5 +1,7 @@ SELECT - t0.id IN (SELECT - arrayJoin(t1.ids) AS ids - FROM way_view AS t1) AS "InColumn(id, ids)" + t0.id IN (( + SELECT + arrayJoin(t1.ids) AS ids + FROM way_view AS t1 + )) AS "InSubquery(id)" FROM node_view AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql index 034aab28ceef..0cc5ae5e1250 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql @@ -2,7 +2,7 @@ SELECT CAST(t1.string_col AS Nullable(Float64)) AS "Cast(string_col, float64)" FROM ( SELECT - t0.string_col, + t0.string_col AS string_col, COUNT(*) AS count FROM functional_alltypes AS t0 GROUP BY diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql new file mode 100644 index 000000000000..5156a61980fa --- /dev/null +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql @@ -0,0 +1,11 @@ +SELECT + t0.a AS a, + t0.b AS b, + t2.c AS c, + t2.d AS d, + t2.c / ( + t0.a - t0.b + ) AS e +FROM s AS t0 +INNER JOIN t AS t2 + ON t0.a = t2.c \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql index 6edcaf0c84a9..e6344e2c48f9 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql @@ -1,5 +1,5 @@ SELECT - t0.a, + t0.a AS a, COALESCE(countIf(NOT ( t0.b )), 0) AS A, diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql index 6320f23aa1a2..f75d124691e9 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql @@ -1,5 +1,17 @@ SELECT - * + t0.id AS id, + t0.bool_col AS bool_col, + t0.tinyint_col AS tinyint_col, + t0.smallint_col AS smallint_col, + t0.int_col AS int_col, + t0.bigint_col AS bigint_col, + t0.float_col AS float_col, + t0.double_col AS double_col, + t0.date_string_col AS date_string_col, + t0.string_col AS string_col, + t0.timestamp_col AS timestamp_col, + t0.year AS year, + t0.month AS month FROM functional_alltypes AS t0 WHERE t0.string_col IN ('foo', 'bar') \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql index 9eb5f653d7cc..280d4be81f9f 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql @@ -1,5 +1,17 @@ SELECT - * + t0.id AS id, + t0.bool_col AS bool_col, + t0.tinyint_col AS tinyint_col, + t0.smallint_col AS smallint_col, + t0.int_col AS int_col, + t0.bigint_col AS bigint_col, + t0.float_col AS float_col, + t0.double_col AS double_col, + t0.date_string_col AS date_string_col, + t0.string_col AS string_col, + t0.timestamp_col AS timestamp_col, + t0.year AS year, + t0.month AS month FROM functional_alltypes AS t0 WHERE NOT ( diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql index 4a756ab86ec9..6ef5f03a897c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql @@ -1,5 +1,17 @@ SELECT - t0.* + t0.id AS id, + t0.bool_col AS bool_col, + t0.tinyint_col AS tinyint_col, + t0.smallint_col AS smallint_col, + t0.int_col AS int_col, + t0.bigint_col AS bigint_col, + t0.float_col AS float_col, + t0.double_col AS double_col, + t0.date_string_col AS date_string_col, + t0.string_col AS string_col, + t0.timestamp_col AS timestamp_col, + t0.year AS year, + t0.month AS month FROM functional_alltypes AS t0 INNER JOIN functional_alltypes AS t1 ON t0.id = t1.id \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql index 79de229147a0..d9820f3b119b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql @@ -1,12 +1,17 @@ SELECT - t0.key, + t1.key AS key, SUM(( ( - t0.value + 1 + t1.value + 1 ) + 2 ) + 3) AS abc -FROM t0 AS t0 -WHERE - t0.value = 42 +FROM ( + SELECT + t0.key AS key, + t0.value AS value + FROM t0 AS t0 + WHERE + t0.value = 42 +) AS t1 GROUP BY - t0.key \ No newline at end of file + t1.key \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql index 0596895c1ccf..cf152c137c02 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql @@ -1,12 +1,17 @@ SELECT - t0.key, + t1.key AS key, SUM(( ( - t0.value + 1 + t1.value + 1 ) + 2 ) + 3) AS foo -FROM t0 AS t0 -WHERE - t0.value = 42 +FROM ( + SELECT + t0.key AS key, + t0.value AS value + FROM t0 AS t0 + WHERE + t0.value = 42 +) AS t1 GROUP BY - t0.key \ No newline at end of file + t1.key \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql index f879a888124a..b1a66e364144 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* + t0.playerID AS playerID, + t0.yearID AS yearID, + t0.stint AS stint, + t0.teamID AS teamID, + t0.lgID AS lgID, + t0.G AS G, + t0.AB AS AB, + t0.R AS R, + t0.H AS H, + t0.X2B AS X2B, + t0.X3B AS X3B, + t0.HR AS HR, + t0.RBI AS RBI, + t0.SB AS SB, + t0.CS AS CS, + t0.BB AS BB, + t0.SO AS SO, + t0.IBB AS IBB, + t0.HBP AS HBP, + t0.SH AS SH, + t0.SF AS SF, + t0.GIDP AS GIDP FROM batting AS t0 -ANY JOIN awards_players AS t1 - ON t0.playerID = t1.awardID \ No newline at end of file +ANY JOIN awards_players AS t2 + ON t0.playerID = t2.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql index 88c96b29443c..9e806e782a58 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* + t0.playerID AS playerID, + t0.yearID AS yearID, + t0.stint AS stint, + t0.teamID AS teamID, + t0.lgID AS lgID, + t0.G AS G, + t0.AB AS AB, + t0.R AS R, + t0.H AS H, + t0.X2B AS X2B, + t0.X3B AS X3B, + t0.HR AS HR, + t0.RBI AS RBI, + t0.SB AS SB, + t0.CS AS CS, + t0.BB AS BB, + t0.SO AS SO, + t0.IBB AS IBB, + t0.HBP AS HBP, + t0.SH AS SH, + t0.SF AS SF, + t0.GIDP AS GIDP FROM batting AS t0 -LEFT ANY JOIN awards_players AS t1 - ON t0.playerID = t1.awardID \ No newline at end of file +LEFT ANY JOIN awards_players AS t2 + ON t0.playerID = t2.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql index 700f214f0382..3e49718a37e7 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* + t0.playerID AS playerID, + t0.yearID AS yearID, + t0.stint AS stint, + t0.teamID AS teamID, + t0.lgID AS lgID, + t0.G AS G, + t0.AB AS AB, + t0.R AS R, + t0.H AS H, + t0.X2B AS X2B, + t0.X3B AS X3B, + t0.HR AS HR, + t0.RBI AS RBI, + t0.SB AS SB, + t0.CS AS CS, + t0.BB AS BB, + t0.SO AS SO, + t0.IBB AS IBB, + t0.HBP AS HBP, + t0.SH AS SH, + t0.SF AS SF, + t0.GIDP AS GIDP FROM batting AS t0 -INNER JOIN awards_players AS t1 - ON t0.playerID = t1.awardID \ No newline at end of file +INNER JOIN awards_players AS t2 + ON t0.playerID = t2.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql index 9e158d9dd8a1..3d2ffe6a0df8 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* + t0.playerID AS playerID, + t0.yearID AS yearID, + t0.stint AS stint, + t0.teamID AS teamID, + t0.lgID AS lgID, + t0.G AS G, + t0.AB AS AB, + t0.R AS R, + t0.H AS H, + t0.X2B AS X2B, + t0.X3B AS X3B, + t0.HR AS HR, + t0.RBI AS RBI, + t0.SB AS SB, + t0.CS AS CS, + t0.BB AS BB, + t0.SO AS SO, + t0.IBB AS IBB, + t0.HBP AS HBP, + t0.SH AS SH, + t0.SF AS SF, + t0.GIDP AS GIDP FROM batting AS t0 -LEFT OUTER JOIN awards_players AS t1 - ON t0.playerID = t1.awardID \ No newline at end of file +LEFT OUTER JOIN awards_players AS t2 + ON t0.playerID = t2.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql index 363aaebf890c..84dddb02c539 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* + t0.playerID AS playerID, + t0.yearID AS yearID, + t0.stint AS stint, + t0.teamID AS teamID, + t0.lgID AS lgID, + t0.G AS G, + t0.AB AS AB, + t0.R AS R, + t0.H AS H, + t0.X2B AS X2B, + t0.X3B AS X3B, + t0.HR AS HR, + t0.RBI AS RBI, + t0.SB AS SB, + t0.CS AS CS, + t0.BB AS BB, + t0.SO AS SO, + t0.IBB AS IBB, + t0.HBP AS HBP, + t0.SH AS SH, + t0.SF AS SF, + t0.GIDP AS GIDP FROM batting AS t0 -ANY JOIN awards_players AS t1 - ON t0.playerID = t1.playerID \ No newline at end of file +ANY JOIN awards_players AS t2 + ON t0.playerID = t2.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql index 3ba9f0d4e06f..6826f42c2752 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* + t0.playerID AS playerID, + t0.yearID AS yearID, + t0.stint AS stint, + t0.teamID AS teamID, + t0.lgID AS lgID, + t0.G AS G, + t0.AB AS AB, + t0.R AS R, + t0.H AS H, + t0.X2B AS X2B, + t0.X3B AS X3B, + t0.HR AS HR, + t0.RBI AS RBI, + t0.SB AS SB, + t0.CS AS CS, + t0.BB AS BB, + t0.SO AS SO, + t0.IBB AS IBB, + t0.HBP AS HBP, + t0.SH AS SH, + t0.SF AS SF, + t0.GIDP AS GIDP FROM batting AS t0 -LEFT ANY JOIN awards_players AS t1 - ON t0.playerID = t1.playerID \ No newline at end of file +LEFT ANY JOIN awards_players AS t2 + ON t0.playerID = t2.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql index 5d0d8dc31e6e..c1d013f9fe49 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* + t0.playerID AS playerID, + t0.yearID AS yearID, + t0.stint AS stint, + t0.teamID AS teamID, + t0.lgID AS lgID, + t0.G AS G, + t0.AB AS AB, + t0.R AS R, + t0.H AS H, + t0.X2B AS X2B, + t0.X3B AS X3B, + t0.HR AS HR, + t0.RBI AS RBI, + t0.SB AS SB, + t0.CS AS CS, + t0.BB AS BB, + t0.SO AS SO, + t0.IBB AS IBB, + t0.HBP AS HBP, + t0.SH AS SH, + t0.SF AS SF, + t0.GIDP AS GIDP FROM batting AS t0 -INNER JOIN awards_players AS t1 - ON t0.playerID = t1.playerID \ No newline at end of file +INNER JOIN awards_players AS t2 + ON t0.playerID = t2.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql index cc098eca7bfb..cd444e5fa871 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* + t0.playerID AS playerID, + t0.yearID AS yearID, + t0.stint AS stint, + t0.teamID AS teamID, + t0.lgID AS lgID, + t0.G AS G, + t0.AB AS AB, + t0.R AS R, + t0.H AS H, + t0.X2B AS X2B, + t0.X3B AS X3B, + t0.HR AS HR, + t0.RBI AS RBI, + t0.SB AS SB, + t0.CS AS CS, + t0.BB AS BB, + t0.SO AS SO, + t0.IBB AS IBB, + t0.HBP AS HBP, + t0.SH AS SH, + t0.SF AS SF, + t0.GIDP AS GIDP FROM batting AS t0 -LEFT OUTER JOIN awards_players AS t1 - ON t0.playerID = t1.playerID \ No newline at end of file +LEFT OUTER JOIN awards_players AS t2 + ON t0.playerID = t2.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql index c960b3b70a1a..57e97d5ec095 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql @@ -1,5 +1,21 @@ SELECT - SUM(t0.float_col) AS "Sum(float_col)" -FROM functional_alltypes AS t0 -WHERE - t0.int_col > 0 \ No newline at end of file + SUM(t1.float_col) AS "Sum(float_col)" +FROM ( + SELECT + t0.id AS id, + t0.bool_col AS bool_col, + t0.tinyint_col AS tinyint_col, + t0.smallint_col AS smallint_col, + t0.int_col AS int_col, + t0.bigint_col AS bigint_col, + t0.float_col AS float_col, + t0.double_col AS double_col, + t0.date_string_col AS date_string_col, + t0.string_col AS string_col, + t0.timestamp_col AS timestamp_col, + t0.year AS year, + t0.month AS month + FROM functional_alltypes AS t0 + WHERE + t0.int_col > 0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql index 0bc4ec1e4852..53eef384f148 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql @@ -1,12 +1,28 @@ SELECT - t1.string_col + t2.string_col AS string_col FROM ( SELECT - t0.string_col, - SUM(t0.float_col) AS total - FROM functional_alltypes AS t0 - WHERE - t0.int_col > 0 + t1.string_col AS string_col, + SUM(t1.float_col) AS total + FROM ( + SELECT + t0.id AS id, + t0.bool_col AS bool_col, + t0.tinyint_col AS tinyint_col, + t0.smallint_col AS smallint_col, + t0.int_col AS int_col, + t0.bigint_col AS bigint_col, + t0.float_col AS float_col, + t0.double_col AS double_col, + t0.date_string_col AS date_string_col, + t0.string_col AS string_col, + t0.timestamp_col AS timestamp_col, + t0.year AS year, + t0.month AS month + FROM functional_alltypes AS t0 + WHERE + t0.int_col > 0 + ) AS t1 GROUP BY - t0.string_col -) AS t1 \ No newline at end of file + t1.string_col +) AS t2 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql index 6806def413a1..3fc646a6237e 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql @@ -1,5 +1,17 @@ SELECT - * + t0.id AS id, + t0.bool_col AS bool_col, + t0.tinyint_col AS tinyint_col, + t0.smallint_col AS smallint_col, + t0.int_col AS int_col, + t0.bigint_col AS bigint_col, + t0.float_col AS float_col, + t0.double_col AS double_col, + t0.date_string_col AS date_string_col, + t0.string_col AS string_col, + t0.timestamp_col AS timestamp_col, + t0.year AS year, + t0.month AS month FROM functional_alltypes AS t0 WHERE t0.float_col > 0 AND t0.int_col < ( diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql index 7982cdf7c584..e91cbcdc4595 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql @@ -1,5 +1,17 @@ SELECT - * + t0.id AS id, + t0.bool_col AS bool_col, + t0.tinyint_col AS tinyint_col, + t0.smallint_col AS smallint_col, + t0.int_col AS int_col, + t0.bigint_col AS bigint_col, + t0.float_col AS float_col, + t0.double_col AS double_col, + t0.date_string_col AS date_string_col, + t0.string_col AS string_col, + t0.timestamp_col AS timestamp_col, + t0.year AS year, + t0.month AS month FROM functional_alltypes AS t0 WHERE t0.int_col > 0 AND t0.float_col BETWEEN 0 AND 1 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql index cee980322a7b..2e6c66b7d831 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql @@ -1,5 +1,5 @@ SELECT - t0.uuid, + t0.uuid AS uuid, minIf(t0.ts, t0.search_level = 1) AS min_date FROM t AS t0 GROUP BY diff --git a/ibis/backends/clickhouse/tests/test_select.py b/ibis/backends/clickhouse/tests/test_select.py index 207fcef8d393..b74ce39ff621 100644 --- a/ibis/backends/clickhouse/tests/test_select.py +++ b/ibis/backends/clickhouse/tests/test_select.py @@ -403,3 +403,12 @@ def test_array_join_in_subquery(snapshot): out = ibis.clickhouse.compile(expr) snapshot.assert_match(out, "out.sql") + + +def test_complex_join(snapshot): + t1 = ibis.table({"a": "int", "b": "int"}, name="s") + t2 = ibis.table({"c": "int", "d": "int"}, name="t") + t3 = t1.join(t2, t1.a == t2.c) + q = t3.mutate(e=t3.c / (t3.a - t3.b)) + out = ibis.clickhouse.compile(q) + snapshot.assert_match(out, "out.sql") diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 75ab280e3a63..5216197b8724 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -6,16 +6,19 @@ import contextlib import os import warnings +from operator import itemgetter from pathlib import Path from typing import TYPE_CHECKING, Any import duckdb +import pandas as pd import pyarrow as pa import pyarrow_hotfix # noqa: F401 -import sqlalchemy as sa import sqlglot as sg +import sqlglot.expressions as sge import toolz +import ibis import ibis.common.exceptions as exc import ibis.expr.datatypes as dt import ibis.expr.operations as ops @@ -23,21 +26,22 @@ import ibis.expr.types as ir from ibis import util from ibis.backends.base import CanCreateSchema -from ibis.backends.base.sql.alchemy import AlchemyCrossSchemaBackend from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported -from ibis.backends.base.sqlglot import C, F -from ibis.backends.duckdb.compiler import DuckDBSQLCompiler -from ibis.backends.duckdb.datatypes import DuckDBType +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import STAR, C, F +from ibis.backends.base.sqlglot.datatypes import DuckDBType +from ibis.backends.duckdb.compiler import DuckDBCompiler +from ibis.backends.duckdb.datatypes import DuckDBPandasData from ibis.expr.operations.udf import InputType -from ibis.formats.pandas import PandasData if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence - import pandas as pd import torch from fsspec import AbstractFileSystem + from ibis.backends.base.sql import BaseBackend + def normalize_filenames(source_list): # Promote to list @@ -46,22 +50,6 @@ def normalize_filenames(source_list): return list(map(util.normalize_filename, source_list)) -def _format_kwargs(kwargs: Mapping[str, Any]): - bindparams, pieces = [], [] - for name, value in kwargs.items(): - bindparam = sa.bindparam(name, value) - if isinstance(paramtype := bindparam.type, sa.String): - # special case strings to avoid double escaping backslashes - pieces.append(f"{name} = '{value!s}'") - elif not isinstance(paramtype, sa.types.NullType): - bindparams.append(bindparam) - pieces.append(f"{name} = :{name}") - else: # fallback to string strategy - pieces.append(f"{name} = {value!r}") - - return sa.text(", ".join(pieces)).bindparams(*bindparams) - - _UDF_INPUT_TYPE_MAPPING = { InputType.PYARROW: duckdb.functional.ARROW, InputType.PYTHON: duckdb.functional.NATIVE, @@ -69,75 +57,291 @@ def _format_kwargs(kwargs: Mapping[str, Any]): class _Settings: - def __init__(self, con): + def __init__(self, con: duckdb.DuckDBPyConnection) -> None: self.con = con - def __getitem__(self, key): - try: - with self.con.begin() as con: - return con.exec_driver_sql( - f"select value from duckdb_settings() where name = '{key}'" - ).one() - except sa.exc.NoResultFound: - raise KeyError(key) + def __getitem__(self, key: str) -> Any: + maybe_value = self.con.execute( + f"select value from duckdb_settings() where name = '{key}'" + ).fetchone() + if maybe_value is not None: + return maybe_value[0] + raise KeyError(key) def __setitem__(self, key, value): - with self.con.begin() as con: - con.exec_driver_sql(f"SET {key}='{value}'") + self.con.execute(f"SET {key} = '{value}'") def __repr__(self): - with self.con.begin() as con: - kv = con.exec_driver_sql( - "select map(array_agg(name), array_agg(value)) from duckdb_settings()" - ).scalar() + ((kv,),) = self.con.execute( + "select map(array_agg(name), array_agg(value)) from duckdb_settings()" + ).fetch() return repr(dict(zip(kv["key"], kv["value"]))) -class Backend(AlchemyCrossSchemaBackend, CanCreateSchema): +class Backend(SQLGlotBackend, CanCreateSchema): name = "duckdb" - compiler = DuckDBSQLCompiler - supports_create_or_replace = True + compiler = DuckDBCompiler() + + def _define_udf_translation_rules(self, expr): + """No-op: UDF translation rules are defined in the compiler.""" @property def settings(self) -> _Settings: - return _Settings(self) + return _Settings(self.con) @property def current_database(self) -> str: - return self._scalar_query(sa.select(sa.func.current_database())) + with self._safe_raw_sql(sg.select(self.compiler.f.current_database())) as cur: + [(db,)] = cur.fetchall() + return db - def list_databases(self, like: str | None = None) -> list[str]: - s = sa.table( - "schemata", - sa.column("catalog_name", sa.TEXT()), - schema="information_schema", + @property + def current_schema(self) -> str: + with self._safe_raw_sql(sg.select(self.compiler.f.current_schema())) as cur: + [(schema,)] = cur.fetchall() + return schema + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name) + return self.con.execute(query, **kwargs) + + def _transform( + self, sql: sge.Expression, table_expr: ir.TableExpr + ) -> sge.Expression: + geocols = frozenset( + name for name, typ in table_expr.schema().items() if typ.is_geospatial() ) - query = sa.select(sa.distinct(s.c.catalog_name)) - with self.begin() as con: - results = list(con.execute(query).scalars()) - return self._filter_with_like(results, like=like) + if not geocols: + return sql + + return sg.select( + *( + self.compiler.f.st_aswkb( + sg.column(col, quoted=self.compiler.quoted) + ).as_(col) + if col in geocols + else col + for col in table_expr.columns + ) + ).from_(sql.subquery()) + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: ibis.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ): + """Create a table in DuckDB. + + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The name of the database in which to create the table; if not + passed, the current database is used. + temp + Create a temporary table + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists + """ + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + properties = [] + + if temp: + properties.append(sge.TemporaryProperty()) + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + (query,) = self._to_sqlglot(table) + else: + query = None + + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name("duckdb_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create( + kind="TABLE", + this=target, + properties=sge.Properties(expressions=properties), + ) + + this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + with self._safe_raw_sql(create_stmt) as cur: + if query is not None: + insert_stmt = sge.Insert(this=table, expression=query).sql(self.name) + cur.execute(insert_stmt).fetchall() + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + ).fetchall() + cur.execute( + f"ALTER TABLE IF EXISTS {table.sql(self.name)} RENAME TO {this.sql(self.name)}" + ).fetchall() + + return self.table(name, schema=database) + + def _load_into_cache(self, name, expr): + self.create_table(name, expr, schema=expr.schema(), temp=True) + + def _clean_up_cached_table(self, op): + self.drop_table(op.name) + + def table( + self, name: str, schema: str | None = None, database: str | None = None + ) -> ir.Table: + """Construct a table expression. + + Parameters + ---------- + name + Table name + schema + Schema name + database + Database name + + Returns + ------- + Table + Table expression + """ + table_schema = self.get_schema(name, schema=schema, database=database) + # load geospatial only if geo columns + if any(typ.is_geospatial() for typ in table_schema.types): + self.load_extension("spatial") + return ops.DatabaseTable( + name, + schema=table_schema, + source=self, + namespace=ops.Namespace(database=database, schema=schema), + ).to_expr() + + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + """Compute the schema of a `table`. + + Parameters + ---------- + table_name + May **not** be fully qualified. Use `database` if you want to + qualify the identifier. + schema + Schema name + database + Database name + + Returns + ------- + sch.Schema + Ibis schema + """ + conditions = [sg.column("table_name").eq(sge.convert(table_name))] + + if database is not None: + conditions.append(sg.column("table_catalog").eq(sge.convert(database))) + + if schema is not None: + conditions.append(sg.column("table_schema").eq(sge.convert(schema))) + + query = ( + sg.select( + "column_name", + "data_type", + sg.column("is_nullable").eq(sge.convert("YES")).as_("nullable"), + ) + .from_(sg.table("columns", db="information_schema")) + .where(sg.and_(*conditions)) + .order_by("ordinal_position") + ) + + with self._safe_raw_sql(query) as cur: + meta = cur.fetch_arrow_table() + + if not meta: + raise exc.IbisError(f"Table not found: {table_name!r}") + + names = meta["column_name"].to_pylist() + types = meta["data_type"].to_pylist() + nullables = meta["nullable"].to_pylist() + + return sch.Schema( + { + name: DuckDBType.from_string(typ, nullable=nullable) + for name, typ, nullable in zip(names, types, nullables) + } + ) + + @contextlib.contextmanager + def _safe_raw_sql(self, *args, **kwargs): + yield self.raw_sql(*args, **kwargs) + + def list_databases(self, like: str | None = None) -> list[str]: + col = "catalog_name" + query = sg.select(sge.Distinct(expressions=[sg.column(col)])).from_( + sg.table("schemata", db="information_schema") + ) + with self._safe_raw_sql(query) as cur: + result = cur.fetch_arrow_table() + dbs = result[col] + return self._filter_with_like(dbs.to_pylist(), like) def list_schemas( self, like: str | None = None, database: str | None = None ) -> list[str]: - # override duckdb because all databases are always visible - text = """\ -SELECT schema_name -FROM information_schema.schemata -WHERE catalog_name = :database""" - query = sa.text(text).bindparams( - database=database if database is not None else self.current_database + col = "schema_name" + query = sg.select(sge.Distinct(expressions=[sg.column(col)])).from_( + sg.table("schemata", db="information_schema") ) - with self.begin() as con: - schemas = list(con.execute(query).scalars()) - return self._filter_with_like(schemas, like=like) + if database is not None: + query = query.where(sg.column("catalog_name").eq(sge.convert(database))) - @property - def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.func.current_schema())) + with self._safe_raw_sql(query) as cur: + out = cur.fetch_arrow_table() + return self._filter_with_like(out[col].to_pylist(), like=like) @staticmethod def _convert_kwargs(kwargs: MutableMapping) -> None: @@ -156,47 +360,6 @@ def version(self) -> str: return importlib.metadata.version("duckdb") - @staticmethod - def _new_sa_metadata(): - meta = sa.MetaData() - - # _new_sa_metadata is invoked whenever `_get_sqla_table` is called, so - # it's safe to store columns as keys, that is, columns from different - # tables with the same name won't collide - complex_type_info_cache = {} - - @sa.event.listens_for(meta, "column_reflect") - def column_reflect(inspector, table, column_info): - import duckdb_engine.datatypes as ddt - - # duckdb_engine as of 0.7.2 doesn't expose the inner types of any - # complex types so we have to extract it from duckdb directly - ddt_struct_type = getattr(ddt, "Struct", sa.types.NullType) - ddt_map_type = getattr(ddt, "Map", sa.types.NullType) - if isinstance( - column_info["type"], (sa.ARRAY, ddt_struct_type, ddt_map_type) - ): - engine = inspector.engine - colname = column_info["name"] - if (coltype := complex_type_info_cache.get(colname)) is None: - quote = engine.dialect.identifier_preparer.quote - quoted_colname = quote(colname) - quoted_tablename = quote(table.name) - with engine.connect() as con: - # The .connection property is used to avoid creating a - # nested transaction - con.connection.execute( - f"DESCRIBE SELECT {quoted_colname} FROM {quoted_tablename}" - ) - _, typ, *_ = con.connection.fetchone() - complex_type_info_cache[colname] = coltype = DuckDBType.from_string( - typ - ) - - column_info["type"] = DuckDBType.from_ibis(coltype) - - return meta - def do_connect( self, database: str | Path = ":memory:", @@ -246,54 +409,67 @@ def do_connect( Path(temp_directory).mkdir(parents=True, exist_ok=True) config["temp_directory"] = str(temp_directory) - engine = sa.create_engine( - f"duckdb:///{database}", - connect_args=dict(read_only=read_only, config=config), - poolclass=sa.pool.StaticPool, - ) - - @sa.event.listens_for(engine, "connect") - def configure_connection(dbapi_connection, connection_record): - if extensions is not None: - self._sa_load_extensions(dbapi_connection, extensions) - dbapi_connection.execute("SET TimeZone = 'UTC'") + self.con = duckdb.connect(str(database), config=config, read_only=read_only) - self._record_batch_readers_consumed = {} + # Load any pre-specified extensions + if extensions is not None: + self._load_extensions(extensions) - # TODO(cpcloud): remove this when duckdb is >0.8.1 - # this is here to workaround https://github.com/duckdb/duckdb/issues/8735 - with contextlib.suppress(duckdb.InvalidInputException): - duckdb.execute("SELECT ?", (1,)) + # Default timezone + with self._safe_raw_sql("SET TimeZone = 'UTC'"): + pass - engine.dialect._backslash_escapes = False - super().do_connect(engine) + self._record_batch_readers_consumed = {} + self._temp_views: set[str] = set() - @staticmethod - def _sa_load_extensions( - dbapi_con, extensions: list[str], force_install: bool = False + def _load_extensions( + self, extensions: list[str], force_install: bool = False ) -> None: - query = """ - WITH exts AS ( - SELECT extension_name AS name, aliases FROM duckdb_extensions() - WHERE installed AND loaded + f = self.compiler.f + query = ( + sg.select(f.unnest(f.list_append(C.aliases, C.extension_name))) + .from_(f.duckdb_extensions()) + .where(sg.and_(C.installed, C.loaded)) ) - SELECT name FROM exts - UNION (SELECT UNNEST(aliases) AS name FROM exts) + with self._safe_raw_sql(query) as cur: + installed = map(itemgetter(0), cur.fetchall()) + # Install and load all other extensions + todo = frozenset(extensions).difference(installed) + for extension in todo: + cur.install_extension(extension, force_install=force_install) + cur.load_extension(extension) + + def _from_url(self, url: str, **kwargs) -> BaseBackend: + """Connect to a backend using a URL `url`. + + Parameters + ---------- + url + URL with which to connect to a backend. + kwargs + Additional keyword arguments + + Returns + ------- + BaseBackend + A backend instance """ - installed = (name for (name,) in dbapi_con.sql(query).fetchall()) - # Install and load all other extensions - todo = set(extensions).difference(installed) - for extension in todo: - dbapi_con.install_extension(extension, force_install=force_install) - dbapi_con.load_extension(extension) + import sqlalchemy as sa - def _load_extensions( - self, extensions: list[str], force_install: bool = False - ) -> None: - with self.begin() as con: - self._sa_load_extensions( - con.connection, extensions, force_install=force_install - ) + url = sa.engine.make_url(url) + + kwargs = toolz.merge( + { + name: value + for name in ("database", "read_only", "temp_directory") + if (value := getattr(url, name, None)) + }, + kwargs, + ) + + kwargs.update(url.query) + self._convert_kwargs(kwargs) + return self.connect(**kwargs) def load_extension(self, extension: str, force_install: bool = False) -> None: """Install and load a duckdb extension by name or path. @@ -314,10 +490,9 @@ def create_schema( raise exc.UnsupportedOperationError( "DuckDB cannot create a schema in another database." ) - name = self._quote(name) - if_not_exists = "IF NOT EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"CREATE SCHEMA {if_not_exists}{name}") + + name = sg.to_identifier(database, quoted=True) + return sge.Create(this=name, kind="SCHEMA", replace=force) def drop_schema( self, name: str, database: str | None = None, force: bool = False @@ -326,10 +501,9 @@ def drop_schema( raise exc.UnsupportedOperationError( "DuckDB cannot drop a schema in another database." ) - name = self._quote(name) - if_exists = "IF EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"DROP SCHEMA {if_exists}{name}") + + name = sg.to_identifier(database, quoted=True) + return sge.Drop(this=name, kind="SCHEMA", replace=force) def register( self, @@ -366,7 +540,7 @@ def register( else: try: return self.read_in_memory(source, table_name=table_name, **kwargs) - except sa.exc.ProgrammingError: + except (duckdb.InvalidInputException, NameError): self._register_failure() if first.startswith(("parquet://", "parq://")) or first.endswith( @@ -397,12 +571,6 @@ def _register_failure(self): f"please call one of {msg} directly" ) - def _compile_temp_view(self, table_name, source): - raw_source = source.compile( - dialect=self.con.dialect, compile_kwargs=dict(literal_binds=True) - ) - return f'CREATE OR REPLACE TEMPORARY VIEW "{table_name}" AS {raw_source}' - @util.experimental def read_json( self, @@ -433,15 +601,18 @@ def read_json( if not table_name: table_name = util.gen_name("read_json") - source = sa.select(sa.literal_column("*")).select_from( - sa.func.read_json_auto( - sa.func.list_value(*normalize_filenames(source_list)), - _format_kwargs(kwargs), - ) + options = [ + sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items() + ] + + self._create_temp_view( + table_name, + sg.select(STAR).from_( + self.compiler.f.read_json_auto( + normalize_filenames(source_list), *options + ) + ), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) return self.table(table_name) @@ -485,13 +656,32 @@ def read_csv( kwargs.setdefault("header", True) kwargs["auto_detect"] = kwargs.pop("auto_detect", "columns" not in kwargs) - source = sa.select(sa.literal_column("*")).select_from( - sa.func.read_csv(sa.func.list_value(*source_list), _format_kwargs(kwargs)) + # TODO: clean this up + # We want to _usually_ quote arguments but if we quote `columns` it messes + # up DuckDB's struct parsing. + options = [ + sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items() + ] + + if (columns := kwargs.pop("columns", None)) is not None: + options.append( + sg.to_identifier("columns").eq( + sge.Struct( + expressions=[ + sge.Slice( + this=sge.convert(key), expression=sge.convert(value) + ) + for key, value in columns.items() + ] + ) + ) + ) + + self._create_temp_view( + table_name, + sg.select(STAR).from_(self.compiler.f.read_csv(source_list, *options)), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) return self.table(table_name) def read_geo( @@ -529,17 +719,24 @@ def read_geo( self.load_extension("spatial") source = util.normalize_filename(source) - if source.startswith(("http://", "https://", "s3://")): self._load_extensions(["httpfs"]) - source_expr = sa.select(sa.literal_column("*")).select_from( - sa.func.st_read(source, _format_kwargs(kwargs)) + source_expr = sg.select(STAR).from_( + self.compiler.f.st_read( + source, + *(sg.to_identifier(key).eq(val) for key, val in kwargs.items()), + ) ) - view = self._compile_temp_view(table_name, source_expr) - with self.begin() as con: - con.exec_driver_sql(view) + view = sge.Create( + kind="VIEW", + this=sg.table(table_name, quoted=self.compiler.quoted), + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + expression=source_expr, + ) + with self._safe_raw_sql(view): + pass return self.table(table_name) def read_parquet( @@ -576,11 +773,8 @@ def read_parquet( # pyarrow dataset try: self._read_parquet_duckdb_native(source_list, table_name, **kwargs) - except sa.exc.OperationalError as e: - if isinstance(e.orig, duckdb.IOException): - self._read_parquet_pyarrow_dataset(source_list, table_name, **kwargs) - else: - raise e + except duckdb.IOException: + self._read_parquet_pyarrow_dataset(source_list, table_name, **kwargs) return self.table(table_name) @@ -593,14 +787,13 @@ def _read_parquet_duckdb_native( ): self._load_extensions(["httpfs"]) - source = sa.select(sa.literal_column("*")).select_from( - sa.func.read_parquet( - sa.func.list_value(*source_list), _format_kwargs(kwargs) - ) + options = [ + sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items() + ] + self._create_temp_view( + table_name, + sg.select(STAR).from_(self.compiler.f.read_parquet(source_list, *options)), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) def _read_parquet_pyarrow_dataset( self, source_list: str | Iterable[str], table_name: str, **kwargs: Any @@ -612,12 +805,11 @@ def _read_parquet_pyarrow_dataset( # We don't create a view since DuckDB special cases Arrow Datasets # so if we also create a view we end up with both a "lazy table" # and a view with the same name - with self.begin() as con: - # DuckDB normally auto-detects Arrow Datasets that are defined - # in local variables but the `dataset` variable won't be local - # by the time we execute against this so we register it - # explicitly. - con.connection.register(table_name, dataset) + self.con.register(table_name, dataset) + # DuckDB normally auto-detects Arrow Datasets that are defined + # in local variables but the `dataset` variable won't be local + # by the time we execute against this so we register it + # explicitly. def read_in_memory( self, @@ -640,8 +832,7 @@ def read_in_memory( The just-registered table """ table_name = table_name or util.gen_name("read_in_memory") - with self.begin() as con: - con.connection.register(table_name, source) + self.con.register(table_name, source) if isinstance(source, pa.RecordBatchReader): # Ensure the reader isn't marked as started, in case the name is @@ -730,33 +921,34 @@ def list_tables( >>> con.list_tables(schema="my_schema") [] >>> with con.begin() as c: - ... c.exec_driver_sql("CREATE TABLE my_schema.baz (a INTEGER)") # doctest: +ELLIPSIS + ... c.exec_driver_sql( + ... "CREATE TABLE my_schema.baz (a INTEGER)" + ... ) # doctest: +ELLIPSIS + ... <...> >>> con.list_tables(schema="my_schema") ['baz'] """ - database = ( - F.current_database() if database is None else sg.exp.convert(database) - ) - schema = F.current_schema() if schema is None else sg.exp.convert(schema) + database = F.current_database() if database is None else sge.convert(database) + schema = F.current_schema() if schema is None else sge.convert(schema) + col = "table_name" sql = ( - sg.select(C.table_name) + sg.select(col) .from_(sg.table("tables", db="information_schema")) .distinct() .where( C.table_catalog.eq(database).or_( - C.table_catalog.eq(sg.exp.convert("temp")) + C.table_catalog.eq(sge.convert("temp")) ), C.table_schema.eq(schema), ) .sql(self.name, pretty=True) ) - with self.begin() as con: - out = con.exec_driver_sql(sql).cursor.fetch_arrow_table() + out = self.con.execute(sql).fetch_arrow_table() - return self._filter_with_like(out["table_name"].to_pylist(), like) + return self._filter_with_like(out[col].to_pylist(), like) def read_postgres( self, uri: str, table_name: str | None = None, schema: str = "public" @@ -782,12 +974,13 @@ def read_postgres( "`table_name` is required when registering a postgres table" ) self._load_extensions(["postgres_scanner"]) - source = sa.select(sa.literal_column("*")).select_from( - sa.func.postgres_scan_pushdown(uri, schema, table_name) + + self._create_temp_view( + table_name, + sg.select(STAR).from_( + self.compiler.f.postgres_scan_pushdown(uri, schema, table_name) + ), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) return self.table(table_name) @@ -836,12 +1029,14 @@ def read_sqlite(self, path: str | Path, table_name: str | None = None) -> ir.Tab raise ValueError("`table_name` is required when registering a sqlite table") self._load_extensions(["sqlite"]) - source = sa.select(sa.literal_column("*")).select_from( - sa.func.sqlite_scan(str(path), table_name) + self._create_temp_view( + table_name, + sg.select(STAR).from_( + self.compiler.f.sqlite_scan( + sg.to_identifier(str(path), quoted=True), table_name + ) + ), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) return self.table(table_name) @@ -868,8 +1063,7 @@ def attach( if read_only: code += " (READ_ONLY)" - with self.begin() as con: - con.exec_driver_sql(code) + self.con.execute(code).fetchall() def detach(self, name: str) -> None: """Detach a database from the current DuckDB session. @@ -880,8 +1074,7 @@ def detach(self, name: str) -> None: The name of the database to detach. """ name = sg.to_identifier(name).sql(self.name) - with self.begin() as con: - con.exec_driver_sql(f"DETACH {name}") + self.con.execute(f"DETACH {name}").fetchall() def attach_sqlite( self, path: str | Path, overwrite: bool = False, all_varchar: bool = False @@ -916,10 +1109,11 @@ def attach_sqlite( >>> con.list_tables() ['t'] """ - self._load_extensions(["sqlite"]) - with self.begin() as con: - con.execute(sa.text(f"SET GLOBAL sqlite_all_varchar={all_varchar}")) - con.execute(sa.text(f"CALL sqlite_attach('{path}', overwrite={overwrite})")) + self.load_extension("sqlite") + with self._safe_raw_sql(f"SET GLOBAL sqlite_all_varchar={all_varchar}") as cur: + cur.execute( + f"CALL sqlite_attach('{path}', overwrite={overwrite})" + ).fetchall() def register_filesystem(self, filesystem: AbstractFileSystem): """Register an `fsspec` filesystem object with DuckDB. @@ -956,8 +1150,7 @@ def register_filesystem(self, filesystem: AbstractFileSystem): name string band string """ - with self.begin() as con: - con.connection.register_filesystem(filesystem) + self.con.register_filesystem(filesystem) def _run_pre_execute_hooks(self, expr: ir.Expr) -> None: # Warn for any tables depending on RecordBatchReaders that have already @@ -1007,18 +1200,20 @@ def to_pyarrow_batches( ::: """ self._run_pre_execute_hooks(expr) - query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - sql = query_ast.compile() + table = expr.as_table() + sql = self.compile(table, limit=limit, params=params) - def batch_producer(con): - with con.begin() as c, contextlib.closing(c.execute(sql)) as cur: - yield from cur.cursor.fetch_record_batch(rows_per_batch=chunk_size) + def batch_producer(cur): + yield from cur.fetch_record_batch(rows_per_batch=chunk_size) + # TODO: check that this is still handled correctly # batch_producer keeps the `self.con` member alive long enough to # exhaust the record batch reader, even if the backend or connection # have gone out of scope in the caller + result = self.raw_sql(sql) + return pa.RecordBatchReader.from_batches( - expr.as_table().schema().to_pyarrow(), batch_producer(self.con) + expr.as_table().schema().to_pyarrow(), batch_producer(result) ) def to_pyarrow( @@ -1030,20 +1225,11 @@ def to_pyarrow( **_: Any, ) -> pa.Table: self._run_pre_execute_hooks(expr) - query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - - # We use `.sql` instead of `.execute` below for performance - in - # certain cases duckdb query -> arrow table can be significantly faster - # in this configuration. Currently `.sql` doesn't support parametrized - # queries, so we need to compile with literal_binds for now. - sql = str( - query_ast.compile().compile( - dialect=self.con.dialect, compile_kwargs={"literal_binds": True} - ) - ) + table = expr.as_table() + sql = self.compile(table, limit=limit, params=params) - with self.begin() as con: - table = con.connection.sql(sql).to_arrow_table() + with self._safe_raw_sql(sql) as cur: + table = cur.fetch_arrow_table() return expr.__pyarrow_result__(table) @@ -1076,7 +1262,7 @@ def to_torch( """ compiled = self.compile(expr, limit=limit, params=params, **kwargs) with self._safe_raw_sql(compiled) as cur: - return cur.connection.connection.torch() + return cur.torch() @util.experimental def to_parquet( @@ -1132,8 +1318,8 @@ def to_parquet( query = self._to_sql(expr, params=params) args = ["FORMAT 'parquet'", *(f"{k.upper()} {v!r}" for k, v in kwargs.items())] copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})" - with self.begin() as con: - con.exec_driver_sql(copy_cmd) + with self._safe_raw_sql(copy_cmd): + pass @util.experimental def to_csv( @@ -1171,8 +1357,8 @@ def to_csv( *(f"{k.upper()} {v!r}" for k, v in kwargs.items()), ] copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})" - with self.begin() as con: - con.exec_driver_sql(copy_cmd) + with self._safe_raw_sql(copy_cmd): + pass def fetch_from_cursor( self, cursor: duckdb.DuckDBPyConnection, schema: sch.Schema @@ -1180,7 +1366,7 @@ def fetch_from_cursor( import pandas as pd import pyarrow.types as pat - table = cursor.cursor.fetch_arrow_table() + table = cursor.fetch_arrow_table() df = pd.DataFrame( { @@ -1198,7 +1384,7 @@ def fetch_from_cursor( for name, col in zip(table.column_names, table.columns) } ) - df = PandasData.convert_table(df, schema) + df = DuckDBPandasData.convert_table(df, schema) if not df.empty and geospatial_supported: return self._to_geodataframe(df, schema) return df @@ -1224,15 +1410,19 @@ def _to_geodataframe(df, schema): return df def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: - with self.begin() as con: - rows = con.exec_driver_sql(f"DESCRIBE {query}") + with self._safe_raw_sql(f"DESCRIBE {query}") as cur: + rows = cur.fetch_arrow_table() + + rows = rows.to_pydict() + + for name, typ, null in zip( + rows["column_name"], rows["column_type"], rows["null"] + ): + yield name, DuckDBType.from_string(typ, nullable=null == "YES") - for name, type, null in toolz.pluck( - ["column_name", "column_type", "null"], rows.mappings() - ): - nullable = null.lower() == "yes" - ibis_type = DuckDBType.from_string(type, nullable=nullable) - yield name, ibis_type + def _register_in_memory_tables(self, expr: ir.Expr) -> None: + for memtable in expr.op().find(ops.InMemoryTable): + self._register_in_memory_table(memtable) def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema @@ -1245,34 +1435,24 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: # only register if we haven't already done so if (name := op.name) not in self.list_tables(): table = op.data.to_pyarrow(schema) - - # register creates a transaction, and we can't nest transactions so - # we create a function to encapsulate the whole shebang - def _register(name, table): - with self.begin() as con: - con.connection.register(name, table) - - _register(name, table) - - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - yield f"CREATE OR REPLACE TEMPORARY VIEW {name} AS {definition}" + table = getattr(table, "obj", table) + self.con.register(name, table) def _register_udfs(self, expr: ir.Expr) -> None: import ibis.expr.operations as ops - with self.con.connect() as con: - for udf_node in expr.op().find(ops.ScalarUDF): - compile_func = getattr( - self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" - ) - with contextlib.suppress(duckdb.InvalidInputException): - con.connection.remove_function(udf_node.__class__.__name__) + con = self.con - registration_func = compile_func(udf_node) - if registration_func is not None: - registration_func(con) + for udf_node in expr.op().find(ops.ScalarUDF): + compile_func = getattr( + self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" + ) + with contextlib.suppress(duckdb.InvalidInputException): + con.remove_function(udf_node.__class__.__name__) + + registration_func = compile_func(udf_node) + if registration_func is not None: + registration_func(con) def _compile_udf(self, udf_node: ops.ScalarUDF) -> None: func = udf_node.__func__ @@ -1284,7 +1464,7 @@ def _compile_udf(self, udf_node: ops.ScalarUDF) -> None: output_type = DuckDBType.to_string(udf_node.dtype) def register_udf(con): - return con.connection.create_function( + return con.create_function( name, func, input_types, @@ -1297,42 +1477,59 @@ def register_udf(con): _compile_python_udf = _compile_udf _compile_pyarrow_udf = _compile_udf + def _compile_builtin_udf(self, udf_node: ops.ScalarUDF) -> None: + """No op.""" + def _compile_pandas_udf(self, _: ops.ScalarUDF) -> None: raise NotImplementedError("duckdb doesn't support pandas UDFs") - def _get_compiled_statement(self, view: sa.Table, definition: sa.sql.Selectable): + def _get_compiled_statement(self, view, definition): # TODO: remove this once duckdb supports CTAS prepared statements return super()._get_compiled_statement( view, definition, compile_kwargs={"literal_binds": True} ) - def _insert_dataframe( - self, table_name: str, df: pd.DataFrame, overwrite: bool - ) -> None: - columns = list(df.columns) - t = sa.table(table_name, *map(sa.column, columns)) - - table_name = self._quote(table_name) - - # the table name df here matters, and *must* match the input variable's - # name because duckdb will look up this name in the outer scope of the - # insert call and pull in that variable's data to scan - source = sa.table("df", *map(sa.column, columns)) - - with self.begin() as con: - if overwrite: - con.execute(t.delete()) - con.execute(t.insert().from_select(columns, sa.select(source))) - - def table( + def insert( self, - name: str, + table_name: str, + obj: pd.DataFrame | ir.Table | list | dict, database: str | None = None, - schema: str | None = None, - ) -> ir.Table: - expr = super().table(name=name, database=database, schema=schema) - # load geospatial only if geo columns - if any(typ.is_geospatial() for typ in expr.op().schema.types): - self.load_extension("spatial") + overwrite: bool = False, + ) -> None: + """Insert data into a table. - return expr + Parameters + ---------- + table_name + The name of the table to which data needs will be inserted + obj + The source data or expression to insert + database + Name of the attached database that the table is located in. + overwrite + If `True` then replace existing contents of table + + Raises + ------ + NotImplementedError + If inserting data from a different database + ValueError + If the type of `obj` isn't supported + """ + table = sg.table(table_name, db=database) + if overwrite: + with self._safe_raw_sql(f"TRUNCATE TABLE {table.sql('duckdb')}"): + pass + + if isinstance(obj, ir.Table): + self._run_pre_execute_hooks(obj) + query = sge.insert( + expression=self.compile(obj), into=table, dialect="duckdb" + ) + with self._safe_raw_sql(query): + pass + else: + self.con.append( + table_name, + obj if isinstance(obj, pd.DataFrame) else pd.DataFrame(obj), + ) diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index eaac09d0b0a7..4207f108b8f5 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -1,79 +1,403 @@ from __future__ import annotations -import sqlalchemy as sa -from sqlalchemy.ext.compiler import compiles +import math +from functools import partial, reduce, singledispatchmethod -import ibis.backends.base.sql.alchemy.datatypes as sat +import sqlglot as sg +import sqlglot.expressions as sge +from public import public + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.base.sql.alchemy.query_builder import _AlchemyTableSetFormatter -from ibis.backends.duckdb.datatypes import DuckDBType -from ibis.backends.duckdb.registry import operation_registry +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import DuckDBType + +_INTERVAL_SUFFIXES = { + "ms": "milliseconds", + "us": "microseconds", + "s": "seconds", + "m": "minutes", + "h": "hours", + "D": "days", + "M": "months", + "Y": "years", +} -class DuckDBSQLExprTranslator(AlchemyExprTranslator): - _registry = operation_registry - _rewrites = AlchemyExprTranslator._rewrites.copy() - _has_reduction_filter_syntax = True - _supports_tuple_syntax = True - _dialect_name = "duckdb" +@public +class DuckDBCompiler(SQLGlotCompiler): + __slots__ = () + dialect = "duckdb" type_mapper = DuckDBType + def _aggregate(self, funcname: str, *args, where): + expr = self.f[funcname](*args) + if where is not None: + return sge.Filter(this=expr, expression=sge.Where(this=where)) + return expr -@compiles(sat.UInt8, "duckdb") -def compile_uint8(element, compiler, **kw): - return "UTINYINT" + @singledispatchmethod + def visit_node(self, op, **kwargs): + return super().visit_node(op, **kwargs) + @visit_node.register(ops.ArrayDistinct) + def visit_ArrayDistinct(self, op, *, arg): + return self.if_( + arg.is_(NULL), + NULL, + self.f.list_distinct(arg) + + self.if_( + self.f.list_count(arg) < self.f.len(arg), + self.f.array(NULL), + self.f.array(), + ), + ) -@compiles(sat.UInt16, "duckdb") -def compile_uint16(element, compiler, **kw): - return "USMALLINT" + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return self.f.list_extract(arg, index + self.cast(index >= 0, op.index.dtype)) + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + func = sge.Lambda(this=arg, expressions=[sg.to_identifier("_")]) + return self.f.flatten(self.f.list_apply(self.f.range(times), func)) -@compiles(sat.UInt32, "duckdb") -def compile_uint32(element, compiler, **kw): - return "UINTEGER" + @visit_node.register(ops.Sample) + def visit_Sample( + self, op, *, parent, fraction: float, method: str, seed: int | None, **_ + ): + sample = sge.TableSample( + this=parent, + method="bernoulli" if method == "row" else "system", + percent=sge.convert(fraction * 100.0), + seed=None if seed is None else sge.convert(seed), + ) + return sg.select(STAR).from_(sample) + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + arg_length = self.f.len(arg) -@compiles(sat.UInt64, "duckdb") -def compile_uint(element, compiler, **kw): - return "UBIGINT" + if start is None: + start = 0 + else: + start = self.f.least(arg_length, self._neg_idx_to_pos(arg, start)) + if stop is None: + stop = arg_length + else: + stop = self._neg_idx_to_pos(arg, stop) -@compiles(sat.ArrayType, "duckdb") -def compile_array(element, compiler, **kw): - if isinstance(value_type := element.value_type, sa.types.NullType): - # duckdb infers empty arrays with no other context as array - typ = "INTEGER" - else: - typ = compiler.process(value_type, **kw) - return f"{typ}[]" + return self.f.list_slice(arg, start + 1, stop) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, body, param): + lamduh = sge.Lambda(this=body, expressions=[sg.to_identifier(param)]) + return self.f.list_apply(arg, lamduh) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, body, param): + lamduh = sge.Lambda(this=body, expressions=[sg.to_identifier(param)]) + return self.f.list_filter(arg, lamduh) + + @visit_node.register(ops.ArrayIntersect) + def visit_ArrayIntersect(self, op, *, left, right): + param = sg.to_identifier("x") + body = self.f.list_contains(right, param) + lamduh = sge.Lambda(this=body, expressions=[param]) + return self.f.list_filter(left, lamduh) + + @visit_node.register(ops.ArrayRemove) + def visit_ArrayRemove(self, op, *, arg, other): + param = sg.to_identifier("x") + body = param.neq(other) + lamduh = sge.Lambda(this=body, expressions=[param]) + return self.f.list_filter(arg, lamduh) + + @visit_node.register(ops.ArrayUnion) + def visit_ArrayUnion(self, op, *, left, right): + arg = self.f.list_concat(left, right) + return self.if_( + arg.is_(NULL), + NULL, + self.f.list_distinct(arg) + + self.if_( + self.f.list_count(arg) < self.f.len(arg), + self.f.array(NULL), + self.f.array(), + ), + ) + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op, *, arg): + i = sg.to_identifier("i") + body = sge.Struct.from_arg_list( + [ + sge.Slice(this=k, expression=v[i]) + for k, v in zip(map(sge.convert, op.dtype.value_type.names), arg) + ] + ) + func = sge.Lambda(this=body, expressions=[i]) + return self.f.list_apply( + self.f.range( + 1, + # DuckDB Range excludes upper bound + self.f.greatest(*map(self.f.len, arg)) + 1, + ), + func, + ) + + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + return self.f.ifnull( + self.f.list_extract(self.f.element_at(arg, key), 1), default + ) -rewrites = DuckDBSQLExprTranslator.rewrites + @visit_node.register(ops.MapContains) + def visit_MapContains(self, op, *, arg, key): + return self.f.len(self.f.element_at(arg, key)).neq(0) + @visit_node.register(ops.ToJSONMap) + @visit_node.register(ops.ToJSONArray) + def visit_ToJSONMap(self, op, *, arg): + return sge.TryCast(this=arg, to=self.type_mapper.from_ibis(op.dtype)) -@rewrites(ops.Any) -@rewrites(ops.All) -@rewrites(ops.StringContains) -def _no_op(expr): - return expr + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + # TODO(cpcloud): map ArrayConcat to this in sqlglot instead of here + return reduce(self.f.list_concat, arg) + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + if unit.short == "ns": + raise com.UnsupportedOperationError( + f"{self.dialect} doesn't support nanosecond interval resolutions" + ) -class DuckDBTableSetFormatter(_AlchemyTableSetFormatter): - def _format_sample(self, op, table): - if op.method == "row": - method = sa.func.bernoulli + if unit.singular == "week": + return self.f.to_days(arg * 7) + return self.f[f"to_{unit.plural}"](arg) + + @visit_node.register(ops.FindInSet) + def visit_FindInSet(self, op, *, needle, values): + return self.f.list_indexof(self.f.array(*values), needle) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, where, arg): + # use a tuple because duckdb doesn't accept COUNT(DISTINCT a, b, c, ...) + # + # this turns the expression into COUNT(DISTINCT (a, b, c, ...)) + row = sge.Tuple( + expressions=list( + map(partial(sg.column, quoted=self.quoted), op.arg.schema.keys()) + ) + ) + return self.agg.count(sge.Distinct(expressions=[row]), where=where) + + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, arg, sep): + return self.f.list_aggr(self.f.array(*arg), "string_agg", sep) + + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + return self.f.mod(self.f.extract("ms", arg), 1_000) + + # DuckDB extracts subminute microseconds and milliseconds + # so we have to finesse it a little bit + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + return self.f.mod(self.f.extract("us", arg), 1_000_000) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + unit = unit.short + if unit == "ms": + return self.f.epoch_ms(arg) + elif unit == "s": + return sge.UnixToTime(this=arg) else: - method = sa.func.system - return table.tablesample( - sampling=method(sa.literal_column(f"{op.fraction * 100} PERCENT")), - seed=(None if op.seed is None else sa.literal_column(str(op.seed))), + raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds, **_ + ): + args = [year, month, day, hours, minutes, seconds] + + func = "make_timestamp" + if (timezone := op.dtype.timezone) is not None: + func += "tz" + args.append(timezone) + + return self.f[func](*args) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + if to.is_interval(): + func = self.f[f"to_{_INTERVAL_SUFFIXES[to.unit.short]}"] + return func(sg.cast(arg, to=self.type_mapper.from_ibis(dt.int32))) + elif to.is_timestamp() and op.arg.dtype.is_integer(): + return self.f.to_timestamp(arg) + + return self.cast(arg, to) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype, **kw): + if value is None: + return super().visit_node(op, value=value, dtype=dtype, **kw) + elif dtype.is_interval(): + if dtype.unit.short == "ns": + raise com.UnsupportedOperationError( + f"{self.dialect} doesn't support nanosecond interval resolutions" + ) + + return sge.Interval( + this=sge.convert(str(value)), unit=dtype.resolution.upper() + ) + elif dtype.is_uuid(): + return self.cast(str(value), dtype) + elif dtype.is_binary(): + return self.cast("".join(map("\\x{:02x}".format, value)), dtype) + elif dtype.is_numeric(): + # cast non finite values to float because that's the behavior of + # duckdb when a mixed decimal/float operation is performed + # + # float will be upcast to double if necessary by duckdb + if not math.isfinite(value): + return self.cast( + str(value), to=dt.float32 if dtype.is_decimal() else dtype + ) + return self.cast(value, dtype) + elif dtype.is_time(): + return self.f.make_time( + value.hour, value.minute, value.second + value.microsecond / 1e6 + ) + elif dtype.is_timestamp(): + args = [ + value.year, + value.month, + value.day, + value.hour, + value.minute, + value.second + value.microsecond / 1e6, + ] + + funcname = "make_timestamp" + + if (tz := dtype.timezone) is not None: + funcname += "tz" + args.append(tz) + + return self.f[funcname](*args) + else: + return super().visit_node(op, value=value, dtype=dtype, **kw) + + @visit_node.register(ops.Capitalize) + def visit_Capitalize(self, op, *, arg): + return self.f.concat( + self.f.upper(self.f.substr(arg, 1, 1)), self.f.lower(self.f.substr(arg, 2)) ) + def _neg_idx_to_pos(self, array, idx): + arg_length = self.f.array_size(array) + return self.if_( + idx >= 0, + idx, + # Need to have the greatest here to handle the case where + # abs(neg_index) > arg_length + # e.g. where the magnitude of the negative index is greater than the + # length of the array + # You cannot index a[:-3] if a = [1, 2] + arg_length + self.f.greatest(idx, -arg_length), + ) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "sample": + raise com.UnsupportedOperationError( + f"{self.dialect} only implements `pop` correlation coefficient" + ) + + # TODO: rewrite rule? + if (left_type := op.left.dtype).is_boolean(): + left = self.cast(left, dt.Int32(nullable=left_type.nullable)) + + if (right_type := op.right.dtype).is_boolean(): + right = self.cast(right, dt.Int32(nullable=right_type.nullable)) + + return self.agg.corr(left, right, where=where) + + +_SIMPLE_OPS = { + ops.ArrayPosition: "list_indexof", + ops.BitAnd: "bit_and", + ops.BitOr: "bit_or", + ops.BitXor: "bit_xor", + ops.EndsWith: "suffix", + ops.Hash: "hash", + ops.IntegerRange: "range", + ops.LPad: "lpad", + ops.Levenshtein: "levenshtein", + ops.MapKeys: "map_keys", + ops.MapLength: "cardinality", + ops.MapMerge: "map_concat", + ops.MapValues: "map_values", + ops.Mode: "mode", + ops.RPad: "rpad", + ops.StringAscii: "ascii", + ops.TimeFromHMS: "make_time", + ops.TypeOf: "typeof", + ops.Unnest: "unnest", + ops.GeoPoint: "st_point", + ops.GeoAsText: "st_astext", + ops.GeoArea: "st_area", + ops.GeoBuffer: "st_buffer", + ops.GeoCentroid: "st_centroid", + ops.GeoContains: "st_contains", + ops.GeoCovers: "st_covers", + ops.GeoCoveredBy: "st_coveredby", + ops.GeoCrosses: "st_crosses", + ops.GeoDifference: "st_difference", + ops.GeoDisjoint: "st_disjoint", + ops.GeoDistance: "st_distance", + ops.GeoDWithin: "st_dwithin", + ops.GeoEndPoint: "st_endpoint", + ops.GeoEnvelope: "st_envelope", + ops.GeoEquals: "st_equals", + ops.GeoGeometryType: "st_geometrytype", + ops.GeoIntersection: "st_intersection", + ops.GeoIntersects: "st_intersects", + ops.GeoIsValid: "st_isvalid", + ops.GeoLength: "st_length", + ops.GeoNPoints: "st_npoints", + ops.GeoOverlaps: "st_overlaps", + ops.GeoStartPoint: "st_startpoint", + ops.GeoTouches: "st_touches", + ops.GeoUnion: "st_union", + ops.GeoUnaryUnion: "st_union_agg", + ops.GeoWithin: "st_within", + ops.GeoX: "st_x", + ops.GeoY: "st_y", +} + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @DuckDBCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @DuckDBCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(DuckDBCompiler, f"visit_{_op.__name__}", _fmt) + -class DuckDBSQLCompiler(AlchemyCompiler): - cheap_in_memory_tables = True - translator_class = DuckDBSQLExprTranslator - table_set_formatter_class = DuckDBTableSetFormatter +del _op, _name, _fmt diff --git a/ibis/backends/duckdb/datatypes.py b/ibis/backends/duckdb/datatypes.py index 603051e53daa..a4277ca82760 100644 --- a/ibis/backends/duckdb/datatypes.py +++ b/ibis/backends/duckdb/datatypes.py @@ -1,74 +1,11 @@ from __future__ import annotations -import duckdb_engine.datatypes as ducktypes -import sqlalchemy.dialects.postgresql as psql +import numpy as np -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import DuckDBType as SqlglotDuckdbType +from ibis.formats.pandas import PandasData -try: - from geoalchemy2 import Geometry - class Geometry_WKB(Geometry): - as_binary = "ST_AsWKB" - -except ImportError: - - class Geometry_WKB: - ... - - -_from_duckdb_types = { - psql.BYTEA: dt.Binary, - psql.UUID: dt.UUID, - ducktypes.TinyInteger: dt.Int8, - ducktypes.SmallInteger: dt.Int16, - ducktypes.Integer: dt.Int32, - ducktypes.BigInteger: dt.Int64, - ducktypes.HugeInteger: dt.Decimal(38, 0), - ducktypes.UInt8: dt.UInt8, - ducktypes.UTinyInteger: dt.UInt8, - ducktypes.UInt16: dt.UInt16, - ducktypes.USmallInteger: dt.UInt16, - ducktypes.UInt32: dt.UInt32, - ducktypes.UInteger: dt.UInt32, - ducktypes.UInt64: dt.UInt64, - ducktypes.UBigInteger: dt.UInt64, -} - -_to_duckdb_types = { - dt.UUID: psql.UUID, - dt.Int8: ducktypes.TinyInteger, - dt.Int16: ducktypes.SmallInteger, - dt.Int32: ducktypes.Integer, - dt.Int64: ducktypes.BigInteger, - dt.UInt8: ducktypes.UTinyInteger, - dt.UInt16: ducktypes.USmallInteger, - dt.UInt32: ducktypes.UInteger, - dt.UInt64: ducktypes.UBigInteger, - # Handle projections with geometry columns - dt.Geometry: Geometry_WKB, -} - - -class DuckDBType(AlchemyType): - dialect = "duckdb" - - @classmethod - def to_ibis(cls, typ, nullable=True): - if dtype := _from_duckdb_types.get(type(typ)): - return dtype(nullable=nullable) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_ibis(cls, dtype): - if typ := _to_duckdb_types.get(type(dtype)): - return typ - else: - return super().from_ibis(dtype) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotDuckdbType.from_string(type_string, nullable=nullable) +class DuckDBPandasData(PandasData): + @staticmethod + def convert_Array(s, dtype, pandas_type): + return s.replace(np.nan, None) diff --git a/ibis/backends/duckdb/tests/conftest.py b/ibis/backends/duckdb/tests/conftest.py index a1de96354fb8..7ca578ea24ff 100644 --- a/ibis/backends/duckdb/tests/conftest.py +++ b/ibis/backends/duckdb/tests/conftest.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from collections.abc import Iterator + from typing import Any from ibis.backends.base import BaseBackend @@ -36,7 +37,7 @@ class TestConf(BackendTest): supports_map = True - deps = "duckdb", "duckdb_engine" + deps = ("duckdb",) stateful = False supports_tpch = True @@ -87,8 +88,12 @@ def connect(*, tmpdir, worker_id, **kw) -> BaseBackend: return ibis.duckdb.connect(extension_directory=extension_directory, **kw) def load_tpch(self) -> None: - with self.connection.begin() as con: - con.exec_driver_sql("CALL dbgen(sf=0.1)") + self.connection.raw_sql("CALL dbgen(sf=0.1)") + + def _load_data(self, **_: Any) -> None: + """Load test data into a backend.""" + for stmt in self.ddl_script: + self.connection.raw_sql(stmt) @pytest.fixture(scope="session") diff --git a/ibis/backends/duckdb/tests/snapshots/test_client/test_to_other_sql/out.sql b/ibis/backends/duckdb/tests/snapshots/test_client/test_to_other_sql/out.sql new file mode 100644 index 000000000000..67f2cfc5ea25 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_client/test_to_other_sql/out.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "functional_alltypes" \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql index e68c65813913..b3523fe55df7 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql @@ -1,3 +1,3 @@ SELECT - ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS DOUBLE)) AS tmp -FROM t AS t0 \ No newline at end of file + ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS DOUBLE)) AS "GeoDWithin(geom, geom, 3.0)" +FROM t AS t0 diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql index 7da710b2dceb..498b544a506c 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql @@ -1,3 +1,3 @@ SELECT - ST_ASTEXT(t0.geom) AS tmp + ST_ASTEXT(t0.geom) AS "GeoAsText(geom)" FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql index bf8ba88ffde2..db42c12ad237 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql @@ -1,3 +1,3 @@ SELECT - ST_NPOINTS(t0.geom) AS tmp + ST_NPOINTS(t0.geom) AS "GeoNPoints(geom)" FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index 05525ab55c96..e01467aa5f65 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -3,7 +3,6 @@ import duckdb import pyarrow as pa import pytest -import sqlalchemy as sa from pytest import param import ibis @@ -25,7 +24,7 @@ def ext_directory(tmpdir_factory): @pytest.mark.xfail( LINUX and SANDBOXED, reason="nix on linux cannot download duckdb extensions or data due to sandboxing", - raises=sa.exc.OperationalError, + raises=duckdb.IOException, ) @pytest.mark.xdist_group(name="duckdb-extensions") def test_connect_extensions(ext_directory): @@ -76,11 +75,11 @@ def test_cross_db(tmpdir): con2.attach(path1, name="test1", read_only=True) - t1_from_con2 = con2.table("t1", schema="test1.main") + t1_from_con2 = con2.table("t1", schema="main", database="test1") assert t1_from_con2.schema() == t2.schema() assert t1_from_con2.execute().equals(t2.execute()) - foo_t1_from_con2 = con2.table("t1", schema="test1.foo") + foo_t1_from_con2 = con2.table("t1", schema="foo", database="test1") assert foo_t1_from_con2.schema() == t2.schema() assert foo_t1_from_con2.execute().equals(t2.execute()) @@ -115,24 +114,26 @@ def test_attach_detach(tmpdir): con2.detach(name) assert name not in con2.list_databases() - with pytest.raises(sa.exc.ProgrammingError): + with pytest.raises(duckdb.BinderException): con2.detach(name) @pytest.mark.parametrize( - "scale", + ("scale", "expected_scale"), [ - None, - param(0, id="seconds"), - param(3, id="millis"), - param(6, id="micros"), - param(9, id="nanos"), + param(None, 6, id="default"), + param(0, 0, id="seconds"), + param(3, 3, id="millis"), + param(6, 6, id="micros"), + param(9, 9, id="nanos"), ], ) -def test_create_table_with_timestamp_scales(con, scale): +def test_create_table_with_timestamp_scales(con, scale, expected_scale): schema = ibis.schema(dict(ts=dt.Timestamp(scale=scale))) - t = con.create_table(gen_name("duckdb_timestamp_scale"), schema=schema, temp=True) - assert t.schema() == schema + expected = ibis.schema(dict(ts=dt.Timestamp(scale=expected_scale))) + name = gen_name("duckdb_timestamp_scale") + t = con.create_table(name, schema=schema, temp=True) + assert t.schema() == expected def test_config_options(con): @@ -153,8 +154,45 @@ def test_config_options(con): def test_config_options_bad_option(con): - with pytest.raises(sa.exc.ProgrammingError): + with pytest.raises(duckdb.CatalogException): con.settings["not_a_valid_option"] = "oopsie" with pytest.raises(KeyError): con.settings["i_didnt_set_this"] + + +def test_insert(con): + import pandas as pd + + name = ibis.util.guid() + + t = con.create_table(name, schema=ibis.schema({"a": "int64"})) + con.insert(name, obj=pd.DataFrame({"a": [1, 2]})) + assert t.count().execute() == 2 + + con.insert(name, obj=pd.DataFrame({"a": [1, 2]})) + assert t.count().execute() == 4 + + con.insert(name, obj=pd.DataFrame({"a": [1, 2]}), overwrite=True) + assert t.count().execute() == 2 + + con.insert(name, t) + assert t.count().execute() == 4 + + con.insert(name, [{"a": 1}, {"a": 2}], overwrite=True) + assert t.count().execute() == 2 + + con.insert(name, [(1,), (2,)]) + assert t.count().execute() == 4 + + con.insert(name, {"a": [1, 2]}, overwrite=True) + assert t.count().execute() == 2 + + +def test_to_other_sql(con, snapshot): + pytest.importorskip("snowflake.connector") + + t = con.table("functional_alltypes") + + sql = ibis.to_sql(t, dialect="snowflake") + snapshot.assert_match(sql, "out.sql") diff --git a/ibis/backends/duckdb/tests/test_datatypes.py b/ibis/backends/duckdb/tests/test_datatypes.py index 7f0212f21374..1e23cde4fdbf 100644 --- a/ibis/backends/duckdb/tests/test_datatypes.py +++ b/ibis/backends/duckdb/tests/test_datatypes.py @@ -3,15 +3,12 @@ import duckdb_engine import numpy as np import pytest -import sqlalchemy as sa -from packaging.version import parse as vparse from pytest import param import ibis -import ibis.backends.base.sql.alchemy.datatypes as sat import ibis.common.exceptions as exc import ibis.expr.datatypes as dt -from ibis.backends.duckdb.datatypes import DuckDBType +from ibis.backends.base.sqlglot.datatypes import DuckDBType @pytest.mark.parametrize( @@ -93,15 +90,6 @@ def test_parse_quoted_struct_field(): ) -def test_generate_quoted_struct(): - typ = sat.StructType( - {"in come": sa.VARCHAR(), "my count": sa.BIGINT(), "thing": sa.INTEGER()} - ) - result = typ.compile(dialect=duckdb_engine.Dialect()) - expected = 'STRUCT("in come" VARCHAR, "my count" BIGINT, thing INTEGER)' - assert result == expected - - @pytest.mark.xfail( condition=vparse(duckdb_engine.__version__) < vparse("0.9.2"), raises=AssertionError, diff --git a/ibis/backends/duckdb/tests/test_geospatial.py b/ibis/backends/duckdb/tests/test_geospatial.py index da34dd422006..49d34f52f905 100644 --- a/ibis/backends/duckdb/tests/test_geospatial.py +++ b/ibis/backends/duckdb/tests/test_geospatial.py @@ -17,7 +17,7 @@ def test_geospatial_point(zones, zones_gdf): - coord = zones.x_cent.point(zones.y_cent).name("coord") + coord = zones.x_cent.point(zones.y_cent) # this returns GeometryArray gp_coord = gpd.points_from_xy(zones_gdf.x_cent, zones_gdf.y_cent) @@ -34,13 +34,13 @@ def test_geospatial_point(zones, zones_gdf): ) def test_geospatial_unary_snapshot(operation, keywords, snapshot): t = ibis.table([("geom", "geometry")], name="t") - expr = getattr(t.geom, operation)(**keywords).name("tmp") + expr = getattr(t.geom, operation)(**keywords) snapshot.assert_match(ibis.to_sql(expr), "out.sql") def test_geospatial_dwithin(snapshot): t = ibis.table([("geom", "geometry")], name="t") - expr = t.geom.d_within(t.geom, 3.0).name("tmp") + expr = t.geom.d_within(t.geom, 3.0) snapshot.assert_match(ibis.to_sql(expr), "out.sql") @@ -62,7 +62,7 @@ def test_geospatial_dwithin(snapshot): ], ) def test_geospatial_unary_tm(op, keywords, gp_op, zones, zones_gdf): - expr = getattr(zones.geom, op)(**keywords).name("tmp") + expr = getattr(zones.geom, op)(**keywords) gp_expr = getattr(zones_gdf.geometry, gp_op) tm.assert_series_equal(expr.to_pandas(), gp_expr, check_names=False) @@ -76,10 +76,10 @@ def test_geospatial_unary_tm(op, keywords, gp_op, zones, zones_gdf): ], ) def test_geospatial_xy(op, keywords, gp_op, zones, zones_gdf): - cen = zones.geom.centroid().name("centroid") + cen = zones.geom.centroid() gp_cen = zones_gdf.geometry.centroid - expr = getattr(cen, op)(**keywords).name("tmp") + expr = getattr(cen, op)(**keywords) gp_expr = getattr(gp_cen, gp_op) tm.assert_series_equal(expr.to_pandas(), gp_expr, check_names=False) @@ -88,7 +88,7 @@ def test_geospatial_xy(op, keywords, gp_op, zones, zones_gdf): def test_geospatial_length(lines, lines_gdf): # note: ST_LENGTH returns 0 for the case of polygon # or multi polygon while pandas geopandas returns the perimeter. - length = lines.geom.length().name("length") + length = lines.geom.length() gp_length = lines_gdf.geometry.length tm.assert_series_equal(length.to_pandas(), gp_length, check_names=False) @@ -113,7 +113,7 @@ def test_geospatial_length(lines, lines_gdf): ], ) def test_geospatial_binary_tm(op, gp_op, zones, zones_gdf): - expr = getattr(zones.geom, op)(zones.geom).name("tmp") + expr = getattr(zones.geom, op)(zones.geom) gp_func = getattr(zones_gdf.geometry, gp_op)(zones_gdf.geometry) tm.assert_series_equal(expr.to_pandas(), gp_func, check_names=False) @@ -129,7 +129,7 @@ def test_geospatial_binary_tm(op, gp_op, zones, zones_gdf): ], ) def test_geospatial_unary_gtm(op, gp_op, zones, zones_gdf): - expr = getattr(zones.geom, op)().name("tmp") + expr = getattr(zones.geom, op)() gp_expr = getattr(zones_gdf.geometry, gp_op) gtm.assert_geoseries_equal(expr.to_pandas(), gp_expr, check_crs=False) @@ -146,14 +146,14 @@ def test_geospatial_unary_gtm(op, gp_op, zones, zones_gdf): ], ) def test_geospatial_binary_gtm(op, gp_op, zones, zones_gdf): - expr = getattr(zones.geom, op)(zones.geom).name("tmp") + expr = getattr(zones.geom, op)(zones.geom) gp_func = getattr(zones_gdf.geometry, gp_op)(zones_gdf.geometry) gtm.assert_geoseries_equal(expr.to_pandas(), gp_func, check_crs=False) def test_geospatial_end_point(lines, lines_gdf): - epoint = lines.geom.end_point().name("end_point") + epoint = lines.geom.end_point() # geopandas does not have end_point this is a work around to get it gp_epoint = lines_gdf.geometry.boundary.explode(index_parts=True).xs(1, level=1) @@ -161,7 +161,7 @@ def test_geospatial_end_point(lines, lines_gdf): def test_geospatial_start_point(lines, lines_gdf): - spoint = lines.geom.start_point().name("start_point") + spoint = lines.geom.start_point() # geopandas does not have start_point this is a work around to get it gp_spoint = lines_gdf.geometry.boundary.explode(index_parts=True).xs(0, level=1) @@ -170,7 +170,7 @@ def test_geospatial_start_point(lines, lines_gdf): # this one takes a bit longer than the rest. def test_geospatial_unary_union(zones, zones_gdf): - unary_union = zones.geom.unary_union().name("unary_union") + unary_union = zones.geom.unary_union() # this returns a shapely geometry object gp_unary_union = zones_gdf.geometry.unary_union @@ -182,7 +182,7 @@ def test_geospatial_unary_union(zones, zones_gdf): def test_geospatial_buffer_point(zones, zones_gdf): - cen = zones.geom.centroid().name("centroid") + cen = zones.geom.centroid() gp_cen = zones_gdf.geometry.centroid buffer = cen.buffer(100.0) diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index 368679826c65..54e85e7f86f7 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -138,16 +138,15 @@ def test_temp_directory(tmp_path): # 1. in-memory + no temp_directory specified con = ibis.duckdb.connect() - with con.begin() as c: - value = c.exec_driver_sql(query).scalar() - assert value # we don't care what the specific value is + + value = con.raw_sql(query).fetchone()[0] + assert value # we don't care what the specific value is temp_directory = Path(tempfile.gettempdir()) / "duckdb" # 2. in-memory + temp_directory specified con = ibis.duckdb.connect(temp_directory=temp_directory) - with con.begin() as c: - value = c.exec_driver_sql(query).scalar() + value = con.raw_sql(query).fetchone()[0] assert value == str(temp_directory) # 3. on-disk + no temp_directory specified @@ -156,8 +155,7 @@ def test_temp_directory(tmp_path): # 4. on-disk + temp_directory specified con = ibis.duckdb.connect(tmp_path / "test2.ddb", temp_directory=temp_directory) - with con.begin() as c: - value = c.exec_driver_sql(query).scalar() + value = con.raw_sql(query).fetchone()[0] assert value == str(temp_directory) diff --git a/ibis/backends/duckdb/tests/test_udf.py b/ibis/backends/duckdb/tests/test_udf.py index fa0968fc2765..cff40de2ca2f 100644 --- a/ibis/backends/duckdb/tests/test_udf.py +++ b/ibis/backends/duckdb/tests/test_udf.py @@ -51,9 +51,7 @@ def test_builtin_scalar(con, func): a, b = "duck", "luck" expr = func(a, b) - with con.begin() as c: - expected = c.exec_driver_sql(f"SELECT {func.__name__}({a!r}, {b!r})").scalar() - + expected = con.raw_sql(f"SELECT {func.__name__}({a!r}, {b!r})").df().squeeze() assert con.execute(expr) == expected @@ -80,10 +78,11 @@ def test_builtin_agg(con, func): data = ibis.memtable({"a": raw_data}) expr = func(data.a) - with con.begin() as c: - expected = c.exec_driver_sql( - f"SELECT {func.__name__}(a) FROM UNNEST({raw_data!r}) _ (a)" - ).scalar() + expected = ( + con.raw_sql(f"SELECT {func.__name__}(a) FROM UNNEST({raw_data!r}) _ (a)") + .df() + .squeeze() + ) assert con.execute(expr) == expected diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql index c29c79e8682a..e472a5727fab 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql @@ -1,62 +1,55 @@ SELECT - t5.street, - t5.key + t5.street AS street, + t5.key AS key, + t5.key_right AS key_right FROM ( SELECT - t4.street, - ROW_NUMBER() OVER (ORDER BY t4.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + t1.street AS street, + ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key, + t3.key AS key_right FROM ( SELECT - t1.street, - t1.key + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + FROM data AS t0 + ) AS t1 + INNER JOIN ( + SELECT + t1.key AS key FROM ( SELECT - t0.*, + t0.street AS street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM data AS t0 ) AS t1 - INNER JOIN ( - SELECT - t1.key - FROM ( - SELECT - t0.*, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - ) AS t2 - ON t1.key = t2.key - ) AS t4 + ) AS t3 + ON t1.key = t3.key ) AS t5 INNER JOIN ( SELECT - t5.key + t5.key AS key FROM ( SELECT - t4.street, - ROW_NUMBER() OVER (ORDER BY t4.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + t1.street AS street, + ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key, + t3.key AS key_right FROM ( SELECT - t1.street, - t1.key + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + FROM data AS t0 + ) AS t1 + INNER JOIN ( + SELECT + t1.key AS key FROM ( SELECT - t0.*, + t0.street AS street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM data AS t0 ) AS t1 - INNER JOIN ( - SELECT - t1.key - FROM ( - SELECT - t0.*, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - ) AS t2 - ON t1.key = t2.key - ) AS t4 + ) AS t3 + ON t1.key = t3.key ) AS t5 -) AS t6 - ON t5.key = t6.key \ No newline at end of file +) AS t7 + ON t5.key = t7.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql new file mode 100644 index 000000000000..f7cf54e9de51 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql @@ -0,0 +1,55 @@ +SELECT + "t5"."street" AS "street", + "t5"."key" AS "key", + "t5"."key_right" AS "key_right" +FROM ( + SELECT + "t1"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", + "t2"."key" AS "key_right" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t2" + ON "t1"."key" = "t2"."key" +) AS "t5" +INNER JOIN ( + SELECT + "t5"."key" AS "key" + FROM ( + SELECT + "t1"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", + "t2"."key" AS "key_right" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t2" + ON "t1"."key" = "t2"."key" + ) AS "t5" +) AS "t6" + ON "t5"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql index fb8c40cd69ba..2a699a186d7d 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql @@ -1,32 +1,55 @@ -WITH t0 AS ( - SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key - FROM data AS t5 -), t1 AS ( - SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key -), t3 AS ( +SELECT + t5.street AS street, + t5.key AS key, + t5.key_right AS key_right +FROM ( SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS key - FROM t2 -), t4 AS ( + t1.street AS street, + ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key, + t3.key AS key_right + FROM ( + SELECT + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 + ) AS t1 + INNER JOIN ( + SELECT + t1.key AS key + FROM ( + SELECT + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 + ) AS t1 + ) AS t3 + ON t1.key = t3.key +) AS t5 +INNER JOIN ( SELECT - t3.key AS key - FROM t3 -) -SELECT - t3.street, - t3.key -FROM t3 -JOIN t4 - ON t3.key = t4.key \ No newline at end of file + t5.key AS key + FROM ( + SELECT + t1.street AS street, + ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key, + t3.key AS key_right + FROM ( + SELECT + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 + ) AS t1 + INNER JOIN ( + SELECT + t1.key AS key + FROM ( + SELECT + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 + ) AS t1 + ) AS t3 + ON t1.key = t3.key + ) AS t5 +) AS t7 + ON t5.key = t7.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql index f3f2e94c391a..d63129cc6985 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql @@ -1,32 +1,55 @@ -WITH t0 AS ( - SELECT - t5."street" AS "street", - ROW_NUMBER() OVER (ORDER BY t5."street" ASC) - 1 AS "key" - FROM "data" AS t5 -), t1 AS ( - SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( - SELECT - t0."street" AS "street", - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" -), t3 AS ( +SELECT + "t5"."street" AS "street", + "t5"."key" AS "key", + "t5"."key_right" AS "key_right" +FROM ( SELECT - t2."street" AS "street", - ROW_NUMBER() OVER (ORDER BY t2."street" ASC) - 1 AS "key" - FROM t2 -), t4 AS ( + "t1"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", + "t3"."key" AS "key_right" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t3" + ON "t1"."key" = "t3"."key" +) AS "t5" +INNER JOIN ( SELECT - t3."key" AS "key" - FROM t3 -) -SELECT - t3."street", - t3."key" -FROM t3 -JOIN t4 - ON t3."key" = t4."key" \ No newline at end of file + "t5"."key" AS "key" + FROM ( + SELECT + "t1"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", + "t3"."key" AS "key_right" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t3" + ON "t1"."key" = "t3"."key" + ) AS "t5" +) AS "t7" + ON "t5"."key" = "t7"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql b/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql new file mode 100644 index 000000000000..c454d75c63aa --- /dev/null +++ b/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql @@ -0,0 +1,17 @@ +SELECT + t3.key1 AS key1, + AVG(t3.value1 - t3.value2) AS avg_diff +FROM ( + SELECT + t0.value1 AS value1, + t0.key1 AS key1, + t0.key2 AS key2, + t1.value2 AS value2, + t1.key1 AS key1_right, + t1.key4 AS key4 + FROM table1 AS t0 + LEFT OUTER JOIN table2 AS t1 + ON t0.key1 = t1.key1 +) AS t3 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql index edaf83ff95ff..6dfef25abe9f 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql @@ -1,49 +1,40 @@ SELECT - t4.key + t2.key AS key, + t3.key AS key_right, + t6.key_right AS key_right_right FROM ( SELECT - t1.key + t0.key AS key + FROM leaf AS t0 + WHERE + TRUE +) AS t2 +INNER JOIN ( + SELECT + t0.key AS key + FROM leaf AS t0 + WHERE + TRUE +) AS t3 + ON t2.key = t3.key +INNER JOIN ( + SELECT + t2.key AS key, + t3.key AS key_right FROM ( SELECT - * + t0.key AS key FROM leaf AS t0 WHERE TRUE - ) AS t1 - INNER JOIN ( - SELECT - t1.key - FROM ( - SELECT - * - FROM leaf AS t0 - WHERE - TRUE - ) AS t1 ) AS t2 - ON t1.key = t2.key -) AS t4 -INNER JOIN ( - SELECT - t1.key - FROM ( + INNER JOIN ( SELECT - * + t0.key AS key FROM leaf AS t0 WHERE TRUE - ) AS t1 - INNER JOIN ( - SELECT - t1.key - FROM ( - SELECT - * - FROM leaf AS t0 - WHERE - TRUE - ) AS t1 - ) AS t2 - ON t1.key = t2.key -) AS t5 - ON t4.key = t5.key \ No newline at end of file + ) AS t3 + ON t2.key = t3.key +) AS t6 + ON t6.key = t6.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql new file mode 100644 index 000000000000..96acd49caaad --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql @@ -0,0 +1,48 @@ +SELECT + "t1"."key" AS "key", + "t2"."key" AS "key_right", + "t4"."key_right" AS "key_right_right" +FROM ( + SELECT + * + FROM "leaf" AS "t0" + WHERE + TRUE +) AS "t1" +INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + * + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t1" +) AS "t2" + ON "t1"."key" = "t2"."key" +INNER JOIN ( + SELECT + "t1"."key" AS "key", + "t2"."key" AS "key_right" + FROM ( + SELECT + * + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + * + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t1" + ) AS "t2" + ON "t1"."key" = "t2"."key" +) AS "t4" + ON "t1"."key" = "t1"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql index 66a947699796..fb2ee62190b5 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql @@ -1,22 +1,40 @@ -WITH t0 AS ( +SELECT + t1.key AS key, + t2.key AS key_right, + t5.key_right AS key_right_right +FROM ( SELECT - t4.key AS key - FROM leaf AS t4 + t0.key AS key + FROM leaf AS t0 WHERE - CAST(TRUE AS BOOLEAN) -), t1 AS ( + TRUE +) AS t1 +INNER JOIN ( SELECT t0.key AS key - FROM t0 -), t2 AS ( + FROM leaf AS t0 + WHERE + TRUE +) AS t2 + ON t1.key = t2.key +INNER JOIN ( SELECT - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key -) -SELECT - t2.key -FROM t2 -JOIN t2 AS t3 - ON t2.key = t3.key \ No newline at end of file + t1.key AS key, + t2.key AS key_right + FROM ( + SELECT + t0.key AS key + FROM leaf AS t0 + WHERE + TRUE + ) AS t1 + INNER JOIN ( + SELECT + t0.key AS key + FROM leaf AS t0 + WHERE + TRUE + ) AS t2 + ON t1.key = t2.key +) AS t5 + ON t1.key = t5.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql index 60738db25e2d..eb9acf0a45fe 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql @@ -1,22 +1,40 @@ -WITH t0 AS ( +SELECT + "t1"."key" AS "key", + "t2"."key" AS "key_right", + "t5"."key_right" AS "key_right_right" +FROM ( SELECT - t4."key" AS "key" - FROM "leaf" AS t4 + "t0"."key" AS "key" + FROM "leaf" AS "t0" WHERE TRUE -), t1 AS ( +) AS "t1" +INNER JOIN ( SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( + "t0"."key" AS "key" + FROM "leaf" AS "t0" + WHERE + TRUE +) AS "t2" + ON "t1"."key" = "t2"."key" +INNER JOIN ( SELECT - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" -) -SELECT - t2."key" -FROM t2 -JOIN t2 AS t3 - ON t2."key" = t3."key" \ No newline at end of file + "t1"."key" AS "key", + "t2"."key" AS "key_right" + FROM ( + SELECT + "t0"."key" AS "key" + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t1" + INNER JOIN ( + SELECT + "t0"."key" AS "key" + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t2" + ON "t1"."key" = "t2"."key" +) AS "t5" + ON "t1"."key" = "t5"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql new file mode 100644 index 000000000000..d3969647c9ea --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql @@ -0,0 +1,22 @@ +SELECT + CASE "t0"."continent" + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/snowflake/out.sql index 922316952999..d3969647c9ea 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/snowflake/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0."continent" + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -16,7 +16,7 @@ SELECT THEN 'Antarctica' ELSE 'Unknown continent' END AS "cont", - SUM(t0."population") AS "total_pop" -FROM "countries" AS t0 + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql index fa221469c7dd..986701fa646e 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql @@ -1,13 +1,9 @@ SELECT - t0.x IN ( + t0.x IN (( SELECT - t1.x - FROM ( - SELECT - * - FROM t AS t0 - WHERE - t0.x > 2 - ) AS t1 - ) AS "InColumn(x, x)" + t0.x AS x + FROM t AS t0 + WHERE + t0.x > 2 + )) AS "InSubquery(x)" FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql new file mode 100644 index 000000000000..b5362bf67adc --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql @@ -0,0 +1,15 @@ +SELECT + "t0"."x" IN (( + SELECT + "t1"."x" AS "x" + FROM ( + SELECT + * + FROM "t" AS "t0" + WHERE + ( + "t0"."x" > 2 + ) + ) AS "t1" + )) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql index 218ccb1d5c46..7b8c77fc31e8 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql @@ -1,13 +1,9 @@ SELECT - t0.x IN ( + t0.x IN (( SELECT - t1.x - FROM ( - SELECT - t0.x AS x - FROM t AS t0 - WHERE - t0.x > CAST(2 AS TINYINT) - ) AS t1 - ) AS "InColumn(x, x)" + t0.x AS x + FROM t AS t0 + WHERE + t0.x > CAST(2 AS TINYINT) + )) AS "InSubquery(x)" FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql index 683a03b084ec..92e386ef62a4 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql @@ -1,13 +1,9 @@ SELECT - t0."x" IN ( + "t0"."x" IN (( SELECT - t1."x" - FROM ( - SELECT - t0."x" AS "x" - FROM "t" AS t0 - WHERE - t0."x" > 2 - ) AS t1 - ) AS "InColumn(x, x)" -FROM "t" AS t0 \ No newline at end of file + "t0"."x" AS "x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + )) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql index d30fed08ac79..e2cd68f4d9a3 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql @@ -1,94 +1,96 @@ SELECT - t11.field_of_study, - t11.diff + t10.field_of_study AS field_of_study, + t10.diff AS diff FROM ( SELECT - * + t5.field_of_study AS field_of_study, + t5.diff AS diff FROM ( SELECT - * + t4.field_of_study AS field_of_study, + any(t4.diff) AS diff FROM ( SELECT - t4.field_of_study, - any(t4.diff) AS diff + t3.field_of_study AS field_of_study, + t3.years AS years, + t3.degrees AS degrees, + t3.earliest_degrees AS earliest_degrees, + t3.latest_degrees AS latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t3.*, - t3.latest_degrees - t3.earliest_degrees AS diff + t2.field_of_study AS field_of_study, + t2.years AS years, + t2.degrees AS degrees, + any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t2.*, - any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + t1.field_of_study AS field_of_study, + CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, + CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees FROM ( SELECT - t1.field_of_study, - CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, - CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees - FROM ( - SELECT - t0.field_of_study, - arrayJoin( - [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - t4.field_of_study - ) AS t5 - ORDER BY - t5.diff DESC - ) AS t6 + t0.field_of_study AS field_of_study, + arrayJoin( + [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + t4.field_of_study + ) AS t5 + ORDER BY + t5.diff DESC LIMIT 10 UNION ALL SELECT - * + t5.field_of_study AS field_of_study, + t5.diff AS diff FROM ( SELECT - * + t4.field_of_study AS field_of_study, + any(t4.diff) AS diff FROM ( SELECT - * + t3.field_of_study AS field_of_study, + t3.years AS years, + t3.degrees AS degrees, + t3.earliest_degrees AS earliest_degrees, + t3.latest_degrees AS latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t4.field_of_study, - any(t4.diff) AS diff + t2.field_of_study AS field_of_study, + t2.years AS years, + t2.degrees AS degrees, + any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t3.*, - t3.latest_degrees - t3.earliest_degrees AS diff + t1.field_of_study AS field_of_study, + CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, + CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees FROM ( SELECT - t2.*, - any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM ( - SELECT - t1.field_of_study, - CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, - CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees - FROM ( - SELECT - t0.field_of_study, - arrayJoin( - [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - t4.field_of_study - ) AS t5 - WHERE - t5.diff < 0 - ) AS t7 - ORDER BY - t7.diff ASC - ) AS t9 + t0.field_of_study AS field_of_study, + arrayJoin( + [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + t4.field_of_study + ) AS t5 + WHERE + t5.diff < 0 + ORDER BY + t5.diff ASC LIMIT 10 -) AS t11 \ No newline at end of file +) AS t10 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql index 8335befe6765..7af0e3831b68 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql @@ -1,75 +1,96 @@ -WITH t0 AS ( - SELECT - t7.field_of_study AS field_of_study, - UNNEST( - CAST([{'years': '1970-71', 'degrees': t7."1970-71"}, {'years': '1975-76', 'degrees': t7."1975-76"}, {'years': '1980-81', 'degrees': t7."1980-81"}, {'years': '1985-86', 'degrees': t7."1985-86"}, {'years': '1990-91', 'degrees': t7."1990-91"}, {'years': '1995-96', 'degrees': t7."1995-96"}, {'years': '2000-01', 'degrees': t7."2000-01"}, {'years': '2005-06', 'degrees': t7."2005-06"}, {'years': '2010-11', 'degrees': t7."2010-11"}, {'years': '2011-12', 'degrees': t7."2011-12"}, {'years': '2012-13', 'degrees': t7."2012-13"}, {'years': '2013-14', 'degrees': t7."2013-14"}, {'years': '2014-15', 'degrees': t7."2014-15"}, {'years': '2015-16', 'degrees': t7."2015-16"}, {'years': '2016-17', 'degrees': t7."2016-17"}, {'years': '2017-18', 'degrees': t7."2017-18"}, {'years': '2018-19', 'degrees': t7."2018-19"}, {'years': '2019-20', 'degrees': t7."2019-20"}] AS STRUCT(years TEXT, degrees BIGINT)[]) - ) AS __pivoted__ - FROM humanities AS t7 -), t1 AS ( - SELECT - t0.field_of_study AS field_of_study, - STRUCT_EXTRACT(t0.__pivoted__, 'years') AS years, - STRUCT_EXTRACT(t0.__pivoted__, 'degrees') AS degrees - FROM t0 -), t2 AS ( - SELECT - t1.field_of_study AS field_of_study, - t1.years AS years, - t1.degrees AS degrees, - FIRST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - LAST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM t1 -), t3 AS ( - SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, - t2.earliest_degrees AS earliest_degrees, - t2.latest_degrees AS latest_degrees, - t2.latest_degrees - t2.earliest_degrees AS diff - FROM t2 -), t4 AS ( - SELECT - t3.field_of_study AS field_of_study, - FIRST(t3.diff) AS diff - FROM t3 - GROUP BY - 1 -), anon_1 AS ( +SELECT + t10.field_of_study AS field_of_study, + t10.diff AS diff +FROM ( SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + t5.field_of_study AS field_of_study, + t5.diff AS diff + FROM ( + SELECT + t4.field_of_study AS field_of_study, + FIRST(t4.diff) AS diff + FROM ( + SELECT + t3.field_of_study AS field_of_study, + t3.years AS years, + t3.degrees AS degrees, + t3.earliest_degrees AS earliest_degrees, + t3.latest_degrees AS latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff + FROM ( + SELECT + t2.field_of_study AS field_of_study, + t2.years AS years, + t2.degrees AS degrees, + FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + FROM ( + SELECT + t1.field_of_study AS field_of_study, + t1.__pivoted__['years'] AS years, + t1.__pivoted__['degrees'] AS degrees + FROM ( + SELECT + t0.field_of_study AS field_of_study, + UNNEST( + [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + 1 + ) AS t5 ORDER BY - t4.diff DESC + t5.diff DESC LIMIT 10 -), t5 AS ( - SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 - WHERE - t4.diff < CAST(0 AS TINYINT) -), anon_2 AS ( + UNION ALL SELECT t5.field_of_study AS field_of_study, t5.diff AS diff - FROM t5 + FROM ( + SELECT + t4.field_of_study AS field_of_study, + FIRST(t4.diff) AS diff + FROM ( + SELECT + t3.field_of_study AS field_of_study, + t3.years AS years, + t3.degrees AS degrees, + t3.earliest_degrees AS earliest_degrees, + t3.latest_degrees AS latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff + FROM ( + SELECT + t2.field_of_study AS field_of_study, + t2.years AS years, + t2.degrees AS degrees, + FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + FROM ( + SELECT + t1.field_of_study AS field_of_study, + t1.__pivoted__['years'] AS years, + t1.__pivoted__['degrees'] AS degrees + FROM ( + SELECT + t0.field_of_study AS field_of_study, + UNNEST( + [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + 1 + ) AS t5 + WHERE + t5.diff < CAST(0 AS TINYINT) ORDER BY t5.diff ASC LIMIT 10 -) -SELECT - t6.field_of_study, - t6.diff -FROM ( - SELECT - anon_1.field_of_study AS field_of_study, - anon_1.diff AS diff - FROM anon_1 - UNION ALL - SELECT - anon_2.field_of_study AS field_of_study, - anon_2.diff AS diff - FROM anon_2 -) AS t6 \ No newline at end of file +) AS t10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/conftest.py b/ibis/backends/tests/sql/conftest.py index b16b9fa48147..2fd23ed45cb3 100644 --- a/ibis/backends/tests/sql/conftest.py +++ b/ibis/backends/tests/sql/conftest.py @@ -1,12 +1,13 @@ from __future__ import annotations import pytest +import sqlglot as sg import ibis pytest.importorskip("duckdb") - +from ibis.backends.duckdb import Backend as DuckDBBackend # noqa: E402 from ibis.tests.expr.mocks import MockBackend # noqa: E402 @@ -70,13 +71,17 @@ def bar_t(con): return con.table("bar_t") -def get_query(expr): - ast = Compiler.to_ast(expr, QueryContext(compiler=Compiler)) - return ast.queries[0] +def to_sql(expr, *args, **kwargs) -> str: + if args: + raise TypeError("Unexpected positional arguments") + if kwargs: + raise TypeError("Unexpected keyword arguments") + sql = DuckDBBackend.compiler.translate(expr.op(), params={}) + if isinstance(sql, sg.exp.Table): + sql = sg.select("*").from_(sql) -def to_sql(expr, *args, **kwargs) -> str: - return get_query(expr).compile(*args, **kwargs) + return sql.sql(dialect="duckdb", pretty=True) @pytest.fixture(scope="module") diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql index 91e2486414bc..636796e7e04d 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql @@ -1,2 +1,7 @@ -SELECT DISTINCT t0.`string_col` -FROM functional_alltypes t0 \ No newline at end of file +SELECT DISTINCT + * +FROM ( + SELECT + t0.string_col AS string_col + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql index 28d25fb5beba..2bdab32a6fc2 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql @@ -1,2 +1 @@ -SELECT t0.`int_col` + 4 AS `Add(int_col, 4)` -FROM int_col_table t0 \ No newline at end of file +t0.int_col + CAST(4 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql index 54f6988bbf2a..20cc4cc3cdbb 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql @@ -1,2 +1 @@ -SELECT t0.`int_col` + 4 AS `foo` -FROM int_col_table t0 \ No newline at end of file +t0.int_col + CAST(4 AS TINYINT) AS foo \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py index c42eaa8fc2d6..58b21a2c9c09 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py @@ -19,9 +19,6 @@ "month": "int32", }, ) +f = functional_alltypes.filter(functional_alltypes.bigint_col > 0) -result = ( - functional_alltypes.filter(functional_alltypes.bigint_col > 0) - .group_by(functional_alltypes.string_col) - .aggregate(functional_alltypes.int_col.nunique().name("nunique")) -) +result = f.aggregate([f.int_col.nunique().name("nunique")], by=[f.string_col]) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql index 9ed4f1de1890..f6bd37af0d2f 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql @@ -1,4 +1,14 @@ -SELECT t0.`string_col`, count(DISTINCT t0.`int_col`) AS `nunique` -FROM functional_alltypes t0 -WHERE t0.`bigint_col` > 0 -GROUP BY 1 \ No newline at end of file +SELECT + t1.string_col AS string_col, + COUNT(DISTINCT t1.int_col) AS nunique +FROM ( + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.bigint_col > CAST(0 AS TINYINT) + ) +) AS t1 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py index db9a8d152cb6..54b395185659 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py @@ -20,19 +20,12 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -difference = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .difference( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) +difference = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).difference( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True ) -proj = difference.select([difference.key, difference.value]) -result = proj.select(proj.key) +result = difference.select(difference.key) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql index df350603146b..f5405266b656 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql @@ -1,21 +1,29 @@ -SELECT t0.`key` +SELECT + t5.key AS key FROM ( - SELECT t1.`key`, t1.`value` + SELECT + t1.string_col AS key, + CAST(t1.float_col AS DOUBLE) AS value FROM ( - WITH t2 AS ( - SELECT t4.`string_col` AS `key`, t4.`double_col` AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` <= 0 - ), - t3 AS ( - SELECT t4.`string_col` AS `key`, CAST(t4.`float_col` AS double) AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` > 0 - ) - SELECT * - FROM t3 - EXCEPT - SELECT * - FROM t2 - ) t1 -) t0 \ No newline at end of file + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col > CAST(0 AS TINYINT) + ) + ) AS t1 + EXCEPT + SELECT + t2.string_col AS key, + t2.double_col AS value + FROM ( + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col <= CAST(0 AS TINYINT) + ) + ) AS t2 +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py index 07cf4d8e959d..aef66c38a37c 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py @@ -2,10 +2,8 @@ t = ibis.table(name="t", schema={"a": "int64", "b": "string"}) +f = t.filter(t.b == "m") +agg = f.aggregate([f.a.sum().name("sum"), f.a.max()], by=[f.b]) +f1 = agg.filter(agg.Max(a) == 2) -result = ( - t.filter(t.b == "m") - .group_by(t.b) - .having(t.a.max() == 2) - .aggregate(t.a.sum().name("sum")) -) +result = f1.select(f1.b, f1.sum) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql index 68f73aceff66..0b54445c43ce 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql @@ -1,5 +1,28 @@ -SELECT t0.`b`, sum(t0.`a`) AS `sum` -FROM t t0 -WHERE t0.`b` = 'm' -GROUP BY 1 -HAVING max(t0.`a`) = 2 \ No newline at end of file +SELECT + t3.b AS b, + t3.sum AS sum +FROM ( + SELECT + * + FROM ( + SELECT + t1.b AS b, + SUM(t1.a) AS sum, + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + * + FROM t AS t0 + WHERE + ( + t0.b = 'm' + ) + ) AS t1 + GROUP BY + 1 + ) AS t2 + WHERE + ( + t2."Max(a)" = CAST(2 AS TINYINT) + ) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql index ebea652d49ca..b2dc4a533237 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql @@ -1,4 +1,20 @@ -SELECT t0.`string_col`, count(1) AS `CountStar(functional_alltypes)` -FROM functional_alltypes t0 -GROUP BY 1 -HAVING max(t0.`double_col`) = 1 \ No newline at end of file +SELECT + t2.string_col AS string_col, + t2."CountStar()" AS "CountStar()" +FROM ( + SELECT + * + FROM ( + SELECT + t0.string_col AS string_col, + COUNT(*) AS "CountStar()", + MAX(t0.double_col) AS "Max(double_col)" + FROM functional_alltypes AS t0 + GROUP BY + 1 + ) AS t1 + WHERE + ( + t1."Max(double_col)" = CAST(1 AS TINYINT) + ) +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py index 5118ece2e5f4..99cadb58de06 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py @@ -20,19 +20,12 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -intersection = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .intersect( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) +intersection = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).intersect( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True ) -proj = intersection.select([intersection.key, intersection.value]) -result = proj.select(proj.key) +result = intersection.select(intersection.key) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql index e849cd866523..382428d10e10 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql @@ -1,21 +1,29 @@ -SELECT t0.`key` +SELECT + t5.key AS key FROM ( - SELECT t1.`key`, t1.`value` + SELECT + t1.string_col AS key, + CAST(t1.float_col AS DOUBLE) AS value FROM ( - WITH t2 AS ( - SELECT t4.`string_col` AS `key`, t4.`double_col` AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` <= 0 - ), - t3 AS ( - SELECT t4.`string_col` AS `key`, CAST(t4.`float_col` AS double) AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` > 0 - ) - SELECT * - FROM t3 - INTERSECT - SELECT * - FROM t2 - ) t1 -) t0 \ No newline at end of file + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col > CAST(0 AS TINYINT) + ) + ) AS t1 + INTERSECT + SELECT + t2.string_col AS key, + t2.double_col AS value + FROM ( + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col <= CAST(0 AS TINYINT) + ) + ) AS t2 +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py index 38bfb40e35aa..223cfa46792a 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py @@ -20,9 +20,10 @@ }, ) -result = functional_alltypes.group_by(functional_alltypes.string_col).aggregate( +result = functional_alltypes.aggregate( [ functional_alltypes.int_col.nunique().name("int_card"), functional_alltypes.smallint_col.nunique().name("smallint_card"), - ] + ], + by=[functional_alltypes.string_col], ) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql index 97baaa4d2137..bcb0a9b1869f 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql @@ -1,4 +1,7 @@ -SELECT t0.`string_col`, count(DISTINCT t0.`int_col`) AS `int_card`, - count(DISTINCT t0.`smallint_col`) AS `smallint_card` -FROM functional_alltypes t0 -GROUP BY 1 \ No newline at end of file +SELECT + t0.string_col AS string_col, + COUNT(DISTINCT t0.int_col) AS int_card, + COUNT(DISTINCT t0.smallint_col) AS smallint_card +FROM functional_alltypes AS t0 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql index c295debd740a..1fe424dcfbe9 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql @@ -1,5 +1,18 @@ -SELECT t0.* -FROM functional_alltypes t0 -WHERE (t0.`double_col` > 3.14) AND - (locate('foo', t0.`string_col`) - 1 >= 0) AND - (((t0.`int_col` - 1) = 0) OR (t0.`float_col` <= 1.34)) \ No newline at end of file +SELECT + * +FROM functional_alltypes AS t0 +WHERE + ( + t0.double_col > CAST(3.14 AS DOUBLE) + ) + AND CONTAINS(t0.string_col, 'foo') + AND ( + ( + ( + t0.int_col - CAST(1 AS TINYINT) + ) = CAST(0 AS TINYINT) + ) + OR ( + t0.float_col <= CAST(1.34 AS DOUBLE) + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py index 4c39ab24b66c..d5fec08f28bd 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py @@ -20,18 +20,11 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -difference = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .difference( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) -) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) -result = difference.select([difference.key, difference.value]) +result = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).difference( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True +) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql index dc7592273ce4..d0a0f1a458c6 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql @@ -1,18 +1,25 @@ -SELECT t0.`key`, t0.`value` +SELECT + t1.string_col AS key, + CAST(t1.float_col AS DOUBLE) AS value FROM ( - WITH t1 AS ( - SELECT t3.`string_col` AS `key`, t3.`double_col` AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` <= 0 - ), - t2 AS ( - SELECT t3.`string_col` AS `key`, CAST(t3.`float_col` AS double) AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` > 0 - ) - SELECT * - FROM t2 - EXCEPT - SELECT * - FROM t1 -) t0 \ No newline at end of file + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col > CAST(0 AS TINYINT) + ) +) AS t1 +EXCEPT +SELECT + t2.string_col AS key, + t2.double_col AS value +FROM ( + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col <= CAST(0 AS TINYINT) + ) +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql index b0aa492cb968..f98b3697b64f 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql @@ -1,2 +1,8 @@ -SELECT DISTINCT t0.`string_col`, t0.`int_col` -FROM functional_alltypes t0 \ No newline at end of file +SELECT DISTINCT + * +FROM ( + SELECT + t0.string_col AS string_col, + t0.int_col AS int_col + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py index bcacc9049a93..51188cc25f4e 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py @@ -1,12 +1,11 @@ import ibis -s = ibis.table(name="s", schema={"b": "string"}) lit = ibis.timestamp("2018-01-01 00:00:00") +s = ibis.table(name="s", schema={"b": "string"}) t = ibis.table(name="t", schema={"a": "int64", "b": "string", "c": "timestamp"}) -proj = t.select([t.a, t.b, t.c.name("C")]) -proj1 = proj.filter(proj.C == lit) -proj2 = proj1.select([proj1.a, proj1.b, lit.name("the_date")]) -proj3 = proj2.inner_join(s, proj2.b == s.b).select(proj2.a) +f = t.filter(t.c == lit) +p = f.select(f.a, f.b, lit.name("the_date")) +joinchain = p.inner_join(s, p.b == s.b) -result = proj3.filter(proj3.a < 1.0) +result = joinchain.filter(joinchain.a < 1.0) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql index fc58d3195c63..f1828ac4c2f5 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql @@ -1,21 +1,27 @@ -WITH t0 AS ( - SELECT t4.`a`, t4.`b`, t4.`c` AS `C` - FROM t t4 -), -t1 AS ( - SELECT t0.* - FROM t0 - WHERE t0.`C` = '2018-01-01T00:00:00' -), -t2 AS ( - SELECT t1.`a`, t1.`b`, '2018-01-01T00:00:00' AS `the_date` - FROM t1 -) -SELECT t3.* +SELECT + * FROM ( - SELECT t2.`a` - FROM t2 - INNER JOIN s t4 - ON t2.`b` = t4.`b` -) t3 -WHERE t3.`a` < 1.0 \ No newline at end of file + SELECT + t3.a AS a + FROM ( + SELECT + t2.a AS a, + t2.b AS b, + MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) AS the_date + FROM ( + SELECT + * + FROM t AS t1 + WHERE + ( + t1.c = MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) + ) + ) AS t2 + ) AS t3 + INNER JOIN s AS t0 + ON t3.b = t0.b +) AS t5 +WHERE + ( + t5.a < CAST(1.0 AS DOUBLE) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py index 1f9d05ae454a..17014ebe6802 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py @@ -20,18 +20,11 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -intersection = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .intersect( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) -) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) -result = intersection.select([intersection.key, intersection.value]) +result = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).intersect( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True +) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql index dd58f56759e2..a1a843860bae 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql @@ -1,18 +1,25 @@ -SELECT t0.`key`, t0.`value` +SELECT + t1.string_col AS key, + CAST(t1.float_col AS DOUBLE) AS value FROM ( - WITH t1 AS ( - SELECT t3.`string_col` AS `key`, t3.`double_col` AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` <= 0 - ), - t2 AS ( - SELECT t3.`string_col` AS `key`, CAST(t3.`float_col` AS double) AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` > 0 - ) - SELECT * - FROM t2 - INTERSECT - SELECT * - FROM t1 -) t0 \ No newline at end of file + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col > CAST(0 AS TINYINT) + ) +) AS t1 +INTERSECT +SELECT + t2.string_col AS key, + t2.double_col AS value +FROM ( + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col <= CAST(0 AS TINYINT) + ) +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py index d0e56bad6b4f..ea48ae1fd416 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py @@ -20,18 +20,11 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -union = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .union( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) -) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) -result = union.select([union.key, union.value]) +result = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).union( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True +) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql index d585d437cbe9..fe2052bf4317 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql @@ -1,18 +1,25 @@ -SELECT t0.`key`, t0.`value` +SELECT + t1.string_col AS key, + CAST(t1.float_col AS DOUBLE) AS value FROM ( - WITH t1 AS ( - SELECT t3.`string_col` AS `key`, t3.`double_col` AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` <= 0 - ), - t2 AS ( - SELECT t3.`string_col` AS `key`, CAST(t3.`float_col` AS double) AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` > 0 - ) - SELECT * - FROM t2 - UNION - SELECT * - FROM t1 -) t0 \ No newline at end of file + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col > CAST(0 AS TINYINT) + ) +) AS t1 +UNION +SELECT + t2.string_col AS key, + t2.double_col AS value +FROM ( + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col <= CAST(0 AS TINYINT) + ) +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py index 3731f3449a83..8ad01cf538e5 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py @@ -2,7 +2,6 @@ t = ibis.table(name="t", schema={"a": "int64", "b": "string"}) -proj = t.order_by(t.b.asc()) -union = proj.union(proj) +s = t.order_by(t.b.asc()) -result = union.select([union.a, union.b]) +result = s.union(s) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql index 3951fb9c74f2..9380eee82c2a 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql @@ -1,13 +1,11 @@ -SELECT t0.`a`, t0.`b` -FROM ( - WITH t1 AS ( - SELECT t2.* - FROM t t2 - ORDER BY t2.`b` ASC - ) - SELECT * - FROM t1 - UNION ALL - SELECT * - FROM t1 -) t0 \ No newline at end of file +SELECT + * +FROM t AS t0 +ORDER BY + t0.b ASC +UNION ALL +SELECT + * +FROM t AS t0 +ORDER BY + t0.b ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py index 66cfa539b334..7c33ae58b4db 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py @@ -20,18 +20,10 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -union = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .union( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit) - ) -) -proj = union.select([union.key, union.value]) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) +union = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).union(f1.select(f1.string_col.name("key"), f1.double_col.name("value"))) -result = proj.select(proj.key) +result = union.select(union.key) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql index d8d9874335b7..15ac865a88b0 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql @@ -1,21 +1,29 @@ -SELECT t0.`key` +SELECT + t5.key AS key FROM ( - SELECT t1.`key`, t1.`value` + SELECT + t1.string_col AS key, + CAST(t1.float_col AS DOUBLE) AS value FROM ( - WITH t2 AS ( - SELECT t4.`string_col` AS `key`, t4.`double_col` AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` <= 0 - ), - t3 AS ( - SELECT t4.`string_col` AS `key`, CAST(t4.`float_col` AS double) AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` > 0 - ) - SELECT * - FROM t3 - UNION ALL - SELECT * - FROM t2 - ) t1 -) t0 \ No newline at end of file + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col > CAST(0 AS TINYINT) + ) + ) AS t1 + UNION ALL + SELECT + t2.string_col AS key, + t2.double_col AS value + FROM ( + SELECT + * + FROM functional_alltypes AS t0 + WHERE + ( + t0.int_col <= CAST(0 AS TINYINT) + ) + ) AS t2 +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py index c964df0eda21..c807087cf122 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py @@ -1,6 +1,10 @@ import ibis +tpch_region = ibis.table( + name="tpch_region", + schema={"r_regionkey": "int16", "r_name": "string", "r_comment": "string"}, +) tpch_nation = ibis.table( name="tpch_nation", schema={ @@ -10,15 +14,7 @@ "n_comment": "string", }, ) -tpch_region = ibis.table( - name="tpch_region", - schema={"r_regionkey": "int16", "r_name": "string", "r_comment": "string"}, -) -result = ( - tpch_region.inner_join( - tpch_nation, tpch_region.r_regionkey == tpch_nation.n_regionkey - ) - .select([tpch_nation, tpch_region.r_name.name("region")]) - .count() -) +result = tpch_region.inner_join( + tpch_nation, tpch_region.r_regionkey == tpch_nation.n_regionkey +).count() diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql index d96890d7589e..6407e4987c29 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql @@ -1,7 +1 @@ -SELECT count(1) AS `CountStar()` -FROM ( - SELECT t2.*, t1.`r_name` AS `region` - FROM tpch_region t1 - INNER JOIN tpch_nation t2 - ON t1.`r_regionkey` = t2.`n_regionkey` -) t0 \ No newline at end of file +COUNT(*) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql index aae031ab5be6..6da69b8c5673 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql @@ -1,4 +1,14 @@ -SELECT t0.`foo_id`, sum(t0.`f`) AS `total` -FROM star1 t0 -GROUP BY 1 -HAVING sum(t0.`f`) > 10 \ No newline at end of file +SELECT + * +FROM ( + SELECT + t0.foo_id AS foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 +) AS t1 +WHERE + ( + t1.total AS total > CAST(10 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql index 72cf2f0a1f54..214ec1ede144 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql @@ -1,4 +1,20 @@ -SELECT t0.`foo_id`, sum(t0.`f`) AS `total` -FROM star1 t0 -GROUP BY 1 -HAVING count(1) > 100 \ No newline at end of file +SELECT + t2.foo_id AS foo_id, + t2.total AS total +FROM ( + SELECT + * + FROM ( + SELECT + t0.foo_id AS foo_id, + SUM(t0.f) AS total, + COUNT(*) AS "CountStar()" + FROM star1 AS t0 + GROUP BY + 1 + ) AS t1 + WHERE + ( + t1."CountStar()" > CAST(100 AS TINYINT) + ) +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql index 3d6905b14e28..72adf2407ec1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql @@ -1,8 +1,16 @@ -SELECT t0.`foo_id`, sum(t0.`value1`) AS `total` +SELECT + t3.foo_id AS foo_id, + SUM(t3.value1) AS total FROM ( - SELECT t1.*, t2.`value1` - FROM star1 t1 - INNER JOIN star2 t2 - ON t1.`foo_id` = t2.`foo_id` -) t0 -GROUP BY 1 \ No newline at end of file + SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id, + t1.value1 AS value1 + FROM star1 AS t0 + INNER JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id +) AS t3 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql index 4f6df3806a2d..3304bb7d330b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql @@ -1,8 +1,31 @@ -SELECT t0.`g`, sum(t0.`foo`) AS `foo total` +SELECT + t2.g AS g, + SUM(t2.foo) AS "foo total" FROM ( - SELECT t1.*, t1.`a` + t1.`b` AS `foo` - FROM alltypes t1 - WHERE (t1.`f` > 0) AND - (t1.`g` = 'bar') -) t0 -GROUP BY 1 \ No newline at end of file + SELECT + t1.a AS a, + t1.b AS b, + t1.c AS c, + t1.d AS d, + t1.e AS e, + t1.f AS f, + t1.g AS g, + t1.h AS h, + t1.i AS i, + t1.j AS j, + t1.k AS k, + t1.a + t1.b AS foo + FROM ( + SELECT + * + FROM alltypes AS t0 + WHERE + ( + t0.f > CAST(0 AS TINYINT) + ) AND ( + t0.g = 'bar' + ) + ) AS t1 +) AS t2 +GROUP BY + 1 diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql index b86f82d6b137..a2bbff51e021 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql @@ -1,8 +1,33 @@ -SELECT t0.`g`, sum(t0.`foo`) AS `foo total` +SELECT + t2.g AS g, + SUM(t2.foo) AS "foo total" FROM ( - SELECT t1.*, t1.`a` + t1.`b` AS `foo` - FROM alltypes t1 - WHERE t1.`f` > 0 -) t0 -WHERE t0.`foo` < 10 -GROUP BY 1 \ No newline at end of file + SELECT + t1.a AS a, + t1.b AS b, + t1.c AS c, + t1.d AS d, + t1.e AS e, + t1.f AS f, + t1.g AS g, + t1.h AS h, + t1.i AS i, + t1.j AS j, + t1.k AS k, + t1.a + t1.b AS foo + FROM ( + SELECT + * + FROM alltypes AS t0 + WHERE + ( + t0.f > CAST(0 AS TINYINT) + ) AND ( + ( + t0.a + t0.b + ) < CAST(10 AS TINYINT) + ) + ) AS t1 +) AS t2 +GROUP BY + 1 diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql index c5d9cb1217ce..4809093f21a0 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql @@ -1,4 +1,24 @@ -SELECT t0.*, t0.`a` + t0.`b` AS `foo` -FROM alltypes t0 -WHERE (t0.`f` > 0) AND - (t0.`g` = 'bar') \ No newline at end of file +SELECT + t1.a AS a, + t1.b AS b, + t1.c AS c, + t1.d AS d, + t1.e AS e, + t1.f AS f, + t1.g AS g, + t1.h AS h, + t1.i AS i, + t1.j AS j, + t1.k AS k, + t1.a + t1.b AS foo +FROM ( + SELECT + * + FROM alltypes AS t0 + WHERE + ( + t0.f > CAST(0 AS TINYINT) + ) AND ( + t0.g = 'bar' + ) +) AS t1 diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql index dfb892809a35..0208ce9dfe03 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql @@ -1,3 +1,22 @@ -SELECT t0.*, t0.`a` + t0.`b` AS `foo` -FROM alltypes t0 -WHERE t0.`f` > 0 \ No newline at end of file +SELECT + t1.a AS a, + t1.b AS b, + t1.c AS c, + t1.d AS d, + t1.e AS e, + t1.f AS f, + t1.g AS g, + t1.h AS h, + t1.i AS i, + t1.j AS j, + t1.k AS k, + t1.a + t1.b AS foo +FROM ( + SELECT + * + FROM alltypes AS t0 + WHERE + ( + t0.f > CAST(0 AS TINYINT) + ) +) AS t1 diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py index fde5a5fce9b8..1f1a500887e9 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py @@ -9,4 +9,4 @@ name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -result = star1.anti_join(star2, star1.foo_id == star2.foo_id).select(star1) +result = star1.anti_join(star2, star1.foo_id == star2.foo_id) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql index 971fd1985c21..021e6eec101e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - LEFT ANTI JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id +FROM star1 AS t0 +ANTI JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql index 35e0559cb01e..fd6b1490e250 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql @@ -1,3 +1,9 @@ -SELECT t0.* -FROM airlines t0 -WHERE (CAST(t0.`dest` AS bigint) = 0) = TRUE \ No newline at end of file +SELECT + * +FROM airlines AS t0 +WHERE + ( + ( + CAST(t0.dest AS BIGINT) = CAST(0 AS TINYINT) + ) = TRUE + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql index 7d1f43968acb..fb3f43560215 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql @@ -1,24 +1,55 @@ -WITH t0 AS ( - SELECT t3.*, t4.`n_name`, t5.`r_name` - FROM tpch_customer t3 - INNER JOIN tpch_nation t4 - ON t3.`c_nationkey` = t4.`n_nationkey` - INNER JOIN tpch_region t5 - ON t4.`n_regionkey` = t5.`r_regionkey` -), -t1 AS ( - SELECT t0.`n_name`, - sum(CAST(t0.`c_acctbal` AS double)) AS `Sum(Cast(c_acctbal, float64))` - FROM t0 - GROUP BY 1 -), -t2 AS ( - SELECT t1.* - FROM t1 - ORDER BY t1.`Sum(Cast(c_acctbal, float64))` DESC +SELECT + t5.c_name AS c_name, + t5.r_name AS r_name, + t5.n_name AS n_name +FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t1.n_name AS n_name, + t2.r_name AS r_name + FROM tpch_customer AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.c_nationkey = t1.n_nationkey + INNER JOIN tpch_region AS t2 + ON t1.n_regionkey = t2.r_regionkey +) AS t5 +SEMI JOIN ( + SELECT + * + FROM ( + SELECT + t5.n_name AS n_name, + SUM(CAST(t5.c_acctbal AS DOUBLE)) AS "Sum(Cast(c_acctbal, float64))" + FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t1.n_name AS n_name, + t2.r_name AS r_name + FROM tpch_customer AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.c_nationkey = t1.n_nationkey + INNER JOIN tpch_region AS t2 + ON t1.n_regionkey = t2.r_regionkey + ) AS t5 + GROUP BY + 1 + ) AS t6 + ORDER BY + t6."Sum(Cast(c_acctbal, float64))" DESC LIMIT 10 -) -SELECT t0.`c_name`, t0.`r_name`, t0.`n_name` -FROM t0 - LEFT SEMI JOIN t2 - ON t0.`n_name` = t2.`n_name` \ No newline at end of file +) AS t8 + ON t5.n_name = t8.n_name \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py index b438d3207191..6058efaa962e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py @@ -22,19 +22,27 @@ lit2 = ibis.literal("bar") result = alltypes.select( - [ - alltypes.g.case() - .when(lit, lit2) - .when(lit1, ibis.literal("qux")) - .else_(ibis.literal("default")) - .end() - .name("col1"), - ibis.case() - .when(alltypes.g == lit, lit2) - .when(alltypes.g == lit1, alltypes.g) - .else_(ibis.literal(None).cast("string")) - .end() - .name("col2"), - alltypes, - ] + alltypes.g.case() + .when(lit, lit2) + .when(lit1, ibis.literal("qux")) + .else_(ibis.literal("default")) + .end() + .name("col1"), + ibis.case() + .when(alltypes.g == lit, lit2) + .when(alltypes.g == lit1, alltypes.g) + .else_(ibis.literal(None).cast("string")) + .end() + .name("col2"), + alltypes.a, + alltypes.b, + alltypes.c, + alltypes.d, + alltypes.e, + alltypes.f, + alltypes.g, + alltypes.h, + alltypes.i, + alltypes.j, + alltypes.k, ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql index da032855d0de..03a2bfc76996 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql @@ -1,12 +1,21 @@ SELECT - CASE t0.`g` - WHEN 'foo' THEN 'bar' - WHEN 'baz' THEN 'qux' - ELSE 'default' - END AS `col1`, + CASE t0.g WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS col1, CASE - WHEN t0.`g` = 'foo' THEN 'bar' - WHEN t0.`g` = 'baz' THEN t0.`g` - ELSE CAST(NULL AS string) - END AS `col2`, t0.* -FROM alltypes t0 \ No newline at end of file + WHEN t0.g = 'foo' + THEN 'bar' + WHEN t0.g = 'baz' + THEN t0.g + ELSE CAST(NULL AS TEXT) + END AS col2, + t0.a AS a, + t0.b AS b, + t0.c AS c, + t0.d AS d, + t0.e AS e, + t0.f AS f, + t0.g AS g, + t0.h AS h, + t0.i AS i, + t0.j AS j, + t0.k AS k +FROM alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql index e25947808580..48625bcc8a00 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql @@ -1,18 +1,37 @@ -WITH t0 AS ( - SELECT t2.`city`, count(t2.`city`) AS `Count(city)` - FROM tbl t2 - GROUP BY 1 -) -SELECT t1.* +SELECT + * FROM ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Count(city)` DESC + SELECT + * + FROM ( + SELECT + t0.city AS city, + COUNT(t0.city) AS "Count(city)" + FROM tbl AS t0 + GROUP BY + 1 + ) AS t1 + ORDER BY + t1."Count(city)" DESC LIMIT 10 -) t1 -LIMIT 5 OFFSET (SELECT count(1) + -5 FROM ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Count(city)` DESC - LIMIT 10 -) t1) \ No newline at end of file +) AS t3 +LIMIT 5 +OFFSET ( + SELECT + COUNT(*) + CAST(-5 AS TINYINT) + FROM ( + SELECT + * + FROM ( + SELECT + t0.city AS city, + COUNT(t0.city) AS "Count(city)" + FROM tbl AS t0 + GROUP BY + 1 + ) AS t1 + ORDER BY + t1."Count(city)" DESC + LIMIT 10 + ) AS t3 +) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql index 09660f6902cc..fe43159d7cac 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql @@ -1,48 +1,9 @@ -WITH t0 AS ( - SELECT t5.`diag`, t5.`status` - FROM aids2_two t5 -), -t1 AS ( - SELECT t5.`diag`, t5.`status` - FROM aids2_one t5 -), -t2 AS ( - SELECT t0.`diag` + 1 AS `diag`, t0.`status` - FROM t0 -), -t3 AS ( - SELECT t1.`diag` + 1 AS `diag`, t1.`status` - FROM t1 -) -SELECT t4.`diag`, t4.`status` -FROM ( - WITH t0 AS ( - SELECT t5.`diag`, t5.`status` - FROM aids2_two t5 - ), - t1 AS ( - SELECT t5.`diag`, t5.`status` - FROM aids2_one t5 - ), - t2 AS ( - SELECT t0.`diag` + 1 AS `diag`, t0.`status` - FROM t0 - ), - t3 AS ( - SELECT t1.`diag` + 1 AS `diag`, t1.`status` - FROM t1 - ), - t5 AS ( - SELECT CAST(t2.`diag` AS int) AS `diag`, t2.`status` - FROM t2 - ), - t6 AS ( - SELECT CAST(t3.`diag` AS int) AS `diag`, t3.`status` - FROM t3 - ) - SELECT * - FROM t6 - UNION ALL - SELECT * - FROM t5 -) t4 \ No newline at end of file +SELECT + CAST(t0.diag + CAST(1 AS TINYINT) AS INT) AS diag, + t0.status AS status +FROM aids2_one AS t0 +UNION ALL +SELECT + CAST(t1.diag + CAST(1 AS TINYINT) AS INT) AS diag, + t1.status AS status +FROM aids2_two AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql index cdbb5aa93918..720c3146efdf 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql @@ -1,12 +1,26 @@ -WITH t0 AS ( - SELECT t2.`key1`, t2.`key2`, t2.`key3`, sum(t2.`value`) AS `total` - FROM foo_table t2 - GROUP BY 1, 2, 3 -) -SELECT t1.`key1`, sum(t1.`total`) AS `total` +SELECT + t2.key1 AS key1, + SUM(t2.total) AS total FROM ( - SELECT t0.`key1`, t0.`key2`, sum(t0.`total`) AS `total` - FROM t0 - GROUP BY 1, 2 -) t1 -GROUP BY 1 \ No newline at end of file + SELECT + t1.key1 AS key1, + t1.key2 AS key2, + SUM(t1.total) AS total + FROM ( + SELECT + t0.key1 AS key1, + t0.key2 AS key2, + t0.key3 AS key3, + SUM(t0.value) AS total + FROM foo_table AS t0 + GROUP BY + 1, + 2, + 3 + ) AS t1 + GROUP BY + 1, + 2 +) AS t2 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql index 72a7fe461d8c..a7b8c0e5c185 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql @@ -1,2 +1 @@ -SELECT t0.`foo_id` like concat('%', 'foo') AS `tmp` -FROM star1 t0 \ No newline at end of file +SUFFIX(t0.foo_id, 'foo') AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql index 601dc361ff08..1ca28225ea6a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql @@ -1,12 +1,21 @@ -WITH t0 AS ( - SELECT t2.* - FROM purchases t2 - WHERE t2.`ts` > '2015-08-15' -) -SELECT t1.* -FROM events t1 -WHERE EXISTS ( - SELECT 1 - FROM t0 - WHERE t1.`user_id` = t0.`user_id` -) \ No newline at end of file +SELECT + * +FROM events AS t0 +WHERE + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM ( + SELECT + * + FROM purchases AS t1 + WHERE + ( + t1.ts > '2015-08-15' + ) AND ( + t0.user_id = t1.user_id + ) + ) AS t2 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql index 3ca2292d594e..fbd2b7d3c76d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql @@ -1,5 +1,7 @@ -SELECT t0.* -FROM t t0 -WHERE (lower(t0.`color`) LIKE '%de%') AND - (locate('de', lower(t0.`color`)) - 1 >= 0) AND - (regexp_like(lower(t0.`color`), '.*ge.*')) \ No newline at end of file +SELECT + * +FROM t AS t0 +WHERE + LOWER(t0.color) LIKE '%de%' + AND CONTAINS(LOWER(t0.color), 'de') + AND REGEXP_MATCHES(LOWER(t0.color), '.*ge.*', 's') diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql index 84266c91887a..1ad66bd2d42e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql @@ -1,16 +1,40 @@ -WITH t0 AS ( - SELECT t2.`region`, t2.`kind`, sum(t2.`amount`) AS `total` - FROM purchases t2 - WHERE t2.`kind` = 'bar' - GROUP BY 1, 2 -), -t1 AS ( - SELECT t2.`region`, t2.`kind`, sum(t2.`amount`) AS `total` - FROM purchases t2 - WHERE t2.`kind` = 'foo' - GROUP BY 1, 2 -) -SELECT t1.`region`, t1.`total` - t0.`total` AS `diff` -FROM t1 - INNER JOIN t0 - ON t1.`region` = t0.`region` \ No newline at end of file +SELECT + t2.region AS region, + t2.total - t3.total AS diff +FROM ( + SELECT + * + FROM ( + SELECT + t0.region AS region, + t0.kind AS kind, + SUM(t0.amount) AS total + FROM purchases AS t0 + GROUP BY + 1, + 2 + ) AS t1 + WHERE + ( + t1.kind = 'foo' + ) +) AS t2 +INNER JOIN ( + SELECT + * + FROM ( + SELECT + t0.region AS region, + t0.kind AS kind, + SUM(t0.amount) AS total + FROM purchases AS t0 + GROUP BY + 1, + 2 + ) AS t1 + WHERE + ( + t1.kind = 'bar' + ) +) AS t3 + ON t2.region = t3.region \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql index 5b8dd20d8a72..5cb4f21a3d2c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql @@ -1,7 +1,21 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > ln(( - SELECT avg(t0.`f`) AS `Mean(f)` - FROM star1 t0 - WHERE t0.`foo_id` = 'foo' -)) \ No newline at end of file +SELECT + * +FROM star1 AS t0 +WHERE + ( + t0.f > LN( + ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( + SELECT + * + FROM star1 AS t0 + WHERE + ( + t0.foo_id = 'foo' + ) + ) AS t1 + ) + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql index 74192a7c1507..4c5fa610555f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql @@ -1,7 +1,23 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > (ln(( - SELECT avg(t0.`f`) AS `Mean(f)` - FROM star1 t0 - WHERE t0.`foo_id` = 'foo' -)) + 1) \ No newline at end of file +SELECT + * +FROM star1 AS t0 +WHERE + ( + t0.f > ( + LN( + ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( + SELECT + * + FROM star1 AS t0 + WHERE + ( + t0.foo_id = 'foo' + ) + ) AS t1 + ) + ) + CAST(1 AS TINYINT) + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql index 74da42fa06bd..6ddb56dc6476 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql @@ -1,5 +1,7 @@ -SELECT t0.*, t0.`foo` * 2 AS `qux` -FROM ( - SELECT t1.*, t1.`foo` + t1.`bar` AS `baz` - FROM tbl t1 -) t0 \ No newline at end of file +SELECT + t0.foo AS foo, + t0.bar AS bar, + t0.value AS value, + t0.foo + t0.bar AS baz, + t0.foo * CAST(2 AS TINYINT) AS qux +FROM tbl AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql index 7324cd727f5f..8383b5c79c8a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql @@ -1,6 +1,15 @@ -SELECT *, `foo` * 2 AS `qux` +SELECT + t1.foo AS foo, + t1.bar AS bar, + t1.value AS value, + t1.foo + t1.bar AS baz, + t1.foo * CAST(2 AS TINYINT) AS qux FROM ( - SELECT t1.*, t1.`foo` + t1.`bar` AS `baz` - FROM tbl t1 - WHERE t1.`value` > 0 -) t0 \ No newline at end of file + SELECT + * + FROM tbl AS t0 + WHERE + ( + t0.value > CAST(0 AS TINYINT) + ) +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql index 31d90d8c221e..f768122da94c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql @@ -1,2 +1,4 @@ -SELECT t0.`date` AS `else`, t0.`explain` AS `join` -FROM table t0 \ No newline at end of file +SELECT + t0.date AS else, + t0.explain AS join +FROM table AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql index 674aa2375012..73d0a7bfd20c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql @@ -1,6 +1,13 @@ -SELECT t0.* +SELECT + t1.x + CAST(1 AS TINYINT) AS x FROM ( - SELECT t1.`x` + 1 AS `x` - FROM t t1 -) t0 -WHERE t0.`x` > 1 \ No newline at end of file + SELECT + * + FROM t AS t0 + WHERE + ( + ( + t0.x + CAST(1 AS TINYINT) + ) > CAST(1 AS TINYINT) + ) +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql index daf0620a4166..b8ceefcb67c9 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql @@ -1,6 +1,11 @@ -SELECT t0.* +SELECT + CAST(1 AS TINYINT) AS a FROM ( - SELECT 1 AS `a` - FROM t t1 -) t0 -WHERE t0.`a` > 1 \ No newline at end of file + SELECT + * + FROM t AS t0 + WHERE + ( + CAST(1 AS TINYINT) > CAST(1 AS TINYINT) + ) +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py index d1d51461058d..029cc24d6008 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py @@ -4,18 +4,12 @@ first = ibis.table( name="first", schema={"key1": "string", "key2": "string", "value1": "float64"} ) +second = ibis.table(name="second", schema={"key1": "string", "value2": "float64"}) third = ibis.table( name="third", schema={"key2": "string", "key3": "string", "value3": "float64"} ) -second = ibis.table(name="second", schema={"key1": "string", "value2": "float64"}) fourth = ibis.table(name="fourth", schema={"key3": "string", "value4": "float64"}) -proj = first.inner_join(second, first.key1 == second.key1).select( - [first, second.value2] -) -proj1 = third.inner_join(fourth, third.key3 == fourth.key3).select( - [third, fourth.value4] -) +joinchain = first.inner_join(second, first.key1 == second.key1) +joinchain1 = third.inner_join(fourth, third.key3 == fourth.key3) -result = proj.inner_join(proj1, proj.key2 == proj1.key2).select( - [proj, proj1.value3, proj1.value4] -) +result = joinchain.inner_join(joinchain1, joinchain.key2 == joinchain1.key2) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql index d7b970431a49..86958930e55e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql @@ -1,16 +1,28 @@ -WITH t0 AS ( - SELECT t2.*, t3.`value4` - FROM third t2 - INNER JOIN fourth t3 - ON t2.`key3` = t3.`key3` -), -t1 AS ( - SELECT t2.*, t3.`value2` - FROM first t2 - INNER JOIN second t3 - ON t2.`key1` = t3.`key1` -) -SELECT t1.*, t0.`value3`, t0.`value4` -FROM t1 - INNER JOIN t0 - ON t1.`key2` = t0.`key2` \ No newline at end of file +SELECT + t6.key1 AS key1, + t6.key2 AS key2, + t6.value1 AS value1, + t6.value2 AS value2, + t7.value3 AS value3, + t7.value4 AS value4 +FROM ( + SELECT + t0.key1 AS key1, + t0.key2 AS key2, + t0.value1 AS value1, + t1.value2 AS value2 + FROM first AS t0 + INNER JOIN second AS t1 + ON t0.key1 = t1.key1 +) AS t6 +INNER JOIN ( + SELECT + t2.key2 AS key2, + t2.key3 AS key3, + t2.value3 AS value3, + t3.value4 AS value4 + FROM third AS t2 + INNER JOIN fourth AS t3 + ON t2.key3 = t3.key3 +) AS t7 + ON t6.key2 = t7.key2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py index 071de55b252a..44a6ed339c71 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py @@ -1,19 +1,6 @@ import ibis -tpch_customer = ibis.table( - name="tpch_customer", - schema={ - "c_custkey": "int64", - "c_name": "string", - "c_address": "string", - "c_nationkey": "int16", - "c_phone": "string", - "c_acctbal": "decimal", - "c_mktsegment": "string", - "c_comment": "string", - }, -) tpch_nation = ibis.table( name="tpch_nation", schema={ @@ -27,6 +14,19 @@ name="tpch_region", schema={"r_regionkey": "int16", "r_name": "string", "r_comment": "string"}, ) +tpch_customer = ibis.table( + name="tpch_customer", + schema={ + "c_custkey": "int64", + "c_name": "string", + "c_address": "string", + "c_nationkey": "int16", + "c_phone": "string", + "c_acctbal": "decimal", + "c_mktsegment": "string", + "c_comment": "string", + }, +) result = tpch_nation.inner_join( tpch_region, tpch_nation.n_regionkey == tpch_region.r_regionkey diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql index f21373ec71c4..7d32758ef61c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql @@ -1,6 +1,21 @@ -SELECT * -FROM tpch_nation t0 - INNER JOIN tpch_region t1 - ON t0.`n_regionkey` = t1.`r_regionkey` - INNER JOIN tpch_customer t2 - ON t0.`n_nationkey` = t2.`c_nationkey` \ No newline at end of file +SELECT + t0.n_nationkey AS n_nationkey, + t0.n_name AS n_name, + t0.n_regionkey AS n_regionkey, + t0.n_comment AS n_comment, + t1.r_regionkey AS r_regionkey, + t1.r_name AS r_name, + t1.r_comment AS r_comment, + t2.c_custkey AS c_custkey, + t2.c_name AS c_name, + t2.c_address AS c_address, + t2.c_nationkey AS c_nationkey, + t2.c_phone AS c_phone, + t2.c_acctbal AS c_acctbal, + t2.c_mktsegment AS c_mktsegment, + t2.c_comment AS c_comment +FROM tpch_nation AS t0 +INNER JOIN tpch_region AS t1 + ON t0.n_regionkey = t1.r_regionkey +INNER JOIN tpch_customer AS t2 + ON t0.n_nationkey = t2.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql index b6eaf7d52b2b..3d73c83b16db 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql @@ -1,10 +1,23 @@ -WITH t0 AS ( - SELECT t2.`n_nationkey`, t2.`n_name` AS `nation`, t3.`r_name` AS `region` - FROM tpch_nation t2 - INNER JOIN tpch_region t3 - ON t2.`n_regionkey` = t3.`r_regionkey` -) -SELECT t1.*, t0.* -FROM t0 - INNER JOIN tpch_customer t1 - ON t0.`n_nationkey` = t1.`c_nationkey` \ No newline at end of file +SELECT + t1.c_custkey AS c_custkey, + t1.c_name AS c_name, + t1.c_address AS c_address, + t1.c_nationkey AS c_nationkey, + t1.c_phone AS c_phone, + t1.c_acctbal AS c_acctbal, + t1.c_mktsegment AS c_mktsegment, + t1.c_comment AS c_comment, + t4.n_nationkey AS n_nationkey, + t4.nation AS nation, + t4.region AS region +FROM ( + SELECT + t0.n_nationkey AS n_nationkey, + t0.n_name AS nation, + t2.r_name AS region + FROM tpch_nation AS t0 + INNER JOIN tpch_region AS t2 + ON t0.n_regionkey = t2.r_regionkey +) AS t4 +INNER JOIN tpch_customer AS t1 + ON t4.n_nationkey = t1.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql new file mode 100644 index 000000000000..fe9f2b5b67f3 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql @@ -0,0 +1,23 @@ +SELECT + t0.on AS on, + t0.by AS by, + t1.on AS on_right, + t1.by AS by_right, + t1.val AS val +FROM left AS t0 +LEFT OUTER JOIN right AS t1 + ON t0.by = t1.by +WHERE + t1.on = ( + SELECT + MAX(t3.on) AS "Max(on)" + FROM ( + SELECT + t1.on AS on, + t1.by AS by, + t1.val AS val + FROM right AS t1 + WHERE + t1.by = t0.by AND t1.on <= t0.on + ) AS t3 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql index b7b9d6c68d14..136cb7d2c69d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql @@ -1,9 +1,13 @@ -WITH t0 AS ( - SELECT t2.* - FROM star1 t2 +SELECT + t2.c AS c, + t2.f AS f, + t2.foo_id AS foo_id, + t2.bar_id AS bar_id +FROM ( + SELECT + * + FROM star1 AS t0 LIMIT 100 -) -SELECT t0.* -FROM t0 - INNER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +) AS t2 +INNER JOIN star2 AS t1 + ON t2.foo_id = t1.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql index ed179f1e792d..6407e4987c29 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql @@ -1,21 +1 @@ -SELECT count(1) AS `CountStar()` -FROM ( - SELECT t1.`id`, t1.`bool_col`, t1.`tinyint_col`, t1.`smallint_col`, - t1.`int_col`, t1.`bigint_col`, t1.`float_col`, t1.`double_col`, - t1.`date_string_col`, t1.`string_col`, t1.`timestamp_col`, - t1.`year`, t1.`month`, t2.`id` AS `id_right`, - t2.`bool_col` AS `bool_col_right`, - t2.`tinyint_col` AS `tinyint_col_right`, - t2.`smallint_col` AS `smallint_col_right`, - t2.`int_col` AS `int_col_right`, - t2.`bigint_col` AS `bigint_col_right`, - t2.`float_col` AS `float_col_right`, - t2.`double_col` AS `double_col_right`, - t2.`date_string_col` AS `date_string_col_right`, - t2.`string_col` AS `string_col_right`, - t2.`timestamp_col` AS `timestamp_col_right`, - t2.`year` AS `year_right`, t2.`month` AS `month_right` - FROM functional_alltypes t1 - INNER JOIN functional_alltypes t2 - ON t1.`tinyint_col` < extract(t2.`timestamp_col`, 'minute') -) t0 \ No newline at end of file +COUNT(*) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql index 66a29f8afa3f..bf1ecb1da578 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql @@ -1,16 +1,24 @@ -WITH t0 AS ( - SELECT t2.* - FROM bar t2 - WHERE t2.`id` < 3 -), -t1 AS ( - SELECT t2.* - FROM foo t2 - WHERE t2.`id` < 2 -) -SELECT t1.`id` AS `left_id`, t1.`desc` AS `left_desc`, t0.`id` AS `right_id`, - t0.`desc` AS `right_desc` -FROM t1 - LEFT OUTER JOIN t0 - ON (t1.`id` = t0.`id`) AND - (t1.`desc` = t0.`desc`) \ No newline at end of file +SELECT + t2.id AS left_id, + t2.desc AS left_desc, + t3.id AS right_id, + t3.desc AS right_desc +FROM ( + SELECT + * + FROM foo AS t0 + WHERE + ( + t0.id < CAST(2 AS TINYINT) + ) +) AS t2 +LEFT OUTER JOIN ( + SELECT + * + FROM bar AS t1 + WHERE + ( + t1.id < CAST(3 AS TINYINT) + ) +) AS t3 + ON t2.id = t3.id AND t2.desc = t3.desc \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py index da8275915ad4..894f52809414 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py @@ -5,24 +5,11 @@ name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) -star3 = ibis.table(name="star3", schema={"bar_id": "string", "value2": "float64"}) star2 = ibis.table( name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) +star3 = ibis.table(name="star3", schema={"bar_id": "string", "value2": "float64"}) -result = ( - star1.left_join(star2, star1.foo_id == star2.foo_id) - .select( - [ - star1.c, - star1.f, - star1.foo_id, - star1.bar_id, - star2.foo_id.name("foo_id_right"), - star2.value1, - star2.value3, - ] - ) - .inner_join(star3, star1.bar_id == star3.bar_id) - .select([star1, star2.value1, star3.value2]) +result = star1.left_join(star2, star1.foo_id == star2.foo_id).inner_join( + star3, star1.bar_id == star3.bar_id ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql index 2e4276f4463e..79ad2e20002d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql @@ -1,10 +1,12 @@ -SELECT *, `value1`, t1.`value2` -FROM ( - SELECT t2.`c`, t2.`f`, t2.`foo_id`, t2.`bar_id`, - t3.`foo_id` AS `foo_id_right`, t3.`value1`, t3.`value3` - FROM star1 t2 - LEFT OUTER JOIN star2 t3 - ON t2.`foo_id` = t3.`foo_id` -) t0 - INNER JOIN star3 t1 - ON `bar_id` = t1.`bar_id` \ No newline at end of file +SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id, + t1.value1 AS value1, + t2.value2 AS value2 +FROM star1 AS t0 +LEFT OUTER JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id +INNER JOIN star3 AS t2 + ON t0.bar_id = t2.bar_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql index b4f67ae8d56d..b88ec7dcdd9d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql @@ -1,7 +1,9 @@ -SELECT t0.* +SELECT + * FROM ( - SELECT t1.* - FROM functional_alltypes t1 + SELECT + * + FROM functional_alltypes AS t0 LIMIT 20 -) t0 -LIMIT 10 \ No newline at end of file +) AS t1 +LIMIT 10 diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py index 1a8369bd146b..1675b491d5db 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py @@ -19,8 +19,8 @@ "month": "int32", }, ) -agg = functional_alltypes.group_by(functional_alltypes.string_col).aggregate( - functional_alltypes.count().name("nrows") +agg = functional_alltypes.aggregate( + [functional_alltypes.count().name("nrows")], by=[functional_alltypes.string_col] ) limit = agg.limit(5) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql index 687196f1f9b2..bbc32482815d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql @@ -1,8 +1,13 @@ -SELECT t0.* +SELECT + * FROM ( - SELECT t1.`string_col`, count(1) AS `nrows` - FROM functional_alltypes t1 - GROUP BY 1 + SELECT + t0.string_col AS string_col, + COUNT(*) AS nrows + FROM functional_alltypes AS t0 + GROUP BY + 1 LIMIT 5 -) t0 -ORDER BY t0.`string_col` ASC \ No newline at end of file +) AS t2 +ORDER BY + t2.string_col ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql index c689bbdae2d2..a37945ee2f13 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql @@ -1,6 +1,11 @@ -SELECT t0.* -FROM foo t0 -WHERE t0.`y` > ( - SELECT max(t1.`x`) AS `Max(x)` - FROM bar t1 -) \ No newline at end of file +SELECT + * +FROM foo AS t0 +WHERE + ( + t0.y > ( + SELECT + MAX(t1.x) AS "Max(x)" + FROM bar AS t1 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py index 8993f672fc63..824342590c0c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py @@ -6,4 +6,4 @@ schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) -result = star1.group_by(star1.foo_id).aggregate(star1.f.sum().name("total")) +result = star1.aggregate([star1.f.sum().name("total")], by=[star1.foo_id]) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql index 5b190f3b2157..29e72c8336aa 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql @@ -1,3 +1,6 @@ -SELECT t0.`foo_id`, sum(t0.`f`) AS `total` -FROM star1 t0 -GROUP BY 1 \ No newline at end of file +SELECT + t0.foo_id AS foo_id, + SUM(t0.f) AS total +FROM star1 AS t0 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py index 96eb30f61071..da7840af5f74 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py @@ -6,6 +6,4 @@ schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) -result = star1.group_by([star1.foo_id, star1.bar_id]).aggregate( - star1.f.sum().name("total") -) +result = star1.aggregate([star1.f.sum().name("total")], by=[star1.foo_id, star1.bar_id]) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql index eea2be13c7a2..116832bf3c3a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql @@ -1,3 +1,8 @@ -SELECT t0.`foo_id`, t0.`bar_id`, sum(t0.`f`) AS `total` -FROM star1 t0 -GROUP BY 1, 2 \ No newline at end of file +SELECT + t0.foo_id AS foo_id, + t0.bar_id AS bar_id, + SUM(t0.f) AS total +FROM star1 AS t0 +GROUP BY + 1, + 2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql index 978aa3734aed..6407e4987c29 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql @@ -1,2 +1 @@ -SELECT count(1) AS `CountStar(star1)` -FROM star1 t0 \ No newline at end of file +COUNT(*) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql index c281273cc2b1..ec694656a01b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > 0 +SELECT + * +FROM star1 AS t0 +WHERE + ( + t0.f > CAST(0 AS TINYINT) + ) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql index 9423c5fcba1c..2b6d0fe52716 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql @@ -1,3 +1,4 @@ -SELECT t0.* -FROM star1 t0 +SELECT + * +FROM star1 AS t0 LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql index 371e8e9ed0be..431ba054f9f9 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql @@ -1,7 +1,12 @@ -SELECT t0.* +SELECT + * FROM ( - SELECT t1.* - FROM star1 t1 + SELECT + * + FROM star1 AS t0 LIMIT 10 -) t0 -WHERE t0.`f` > 0 \ No newline at end of file +) AS t1 +WHERE + ( + t1.f > CAST(0 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql index 42a7a0310f16..3c71bda9b962 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql @@ -1,3 +1,5 @@ -SELECT t0.* -FROM star1 t0 -LIMIT 10 OFFSET 5 \ No newline at end of file +SELECT + * +FROM star1 AS t0 +LIMIT 10 +OFFSET 5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql index c3a9a54ab86a..2e1820e62e9f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql @@ -1,2 +1,3 @@ -SELECT t0.* -FROM star1 t0 \ No newline at end of file +SELECT + * +FROM star1 AS star1_ref \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql index 69691039e13d..a401619ae2d8 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql @@ -1,3 +1,5 @@ -SELECT t0.* -FROM star1 t0 -ORDER BY t0.`f` ASC \ No newline at end of file +SELECT + * +FROM star1 AS t0 +ORDER BY + t0.f ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql index 69ab711dca56..f223e15ca36b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql @@ -1,2 +1,3 @@ -SELECT t0.* -FROM alltypes t0 \ No newline at end of file +SELECT + * +FROM alltypes \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql index a42ae0a70e7f..374a6dbdd0e7 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql @@ -1,10 +1,30 @@ -WITH t0 AS ( - SELECT t2.*, t3.* - FROM tpch_region t2 - INNER JOIN tpch_nation t3 - ON t2.`r_regionkey` = t3.`n_regionkey` -) -SELECT t0.`r_name`, t1.`n_name` -FROM t0 - INNER JOIN t0 t1 - ON t0.`r_regionkey` = t1.`r_regionkey` \ No newline at end of file +SELECT + t3.r_name AS r_name, + t4.n_name AS n_name +FROM ( + SELECT + t0.r_regionkey AS r_regionkey, + t0.r_name AS r_name, + t0.r_comment AS r_comment, + t1.n_nationkey AS n_nationkey, + t1.n_name AS n_name, + t1.n_regionkey AS n_regionkey, + t1.n_comment AS n_comment + FROM tpch_region AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.r_regionkey = t1.n_regionkey +) AS t3 +INNER JOIN ( + SELECT + t0.r_regionkey AS r_regionkey, + t0.r_name AS r_name, + t0.r_comment AS r_comment, + t1.n_nationkey AS n_nationkey, + t1.n_name AS n_name, + t1.n_regionkey AS n_regionkey, + t1.n_comment AS n_comment + FROM tpch_region AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.r_regionkey = t1.n_regionkey +) AS t4 + ON t3.r_regionkey = t4.r_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py index bd4953665f16..02140f8bb8a7 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py @@ -9,4 +9,4 @@ name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -result = star1.semi_join(star2, star1.foo_id == star2.foo_id).select(star1) +result = star1.semi_join(star2, star1.foo_id == star2.foo_id) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql index dc3915e054a0..0cfc52ab309b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - LEFT SEMI JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id +FROM star1 AS t0 +SEMI JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py index 08cedadf59d9..09d698686546 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py @@ -11,4 +11,4 @@ result = star1.inner_join( star2, [star1.foo_id == star2.foo_id, star1.bar_id == star2.foo_id] -).select(star1) +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql index a4895f7b057b..e1aed9698bc1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - INNER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id +FROM star1 AS t0 +INNER JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql index d1fa6db8b809..63cc978163fc 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql @@ -1,5 +1,8 @@ -SELECT t0.* -FROM star1 t0 - INNER JOIN star2 t1 - ON (t0.`foo_id` = t1.`foo_id`) AND - (t0.`bar_id` = t1.`foo_id`) \ No newline at end of file +SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id +FROM star1 AS t0 +INNER JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id AND t0.bar_id = t1.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql index 390d8f0faf19..3cc6681236ae 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - LEFT OUTER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id +FROM star1 AS t0 +LEFT OUTER JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql index 8ca7b3cb80d0..09d791f98b1e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - FULL OUTER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id +FROM star1 AS t0 +FULL OUTER JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql index 35e0d7a3c289..8e97174b77d9 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql @@ -1,11 +1,16 @@ -WITH t0 AS ( - SELECT t2.* - FROM t t2 - ORDER BY t2.`a` ASC -) -SELECT t1.`b`, count(1) AS `b_count` +SELECT + t2.b AS b, + COUNT(*) AS b_count FROM ( - SELECT t0.`b` - FROM t0 -) t1 -GROUP BY 1 \ No newline at end of file + SELECT + t1.b AS b + FROM ( + SELECT + * + FROM t AS t0 + ORDER BY + t0.a ASC + ) AS t1 +) AS t2 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql index 0fa7d5376fd9..b6331cb95031 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql @@ -1,11 +1,16 @@ -WITH t0 AS ( - SELECT t2.* - FROM t t2 - ORDER BY t2.`b` ASC -) -SELECT t1.`b`, count(1) AS `b_count` +SELECT + t2.b AS b, + COUNT(*) AS b_count FROM ( - SELECT t0.`b` - FROM t0 -) t1 -GROUP BY 1 \ No newline at end of file + SELECT + t1.b AS b + FROM ( + SELECT + * + FROM t AS t0 + ORDER BY + t0.b ASC + ) AS t1 +) AS t2 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql index 697e3cda4882..bf7995d43b4e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql @@ -1,2 +1 @@ -SELECT t0.`foo_id` like concat('foo', '%') AS `tmp` -FROM star1 t0 \ No newline at end of file +STARTS_WITH(t0.foo_id, 'foo') AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql index 3000f853cfbc..67f775236f0f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql @@ -1,19 +1,60 @@ -WITH t0 AS ( - SELECT t3.*, t1.`r_name` AS `region`, t4.`o_totalprice` AS `amount`, - CAST(t4.`o_orderdate` AS timestamp) AS `odate` - FROM tpch_region t1 - INNER JOIN tpch_nation t2 - ON t1.`r_regionkey` = t2.`n_regionkey` - INNER JOIN tpch_customer t3 - ON t3.`c_nationkey` = t2.`n_nationkey` - INNER JOIN tpch_orders t4 - ON t4.`o_custkey` = t3.`c_custkey` -) -SELECT t0.* -FROM t0 -WHERE t0.`amount` > ( - SELECT avg(t1.`amount`) AS `Mean(amount)` - FROM t0 t1 - WHERE t1.`region` = t0.`region` -) +SELECT + * +FROM ( + SELECT + t2.c_custkey AS c_custkey, + t2.c_name AS c_name, + t2.c_address AS c_address, + t2.c_nationkey AS c_nationkey, + t2.c_phone AS c_phone, + t2.c_acctbal AS c_acctbal, + t2.c_mktsegment AS c_mktsegment, + t2.c_comment AS c_comment, + t0.r_name AS region, + t3.o_totalprice AS amount, + CAST(t3.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.r_regionkey = t1.n_regionkey + INNER JOIN tpch_customer AS t2 + ON t2.c_nationkey = t1.n_nationkey + INNER JOIN tpch_orders AS t3 + ON t3.o_custkey = t2.c_custkey +) AS t7 +WHERE + ( + t7.amount > ( + SELECT + AVG(t9.amount) AS "Mean(amount)" + FROM ( + SELECT + * + FROM ( + SELECT + t2.c_custkey AS c_custkey, + t2.c_name AS c_name, + t2.c_address AS c_address, + t2.c_nationkey AS c_nationkey, + t2.c_phone AS c_phone, + t2.c_acctbal AS c_acctbal, + t2.c_mktsegment AS c_mktsegment, + t2.c_comment AS c_comment, + t0.r_name AS region, + t3.o_totalprice AS amount, + CAST(t3.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.r_regionkey = t1.n_regionkey + INNER JOIN tpch_customer AS t2 + ON t2.c_nationkey = t1.n_nationkey + INNER JOIN tpch_orders AS t3 + ON t3.o_custkey = t2.c_custkey + ) AS t8 + WHERE + ( + t8.region = t7.region + ) + ) AS t9 + ) + ) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql index cac23b33d30f..6829667d5210 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql @@ -1,6 +1,11 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > ( - SELECT avg(t0.`f`) AS `Mean(f)` - FROM star1 t0 -) \ No newline at end of file +SELECT + * +FROM star1 AS t0 +WHERE + ( + t0.f > ( + SELECT + AVG(t0.f) AS "Mean(f)" + FROM star1 AS t0 + ) + ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql index 4a2a9856ac67..d196e74c2bf7 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql @@ -1,7 +1,19 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > ( - SELECT avg(t0.`f`) AS `Mean(f)` - FROM star1 t0 - WHERE t0.`foo_id` = 'foo' -) \ No newline at end of file +SELECT + * +FROM star1 AS t0 +WHERE + ( + t0.f > ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( + SELECT + * + FROM star1 AS t0 + WHERE + ( + t0.foo_id = 'foo' + ) + ) AS t1 + ) + ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql index fba1d583e681..e7785e9117d3 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql @@ -1,32 +1,51 @@ -WITH t0 AS ( - SELECT t3.`a`, t3.`g`, sum(t3.`f`) AS `metric` - FROM alltypes t3 - GROUP BY 1, 2 -), -t1 AS ( - SELECT t0.* - FROM t0 - INNER JOIN t0 t3 - ON t0.`g` = t3.`g` -) -SELECT t2.`a`, t2.`g`, t2.`metric` +SELECT + t1.a AS a, + t1.g AS g, + t1.metric AS metric FROM ( - WITH t0 AS ( - SELECT t3.`a`, t3.`g`, sum(t3.`f`) AS `metric` - FROM alltypes t3 - GROUP BY 1, 2 - ), - t1 AS ( - SELECT t0.* - FROM t0 - INNER JOIN t0 t3 - ON t0.`g` = t3.`g` - ) - SELECT * - FROM t1 - UNION ALL - SELECT t0.* - FROM t0 - INNER JOIN t0 t3 - ON t0.`g` = t3.`g` -) t2 \ No newline at end of file + SELECT + t0.a AS a, + t0.g AS g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 +) AS t1 +INNER JOIN ( + SELECT + t0.a AS a, + t0.g AS g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 +) AS t2 + ON t1.g = t2.g +UNION ALL +SELECT + t1.a AS a, + t1.g AS g, + t1.metric AS metric +FROM ( + SELECT + t0.a AS a, + t0.g AS g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 +) AS t1 +INNER JOIN ( + SELECT + t0.a AS a, + t0.g AS g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 +) AS t2 + ON t1.g = t2.g \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql index a59687794a8a..496b5b45619e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql @@ -1,15 +1,41 @@ -WITH t0 AS ( - SELECT t2.`g`, t2.`a`, t2.`b`, sum(t2.`f`) AS `total` - FROM alltypes t2 - GROUP BY 1, 2, 3 -) -SELECT t0.`g`, max(t0.`total` - `total`) AS `metric` +SELECT + t4.g AS g, + MAX(t4.total - t4.total_right) AS metric FROM ( - SELECT t0.`g`, t0.`a`, t0.`b`, t0.`total`, t2.`g` AS `g_right`, - t2.`a` AS `a_right`, t2.`b` AS `b_right`, - t2.`total` AS `total_right` - FROM t0 - INNER JOIN t0 t2 - ON t0.`a` = t2.`b` -) t1 -GROUP BY 1 \ No newline at end of file + SELECT + t1.g AS g, + t1.a AS a, + t1.b AS b, + t1.total AS total, + t2.g AS g_right, + t2.a AS a_right, + t2.b AS b_right, + t2.total AS total_right + FROM ( + SELECT + t0.g AS g, + t0.a AS a, + t0.b AS b, + SUM(t0.f) AS total + FROM alltypes AS t0 + GROUP BY + 1, + 2, + 3 + ) AS t1 + INNER JOIN ( + SELECT + t0.g AS g, + t0.a AS a, + t0.b AS b, + SUM(t0.f) AS total + FROM alltypes AS t0 + GROUP BY + 1, + 2, + 3 + ) AS t2 + ON t1.a = t2.b +) AS t4 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql index c3d8fc4e9d9f..551821a247c4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql @@ -1,22 +1,40 @@ -WITH t0 AS ( - SELECT t3.`dest`, avg(t3.`arrdelay`) AS `Mean(arrdelay)` - FROM airlines t3 - WHERE t3.`dest` IN ('ORD', 'JFK', 'SFO') - GROUP BY 1 -), -t1 AS ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Mean(arrdelay)` DESC - LIMIT 10 -), -t2 AS ( - SELECT t3.* - FROM airlines t3 - WHERE t3.`dest` IN ('ORD', 'JFK', 'SFO') -) -SELECT `origin`, count(1) AS `CountStar()` -FROM t2 - LEFT SEMI JOIN t1 - ON t2.`dest` = t1.`dest` -GROUP BY 1 \ No newline at end of file +SELECT + t6.origin AS origin, + COUNT(*) AS "CountStar()" +FROM ( + SELECT + t1.dest AS dest, + t1.origin AS origin, + t1.arrdelay AS arrdelay + FROM ( + SELECT + * + FROM airlines AS t0 + WHERE + t0.dest IN ('ORD', 'JFK', 'SFO') + ) AS t1 + SEMI JOIN ( + SELECT + * + FROM ( + SELECT + t1.dest AS dest, + AVG(t1.arrdelay) AS "Mean(arrdelay)" + FROM ( + SELECT + * + FROM airlines AS t0 + WHERE + t0.dest IN ('ORD', 'JFK', 'SFO') + ) AS t1 + GROUP BY + 1 + ) AS t2 + ORDER BY + t2."Mean(arrdelay)" DESC + LIMIT 10 + ) AS t4 + ON t1.dest = t4.dest +) AS t6 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql index 391b17edcdf4..077bcd8cb40b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql @@ -1,15 +1,23 @@ -WITH t0 AS ( - SELECT t2.`city`, avg(t2.`v2`) AS `Mean(v2)` - FROM tbl t2 - GROUP BY 1 -), -t1 AS ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Mean(v2)` DESC +SELECT + t0.foo AS foo, + t0.bar AS bar, + t0.city AS city, + t0.v1 AS v1, + t0.v2 AS v2 +FROM tbl AS t0 +SEMI JOIN ( + SELECT + * + FROM ( + SELECT + t0.city AS city, + AVG(t0.v2) AS "Mean(v2)" + FROM tbl AS t0 + GROUP BY + 1 + ) AS t1 + ORDER BY + t1."Mean(v2)" DESC LIMIT 10 -) -SELECT * -FROM tbl t2 - LEFT SEMI JOIN t1 - ON t2.`city` = t1.`city` \ No newline at end of file +) AS t3 + ON t0.city = t3.city \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql index e5e3d95f33bb..6f15e53a0d5b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql @@ -1,15 +1,23 @@ -WITH t0 AS ( - SELECT t2.`city`, count(t2.`city`) AS `Count(city)` - FROM tbl t2 - GROUP BY 1 -), -t1 AS ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Count(city)` DESC +SELECT + t0.foo AS foo, + t0.bar AS bar, + t0.city AS city, + t0.v1 AS v1, + t0.v2 AS v2 +FROM tbl AS t0 +SEMI JOIN ( + SELECT + * + FROM ( + SELECT + t0.city AS city, + COUNT(t0.city) AS "Count(city)" + FROM tbl AS t0 + GROUP BY + 1 + ) AS t1 + ORDER BY + t1."Count(city)" DESC LIMIT 10 -) -SELECT * -FROM tbl t2 - LEFT SEMI JOIN t1 - ON t2.`city` = t1.`city` \ No newline at end of file +) AS t3 + ON t0.city = t3.city \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql index 02391a855345..237a4f77eb32 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql @@ -1,23 +1,62 @@ -WITH t0 AS ( - SELECT t3.*, t4.`n_name`, t5.`r_name` - FROM tpch_customer t3 - INNER JOIN tpch_nation t4 - ON t3.`c_nationkey` = t4.`n_nationkey` - INNER JOIN tpch_region t5 - ON t4.`n_regionkey` = t5.`r_regionkey` -), -t1 AS ( - SELECT t0.`n_name`, sum(t0.`c_acctbal`) AS `Sum(c_acctbal)` - FROM t0 - GROUP BY 1 -), -t2 AS ( - SELECT t1.* - FROM t1 - ORDER BY t1.`Sum(c_acctbal)` DESC +SELECT + t5.c_custkey AS c_custkey, + t5.c_name AS c_name, + t5.c_address AS c_address, + t5.c_nationkey AS c_nationkey, + t5.c_phone AS c_phone, + t5.c_acctbal AS c_acctbal, + t5.c_mktsegment AS c_mktsegment, + t5.c_comment AS c_comment, + t5.n_name AS n_name, + t5.r_name AS r_name +FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t1.n_name AS n_name, + t2.r_name AS r_name + FROM tpch_customer AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.c_nationkey = t1.n_nationkey + INNER JOIN tpch_region AS t2 + ON t1.n_regionkey = t2.r_regionkey +) AS t5 +SEMI JOIN ( + SELECT + * + FROM ( + SELECT + t5.n_name AS n_name, + SUM(t5.c_acctbal) AS "Sum(c_acctbal)" + FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t1.n_name AS n_name, + t2.r_name AS r_name + FROM tpch_customer AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.c_nationkey = t1.n_nationkey + INNER JOIN tpch_region AS t2 + ON t1.n_regionkey = t2.r_regionkey + ) AS t5 + GROUP BY + 1 + ) AS t6 + ORDER BY + t6."Sum(c_acctbal)" DESC LIMIT 10 -) -SELECT * -FROM t0 - LEFT SEMI JOIN t2 - ON t0.`n_name` = t2.`n_name` \ No newline at end of file +) AS t8 + ON t5.n_name = t8.n_name \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql index 23a14c5a7697..55fd82ce48b9 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql @@ -1,8 +1,13 @@ -SELECT t0.* +SELECT + * FROM ( - SELECT t1.`dest`, avg(t1.`arrdelay`) AS `Mean(arrdelay)` - FROM airlines t1 - GROUP BY 1 -) t0 -ORDER BY t0.`Mean(arrdelay)` DESC + SELECT + t0.dest AS dest, + AVG(t0.arrdelay) AS "Mean(arrdelay)" + FROM airlines AS t0 + GROUP BY + 1 +) AS t1 +ORDER BY + t1."Mean(arrdelay)" DESC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql index ad04485d51f8..cf9c5a649d4b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql @@ -1,22 +1,53 @@ -WITH t0 AS ( - SELECT t3.`r_name` AS `region`, t4.`n_name` AS `nation`, - t6.`o_totalprice` AS `amount`, - CAST(t6.`o_orderdate` AS timestamp) AS `odate` - FROM tpch_region t3 - INNER JOIN tpch_nation t4 - ON t3.`r_regionkey` = t4.`n_regionkey` - INNER JOIN tpch_customer t5 - ON t5.`c_nationkey` = t4.`n_nationkey` - INNER JOIN tpch_orders t6 - ON t6.`o_custkey` = t5.`c_custkey` -), -t1 AS ( - SELECT t0.`region`, extract(t0.`odate`, 'year') AS `year`, - CAST(sum(t0.`amount`) AS double) AS `total` - FROM t0 - GROUP BY 1, 2 -) -SELECT t1.`region`, t1.`year`, t1.`total` - t2.`total` AS `yoy_change` -FROM t1 - INNER JOIN t1 t2 - ON t1.`year` = (t2.`year` - 1) \ No newline at end of file +SELECT + t8.region AS region, + t8.year AS year, + t8.total - t9.total AS yoy_change +FROM ( + SELECT + t7.region AS region, + EXTRACT('year' FROM t7.odate) AS year, + CAST(SUM(t7.amount) AS DOUBLE) AS total + FROM ( + SELECT + t0.r_name AS region, + t1.n_name AS nation, + t3.o_totalprice AS amount, + CAST(t3.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.r_regionkey = t1.n_regionkey + INNER JOIN tpch_customer AS t2 + ON t2.c_nationkey = t1.n_nationkey + INNER JOIN tpch_orders AS t3 + ON t3.o_custkey = t2.c_custkey + ) AS t7 + GROUP BY + 1, + 2 +) AS t8 +INNER JOIN ( + SELECT + t7.region AS region, + EXTRACT('year' FROM t7.odate) AS year, + CAST(SUM(t7.amount) AS DOUBLE) AS total + FROM ( + SELECT + t0.r_name AS region, + t1.n_name AS nation, + t3.o_totalprice AS amount, + CAST(t3.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t0 + INNER JOIN tpch_nation AS t1 + ON t0.r_regionkey = t1.n_regionkey + INNER JOIN tpch_customer AS t2 + ON t2.c_nationkey = t1.n_nationkey + INNER JOIN tpch_orders AS t3 + ON t3.o_custkey = t2.c_custkey + ) AS t7 + GROUP BY + 1, + 2 +) AS t9 + ON t8.year = ( + t9.year - CAST(1 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py index d151e7855fb8..13beec098051 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py @@ -21,9 +21,7 @@ ) result = functional_alltypes.filter( - [ - functional_alltypes.timestamp_col - < (ibis.timestamp("2010-01-01 00:00:00") + ibis.interval(3)), - functional_alltypes.timestamp_col < (ibis.now() + ibis.interval(10)), - ] + functional_alltypes.timestamp_col + < (ibis.timestamp("2010-01-01 00:00:00") + ibis.interval(3)), + functional_alltypes.timestamp_col < (ibis.now() + ibis.interval(10)), ).count() diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql index e950adc79c04..6407e4987c29 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql @@ -1,4 +1 @@ -SELECT count(1) AS `CountStar()` -FROM functional_alltypes t0 -WHERE (t0.`timestamp_col` < date_add(cast('2010-01-01T00:00:00' as timestamp), INTERVAL 3 MONTH)) AND - (t0.`timestamp_col` < date_add(cast(now() as timestamp), INTERVAL 10 DAY)) \ No newline at end of file +COUNT(*) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py index c4b5fefc74bd..e44d7f2cff3a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py @@ -8,8 +8,6 @@ star2 = ibis.table( name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -proj = star1.inner_join(star2, star1.foo_id == star2.foo_id).select( - [star1, (star1.f - star2.value1).name("diff")] -) +joinchain = star1.inner_join(star2, star1.foo_id == star2.foo_id) -result = proj.filter(proj.diff > 1) +result = joinchain.filter(joinchain.diff > 1) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql index 8c9f86b9b8fe..7168e81df0e5 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql @@ -1,8 +1,17 @@ -SELECT t0.* +SELECT + * FROM ( - SELECT t1.*, t1.`f` - t2.`value1` AS `diff` - FROM star1 t1 - INNER JOIN star2 t2 - ON t1.`foo_id` = t2.`foo_id` -) t0 -WHERE t0.`diff` > 1 \ No newline at end of file + SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id, + t0.f - t1.value1 AS diff + FROM star1 AS t0 + INNER JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id +) AS t3 +WHERE + ( + t3.diff > CAST(1 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py index 8da00788bce6..adb225e89119 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py @@ -19,4 +19,4 @@ ) lit = ibis.literal(0) -result = alltypes.filter([alltypes.a > lit, alltypes.f.between(lit, ibis.literal(1))]) +result = alltypes.filter(alltypes.a > lit, alltypes.f.between(lit, ibis.literal(1))) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql index db518b690e66..136c64d28e4e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM alltypes t0 -WHERE (t0.`a` > 0) AND - (t0.`f` BETWEEN 0 AND 1) \ No newline at end of file +SELECT + * +FROM alltypes AS t0 +WHERE + ( + t0.a > CAST(0 AS TINYINT) + ) + AND t0.f BETWEEN CAST(0 AS TINYINT) AND CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py index 53f9b4068aad..b89e0510e17a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py @@ -8,9 +8,6 @@ star2 = ibis.table( name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) +joinchain = star1.inner_join(star2, star1.foo_id == star2.foo_id) -result = ( - star1.inner_join(star2, star1.foo_id == star2.foo_id) - .select([star1, star2.value1, star2.value3]) - .filter([star1.f > 0, star2.value3 < 1000]) -) +result = joinchain.filter(joinchain.f > 0, joinchain.value3 < 1000) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql index 8ecd49adabc9..27f0563003e4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql @@ -1,6 +1,20 @@ -SELECT t0.*, t1.`value1`, t1.`value3` -FROM star1 t0 - INNER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` -WHERE (t0.`f` > 0) AND - (t1.`value3` < 1000) \ No newline at end of file +SELECT + * +FROM ( + SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id, + t1.value1 AS value1, + t1.value3 AS value3 + FROM star1 AS t0 + INNER JOIN star2 AS t1 + ON t0.foo_id = t1.foo_id +) AS t3 +WHERE + ( + t3.f > CAST(0 AS TINYINT) + ) AND ( + t3.value3 < CAST(1000 AS SMALLINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql new file mode 100644 index 000000000000..214ec1ede144 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql @@ -0,0 +1,20 @@ +SELECT + t2.foo_id AS foo_id, + t2.total AS total +FROM ( + SELECT + * + FROM ( + SELECT + t0.foo_id AS foo_id, + SUM(t0.f) AS total, + COUNT(*) AS "CountStar()" + FROM star1 AS t0 + GROUP BY + 1 + ) AS t1 + WHERE + ( + t1."CountStar()" > CAST(100 AS TINYINT) + ) +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql new file mode 100644 index 000000000000..307170b0f208 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql @@ -0,0 +1,14 @@ +SELECT + * +FROM ( + SELECT + t0.foo_id AS foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 +) AS t1 +WHERE + ( + t1.total > CAST(10 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql new file mode 100644 index 000000000000..29e72c8336aa --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql @@ -0,0 +1,6 @@ +SELECT + t0.foo_id AS foo_id, + SUM(t0.f) AS total +FROM star1 AS t0 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql new file mode 100644 index 000000000000..116832bf3c3a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql @@ -0,0 +1,8 @@ +SELECT + t0.foo_id AS foo_id, + t0.bar_id AS bar_id, + SUM(t0.f) AS total +FROM star1 AS t0 +GROUP BY + 1, + 2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql new file mode 100644 index 000000000000..a944dbd5c958 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql @@ -0,0 +1 @@ +t0.double_col BETWEEN CAST(5 AS TINYINT) AND CAST(10 AS TINYINT) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql new file mode 100644 index 000000000000..6e3e0443a532 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql @@ -0,0 +1,5 @@ +( + t0.double_col > CAST(0 AS TINYINT) +) AND ( + t0.double_col < CAST(5 AS TINYINT) +) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql new file mode 100644 index 000000000000..b0f919bdac1a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql @@ -0,0 +1,5 @@ +( + t0.double_col < CAST(0 AS TINYINT) +) OR ( + t0.double_col > CAST(5 AS TINYINT) +) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql new file mode 100644 index 000000000000..233633ba658a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql @@ -0,0 +1,5 @@ +COALESCE( + CASE WHEN t0.double_col > CAST(30 AS TINYINT) THEN t0.double_col ELSE NULL END, + NULL, + t0.float_col +) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql new file mode 100644 index 000000000000..19a939098145 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql @@ -0,0 +1 @@ +t0.double_col = CAST(5 AS TINYINT) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql new file mode 100644 index 000000000000..1c278dc0cb73 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql @@ -0,0 +1 @@ +t0.double_col >= CAST(5 AS TINYINT) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql new file mode 100644 index 000000000000..4fbdd9ab3b1c --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql @@ -0,0 +1 @@ +t0.double_col > CAST(5 AS TINYINT) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql new file mode 100644 index 000000000000..41acc5d90cad --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql @@ -0,0 +1 @@ +t0.double_col <= CAST(5 AS TINYINT) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql new file mode 100644 index 000000000000..a6fb94a63f56 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql @@ -0,0 +1 @@ +t0.double_col < CAST(5 AS TINYINT) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql new file mode 100644 index 000000000000..98a382b28167 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql @@ -0,0 +1 @@ +t0.double_col <> CAST(5 AS TINYINT) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql new file mode 100644 index 000000000000..4e808e0ff710 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql @@ -0,0 +1,20 @@ +SELECT + t2.g AS g, + t2.metric AS metric +FROM ( + SELECT + t0.g AS g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1 +) AS t2 +INNER JOIN ( + SELECT + t1.g AS g, + SUM(t1.f) AS metric + FROM alltypes AS t1 + GROUP BY + 1 +) AS t4 + ON t2.g = t4.g \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql new file mode 100644 index 000000000000..e2c1fc57b53f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql @@ -0,0 +1 @@ +COUNT(DISTINCT t0.int_col) AS nunique \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql new file mode 100644 index 000000000000..c4d51f354005 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql @@ -0,0 +1,6 @@ +SELECT + t0.string_col AS string_col, + COUNT(DISTINCT t0.int_col) AS nunique +FROM functional_alltypes AS t0 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql new file mode 100644 index 000000000000..f98b3697b64f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql @@ -0,0 +1,8 @@ +SELECT DISTINCT + * +FROM ( + SELECT + t0.string_col AS string_col, + t0.int_col AS int_col + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql new file mode 100644 index 000000000000..636796e7e04d --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql @@ -0,0 +1,7 @@ +SELECT DISTINCT + * +FROM ( + SELECT + t0.string_col AS string_col + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql new file mode 100644 index 000000000000..dd4c570ec517 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql @@ -0,0 +1,3 @@ +SELECT DISTINCT + * +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql new file mode 100644 index 000000000000..c84bf63f1858 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql @@ -0,0 +1,19 @@ +SELECT + * +FROM foo_t AS t0 +WHERE + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM ( + SELECT + * + FROM bar_t AS t1 + WHERE + ( + t0.key1 = t1.key1 + ) + ) AS t2 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql new file mode 100644 index 000000000000..83416d431936 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql @@ -0,0 +1,23 @@ +SELECT + * +FROM foo_t AS t0 +WHERE + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM ( + SELECT + * + FROM bar_t AS t1 + WHERE + ( + ( + t0.key1 = t1.key1 + ) AND ( + t1.key2 = 'foo' + ) + ) + ) AS t2 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql new file mode 100644 index 000000000000..13e53cf4bbd7 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql @@ -0,0 +1,14 @@ +SELECT + * +FROM ( + SELECT + t0.int_col AS int_col, + SUM(t0.bigint_col) AS bigint_col + FROM t AS t0 + GROUP BY + 1 +) AS t1 +WHERE + ( + t1.bigint_col = CAST(60 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql new file mode 100644 index 000000000000..96dd80efc156 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql @@ -0,0 +1 @@ +t0.double_col IS NULL AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql new file mode 100644 index 000000000000..b25cb9418af6 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql @@ -0,0 +1 @@ +t0.double_col IS NOT NULL AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql new file mode 100644 index 000000000000..7d32758ef61c --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql @@ -0,0 +1,21 @@ +SELECT + t0.n_nationkey AS n_nationkey, + t0.n_name AS n_name, + t0.n_regionkey AS n_regionkey, + t0.n_comment AS n_comment, + t1.r_regionkey AS r_regionkey, + t1.r_name AS r_name, + t1.r_comment AS r_comment, + t2.c_custkey AS c_custkey, + t2.c_name AS c_name, + t2.c_address AS c_address, + t2.c_nationkey AS c_nationkey, + t2.c_phone AS c_phone, + t2.c_acctbal AS c_acctbal, + t2.c_mktsegment AS c_mktsegment, + t2.c_comment AS c_comment +FROM tpch_nation AS t0 +INNER JOIN tpch_region AS t1 + ON t0.n_regionkey = t1.r_regionkey +INNER JOIN tpch_customer AS t2 + ON t0.n_nationkey = t2.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql new file mode 100644 index 000000000000..2b6d0fe52716 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql @@ -0,0 +1,4 @@ +SELECT + * +FROM star1 AS t0 +LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql new file mode 100644 index 000000000000..3c71bda9b962 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM star1 AS t0 +LIMIT 10 +OFFSET 5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql new file mode 100644 index 000000000000..ec694656a01b --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql @@ -0,0 +1,8 @@ +SELECT + * +FROM star1 AS t0 +WHERE + ( + t0.f > CAST(0 AS TINYINT) + ) +LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql new file mode 100644 index 000000000000..431ba054f9f9 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + * + FROM star1 AS t0 + LIMIT 10 +) AS t1 +WHERE + ( + t1.f > CAST(0 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py new file mode 100644 index 000000000000..27d91a4d745a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py @@ -0,0 +1,15 @@ +import ibis + + +star1 = ibis.table( + name="star1", + schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, +) +star2 = ibis.table( + name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} +) +agg = star1.aggregate([star1.f.sum().name("total")], by=[star1.foo_id]) +joinchain = agg.inner_join(star2, agg.foo_id == star2.foo_id) +f = joinchain.filter(joinchain.total > 100) + +result = f.order_by(f.total.desc()) diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql new file mode 100644 index 000000000000..3b947354fb10 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql @@ -0,0 +1,28 @@ +SELECT + * +FROM ( + SELECT + * + FROM ( + SELECT + t2.foo_id AS foo_id, + t2.total AS total, + t1.value1 AS value1 + FROM ( + SELECT + t0.foo_id AS foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 + ) AS t2 + INNER JOIN star2 AS t1 + ON t2.foo_id = t1.foo_id + ) AS t4 + WHERE + ( + t4.total > CAST(100 AS TINYINT) + ) +) AS t5 +ORDER BY + t5.total DESC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql new file mode 100644 index 000000000000..0a2d70812c52 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql @@ -0,0 +1,20 @@ +SELECT + t0.x1 AS x1, + t0.y1 AS y1, + t1.x2 AS x2, + t6.x3 AS x3, + t6.y2 AS y2, + t6.x4 AS x4 +FROM t1 AS t0 +INNER JOIN t2 AS t1 + ON t0.x1 = t1.x2 +INNER JOIN ( + SELECT + t2.x3 AS x3, + t2.y2 AS y2, + t3.x4 AS x4 + FROM t3 AS t2 + INNER JOIN t4 AS t3 + ON t2.x3 = t3.x4 +) AS t6 + ON t0.y1 = t6.y2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql new file mode 100644 index 000000000000..28adfeef1952 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql @@ -0,0 +1,11 @@ +SELECT + t1.person_id AS person_id +FROM ( + SELECT + * + FROM person AS t0 + WHERE + ( + CAST(400 AS SMALLINT) <= CAST(40 AS TINYINT) + ) +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql new file mode 100644 index 000000000000..66e751eda132 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col * CAST(2 AS TINYINT) AS foo +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql new file mode 100644 index 000000000000..812bef8825b8 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql @@ -0,0 +1,3 @@ +NOT ( + t0.double_col > CAST(0 AS TINYINT) +) AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql new file mode 100644 index 000000000000..d27c92e5a7a9 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql @@ -0,0 +1,36 @@ +SELECT + * +FROM ( + SELECT + t4.ancestor_node_sort_order AS ancestor_node_sort_order, + CAST(1 AS TINYINT) AS n + FROM ( + SELECT + t0.product_id AS product_id, + t2.ancestor_level_name AS ancestor_level_name, + t2.ancestor_level_number AS ancestor_level_number, + t2.ancestor_node_sort_order AS ancestor_node_sort_order, + t2.descendant_node_natural_key AS descendant_node_natural_key, + t2.product_level_name AS product_level_name + FROM facts AS t0 + INNER JOIN ( + SELECT + t1.ancestor_level_name AS ancestor_level_name, + t1.ancestor_level_number AS ancestor_level_number, + t1.ancestor_node_sort_order AS ancestor_node_sort_order, + t1.descendant_node_natural_key AS descendant_node_natural_key, + CONCAT( + LPAD('-', ( + t1.ancestor_level_number - CAST(1 AS TINYINT) + ) * CAST(7 AS TINYINT), '-'), + t1.ancestor_level_name + ) AS product_level_name + FROM products AS t1 + ) AS t2 + ON t0.product_id = t2.descendant_node_natural_key + ) AS t4 + GROUP BY + 1 +) AS t5 +ORDER BY + t5.ancestor_node_sort_order ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql new file mode 100644 index 000000000000..60077ed03a09 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql @@ -0,0 +1,51 @@ +SELECT + t9.customer_id AS customer_id, + t9.first_name AS first_name, + t9.last_name AS last_name, + t9.first_order AS first_order, + t9.most_recent_order AS most_recent_order, + t9.number_of_orders AS number_of_orders, + t11.total_amount AS customer_lifetime_value +FROM ( + SELECT + t0.customer_id AS customer_id, + t0.first_name AS first_name, + t0.last_name AS last_name, + t5.first_order AS first_order, + t5.most_recent_order AS most_recent_order, + t5.number_of_orders AS number_of_orders + FROM customers AS t0 + LEFT OUTER JOIN ( + SELECT + t1.customer_id AS customer_id, + MIN(t1.order_date) AS first_order, + MAX(t1.order_date) AS most_recent_order, + COUNT(t1.order_id) AS number_of_orders + FROM orders AS t1 + GROUP BY + 1 + ) AS t5 + ON t0.customer_id = t5.customer_id +) AS t9 +LEFT OUTER JOIN ( + SELECT + t7.customer_id AS customer_id, + SUM(t7.amount) AS total_amount + FROM ( + SELECT + t2.payment_id AS payment_id, + t2.order_id AS order_id, + t2.payment_method AS payment_method, + t2.amount AS amount, + t3.order_id AS order_id_right, + t3.customer_id AS customer_id, + t3.order_date AS order_date, + t3.status AS status + FROM payments AS t2 + LEFT OUTER JOIN orders AS t3 + ON t2.order_id = t3.order_id + ) AS t7 + GROUP BY + 1 +) AS t11 + ON t9.customer_id = t11.customer_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql new file mode 100644 index 000000000000..0fe7408ed8c4 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql @@ -0,0 +1,16 @@ +SELECT + t0.id AS id, + t0.personal AS personal, + t0.family AS family, + t1.taken AS taken, + t1.person AS person, + t1.quant AS quant, + t1.reading AS reading, + t2.id AS id_right, + t2.site AS site, + t2.dated AS dated +FROM person AS t0 +INNER JOIN survey AS t1 + ON t0.id = t1.person +INNER JOIN visited AS t2 + ON t2.id = t1.taken \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql new file mode 100644 index 000000000000..7c62bce20d7f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql @@ -0,0 +1,21 @@ +SELECT + * +FROM foo_t AS t0 +WHERE + NOT ( + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM ( + SELECT + * + FROM bar_t AS t1 + WHERE + ( + t0.key1 = t1.key1 + ) + ) AS t2 + ) + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql new file mode 100644 index 000000000000..a401619ae2d8 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM star1 AS t0 +ORDER BY + t0.f ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql new file mode 100644 index 000000000000..bd2fd25215ee --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM star1 AS t0 +ORDER BY + RANDOM() ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql new file mode 100644 index 000000000000..51532051bd3f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql @@ -0,0 +1,13 @@ +SELECT + * +FROM ( + SELECT + * + FROM t AS t0 + WHERE + ( + t0.a = CAST(1 AS TINYINT) + ) +) AS t1 +ORDER BY + CONCAT(t1.b, 'a') ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql new file mode 100644 index 000000000000..ae63fb4838b3 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql @@ -0,0 +1,7 @@ +CASE + WHEN t0.f > CAST(0 AS TINYINT) + THEN t0.d * CAST(2 AS TINYINT) + WHEN t0.c < CAST(0 AS TINYINT) + THEN t0.a * CAST(2 AS TINYINT) + ELSE CAST(NULL AS BIGINT) +END AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql new file mode 100644 index 000000000000..3e0c659089b2 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql @@ -0,0 +1,21 @@ +SELECT + * +FROM functional_alltypes AS t0 +WHERE + NOT ( + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM ( + SELECT + * + FROM functional_alltypes AS t1 + WHERE + ( + t0.string_col = t1.string_col + ) + ) AS t2 + ) + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql new file mode 100644 index 000000000000..0f4cb117f736 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql @@ -0,0 +1,19 @@ +SELECT + * +FROM functional_alltypes AS t0 +WHERE + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM ( + SELECT + * + FROM functional_alltypes AS t1 + WHERE + ( + t0.string_col = t1.string_col + ) + ) AS t2 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql new file mode 100644 index 000000000000..de06d3805e28 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql @@ -0,0 +1,8 @@ +SELECT + t0.c AS c, + t0.f AS f, + t0.foo_id AS foo_id, + t0.bar_id AS bar_id +FROM star1 AS t0 +INNER JOIN star1 AS t1 + ON t0.foo_id = t1.bar_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql new file mode 100644 index 000000000000..7dfc00dcf063 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql @@ -0,0 +1 @@ +CASE t0.g WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS tmp \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql new file mode 100644 index 000000000000..0f520487cbb8 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + t0.string_col AS string_col, + MAX(t0.double_col) AS foo + FROM functional_alltypes AS t0 + GROUP BY + 1 +) AS t1 +ORDER BY + t1.foo DESC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql new file mode 100644 index 000000000000..ab9ad3623170 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql @@ -0,0 +1,14 @@ +SELECT + t2.foo_id AS foo_id, + t2.total AS total, + t1.value1 AS value1 +FROM ( + SELECT + t0.foo_id AS foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 +) AS t2 +INNER JOIN star2 AS t1 + ON t2.foo_id = t1.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql new file mode 100644 index 000000000000..3ded1e24f732 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql @@ -0,0 +1,19 @@ +SELECT + * +FROM foo AS t0 +WHERE + ( + t0.y > ( + SELECT + AVG(t2.y) AS "Mean(y)" + FROM ( + SELECT + * + FROM foo AS t1 + WHERE + ( + t0.dept_id = t1.dept_id + ) + ) AS t2 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql new file mode 100644 index 000000000000..631fc089852e --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql @@ -0,0 +1,33 @@ +SELECT + * +FROM ( + SELECT + t0.p_partkey AS p_partkey, + t1.ps_supplycost AS ps_supplycost + FROM part AS t0 + INNER JOIN partsupp AS t1 + ON t0.p_partkey = t1.ps_partkey +) AS t5 +WHERE + ( + t5.ps_supplycost = ( + SELECT + MIN(t7.ps_supplycost) AS "Min(ps_supplycost)" + FROM ( + SELECT + * + FROM ( + SELECT + t1.ps_partkey AS ps_partkey, + t1.ps_supplycost AS ps_supplycost + FROM partsupp AS t1 + INNER JOIN supplier AS t2 + ON t2.s_suppkey = t1.ps_suppkey + ) AS t6 + WHERE + ( + t6.ps_partkey = t5.p_partkey + ) + ) AS t7 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/decompiled.py b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/decompiled.py new file mode 100644 index 000000000000..14517d8e9493 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/decompiled.py @@ -0,0 +1,9 @@ +import ibis + + +star1 = ibis.table( + name="star1", + schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, +) + +result = star1.filter(star1.f > 0, star1.c < (star1.f * 2)) diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql new file mode 100644 index 000000000000..2c8cebaa0a08 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM star1 AS t0 +WHERE + ( + t0.f > CAST(0 AS TINYINT) + ) AND ( + t0.c < ( + t0.f * CAST(2 AS TINYINT) + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql new file mode 100644 index 000000000000..5240818910ca --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM foo AS t0 +WHERE + t0.job IN (( + SELECT + t1.job AS job + FROM bar AS t1 + )) \ No newline at end of file diff --git a/ibis/backends/tests/sql/test_compiler.py b/ibis/backends/tests/sql/test_compiler.py index 646b9ad9b652..ec7b264ffbf3 100644 --- a/ibis/backends/tests/sql/test_compiler.py +++ b/ibis/backends/tests/sql/test_compiler.py @@ -6,7 +6,6 @@ import ibis -# from ibis.backends.base.sql.compiler import Compiler from ibis.backends.tests.sql.conftest import to_sql from ibis.tests.util import assert_decompile_roundtrip, schemas_eq diff --git a/ibis/backends/tests/sql/test_select_sql.py b/ibis/backends/tests/sql/test_select_sql.py index 78eee22af88c..bca97afebb72 100644 --- a/ibis/backends/tests/sql/test_select_sql.py +++ b/ibis/backends/tests/sql/test_select_sql.py @@ -5,10 +5,8 @@ import ibis from ibis import _ - -# from ibis.backends.base.sql.compiler import Compiler -from ibis.backends.tests.sql.conftest import get_query, to_sql -from ibis.tests.util import assert_decompile_roundtrip, schemas_eq +from ibis.backends.tests.sql.conftest import to_sql +from ibis.tests.util import assert_decompile_roundtrip pytestmark = pytest.mark.duckdb @@ -429,14 +427,15 @@ def test_scalar_subquery_different_table(foo, bar, snapshot): snapshot.assert_match(to_sql(expr), "out.sql") -def test_exists_subquery_repr(t1, t2): - # GH #660 +# TODO(kszucs): should do snapshot testing instead +# def test_exists_subquery_repr(t1, t2): +# # GH #660 - cond = t1.key1 == t2.key1 - expr = t1[cond.any()] - stmt = get_query(expr) +# cond = t1.key1 == t2.key1 +# expr = t1[cond.any()] +# stmt = get_query(expr) - repr(stmt.where[0]) +# repr(stmt.where[0]) def test_filter_inside_exists(snapshot): @@ -491,9 +490,6 @@ def test_multiple_limits(functional_alltypes, snapshot): t = functional_alltypes expr = t.limit(20).limit(10) - stmt = get_query(expr) - - assert stmt.limit.n == 10 snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) @@ -860,3 +856,13 @@ def test_chain_limit_doesnt_collapse(snapshot): ) expr = t.city.topk(10)[-5:] snapshot.assert_match(to_sql(expr), "result.sql") + + +def test_join_with_conditional_aggregate(snapshot): + left = ibis.table({"on": "int", "by": "string"}, name="left") + right = ibis.table({"on": "int", "by": "string", "val": "float"}, name="right") + stat = right[(right.by == left.by) & (right.on <= left.on)]["on"].max() + merged = left.join(right, how="left", predicates=left.by == right.by)[ + right.on == stat + ] + snapshot.assert_match(to_sql(merged), "result.sql") diff --git a/ibis/backends/tests/sql/test_sql.py b/ibis/backends/tests/sql/test_sql.py index d91d1aff6683..ac4d07876d7e 100644 --- a/ibis/backends/tests/sql/test_sql.py +++ b/ibis/backends/tests/sql/test_sql.py @@ -522,3 +522,50 @@ def test_order_by_expr(snapshot): t = ibis.table(dict(a="int", b="string"), name="t") expr = t[lambda t: t.a == 1].order_by(lambda t: t.b + "a") snapshot.assert_match(to_sql(expr), "out.sql") + + +def test_no_cartesian_join(snapshot): + customers = ibis.table( + dict(customer_id="int64", first_name="string", last_name="string"), + name="customers", + ) + orders = ibis.table( + dict(order_id="int64", customer_id="int64", order_date="date", status="string"), + name="orders", + ) + payments = ibis.table( + dict( + payment_id="int64", + order_id="int64", + payment_method="string", + amount="float64", + ), + name="payments", + ) + + customer_orders = orders.group_by("customer_id").aggregate( + first_order=orders.order_date.min(), + most_recent_order=orders.order_date.max(), + number_of_orders=orders.order_id.count(), + ) + + customer_payments = ( + payments.left_join(orders, "order_id") + .group_by(orders.customer_id) + .aggregate(total_amount=payments.amount.sum()) + ) + + final = ( + customers.left_join(customer_orders, "customer_id") + .drop("customer_id_right") + .left_join(customer_payments, "customer_id")[ + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order, + customer_orders.most_recent_order, + customer_orders.number_of_orders, + customer_payments.total_amount.name("customer_lifetime_value"), + ] + ) + snapshot.assert_match(ibis.to_sql(final, dialect="duckdb"), "out.sql") diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index b3a6b197b988..61633cafbc02 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -772,16 +772,7 @@ def mean_and_std(v): id="collect", marks=[ pytest.mark.notimpl( - [ - "impala", - "mysql", - "sqlite", - "datafusion", - "mssql", - "druid", - "oracle", - "exasol", - ], + ["impala", "mysql", "sqlite", "mssql", "druid", "oracle", "exasol"], raises=com.OperationNotDefinedError, ), pytest.mark.broken( @@ -864,7 +855,7 @@ def test_reduction_ops( id="cond", marks=[ pytest.mark.notyet( - ["snowflake", "mysql"], + ["mysql"], raises=com.UnsupportedOperationError, reason="backend does not support filtered count distinct with more than one column", ), @@ -1030,7 +1021,7 @@ def test_quantile( id="covar_pop", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "polars", "druid"], + ["dask", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1050,7 +1041,7 @@ def test_quantile( id="covar_samp", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "polars", "druid"], + ["dask", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1070,7 +1061,7 @@ def test_quantile( id="corr_pop", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "druid"], + ["dask", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1100,11 +1091,16 @@ def test_quantile( id="corr_samp", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "druid"], + ["dask", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( - ["impala", "mysql", "sqlite", "flink"], + ["duckdb", "snowflake"], + raises=com.UnsupportedOperationError, + reason="backend only implements population correlation coefficient", + ), + pytest.mark.notyet( + ["impala", "mysql", "sqlite"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1117,7 +1113,6 @@ def test_quantile( "trino", "postgres", "risingwave", - "duckdb", "snowflake", "oracle", ], @@ -1136,7 +1131,7 @@ def test_quantile( id="covar_pop_bool", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "polars", "druid"], + ["dask", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1160,7 +1155,7 @@ def test_quantile( id="corr_pop_bool", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "druid"], + ["dask", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1393,10 +1388,7 @@ def test_date_quantile(alltypes, func): "::", id="expr", marks=[ - pytest.mark.notyet( - ["duckdb", "trino"], - raises=com.UnsupportedOperationError, - ), + pytest.mark.notyet(["trino"], raises=com.UnsupportedOperationError), pytest.mark.notyet( ["bigquery"], raises=GoogleBadRequest, @@ -1641,8 +1633,8 @@ def test_binds_are_cast(alltypes): def test_agg_sort(alltypes): query = alltypes.aggregate(count=alltypes.count()) - query = query.order_by(alltypes.year) - query.execute() + with pytest.raises(com.IntegrityError): + query.order_by(alltypes.year) @pytest.mark.xfail_version( @@ -1693,16 +1685,17 @@ def test_grouped_case(backend, con): ["datafusion", "mssql", "polars", "exasol"], raises=com.OperationNotDefinedError ) @pytest.mark.broken( - ["dask", "pandas"], + ["dask"], reason="Dask and Pandas do not windowize this operation correctly", raises=AssertionError, ) @pytest.mark.notyet(["impala", "flink"], raises=com.UnsupportedOperationError) -@pytest.mark.notyet(["clickhouse"], raises=ClickHouseDatabaseError) -@pytest.mark.notyet(["druid", "trino", "snowflake"], raises=sa.exc.ProgrammingError) -@pytest.mark.notyet(["mysql"], raises=sa.exc.NotSupportedError) -@pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) +@pytest.mark.notyet(["clickhouse"], raises=ClickhouseDatabaseError) +@pytest.mark.notyet(["druid", "trino"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) +@pytest.mark.notyet("mysql", raises=sa.exc.NotSupportedError) +@pytest.mark.notyet("oracle", raises=sa.exc.DatabaseError) +@pytest.mark.notyet("pyspark", raises=PysparkAnalysisException) def test_group_concat_over_window(backend, con): input_df = pd.DataFrame( { diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index b3e5e99cbaaf..00a7b670ce80 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -150,7 +150,7 @@ def test_np_array_literal(con): @pytest.mark.parametrize("idx", range(3)) -@pytest.mark.notimpl(["polars", "datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) def test_array_index(con, idx): arr = [1, 2, 3] expr = ibis.literal(arr) @@ -531,7 +531,7 @@ def test_array_filter(con, input, output): @builtin_array @pytest.mark.notimpl( - ["mssql", "pandas", "polars", "postgres"], + ["mssql", "polars", "postgres"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index e4402e94b2ad..4b8b2dfce135 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -313,7 +313,7 @@ def test_rename_table(con, temp_table, temp_table_orig): assert temp_table_orig not in con.list_tables() -@mark.notimpl(["datafusion", "polars", "druid"]) +@mark.notimpl(["polars", "druid"]) @mark.never(["impala", "pyspark"], reason="No non-nullable datatypes") @mark.notyet( ["trino"], reason="trino doesn't support NOT NULL in its in-memory catalog" @@ -917,10 +917,12 @@ def test_self_join_memory_table(backend, con, monkeypatch): t = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) t_view = t.view() expr = t.join(t_view, t.x == t_view.y).select("x", "y", "z", "z_right") + result = con.execute(expr).sort_values("x").reset_index(drop=True) expected = pd.DataFrame( {"x": [1, 2], "y": [2, 1], "z": ["a", "b"], "z_right": ["b", "a"]} ) + backend.assert_frame_equal(result, expected) @@ -1014,10 +1016,10 @@ def test_default_backend(): for _ in range(2): assert expr.execute() == df.a.sum() - sql = str(ibis.to_sql(expr)) + sql = ibis.to_sql(expr) rx = """\ SELECT - SUM\\((\\w+)\\.a\\) AS ".+" + SUM\\((t\\d+)\\.a\\) AS ".+" FROM \\w+ AS \\1""" assert re.match(rx, sql) is not None @@ -1147,9 +1149,9 @@ def test_has_operation_no_geo(con, op): for name, obj in sorted(inspect.getmembers(builtins), key=itemgetter(0)) for backend in sorted(ALL_BACKENDS) # filter out builtins that are types, except for tuples on ClickHouse - # because tuples are used to represent lists of expressions + # and duckdb because tuples are used to represent lists of expressions if isinstance(obj, type) - if (obj != tuple or backend != "clickhouse") + if (obj != tuple or backend not in ("clickhouse", "duckdb")) if (backend != "pyspark" or vparse(pd.__version__) < vparse("2")) ], ) @@ -1459,7 +1461,7 @@ def gen_test_name(con: BaseBackend) -> str: @mark.notimpl( - ["datafusion", "polars"], + ["polars"], raises=NotImplementedError, reason="overwriting not implemented in ibis for this backend", ) diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 263759691aa5..1700403104e3 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -11,7 +11,7 @@ from ibis.backends.tests.errors import PolarsComputeError table_dot_sql_notimpl = pytest.mark.notimpl(["bigquery", "impala", "druid"]) -dot_sql_notimpl = pytest.mark.notimpl(["datafusion", "exasol", "flink"]) +dot_sql_notimpl = pytest.mark.notimpl(["exasol", "flink"]) dot_sql_notyet = pytest.mark.notyet( ["snowflake", "oracle"], reason="snowflake and oracle column names are case insensitive", @@ -27,7 +27,7 @@ } -@dot_sql_notimpl +@pytest.mark.notimpl(["flink"]) @dot_sql_notyet @dot_sql_never @pytest.mark.parametrize( @@ -238,13 +238,13 @@ def test_dot_sql_reuse_alias_with_different_types(backend, alltypes, df): @dot_sql_never def test_table_dot_sql_transpile(backend, alltypes, dialect, df): name = "foo2" - foo = alltypes.select(x=_.int_col + 1).alias(name) + foo = alltypes.select(x=_.bigint_col + 1).alias(name) expr = sg.select("x").from_(sg.table(name, quoted=True)) dialect = _IBIS_TO_SQLGLOT_DIALECT.get(dialect, dialect) sqlstr = expr.sql(dialect=dialect, pretty=True) dot_sql_expr = foo.sql(sqlstr, dialect=dialect) result = dot_sql_expr.execute() - expected = df.int_col.add(1).rename("x") + expected = df.bigint_col.add(1).rename("x") backend.assert_series_equal(result.x, expected) @@ -269,12 +269,12 @@ def test_table_dot_sql_transpile(backend, alltypes, dialect, df): @dot_sql_never def test_con_dot_sql_transpile(backend, con, dialect, df): t = sg.table("functional_alltypes") - foo = sg.select(sg.alias(sg.column("int_col") + 1, "x")).from_(t) + foo = sg.select(sg.alias(sg.column("bigint_col") + 1, "x")).from_(t) dialect = _IBIS_TO_SQLGLOT_DIALECT.get(dialect, dialect) sqlstr = foo.sql(dialect=dialect, pretty=True) expr = con.sql(sqlstr, dialect=dialect) result = expr.execute() - expected = df.int_col.add(1).rename("x") + expected = df.bigint_col.add(1).rename("x") backend.assert_series_equal(result.x, expected) diff --git a/ibis/backends/tests/test_examples.py b/ibis/backends/tests/test_examples.py index d4f6505e36d9..5a9cab87f2e7 100644 --- a/ibis/backends/tests/test_examples.py +++ b/ibis/backends/tests/test_examples.py @@ -15,7 +15,7 @@ (LINUX or MACOS) and SANDBOXED, reason="nix on linux cannot download duckdb extensions or data due to sandboxing", ) -@pytest.mark.notimpl(["dask", "datafusion", "exasol", "pyspark"]) +@pytest.mark.notimpl(["dask", "pyspark", "flink", "exasol"]) @pytest.mark.notyet(["clickhouse", "druid", "impala", "mssql", "trino", "risingwave"]) @pytest.mark.parametrize( ("example", "columns"), diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 71a3f97afc69..e6392d99632c 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -127,7 +127,7 @@ def test_column_to_pyarrow_table_schema(awards_players): assert array.type == pa.string() or array.type == pa.large_string() -@pytest.mark.notimpl(["pandas", "dask", "datafusion", "flink"]) +@pytest.mark.notimpl(["dask", "datafusion", "flink"]) @pytest.mark.notyet( ["clickhouse"], raises=AssertionError, @@ -142,7 +142,7 @@ def test_table_pyarrow_batch_chunk_size(awards_players): util.consume(batch_reader) -@pytest.mark.notimpl(["pandas", "dask", "datafusion", "flink"]) +@pytest.mark.notimpl(["dask", "datafusion", "flink"]) @pytest.mark.notyet( ["clickhouse"], raises=AssertionError, @@ -208,7 +208,7 @@ def test_table_to_parquet(tmp_path, backend, awards_players): @pytest.mark.notimpl( ["duckdb"], reason="cannot inline WriteOptions objects", - raises=sa.exc.NotSupportedError, + raises=DuckDBNotImplementedException, ) @pytest.mark.parametrize("version", ["1.0", "2.6"]) def test_table_to_parquet_writer_kwargs(version, tmp_path, backend, awards_players): @@ -308,7 +308,7 @@ def test_table_to_csv(tmp_path, backend, awards_players): @pytest.mark.notimpl( ["duckdb"], reason="cannot inline WriteOptions objects", - raises=sa.exc.ProgrammingError, + raises=DuckDBParserException, ) @pytest.mark.parametrize("delimiter", [";", "\t"], ids=["semicolon", "tab"]) def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): @@ -345,10 +345,8 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): marks=[ pytest.mark.notyet(["impala"], reason="precision not supported"), pytest.mark.notyet(["duckdb"], reason="precision is out of range"), - pytest.mark.notyet( - ["druid", "mssql", "snowflake", "trino"], - raises=sa.exc.ProgrammingError, - ), + pytest.mark.notyet(["druid", "trino"], raises=sa.exc.ProgrammingError), + pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), pytest.mark.notyet(["mysql"], raises=sa.exc.OperationalError), pytest.mark.notyet( @@ -396,6 +394,11 @@ def test_to_pyarrow_decimal(backend, dtype, pyarrow_dtype): reason="read_delta not yet implemented", ) @pytest.mark.notyet(["clickhouse"], raises=Exception) +@pytest.mark.notyet( + ["snowflake"], + raises=Exception, + reason="deltalake doesn't support nanosecond timestamps", +) @pytest.mark.notyet(["mssql", "pandas"], raises=PyDeltaTableError) @pytest.mark.notyet( ["druid"], diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 307685f113c0..308dd19b1a09 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -142,6 +142,7 @@ def test_isna(backend, alltypes, col, filt): [ "bigquery", "clickhouse", + "datafusion", "duckdb", "impala", "postgres", @@ -195,9 +196,7 @@ def test_coalesce(con, expr, expected): # TODO(dask) - identicalTo - #2553 -@pytest.mark.notimpl( - ["clickhouse", "datafusion", "dask", "pyspark", "mssql", "druid", "exasol"] -) +@pytest.mark.notimpl(["clickhouse", "dask", "pyspark", "mssql", "druid", "exasol"]) def test_identical_to(backend, alltypes, sorted_df): sorted_alltypes = alltypes.order_by("id") df = sorted_df @@ -623,7 +622,7 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): reason="dask doesn't support Series as isin/notin argument", raises=NotImplementedError, ) -@pytest.mark.notimpl(["datafusion", "druid"]) +@pytest.mark.notimpl(["druid"]) @pytest.mark.parametrize( ("ibis_op", "pandas_op"), [ @@ -641,11 +640,13 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): _.string_col.notin(_.string_col), lambda df: ~df.string_col.isin(df.string_col), id="notin_col", + marks=[pytest.mark.notimpl(["datafusion"])], ), param( (_.bigint_col + 1).notin(_.string_col.length() + 1), lambda df: ~(df.bigint_col.add(1)).isin(df.string_col.str.len().add(1)), id="notin_expr", + marks=[pytest.mark.notimpl(["datafusion"])], ), ], ) @@ -741,24 +742,28 @@ def test_ifelse_column(backend, alltypes, df): def test_select_filter(backend, alltypes, df): t = alltypes - expr = t.select("int_col").filter(t.string_col == "4") + # XXX: should we consider a builder pattern for select and filter too? + # this would allow us to capture the context + # TODO(cpcloud): this now requires the additional string_col projection + expr = t.select("int_col", "string_col").filter(t.string_col == "4") result = expr.execute() - expected = df.loc[df.string_col == "4", ["int_col"]].reset_index(drop=True) + expected = df.loc[df.string_col == "4", ["int_col", "string_col"]].reset_index( + drop=True + ) backend.assert_frame_equal(result, expected) def test_select_filter_select(backend, alltypes, df): t = alltypes - expr = t.select("int_col").filter(t.string_col == "4").int_col + expr = t.select("int_col", "string_col").filter(t.string_col == "4").int_col result = expr.execute().rename("int_col") expected = df.loc[df.string_col == "4", "int_col"].reset_index(drop=True) backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) -@pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError) +@pytest.mark.broken(["mssql"], raises=sa.exc.OperationalError) def test_between(backend, alltypes, df): expr = alltypes.double_col.between(5, 10) result = expr.execute().rename("double_col") @@ -893,7 +898,7 @@ def test_isin_uncorrelated( @pytest.mark.broken(["polars"], reason="incorrect answer") -@pytest.mark.notimpl(["datafusion", "pyspark", "druid", "exasol"]) +@pytest.mark.notimpl(["pyspark", "druid", "exasol"]) @pytest.mark.notyet(["dask"], reason="not supported by the backend") def test_isin_uncorrelated_filter( backend, batting, awards_players, batting_df, awards_players_df @@ -1007,9 +1012,7 @@ def test_memtable_column_naming_mismatch(backend, con, monkeypatch, df, columns) @pytest.mark.notimpl( - ["dask", "datafusion", "pandas", "polars"], - raises=NotImplementedError, - reason="not a SQL backend", + ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" ) @pytest.mark.notimpl( ["pyspark"], reason="pyspark doesn't generate SQL", raises=NotImplementedError @@ -1358,7 +1361,6 @@ def hash_256(col): "pandas", "dask", "bigquery", - "datafusion", "druid", "impala", "mssql", @@ -1367,9 +1369,9 @@ def hash_256(col): "postgres", "risingwave", "pyspark", - "snowflake", "sqlite", "exasol", + "snowflake", ] ) @pytest.mark.parametrize( @@ -1391,6 +1393,7 @@ def hash_256(col): reason="raises TrinoUserError", ), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), + pytest.mark.broken(["datafusion"], reason="casts to 1672531200000000"), ], ), ], @@ -1414,9 +1417,9 @@ def test_try_cast_expected(con, from_val, to_type, expected): "postgres", "risingwave", "pyspark", - "snowflake", "sqlite", "exasol", + "snowflake", ] ) @pytest.mark.parametrize( @@ -1486,9 +1489,9 @@ def test_try_cast_table(backend, con): "postgres", "risingwave", "pyspark", - "snowflake", "sqlite", "exasol", + "snowflake", ] ) @pytest.mark.parametrize( @@ -1673,10 +1676,15 @@ def test_static_table_slice(backend, slc, expected_count_fn): ids=str, ) @pytest.mark.notyet( - ["mysql", "snowflake", "trino"], + ["mysql", "trino"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notyet( + ["snowflake"], + raises=SnowflakeProgrammingError, + reason="backend doesn't support dynamic limit/offset", +) @pytest.mark.notimpl( ["mssql"], raises=sa.exc.CompileError, @@ -1726,7 +1734,7 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): @pytest.mark.notyet( - ["mysql", "snowflake", "trino"], + ["mysql", "trino"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) @@ -1734,6 +1742,11 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): ["exasol"], raises=sa.exc.CompileError, ) +@pytest.mark.notyet( + ["snowflake"], + raises=SnowflakeProgrammingError, + reason="backend doesn't support dynamic limit/offset", +) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, @@ -1757,11 +1770,6 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): raises=ImpalaHiveServer2Error, ) @pytest.mark.notyet(["pyspark"], reason="pyspark doesn't support dynamic limit/offset") -@pytest.mark.xfail_version( - duckdb=["duckdb<=0.8.1"], - raises=AssertionError, - reason="https://github.com/duckdb/duckdb/issues/8412", -) @pytest.mark.notyet(["flink"], reason="flink doesn't support dynamic limit/offset") @pytest.mark.notimpl( ["risingwave"], diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index c80d51ae58d4..e46196aa2378 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -289,7 +289,7 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): reason="polars doesn't support join predicates", ) @pytest.mark.notimpl( - ["dask", "pandas"], + ["dask"], raises=TypeError, reason="dask and pandas don't support join predicates", ) @@ -397,3 +397,13 @@ def test_outer_join_nullability(backend, how, nrows, gen_right, keys): result = expr.to_pyarrow() assert len(result) == nrows + + +def test_complex_join_agg(snapshot): + t1 = ibis.table(dict(value1="float", key1="string", key2="string"), name="table1") + t2 = ibis.table(dict(value2="float", key1="string", key4="string"), name="table2") + + avg_diff = (t1.value1 - t2.value2).mean() + expr = t1.left_join(t2, "key1").group_by(t1.key1).aggregate(avg_diff=avg_diff) + + snapshot.assert_match(str(ibis.to_sql(expr, dialect="duckdb")), "out.sql") diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 202d68e73764..047c9322ca1f 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -27,6 +27,11 @@ from ibis.expr import datatypes as dt from ibis.tests.util import assert_equal +try: + from snowflake.connector.errors import ProgrammingError as SnowflakeProgrammingError +except ImportError: + SnowflakeProgrammingError = None + @pytest.mark.parametrize( ("expr", "expected_types"), @@ -254,7 +259,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": "1.1", + "snowflake": decimal.Decimal("1.1"), "sqlite": 1.1, "trino": 1.1, "dask": decimal.Decimal("1.1"), @@ -272,7 +277,7 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "NUMERIC", - "snowflake": "VARCHAR", + "snowflake": "DECIMAL", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(18,3)", @@ -308,7 +313,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": "1.100000000", + "snowflake": decimal.Decimal("1.1"), "sqlite": 1.1, "trino": 1.1, "duckdb": decimal.Decimal("1.100000000"), @@ -328,7 +333,7 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "NUMERIC", "clickhouse": "Decimal(38, 9)", - "snowflake": "VARCHAR", + "snowflake": "DECIMAL", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(38,9)", @@ -359,7 +364,6 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": "1.10000000000000000000000000000000000000", "sqlite": 1.1, "trino": 1.1, "dask": decimal.Decimal("1.1"), @@ -378,7 +382,6 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "BIGNUMERIC", "clickhouse": "Decimal(76, 38)", - "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(18,3)", @@ -401,9 +404,7 @@ def test_numeric_literal(con, backend, expr, expected_types): raises=ImpalaHiveServer2Error, ), pytest.mark.broken( - ["duckdb"], - "(duckdb.ParserException) Parser Error: Width must be between 1 and 38!", - raises=sa.exc.ProgrammingError, + ["duckdb"], "Unsupported precision.", raises=DuckDBParserException ), pytest.mark.notyet(["datafusion"], raises=Exception), pytest.mark.notyet( @@ -411,6 +412,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "The precision can be up to 38 in Flink", raises=ValueError, ), + pytest.mark.broken( + ["snowflake"], + "Invalid number precision: 76. Must be between 0 and 38.", + raises=SnowflakeProgrammingError, + ), ], id="decimal-big", ), @@ -419,7 +425,6 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("inf"), - "snowflake": "Infinity", "sqlite": float("inf"), "postgres": float("nan"), "risingwave": float("nan"), @@ -427,16 +432,16 @@ def test_numeric_literal(con, backend, expr, expected_types): "dask": decimal.Decimal("Infinity"), "impala": float("inf"), "exasol": float("inf"), + "duckdb": float("inf"), }, { "bigquery": "FLOAT64", - "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", - "duckdb": "DECIMAL(18,3)", "postgres": "numeric", "risingwave": "numeric", "impala": "DOUBLE", + "duckdb": "FLOAT", }, marks=[ pytest.mark.broken( @@ -444,11 +449,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["duckdb"], - "duckdb.ConversionException: Conversion Error: Could not cast value inf to DECIMAL(18,3)", - raises=DuckDBConversionException, - ), pytest.mark.broken( ["trino"], "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " @@ -456,6 +456,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), + pytest.mark.broken( + ["snowflake"], + "snowflake.connector.errors.ProgrammingError: 100038 (22018): Numeric value 'Infinity' is not recognized", + raises=SnowflakeProgrammingError, + ), pytest.mark.broken( ["pyspark"], "An error occurred while calling z:org.apache.spark.sql.functions.lit.", @@ -502,7 +507,6 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("-inf"), - "snowflake": "-Infinity", "sqlite": float("-inf"), "postgres": float("nan"), "risingwave": float("nan"), @@ -510,16 +514,16 @@ def test_numeric_literal(con, backend, expr, expected_types): "dask": decimal.Decimal("-Infinity"), "impala": float("-inf"), "exasol": float("-inf"), + "duckdb": float("-inf"), }, { "bigquery": "FLOAT64", - "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", - "duckdb": "DECIMAL(18,3)", "postgres": "numeric", "risingwave": "numeric", "impala": "DOUBLE", + "duckdb": "FLOAT", }, marks=[ pytest.mark.broken( @@ -527,11 +531,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["duckdb"], - "duckdb.ConversionException: Conversion Error: Could not cast value -inf to DECIMAL(18,3)", - raises=DuckDBConversionException, - ), pytest.mark.broken( ["trino"], "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " @@ -539,6 +538,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), + pytest.mark.broken( + ["snowflake"], + "snowflake.connector.errors.ProgrammingError: 100038 (22018): Numeric value '-Infinity' is not recognized", + raises=SnowflakeProgrammingError, + ), pytest.mark.broken( ["pyspark"], "An error occurred while calling z:org.apache.spark.sql.functions.lit.", @@ -585,7 +589,6 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("nan"), - "snowflake": "NaN", "sqlite": None, "postgres": float("nan"), "risingwave": float("nan"), @@ -593,16 +596,16 @@ def test_numeric_literal(con, backend, expr, expected_types): "dask": decimal.Decimal("NaN"), "impala": float("nan"), "exasol": float("nan"), + "duckdb": float("nan"), }, { "bigquery": "FLOAT64", - "snowflake": "VARCHAR", "sqlite": "null", "trino": "decimal(2,1)", - "duckdb": "DECIMAL(18,3)", "postgres": "numeric", "risingwave": "numeric", "impala": "DOUBLE", + "duckdb": "FLOAT", }, marks=[ pytest.mark.broken( @@ -610,14 +613,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["duckdb"], - "(duckdb.InvalidInputException) Invalid Input Error: Attempting " - "to execute an unsuccessful or closed pending query result" - "Error: Invalid Input Error: Type DOUBLE with value nan can't be " - "cast because the value is out of range for the destination type INT64", - raises=sa.exc.ProgrammingError, - ), pytest.mark.broken( ["trino"], "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " @@ -625,6 +620,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), + pytest.mark.broken( + ["snowflake"], + "snowflake.connector.errors.ProgrammingError: 100038 (22018): Numeric value 'NaN' is not recognized", + raises=SnowflakeProgrammingError, + ), pytest.mark.broken( ["pyspark"], "An error occurred while calling z:org.apache.spark.sql.functions.lit.", @@ -644,14 +644,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "[SQL: SELECT %(param_1)s AS [Decimal('NaN')]]", raises=(sa.exc.ProgrammingError, KeyError), ), - pytest.mark.broken( - ["mssql"], - "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " - "(org.apache.calcite.tools.ValidationException): " - "org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 10: Column 'NaN' not found in any table" - "[SQL: SELECT NaN AS \"Decimal('NaN')\"]", - raises=sa.exc.ProgrammingError, - ), pytest.mark.broken( ["druid"], "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " @@ -792,28 +784,14 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): param( operator.methodcaller("isnan"), np.isnan, - marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - ], + marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), id="isnan", ), param( operator.methodcaller("isinf"), np.isinf, id="isinf", - marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), - ], + marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ), ], ) @@ -1459,7 +1437,7 @@ def test_floating_mod(backend, alltypes, df): ) @pytest.mark.notyet(["mssql"], raises=(sa.exc.OperationalError, sa.exc.DataError)) @pytest.mark.notyet(["postgres"], raises=sa.exc.DataError) -@pytest.mark.notyet(["snowflake"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, com.IbisTypeError)) def test_divide_by_zero(backend, alltypes, df, column, denominator): expr = alltypes[column] / denominator @@ -1481,7 +1459,6 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "mysql": 10, "snowflake": 38, "trino": 18, - "duckdb": None, "sqlite": None, "mssql": None, "oracle": 38, @@ -1492,7 +1469,6 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "mysql": 0, "snowflake": 0, "trino": 3, - "duckdb": None, "sqlite": None, "mssql": None, "oracle": 0, @@ -1506,11 +1482,13 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "clickhouse", "dask", "datafusion", + "duckdb", "impala", "pandas", "pyspark", "polars", "flink", + "snowflake", ], reason="Not SQLAlchemy backends", ) @@ -1621,7 +1599,8 @@ def test_random(con): @pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) def test_clip(backend, alltypes, df, ibis_func, pandas_func): result = ibis_func(alltypes.int_col).execute() - expected = pandas_func(df.int_col).astype(result.dtype) + raw_expected = pandas_func(df.int_col) + expected = raw_expected.astype(result.dtype) # Names won't match in the PySpark backend since PySpark # gives 'tmp' name when executing a Column backend.assert_series_equal(result, expected, check_names=False) diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index b04d0f762c7c..b7aa81c43dd1 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -37,7 +37,7 @@ def test_floating_scalar_parameter(backend, alltypes, df, column, raw_value): ("start_string", "end_string"), [("2009-03-01", "2010-07-03"), ("2014-12-01", "2017-01-05")], ) -@pytest.mark.notimpl(["datafusion", "mssql", "trino", "druid"]) +@pytest.mark.notimpl(["mssql", "trino", "druid"]) @pytest.mark.broken(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl( ["risingwave"], @@ -65,7 +65,7 @@ def test_timestamp_accepts_date_literals(alltypes): assert expr.compile(params=params) is not None -@pytest.mark.notimpl(["impala", "pyspark", "druid", "oracle", "exasol"]) +@pytest.mark.notimpl(["dask", "impala", "pyspark", "druid", "oracle", "exasol"]) @pytest.mark.never( ["mysql", "sqlite", "mssql"], reason="backend will never implement array types" ) diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py index a612eed9cf45..3aee23a6e6f8 100644 --- a/ibis/backends/tests/test_register.py +++ b/ibis/backends/tests/test_register.py @@ -388,12 +388,14 @@ def test_register_garbage(con, monkeypatch): monkeypatch.setattr(con, "_load_extensions", lambda x: True) sa = pytest.importorskip("sqlalchemy") + duckdb = pytest.importorskip("duckdb") with pytest.raises( - sa.exc.OperationalError, match="No files found that match the pattern" + (sa.exc.OperationalError, duckdb.IOException), + match="No files found that match the pattern", ): con.read_csv("garbage_notafile") - with pytest.raises(FileNotFoundError): + with pytest.raises((FileNotFoundError, duckdb.IOException)): con.read_parquet("garbage_notafile") diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 9db76cfa9c27..0654ea12cb77 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -39,8 +39,7 @@ reason="Not a SQL backend", ) no_sql_extraction = pytest.mark.notimpl( - ["datafusion", "pyspark", "polars"], - reason="Not clear how to extract SQL from the backend", + ["pyspark", "polars"], reason="Not clear how to extract SQL from the backend" ) @@ -62,9 +61,7 @@ def test_literal(backend, expr): assert ibis.to_sql(expr, dialect=backend.name()) -@pytest.mark.never( - ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL" -) +@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") @pytest.mark.xfail_version( mssql=["sqlalchemy>=2"], reason="sqlalchemy 2 prefixes literals with `N`" ) @@ -90,9 +87,7 @@ def test_group_by_has_index(backend, snapshot): snapshot.assert_match(sql, "out.sql") -@pytest.mark.never( - ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL" -) +@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") def test_cte_refs_in_topo_order(backend, snapshot): mr0 = ibis.table(schema=ibis.schema(dict(key="int")), name="leaf") @@ -105,9 +100,7 @@ def test_cte_refs_in_topo_order(backend, snapshot): snapshot.assert_match(sql, "out.sql") -@pytest.mark.never( - ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL" -) +@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") def test_isin_bug(con, snapshot): t = ibis.table(dict(x="int"), name="t") good = t[t.x > 2].x @@ -116,7 +109,7 @@ def test_isin_bug(con, snapshot): @pytest.mark.never( - ["pandas", "dask", "datafusion", "polars", "pyspark"], + ["pandas", "dask", "polars", "pyspark"], reason="not SQL", raises=NotImplementedError, ) @@ -124,7 +117,7 @@ def test_isin_bug(con, snapshot): ["sqlite", "mysql", "druid", "impala", "mssql"], reason="no unnest support upstream" ) @pytest.mark.notimpl( - ["oracle", "flink"], + ["oracle", "flink", "datafusion"], reason="unnest not yet implemented", raises=exc.OperationNotDefinedError, ) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 7c36da0d7b1f..13a4ab0b3268 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -147,9 +147,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6.*"), id="like", marks=[ - pytest.mark.notimpl( - ["datafusion", "polars"], raises=com.OperationNotDefinedError - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -162,9 +160,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6%"), id="complex_like_escape", marks=[ - pytest.mark.notimpl( - ["datafusion", "polars"], raises=com.OperationNotDefinedError - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -177,9 +173,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6%.*"), id="complex_like_escape_match", marks=[ - pytest.mark.notimpl( - ["datafusion", "polars"], raises=com.OperationNotDefinedError - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -193,8 +187,7 @@ def uses_java_re(t): id="ilike", marks=[ pytest.mark.notimpl( - ["datafusion", "pyspark", "polars"], - raises=com.OperationNotDefinedError, + ["pyspark", "polars"], raises=com.OperationNotDefinedError ), pytest.mark.broken( ["mssql"], @@ -467,15 +460,7 @@ def uses_java_re(t): id="translate", marks=[ pytest.mark.notimpl( - [ - "clickhouse", - "duckdb", - "mssql", - "mysql", - "polars", - "druid", - "oracle", - ], + ["mssql", "mysql", "polars", "druid", "oracle"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -838,7 +823,6 @@ def uses_java_re(t): marks=pytest.mark.notimpl( [ "dask", - "datafusion", "impala", "mysql", "sqlite", @@ -894,6 +878,11 @@ def test_string(backend, alltypes, df, result_func, expected_func): ["mysql", "mssql", "druid", "oracle", "exasol"], raises=com.OperationNotDefinedError, ) +@pytest.mark.broken( + ["duckdb"], + reason="no idea, generated SQL looks very correct but this fails", + raises=AssertionError, +) def test_re_replace_global(con): expr = ibis.literal("aba").re_replace("a", "c") result = con.execute(expr) diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index aa37880f72e2..4c4897ab6cce 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -6,6 +6,7 @@ import pandas as pd import pandas.testing as tm import pytest +from pytest import param import ibis import ibis.expr.datatypes as dt @@ -17,20 +18,32 @@ ] -@pytest.mark.notimpl(["dask", "snowflake"]) -@pytest.mark.parametrize("field", ["a", "b", "c"]) -def test_single_field(backend, struct, struct_df, field): +@pytest.mark.notimpl(["dask"]) +@pytest.mark.parametrize( + ("field", "expected"), + [ + param( + "a", + [1.0, 2.0, 3.0, np.nan, 2.0, np.nan, 3.0], + id="a", + marks=pytest.mark.notimpl(["snowflake"]), + ), + param( + "b", ["banana", "apple", "orange", "banana", None, None, "orange"], id="b" + ), + param( + "c", + [2, 3, 4, 2, 3, np.nan, np.nan], + id="c", + marks=pytest.mark.notimpl(["snowflake"]), + ), + ], +) +def test_single_field(struct, field, expected): expr = struct.abc[field] - result = expr.execute().sort_values().reset_index(drop=True) - expected = ( - struct_df.abc.map( - lambda value: value[field] if isinstance(value, dict) else value - ) - .rename(field) - .sort_values() - .reset_index(drop=True) - ) - backend.assert_series_equal(result, expected) + result = expr.execute() + equal_nan = expr.type().is_numeric() + assert np.array_equal(result, expected, equal_nan=equal_nan) @pytest.mark.notimpl(["dask"]) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 9d1ec03b6fe9..019207c6f24b 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -16,7 +16,6 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt from ibis.backends.base import _get_backend_names -from ibis.backends.pandas.execution.temporal import day_name from ibis.backends.tests.errors import ( ArrowInvalid, ClickHouseDatabaseError, @@ -33,34 +32,6 @@ from ibis.common.annotations import ValidationError -def day_name(obj: pd.core.indexes.accessors.DatetimeProperties | pd.Timestamp) -> str: - """Backwards compatible name-of-day getting function. - - Returns - ------- - str - The name of the day corresponding to `obj` - """ - try: - return obj.day_name() - except AttributeError: - return obj.weekday_name - - -def day_name(obj: pd.core.indexes.accessors.DatetimeProperties | pd.Timestamp) -> str: - """Backwards compatible name-of-day getting function. - - Returns - ------- - str - The name of the day corresponding to `obj` - """ - try: - return obj.day_name() - except AttributeError: - return obj.weekday_name - - @pytest.mark.parametrize("attr", ["year", "month", "day"]) @pytest.mark.parametrize( "expr_fn", @@ -671,10 +642,7 @@ def test_timestamp_truncate(backend, alltypes, df, unit): @pytest.mark.broken( ["polars", "druid"], reason="snaps to the UNIX epoch", raises=AssertionError ) -@pytest.mark.notimpl( - ["datafusion", "oracle"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -1053,11 +1021,9 @@ def convert_to_offset(x): marks=[ pytest.mark.notimpl( [ - "clickhouse", "dask", "impala", "mysql", - "pandas", "postgres", "risingwave", "snowflake", @@ -1082,7 +1048,6 @@ def convert_to_offset(x): marks=[ pytest.mark.notimpl( [ - "clickhouse", "sqlite", "postgres", "risingwave", @@ -1172,10 +1137,10 @@ def convert_to_offset(x): raises=ValidationError, reason="unsupported operand type(s) for -: 'StringColumn' and 'TimestampScalar'", ), - pytest.mark.xfail_version( - duckdb=["duckdb>=0.8.0"], + pytest.mark.broken( + ["duckdb"], raises=AssertionError, - reason="duckdb 0.8.0 returns DateOffset columns", + reason="duckdb returns dateoffsets", ), pytest.mark.broken( ["trino"], @@ -1680,12 +1645,6 @@ def test_interval_add_cast_column(backend, alltypes, df): ), "%Y%m%d", marks=[ - pytest.mark.notimpl( - [ - "pandas", - ], - raises=com.OperationNotDefinedError, - ), pytest.mark.notimpl( [ "pyspark", @@ -1790,7 +1749,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): reason="PySpark backend does not support timestamp from unix time with unit us. Supported unit is s.", ), pytest.mark.notimpl( - ["duckdb", "mssql", "clickhouse"], + ["mssql", "clickhouse", "duckdb"], raises=com.UnsupportedOperationError, reason="`us` unit is not supported!", ), @@ -1807,12 +1766,12 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): pytest.mark.notimpl( ["pyspark"], raises=com.UnsupportedArgumentError, - reason="PySpark backend does not support timestamp from unix time with unit ms. Supported unit is s.", + reason="PySpark backend does not support timestamp from unix time with unit ns. Supported unit is s.", ), pytest.mark.notimpl( ["duckdb", "mssql", "clickhouse"], raises=com.UnsupportedOperationError, - reason="`ms` unit is not supported!", + reason="`ns` unit is not supported!", ), pytest.mark.notimpl( ["flink"], @@ -1862,7 +1821,7 @@ def test_integer_to_timestamp(backend, con, unit): "(snowflake.connector.errors.ProgrammingError) 100096 (22007): " "Can't parse '11/01/10' as timestamp with format '%m/%d/%y'" ), - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, ), pytest.mark.never( ["flink"], @@ -1980,7 +1939,7 @@ def test_day_of_week_column(backend, alltypes, df): backend.assert_series_equal(result_index, expected_index, check_names=False) result_day = expr.full_name().name("tmp").execute() - expected_day = day_name(df.timestamp_col.dt) + expected_day = df.timestamp_col.dt.day_name() backend.assert_series_equal(result_day, expected_day, check_names=False) @@ -1995,7 +1954,7 @@ def test_day_of_week_column(backend, alltypes, df): ), param( lambda t: t.timestamp_col.day_of_week.full_name().length().sum(), - lambda s: day_name(s.dt).str.len().sum(), + lambda s: s.dt.day_name().str.len().sum(), id="day_of_week_full_name", marks=[ pytest.mark.notimpl( @@ -2077,10 +2036,7 @@ def test_now_from_projection(alltypes): } -@pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=sa.exc.ProgrammingError, reason="SQL parse failed" ) @@ -2128,10 +2084,7 @@ def test_date_literal(con, backend): } -@pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=sa.exc.ProgrammingError, @@ -2170,8 +2123,7 @@ def test_timestamp_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "datafusion", "mysql", "dask", "pyspark"], - raises=com.OperationNotDefinedError, + ["pandas", "mysql", "dask", "pyspark"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl( ["mysql"], @@ -2298,14 +2250,12 @@ def test_time_literal(con, backend): @pytest.mark.broken( ["sqlite"], raises=AssertionError, reason="SQLite returns Timedelta from execution" ) -@pytest.mark.notimpl( - ["dask", "datafusion", "pandas"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.parametrize( "microsecond", [ - 0, + param(0, id="second"), param( 561021, marks=[ @@ -2328,9 +2278,9 @@ def test_time_literal(con, backend): ), ), ], + id="subsecond", ), ], - ids=["second", "subsecond"], ) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_extract_time_from_timestamp(con, microsecond): @@ -2357,9 +2307,8 @@ def test_extract_time_from_timestamp(con, microsecond): @pytest.mark.broken( ["snowflake"], - "(snowflake.connector.errors.ProgrammingError) 001007 (22023): SQL compilation error:" - "invalid type [CAST(INTERVAL_LITERAL('second', '1') AS VARIANT)] for parameter 'TO_VARIANT'", - raises=sa.exc.ProgrammingError, + "interval literal is not supported in this form.", + raises=SnowflakeProgrammingError, ) @pytest.mark.broken( ["druid"], @@ -2388,7 +2337,9 @@ def test_extract_time_from_timestamp(con, microsecond): raises=(NotImplementedError, AttributeError), ) @pytest.mark.broken( - ["bigquery"], reason="BigQuery returns DateOffset arrays", raises=AssertionError + ["bigquery", "duckdb"], + reason="BigQuery returns DateOffset arrays", + raises=AssertionError, ) @pytest.mark.xfail_version( datafusion=["datafusion"], @@ -2400,11 +2351,6 @@ def test_extract_time_from_timestamp(con, microsecond): reason="Driver doesn't know how to handle intervals", raises=ClickHouseDatabaseError, ) -@pytest.mark.xfail_version( - duckdb=["duckdb>=0.8.0"], - raises=AssertionError, - reason="duckdb 0.8.0 returns DateOffset columns", -) @pytest.mark.notimpl( ["flink"], raises=Py4JJavaError, @@ -2423,10 +2369,7 @@ def test_interval_literal(con, backend): assert con.execute(expr.typeof()) == INTERVAL_BACKEND_TYPES[backend_name] -@pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["mysql"], raises=sa.exc.ProgrammingError, @@ -2461,10 +2404,7 @@ def test_date_column_from_ymd(backend, con, alltypes, df): backend.assert_series_equal(golden, result.timestamp_col) -@pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -2836,7 +2776,7 @@ def test_timestamp_precision_output(con, ts, scale, unit): marks=[ pytest.mark.notimpl( ["clickhouse"], - raises=NotImplementedError, + raises=com.OperationNotDefinedError, reason="time types not yet implemented in ibis for the clickhouse backend", ) ], @@ -2906,7 +2846,7 @@ def test_delta(con, start, end, unit, expected): ), pytest.mark.notimpl( ["snowflake"], - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, reason="snowflake doesn't support sub-second interval precision", ), pytest.mark.notimpl( @@ -2980,8 +2920,8 @@ def test_delta(con, start, end, unit, expected): reason="function date_bin(interval, timestamp without time zone, timestamp without time zone) does not exist", ) def test_timestamp_bucket(backend, kws, pd_freq): - ts = backend.functional_alltypes.timestamp_col.name("ts").execute() - res = backend.functional_alltypes.timestamp_col.bucket(**kws).name("ts").execute() + ts = backend.functional_alltypes.timestamp_col.execute().rename("ts") + res = backend.functional_alltypes.timestamp_col.bucket(**kws).execute().rename("ts") sol = ts.dt.floor(pd_freq) backend.assert_series_equal(res, sol) @@ -3019,11 +2959,13 @@ def test_timestamp_bucket(backend, kws, pd_freq): reason="function date_bin(interval, timestamp without time zone, timestamp without time zone) does not exist", ) def test_timestamp_bucket_offset(backend, offset_mins): - ts = backend.functional_alltypes.timestamp_col.name("ts") - expr = ts.bucket(minutes=5, offset=ibis.interval(minutes=offset_mins)).name("ts") - res = expr.execute().astype("datetime64[ns]") + ts = backend.functional_alltypes.timestamp_col + expr = ts.bucket(minutes=5, offset=ibis.interval(minutes=offset_mins)) + res = expr.execute().astype("datetime64[ns]").rename("ts") td = pd.Timedelta(minutes=offset_mins) - sol = ((ts.execute() - td).dt.floor("300s") + td).astype("datetime64[ns]") + sol = ((ts.execute().rename("ts") - td).dt.floor("300s") + td).astype( + "datetime64[ns]" + ) backend.assert_series_equal(res, sol) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 1b43317a52c9..036b424dad0e 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -31,6 +31,12 @@ ) +try: + from snowflake.connector.errors import ProgrammingError as SnowflakeProgrammingError +except ImportError: + SnowflakeProgrammingError = None + + # adapted from https://gist.github.com/xmnlab/2c1f93df1a6c6bde4e32c8579117e9cc def pandas_ntile(x, bucket: int): """Divide values into a number of buckets. @@ -110,11 +116,6 @@ def calc_zscore(s): reason="upstream is broken; returns all nulls", raises=AssertionError, ), - pytest.mark.broken( - ["datafusion"], - reason="Exception: Internal error: Expects default value to have Int64 type.", - raises=BaseException, - ), pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["flink"], @@ -645,6 +646,7 @@ def test_grouped_unbounded_window( # 1) Grouped # 2) Ordered if `ordered` is True df = df.sort_values("id") if ordered else df + expected = df.assign(val=expected_fn(df.groupby("string_col"))) expected = expected.set_index("id").sort_index() @@ -661,7 +663,7 @@ def test_grouped_unbounded_window( ], ) @pytest.mark.broken(["snowflake"], raises=AssertionError) -@pytest.mark.broken(["dask", "pandas", "mssql"], raises=AssertionError) +@pytest.mark.broken(["dask", "mssql"], raises=AssertionError) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["flink"], @@ -728,7 +730,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): True, id="ordered-mean", marks=[ - pytest.mark.broken(["pandas"], raises=AssertionError), pytest.mark.notimpl( ["dask"], raises=NotImplementedError, @@ -805,7 +806,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ], raises=com.OperationNotDefinedError, ), - pytest.mark.broken(["pandas"], raises=AssertionError), pytest.mark.broken( ["dask"], raises=ValueError, @@ -875,11 +875,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=AssertionError, ), pytest.mark.broken(["oracle"], raises=AssertionError), - pytest.mark.broken( - ["datafusion"], - raises=Exception, - reason="Exception: Internal error: Expects default value to have Int64 type.", - ), pytest.mark.notimpl( ["pyspark"], raises=PySparkAnalysisException, @@ -895,7 +890,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): pytest.mark.notyet( ["snowflake"], reason="backend requires ordering", - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, ), pytest.mark.notimpl( ["risingwave"], @@ -936,11 +931,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ), raises=AssertionError, ), - pytest.mark.broken( - ["datafusion"], - raises=Exception, - reason="Exception: Internal error: Expects default value to have Int64 type.", - ), pytest.mark.broken(["oracle"], raises=AssertionError), pytest.mark.notimpl( ["pyspark"], @@ -957,7 +947,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): pytest.mark.notyet( ["snowflake"], reason="backend requires ordering", - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, ), pytest.mark.notimpl( ["risingwave"], @@ -1066,7 +1056,11 @@ def test_ungrouped_unbounded_window( @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["snowflake"], raises=sa.exc.ProgrammingError) +@pytest.mark.notimpl( + ["snowflake"], + raises=SnowflakeProgrammingError, + reason="snowflake doesn't support sliding range windows", +) @pytest.mark.notimpl( ["impala"], raises=ImpalaHiveServer2Error, reason="limited RANGE support" ) @@ -1168,11 +1162,6 @@ def test_percent_rank_whole_table_no_order_by(backend, alltypes, df): @pytest.mark.broken( ["pandas"], reason="pandas returns incorrect results", raises=AssertionError ) -@pytest.mark.broken( - ["datafusion"], - reason="Exception: External error: Internal error: Expects default value to have Int64 type", - raises=Exception, -) def test_grouped_ordered_window_coalesce(backend, alltypes, df): t = alltypes expr = ( diff --git a/ibis/backends/tests/tpch/conftest.py b/ibis/backends/tests/tpch/conftest.py index 7339d8e47fb7..cfb85452841b 100644 --- a/ibis/backends/tests/tpch/conftest.py +++ b/ibis/backends/tests/tpch/conftest.py @@ -11,6 +11,7 @@ from dateutil.relativedelta import relativedelta import ibis +from ibis.formats.pandas import PandasData if TYPE_CHECKING: import ibis.expr.types as ir @@ -66,29 +67,28 @@ def wrapper(*args, backend, snapshot, **kwargs): raw_sql = sql.sql(dialect="duckdb", pretty=True) - expected_expr = backend.connection.sql( - # in theory this should allow us to use one dialect for every backend - raw_sql, - dialect="duckdb", - ) + expected_expr = backend.connection.sql(raw_sql, dialect="duckdb") result_expr = test(*args, **kwargs) - result = result_expr.execute() + ibis_sql = ibis.to_sql(result_expr, dialect=backend_name) + + assert result_expr._find_backend(use_default=False) is backend.connection + result = backend.connection.execute(result_expr) assert not result.empty expected = expected_expr.execute() - assert not expected.empty - assert list(map(str.lower, expected.columns)) == result.columns.tolist() expected.columns = result.columns + expected = PandasData.convert_table(expected, result_expr.schema()) + assert not expected.empty + assert len(expected) == len(result) backend.assert_frame_equal(result, expected, check_dtype=False) - # only produce sql if the execution passes - result_expr_sql = ibis.to_sql(result_expr, dialect=backend_name) - snapshot.assert_match(result_expr_sql, sql_path_name) + # only write sql if the execution passes + snapshot.assert_match(ibis_sql, sql_path_name) return wrapper diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql index b44c9d654e33..31b9d111cde6 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql @@ -1,14 +1,14 @@ SELECT - t0.l_returnflag, - t0.l_linestatus, - t0.sum_qty, - t0.sum_base_price, - t0.sum_disc_price, - t0.sum_charge, - t0.avg_qty, - t0.avg_price, - t0.avg_disc, - t0.count_order + t2.l_returnflag AS l_returnflag, + t2.l_linestatus AS l_linestatus, + t2.sum_qty AS sum_qty, + t2.sum_base_price AS sum_base_price, + t2.sum_disc_price AS sum_disc_price, + t2.sum_charge AS sum_charge, + t2.avg_qty AS avg_qty, + t2.avg_price AS avg_price, + t2.avg_disc AS avg_disc, + t2.count_order AS count_order FROM ( SELECT t1.l_returnflag AS l_returnflag, @@ -19,8 +19,10 @@ FROM ( CAST(1 AS TINYINT) - t1.l_discount )) AS sum_disc_price, SUM( - t1.l_extendedprice * ( - CAST(1 AS TINYINT) - t1.l_discount + ( + t1.l_extendedprice * ( + CAST(1 AS TINYINT) - t1.l_discount + ) ) * ( t1.l_tax + CAST(1 AS TINYINT) ) @@ -29,13 +31,32 @@ FROM ( AVG(t1.l_extendedprice) AS avg_price, AVG(t1.l_discount) AS avg_disc, COUNT(*) AS count_order - FROM main.lineitem AS t1 - WHERE - t1.l_shipdate <= MAKE_DATE(1998, 9, 2) + FROM ( + SELECT + t0.l_orderkey AS l_orderkey, + t0.l_partkey AS l_partkey, + t0.l_suppkey AS l_suppkey, + t0.l_linenumber AS l_linenumber, + t0.l_quantity AS l_quantity, + t0.l_extendedprice AS l_extendedprice, + t0.l_discount AS l_discount, + t0.l_tax AS l_tax, + t0.l_returnflag AS l_returnflag, + t0.l_linestatus AS l_linestatus, + t0.l_shipdate AS l_shipdate, + t0.l_commitdate AS l_commitdate, + t0.l_receiptdate AS l_receiptdate, + t0.l_shipinstruct AS l_shipinstruct, + t0.l_shipmode AS l_shipmode, + t0.l_comment AS l_comment + FROM lineitem AS t0 + WHERE + t0.l_shipdate <= MAKE_DATE(1998, 9, 2) + ) AS t1 GROUP BY 1, 2 -) AS t0 +) AS t2 ORDER BY - t0.l_returnflag ASC, - t0.l_linestatus ASC \ No newline at end of file + t2.l_returnflag ASC, + t2.l_linestatus ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql index 0bea3f3b2cdf..9f1cf92c38e2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql @@ -1,59 +1,62 @@ -WITH t0 AS ( - SELECT - t2."L_ORDERKEY" AS "l_orderkey", - t2."L_PARTKEY" AS "l_partkey", - t2."L_SUPPKEY" AS "l_suppkey", - t2."L_LINENUMBER" AS "l_linenumber", - t2."L_QUANTITY" AS "l_quantity", - t2."L_EXTENDEDPRICE" AS "l_extendedprice", - t2."L_DISCOUNT" AS "l_discount", - t2."L_TAX" AS "l_tax", - t2."L_RETURNFLAG" AS "l_returnflag", - t2."L_LINESTATUS" AS "l_linestatus", - t2."L_SHIPDATE" AS "l_shipdate", - t2."L_COMMITDATE" AS "l_commitdate", - t2."L_RECEIPTDATE" AS "l_receiptdate", - t2."L_SHIPINSTRUCT" AS "l_shipinstruct", - t2."L_SHIPMODE" AS "l_shipmode", - t2."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t2 -) SELECT - t1."l_returnflag", - t1."l_linestatus", - t1."sum_qty", - t1."sum_base_price", - t1."sum_disc_price", - t1."sum_charge", - t1."avg_qty", - t1."avg_price", - t1."avg_disc", - t1."count_order" + "t2"."l_returnflag" AS "l_returnflag", + "t2"."l_linestatus" AS "l_linestatus", + "t2"."sum_qty" AS "sum_qty", + "t2"."sum_base_price" AS "sum_base_price", + "t2"."sum_disc_price" AS "sum_disc_price", + "t2"."sum_charge" AS "sum_charge", + "t2"."avg_qty" AS "avg_qty", + "t2"."avg_price" AS "avg_price", + "t2"."avg_disc" AS "avg_disc", + "t2"."count_order" AS "count_order" FROM ( SELECT - t0."l_returnflag" AS "l_returnflag", - t0."l_linestatus" AS "l_linestatus", - SUM(t0."l_quantity") AS "sum_qty", - SUM(t0."l_extendedprice") AS "sum_base_price", - SUM(t0."l_extendedprice" * ( - 1 - t0."l_discount" + "t1"."l_returnflag" AS "l_returnflag", + "t1"."l_linestatus" AS "l_linestatus", + SUM("t1"."l_quantity") AS "sum_qty", + SUM("t1"."l_extendedprice") AS "sum_base_price", + SUM("t1"."l_extendedprice" * ( + 1 - "t1"."l_discount" )) AS "sum_disc_price", - SUM(t0."l_extendedprice" * ( - 1 - t0."l_discount" - ) * ( - t0."l_tax" + 1 - )) AS "sum_charge", - AVG(t0."l_quantity") AS "avg_qty", - AVG(t0."l_extendedprice") AS "avg_price", - AVG(t0."l_discount") AS "avg_disc", + SUM( + ( + "t1"."l_extendedprice" * ( + 1 - "t1"."l_discount" + ) + ) * ( + "t1"."l_tax" + 1 + ) + ) AS "sum_charge", + AVG("t1"."l_quantity") AS "avg_qty", + AVG("t1"."l_extendedprice") AS "avg_price", + AVG("t1"."l_discount") AS "avg_disc", COUNT(*) AS "count_order" - FROM t0 - WHERE - t0."l_shipdate" <= DATE_FROM_PARTS(1998, 9, 2) + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + WHERE + "t0"."L_SHIPDATE" <= DATEFROMPARTS(1998, 9, 2) + ) AS "t1" GROUP BY 1, 2 -) AS t1 +) AS "t2" ORDER BY - t1."l_returnflag" ASC, - t1."l_linestatus" ASC \ No newline at end of file + "t2"."l_returnflag" ASC, + "t2"."l_linestatus" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql index 0742d4a2c8d3..b8ea068fcd7b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql @@ -1,84 +1,116 @@ -WITH t0 AS ( - SELECT - t2.p_partkey AS p_partkey, - t2.p_name AS p_name, - t2.p_mfgr AS p_mfgr, - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - t2.p_container AS p_container, - t2.p_retailprice AS p_retailprice, - t2.p_comment AS p_comment, - t3.ps_partkey AS ps_partkey, - t3.ps_suppkey AS ps_suppkey, - t3.ps_availqty AS ps_availqty, - t3.ps_supplycost AS ps_supplycost, - t3.ps_comment AS ps_comment, - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment, - t5.n_nationkey AS n_nationkey, - t5.n_name AS n_name, - t5.n_regionkey AS n_regionkey, - t5.n_comment AS n_comment, - t6.r_regionkey AS r_regionkey, - t6.r_name AS r_name, - t6.r_comment AS r_comment - FROM main.part AS t2 - JOIN main.partsupp AS t3 - ON t2.p_partkey = t3.ps_partkey - JOIN main.supplier AS t4 - ON t4.s_suppkey = t3.ps_suppkey - JOIN main.nation AS t5 - ON t4.s_nationkey = t5.n_nationkey - JOIN main.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t2.p_size = CAST(15 AS TINYINT) - AND t2.p_type LIKE '%BRASS' - AND t6.r_name = 'EUROPE' - AND t3.ps_supplycost = ( - SELECT - MIN(t3.ps_supplycost) AS "Min(ps_supplycost)" - FROM main.partsupp AS t3 - JOIN main.supplier AS t4 - ON t4.s_suppkey = t3.ps_suppkey - JOIN main.nation AS t5 - ON t4.s_nationkey = t5.n_nationkey - JOIN main.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t6.r_name = 'EUROPE' AND t2.p_partkey = t3.ps_partkey - ) -) SELECT - t1.s_acctbal, - t1.s_name, - t1.n_name, - t1.p_partkey, - t1.p_mfgr, - t1.s_address, - t1.s_phone, - t1.s_comment + t19.s_acctbal AS s_acctbal, + t19.s_name AS s_name, + t19.n_name AS n_name, + t19.p_partkey AS p_partkey, + t19.p_mfgr AS p_mfgr, + t19.s_address AS s_address, + t19.s_phone AS s_phone, + t19.s_comment AS s_comment FROM ( SELECT - t0.s_acctbal AS s_acctbal, - t0.s_name AS s_name, - t0.n_name AS n_name, t0.p_partkey AS p_partkey, + t0.p_name AS p_name, t0.p_mfgr AS p_mfgr, - t0.s_address AS s_address, - t0.s_phone AS s_phone, - t0.s_comment AS s_comment - FROM t0 -) AS t1 + t0.p_brand AS p_brand, + t0.p_type AS p_type, + t0.p_size AS p_size, + t0.p_container AS p_container, + t0.p_retailprice AS p_retailprice, + t0.p_comment AS p_comment, + t5.ps_partkey AS ps_partkey, + t5.ps_suppkey AS ps_suppkey, + t5.ps_availqty AS ps_availqty, + t5.ps_supplycost AS ps_supplycost, + t5.ps_comment AS ps_comment, + t6.s_suppkey AS s_suppkey, + t6.s_name AS s_name, + t6.s_address AS s_address, + t6.s_nationkey AS s_nationkey, + t6.s_phone AS s_phone, + t6.s_acctbal AS s_acctbal, + t6.s_comment AS s_comment, + t8.n_nationkey AS n_nationkey, + t8.n_name AS n_name, + t8.n_regionkey AS n_regionkey, + t8.n_comment AS n_comment, + t10.r_regionkey AS r_regionkey, + t10.r_name AS r_name, + t10.r_comment AS r_comment + FROM part AS t0 + INNER JOIN partsupp AS t5 + ON t0.p_partkey = t5.ps_partkey + INNER JOIN supplier AS t6 + ON t6.s_suppkey = t5.ps_suppkey + INNER JOIN nation AS t8 + ON t6.s_nationkey = t8.n_nationkey + INNER JOIN region AS t10 + ON t8.n_regionkey = t10.r_regionkey +) AS t19 +WHERE + t19.p_size = CAST(15 AS TINYINT) + AND t19.p_type LIKE '%BRASS' + AND t19.r_name = 'EUROPE' + AND t19.ps_supplycost = ( + SELECT + MIN(t21.ps_supplycost) AS "Min(ps_supplycost)" + FROM ( + SELECT + t20.ps_partkey AS ps_partkey, + t20.ps_suppkey AS ps_suppkey, + t20.ps_availqty AS ps_availqty, + t20.ps_supplycost AS ps_supplycost, + t20.ps_comment AS ps_comment, + t20.s_suppkey AS s_suppkey, + t20.s_name AS s_name, + t20.s_address AS s_address, + t20.s_nationkey AS s_nationkey, + t20.s_phone AS s_phone, + t20.s_acctbal AS s_acctbal, + t20.s_comment AS s_comment, + t20.n_nationkey AS n_nationkey, + t20.n_name AS n_name, + t20.n_regionkey AS n_regionkey, + t20.n_comment AS n_comment, + t20.r_regionkey AS r_regionkey, + t20.r_name AS r_name, + t20.r_comment AS r_comment + FROM ( + SELECT + t1.ps_partkey AS ps_partkey, + t1.ps_suppkey AS ps_suppkey, + t1.ps_availqty AS ps_availqty, + t1.ps_supplycost AS ps_supplycost, + t1.ps_comment AS ps_comment, + t7.s_suppkey AS s_suppkey, + t7.s_name AS s_name, + t7.s_address AS s_address, + t7.s_nationkey AS s_nationkey, + t7.s_phone AS s_phone, + t7.s_acctbal AS s_acctbal, + t7.s_comment AS s_comment, + t9.n_nationkey AS n_nationkey, + t9.n_name AS n_name, + t9.n_regionkey AS n_regionkey, + t9.n_comment AS n_comment, + t11.r_regionkey AS r_regionkey, + t11.r_name AS r_name, + t11.r_comment AS r_comment + FROM partsupp AS t1 + INNER JOIN supplier AS t7 + ON t7.s_suppkey = t1.ps_suppkey + INNER JOIN nation AS t9 + ON t7.s_nationkey = t9.n_nationkey + INNER JOIN region AS t11 + ON t9.n_regionkey = t11.r_regionkey + ) AS t20 + WHERE + t20.r_name = 'EUROPE' AND t19.p_partkey = t20.ps_partkey + ) AS t21 + ) ORDER BY - t1.s_acctbal DESC, - t1.n_name ASC, - t1.s_name ASC, - t1.p_partkey ASC + t19.s_acctbal DESC, + t19.n_name ASC, + t19.s_name ASC, + t19.p_partkey ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql index 1fe27156cd12..01cfa3a33d2e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql @@ -1,127 +1,190 @@ -WITH t1 AS ( - SELECT - t7."P_PARTKEY" AS "p_partkey", - t7."P_NAME" AS "p_name", - t7."P_MFGR" AS "p_mfgr", - t7."P_BRAND" AS "p_brand", - t7."P_TYPE" AS "p_type", - t7."P_SIZE" AS "p_size", - t7."P_CONTAINER" AS "p_container", - t7."P_RETAILPRICE" AS "p_retailprice", - t7."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t7 -), t0 AS ( - SELECT - t7."PS_PARTKEY" AS "ps_partkey", - t7."PS_SUPPKEY" AS "ps_suppkey", - t7."PS_AVAILQTY" AS "ps_availqty", - t7."PS_SUPPLYCOST" AS "ps_supplycost", - t7."PS_COMMENT" AS "ps_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS t7 -), t2 AS ( - SELECT - t7."S_SUPPKEY" AS "s_suppkey", - t7."S_NAME" AS "s_name", - t7."S_ADDRESS" AS "s_address", - t7."S_NATIONKEY" AS "s_nationkey", - t7."S_PHONE" AS "s_phone", - t7."S_ACCTBAL" AS "s_acctbal", - t7."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t7 -), t3 AS ( - SELECT - t7."N_NATIONKEY" AS "n_nationkey", - t7."N_NAME" AS "n_name", - t7."N_REGIONKEY" AS "n_regionkey", - t7."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t7 -), t4 AS ( - SELECT - t7."R_REGIONKEY" AS "r_regionkey", - t7."R_NAME" AS "r_name", - t7."R_COMMENT" AS "r_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS t7 -), t5 AS ( - SELECT - t1."p_partkey" AS "p_partkey", - t1."p_name" AS "p_name", - t1."p_mfgr" AS "p_mfgr", - t1."p_brand" AS "p_brand", - t1."p_type" AS "p_type", - t1."p_size" AS "p_size", - t1."p_container" AS "p_container", - t1."p_retailprice" AS "p_retailprice", - t1."p_comment" AS "p_comment", - t0."ps_partkey" AS "ps_partkey", - t0."ps_suppkey" AS "ps_suppkey", - t0."ps_availqty" AS "ps_availqty", - t0."ps_supplycost" AS "ps_supplycost", - t0."ps_comment" AS "ps_comment", - t2."s_suppkey" AS "s_suppkey", - t2."s_name" AS "s_name", - t2."s_address" AS "s_address", - t2."s_nationkey" AS "s_nationkey", - t2."s_phone" AS "s_phone", - t2."s_acctbal" AS "s_acctbal", - t2."s_comment" AS "s_comment", - t3."n_nationkey" AS "n_nationkey", - t3."n_name" AS "n_name", - t3."n_regionkey" AS "n_regionkey", - t3."n_comment" AS "n_comment", - t4."r_regionkey" AS "r_regionkey", - t4."r_name" AS "r_name", - t4."r_comment" AS "r_comment" - FROM t1 - JOIN t0 - ON t1."p_partkey" = t0."ps_partkey" - JOIN t2 - ON t2."s_suppkey" = t0."ps_suppkey" - JOIN t3 - ON t2."s_nationkey" = t3."n_nationkey" - JOIN t4 - ON t3."n_regionkey" = t4."r_regionkey" - WHERE - t1."p_size" = 15 - AND t1."p_type" LIKE '%BRASS' - AND t4."r_name" = 'EUROPE' - AND t0."ps_supplycost" = ( - SELECT - MIN(t0."ps_supplycost") AS "Min(ps_supplycost)" - FROM t0 - JOIN t2 - ON t2."s_suppkey" = t0."ps_suppkey" - JOIN t3 - ON t2."s_nationkey" = t3."n_nationkey" - JOIN t4 - ON t3."n_regionkey" = t4."r_regionkey" - WHERE - t4."r_name" = 'EUROPE' AND t1."p_partkey" = t0."ps_partkey" - ) -) SELECT - t6."s_acctbal", - t6."s_name", - t6."n_name", - t6."p_partkey", - t6."p_mfgr", - t6."s_address", - t6."s_phone", - t6."s_comment" + "t24"."s_acctbal" AS "s_acctbal", + "t24"."s_name" AS "s_name", + "t24"."n_name" AS "n_name", + "t24"."p_partkey" AS "p_partkey", + "t24"."p_mfgr" AS "p_mfgr", + "t24"."s_address" AS "s_address", + "t24"."s_phone" AS "s_phone", + "t24"."s_comment" AS "s_comment" FROM ( SELECT - t5."s_acctbal" AS "s_acctbal", - t5."s_name" AS "s_name", - t5."n_name" AS "n_name", - t5."p_partkey" AS "p_partkey", - t5."p_mfgr" AS "p_mfgr", - t5."s_address" AS "s_address", - t5."s_phone" AS "s_phone", - t5."s_comment" AS "s_comment" - FROM t5 -) AS t6 + "t5"."p_partkey" AS "p_partkey", + "t5"."p_name" AS "p_name", + "t5"."p_mfgr" AS "p_mfgr", + "t5"."p_brand" AS "p_brand", + "t5"."p_type" AS "p_type", + "t5"."p_size" AS "p_size", + "t5"."p_container" AS "p_container", + "t5"."p_retailprice" AS "p_retailprice", + "t5"."p_comment" AS "p_comment", + "t10"."ps_partkey" AS "ps_partkey", + "t10"."ps_suppkey" AS "ps_suppkey", + "t10"."ps_availqty" AS "ps_availqty", + "t10"."ps_supplycost" AS "ps_supplycost", + "t10"."ps_comment" AS "ps_comment", + "t11"."s_suppkey" AS "s_suppkey", + "t11"."s_name" AS "s_name", + "t11"."s_address" AS "s_address", + "t11"."s_nationkey" AS "s_nationkey", + "t11"."s_phone" AS "s_phone", + "t11"."s_acctbal" AS "s_acctbal", + "t11"."s_comment" AS "s_comment", + "t13"."n_nationkey" AS "n_nationkey", + "t13"."n_name" AS "n_name", + "t13"."n_regionkey" AS "n_regionkey", + "t13"."n_comment" AS "n_comment", + "t15"."r_regionkey" AS "r_regionkey", + "t15"."r_name" AS "r_name", + "t15"."r_comment" AS "r_comment" + FROM ( + SELECT + "t0"."P_PARTKEY" AS "p_partkey", + "t0"."P_NAME" AS "p_name", + "t0"."P_MFGR" AS "p_mfgr", + "t0"."P_BRAND" AS "p_brand", + "t0"."P_TYPE" AS "p_type", + "t0"."P_SIZE" AS "p_size", + "t0"."P_CONTAINER" AS "p_container", + "t0"."P_RETAILPRICE" AS "p_retailprice", + "t0"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t0" + ) AS "t5" + INNER JOIN ( + SELECT + "t1"."PS_PARTKEY" AS "ps_partkey", + "t1"."PS_SUPPKEY" AS "ps_suppkey", + "t1"."PS_AVAILQTY" AS "ps_availqty", + "t1"."PS_SUPPLYCOST" AS "ps_supplycost", + "t1"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t1" + ) AS "t10" + ON "t5"."p_partkey" = "t10"."ps_partkey" + INNER JOIN ( + SELECT + "t2"."S_SUPPKEY" AS "s_suppkey", + "t2"."S_NAME" AS "s_name", + "t2"."S_ADDRESS" AS "s_address", + "t2"."S_NATIONKEY" AS "s_nationkey", + "t2"."S_PHONE" AS "s_phone", + "t2"."S_ACCTBAL" AS "s_acctbal", + "t2"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t2" + ) AS "t11" + ON "t11"."s_suppkey" = "t10"."ps_suppkey" + INNER JOIN ( + SELECT + "t3"."N_NATIONKEY" AS "n_nationkey", + "t3"."N_NAME" AS "n_name", + "t3"."N_REGIONKEY" AS "n_regionkey", + "t3"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t3" + ) AS "t13" + ON "t11"."s_nationkey" = "t13"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."R_REGIONKEY" AS "r_regionkey", + "t4"."R_NAME" AS "r_name", + "t4"."R_COMMENT" AS "r_comment" + FROM "REGION" AS "t4" + ) AS "t15" + ON "t13"."n_regionkey" = "t15"."r_regionkey" +) AS "t24" +WHERE + "t24"."p_size" = 15 + AND "t24"."p_type" LIKE '%BRASS' + AND "t24"."r_name" = 'EUROPE' + AND "t24"."ps_supplycost" = ( + SELECT + MIN("t26"."ps_supplycost") AS "Min(ps_supplycost)" + FROM ( + SELECT + "t25"."ps_partkey" AS "ps_partkey", + "t25"."ps_suppkey" AS "ps_suppkey", + "t25"."ps_availqty" AS "ps_availqty", + "t25"."ps_supplycost" AS "ps_supplycost", + "t25"."ps_comment" AS "ps_comment", + "t25"."s_suppkey" AS "s_suppkey", + "t25"."s_name" AS "s_name", + "t25"."s_address" AS "s_address", + "t25"."s_nationkey" AS "s_nationkey", + "t25"."s_phone" AS "s_phone", + "t25"."s_acctbal" AS "s_acctbal", + "t25"."s_comment" AS "s_comment", + "t25"."n_nationkey" AS "n_nationkey", + "t25"."n_name" AS "n_name", + "t25"."n_regionkey" AS "n_regionkey", + "t25"."n_comment" AS "n_comment", + "t25"."r_regionkey" AS "r_regionkey", + "t25"."r_name" AS "r_name", + "t25"."r_comment" AS "r_comment" + FROM ( + SELECT + "t6"."ps_partkey" AS "ps_partkey", + "t6"."ps_suppkey" AS "ps_suppkey", + "t6"."ps_availqty" AS "ps_availqty", + "t6"."ps_supplycost" AS "ps_supplycost", + "t6"."ps_comment" AS "ps_comment", + "t12"."s_suppkey" AS "s_suppkey", + "t12"."s_name" AS "s_name", + "t12"."s_address" AS "s_address", + "t12"."s_nationkey" AS "s_nationkey", + "t12"."s_phone" AS "s_phone", + "t12"."s_acctbal" AS "s_acctbal", + "t12"."s_comment" AS "s_comment", + "t14"."n_nationkey" AS "n_nationkey", + "t14"."n_name" AS "n_name", + "t14"."n_regionkey" AS "n_regionkey", + "t14"."n_comment" AS "n_comment", + "t16"."r_regionkey" AS "r_regionkey", + "t16"."r_name" AS "r_name", + "t16"."r_comment" AS "r_comment" + FROM ( + SELECT + "t1"."PS_PARTKEY" AS "ps_partkey", + "t1"."PS_SUPPKEY" AS "ps_suppkey", + "t1"."PS_AVAILQTY" AS "ps_availqty", + "t1"."PS_SUPPLYCOST" AS "ps_supplycost", + "t1"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t1" + ) AS "t6" + INNER JOIN ( + SELECT + "t2"."S_SUPPKEY" AS "s_suppkey", + "t2"."S_NAME" AS "s_name", + "t2"."S_ADDRESS" AS "s_address", + "t2"."S_NATIONKEY" AS "s_nationkey", + "t2"."S_PHONE" AS "s_phone", + "t2"."S_ACCTBAL" AS "s_acctbal", + "t2"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t2" + ) AS "t12" + ON "t12"."s_suppkey" = "t6"."ps_suppkey" + INNER JOIN ( + SELECT + "t3"."N_NATIONKEY" AS "n_nationkey", + "t3"."N_NAME" AS "n_name", + "t3"."N_REGIONKEY" AS "n_regionkey", + "t3"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t3" + ) AS "t14" + ON "t12"."s_nationkey" = "t14"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."R_REGIONKEY" AS "r_regionkey", + "t4"."R_NAME" AS "r_name", + "t4"."R_COMMENT" AS "r_comment" + FROM "REGION" AS "t4" + ) AS "t16" + ON "t14"."n_regionkey" = "t16"."r_regionkey" + ) AS "t25" + WHERE + "t25"."r_name" = 'EUROPE' AND "t24"."p_partkey" = "t25"."ps_partkey" + ) AS "t26" + ) ORDER BY - t6."s_acctbal" DESC, - t6."n_name" ASC, - t6."s_name" ASC, - t6."p_partkey" ASC + "t24"."s_acctbal" DESC NULLS LAST, + "t24"."n_name" ASC, + "t24"."s_name" ASC, + "t24"."p_partkey" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql index 105609c556ca..90c48b774ef5 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql @@ -1,39 +1,103 @@ -WITH t0 AS ( +SELECT + t9.l_orderkey AS l_orderkey, + t9.revenue AS revenue, + t9.o_orderdate AS o_orderdate, + t9.o_shippriority AS o_shippriority +FROM ( SELECT - t4.l_orderkey AS l_orderkey, - t3.o_orderdate AS o_orderdate, - t3.o_shippriority AS o_shippriority, - SUM(t4.l_extendedprice * ( - CAST(1 AS TINYINT) - t4.l_discount + t8.l_orderkey AS l_orderkey, + t8.o_orderdate AS o_orderdate, + t8.o_shippriority AS o_shippriority, + SUM(t8.l_extendedprice * ( + CAST(1 AS TINYINT) - t8.l_discount )) AS revenue - FROM main.customer AS t2 - JOIN main.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN main.lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - WHERE - t2.c_mktsegment = 'BUILDING' - AND t3.o_orderdate < MAKE_DATE(1995, 3, 15) - AND t4.l_shipdate > MAKE_DATE(1995, 3, 15) + FROM ( + SELECT + t7.c_custkey AS c_custkey, + t7.c_name AS c_name, + t7.c_address AS c_address, + t7.c_nationkey AS c_nationkey, + t7.c_phone AS c_phone, + t7.c_acctbal AS c_acctbal, + t7.c_mktsegment AS c_mktsegment, + t7.c_comment AS c_comment, + t7.o_orderkey AS o_orderkey, + t7.o_custkey AS o_custkey, + t7.o_orderstatus AS o_orderstatus, + t7.o_totalprice AS o_totalprice, + t7.o_orderdate AS o_orderdate, + t7.o_orderpriority AS o_orderpriority, + t7.o_clerk AS o_clerk, + t7.o_shippriority AS o_shippriority, + t7.o_comment AS o_comment, + t7.l_orderkey AS l_orderkey, + t7.l_partkey AS l_partkey, + t7.l_suppkey AS l_suppkey, + t7.l_linenumber AS l_linenumber, + t7.l_quantity AS l_quantity, + t7.l_extendedprice AS l_extendedprice, + t7.l_discount AS l_discount, + t7.l_tax AS l_tax, + t7.l_returnflag AS l_returnflag, + t7.l_linestatus AS l_linestatus, + t7.l_shipdate AS l_shipdate, + t7.l_commitdate AS l_commitdate, + t7.l_receiptdate AS l_receiptdate, + t7.l_shipinstruct AS l_shipinstruct, + t7.l_shipmode AS l_shipmode, + t7.l_comment AS l_comment + FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t3.o_orderkey AS o_orderkey, + t3.o_custkey AS o_custkey, + t3.o_orderstatus AS o_orderstatus, + t3.o_totalprice AS o_totalprice, + t3.o_orderdate AS o_orderdate, + t3.o_orderpriority AS o_orderpriority, + t3.o_clerk AS o_clerk, + t3.o_shippriority AS o_shippriority, + t3.o_comment AS o_comment, + t4.l_orderkey AS l_orderkey, + t4.l_partkey AS l_partkey, + t4.l_suppkey AS l_suppkey, + t4.l_linenumber AS l_linenumber, + t4.l_quantity AS l_quantity, + t4.l_extendedprice AS l_extendedprice, + t4.l_discount AS l_discount, + t4.l_tax AS l_tax, + t4.l_returnflag AS l_returnflag, + t4.l_linestatus AS l_linestatus, + t4.l_shipdate AS l_shipdate, + t4.l_commitdate AS l_commitdate, + t4.l_receiptdate AS l_receiptdate, + t4.l_shipinstruct AS l_shipinstruct, + t4.l_shipmode AS l_shipmode, + t4.l_comment AS l_comment + FROM customer AS t0 + INNER JOIN orders AS t3 + ON t0.c_custkey = t3.o_custkey + INNER JOIN lineitem AS t4 + ON t4.l_orderkey = t3.o_orderkey + ) AS t7 + WHERE + t7.c_mktsegment = 'BUILDING' + AND t7.o_orderdate < MAKE_DATE(1995, 3, 15) + AND t7.l_shipdate > MAKE_DATE(1995, 3, 15) + ) AS t8 GROUP BY 1, 2, 3 -) -SELECT - t1.l_orderkey, - t1.revenue, - t1.o_orderdate, - t1.o_shippriority -FROM ( - SELECT - t0.l_orderkey AS l_orderkey, - t0.revenue AS revenue, - t0.o_orderdate AS o_orderdate, - t0.o_shippriority AS o_shippriority - FROM t0 -) AS t1 +) AS t9 ORDER BY - t1.revenue DESC, - t1.o_orderdate ASC + t9.revenue DESC, + t9.o_orderdate ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql index 3c5c4819ed7e..13a8f7da2bd2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql @@ -1,81 +1,145 @@ -WITH t1 AS ( - SELECT - t5."C_CUSTKEY" AS "c_custkey", - t5."C_NAME" AS "c_name", - t5."C_ADDRESS" AS "c_address", - t5."C_NATIONKEY" AS "c_nationkey", - t5."C_PHONE" AS "c_phone", - t5."C_ACCTBAL" AS "c_acctbal", - t5."C_MKTSEGMENT" AS "c_mktsegment", - t5."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t5 -), t0 AS ( - SELECT - t5."O_ORDERKEY" AS "o_orderkey", - t5."O_CUSTKEY" AS "o_custkey", - t5."O_ORDERSTATUS" AS "o_orderstatus", - t5."O_TOTALPRICE" AS "o_totalprice", - t5."O_ORDERDATE" AS "o_orderdate", - t5."O_ORDERPRIORITY" AS "o_orderpriority", - t5."O_CLERK" AS "o_clerk", - t5."O_SHIPPRIORITY" AS "o_shippriority", - t5."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t5 -), t2 AS ( - SELECT - t5."L_ORDERKEY" AS "l_orderkey", - t5."L_PARTKEY" AS "l_partkey", - t5."L_SUPPKEY" AS "l_suppkey", - t5."L_LINENUMBER" AS "l_linenumber", - t5."L_QUANTITY" AS "l_quantity", - t5."L_EXTENDEDPRICE" AS "l_extendedprice", - t5."L_DISCOUNT" AS "l_discount", - t5."L_TAX" AS "l_tax", - t5."L_RETURNFLAG" AS "l_returnflag", - t5."L_LINESTATUS" AS "l_linestatus", - t5."L_SHIPDATE" AS "l_shipdate", - t5."L_COMMITDATE" AS "l_commitdate", - t5."L_RECEIPTDATE" AS "l_receiptdate", - t5."L_SHIPINSTRUCT" AS "l_shipinstruct", - t5."L_SHIPMODE" AS "l_shipmode", - t5."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t5 -), t3 AS ( +SELECT + "t12"."l_orderkey" AS "l_orderkey", + "t12"."revenue" AS "revenue", + "t12"."o_orderdate" AS "o_orderdate", + "t12"."o_shippriority" AS "o_shippriority" +FROM ( SELECT - t2."l_orderkey" AS "l_orderkey", - t0."o_orderdate" AS "o_orderdate", - t0."o_shippriority" AS "o_shippriority", - SUM(t2."l_extendedprice" * ( - 1 - t2."l_discount" + "t11"."l_orderkey" AS "l_orderkey", + "t11"."o_orderdate" AS "o_orderdate", + "t11"."o_shippriority" AS "o_shippriority", + SUM("t11"."l_extendedprice" * ( + 1 - "t11"."l_discount" )) AS "revenue" - FROM t1 - JOIN t0 - ON t1."c_custkey" = t0."o_custkey" - JOIN t2 - ON t2."l_orderkey" = t0."o_orderkey" - WHERE - t1."c_mktsegment" = 'BUILDING' - AND t0."o_orderdate" < DATE_FROM_PARTS(1995, 3, 15) - AND t2."l_shipdate" > DATE_FROM_PARTS(1995, 3, 15) + FROM ( + SELECT + "t10"."c_custkey" AS "c_custkey", + "t10"."c_name" AS "c_name", + "t10"."c_address" AS "c_address", + "t10"."c_nationkey" AS "c_nationkey", + "t10"."c_phone" AS "c_phone", + "t10"."c_acctbal" AS "c_acctbal", + "t10"."c_mktsegment" AS "c_mktsegment", + "t10"."c_comment" AS "c_comment", + "t10"."o_orderkey" AS "o_orderkey", + "t10"."o_custkey" AS "o_custkey", + "t10"."o_orderstatus" AS "o_orderstatus", + "t10"."o_totalprice" AS "o_totalprice", + "t10"."o_orderdate" AS "o_orderdate", + "t10"."o_orderpriority" AS "o_orderpriority", + "t10"."o_clerk" AS "o_clerk", + "t10"."o_shippriority" AS "o_shippriority", + "t10"."o_comment" AS "o_comment", + "t10"."l_orderkey" AS "l_orderkey", + "t10"."l_partkey" AS "l_partkey", + "t10"."l_suppkey" AS "l_suppkey", + "t10"."l_linenumber" AS "l_linenumber", + "t10"."l_quantity" AS "l_quantity", + "t10"."l_extendedprice" AS "l_extendedprice", + "t10"."l_discount" AS "l_discount", + "t10"."l_tax" AS "l_tax", + "t10"."l_returnflag" AS "l_returnflag", + "t10"."l_linestatus" AS "l_linestatus", + "t10"."l_shipdate" AS "l_shipdate", + "t10"."l_commitdate" AS "l_commitdate", + "t10"."l_receiptdate" AS "l_receiptdate", + "t10"."l_shipinstruct" AS "l_shipinstruct", + "t10"."l_shipmode" AS "l_shipmode", + "t10"."l_comment" AS "l_comment" + FROM ( + SELECT + "t3"."c_custkey" AS "c_custkey", + "t3"."c_name" AS "c_name", + "t3"."c_address" AS "c_address", + "t3"."c_nationkey" AS "c_nationkey", + "t3"."c_phone" AS "c_phone", + "t3"."c_acctbal" AS "c_acctbal", + "t3"."c_mktsegment" AS "c_mktsegment", + "t3"."c_comment" AS "c_comment", + "t6"."o_orderkey" AS "o_orderkey", + "t6"."o_custkey" AS "o_custkey", + "t6"."o_orderstatus" AS "o_orderstatus", + "t6"."o_totalprice" AS "o_totalprice", + "t6"."o_orderdate" AS "o_orderdate", + "t6"."o_orderpriority" AS "o_orderpriority", + "t6"."o_clerk" AS "o_clerk", + "t6"."o_shippriority" AS "o_shippriority", + "t6"."o_comment" AS "o_comment", + "t7"."l_orderkey" AS "l_orderkey", + "t7"."l_partkey" AS "l_partkey", + "t7"."l_suppkey" AS "l_suppkey", + "t7"."l_linenumber" AS "l_linenumber", + "t7"."l_quantity" AS "l_quantity", + "t7"."l_extendedprice" AS "l_extendedprice", + "t7"."l_discount" AS "l_discount", + "t7"."l_tax" AS "l_tax", + "t7"."l_returnflag" AS "l_returnflag", + "t7"."l_linestatus" AS "l_linestatus", + "t7"."l_shipdate" AS "l_shipdate", + "t7"."l_commitdate" AS "l_commitdate", + "t7"."l_receiptdate" AS "l_receiptdate", + "t7"."l_shipinstruct" AS "l_shipinstruct", + "t7"."l_shipmode" AS "l_shipmode", + "t7"."l_comment" AS "l_comment" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t6" + ON "t3"."c_custkey" = "t6"."o_custkey" + INNER JOIN ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t7" + ON "t7"."l_orderkey" = "t6"."o_orderkey" + ) AS "t10" + WHERE + "t10"."c_mktsegment" = 'BUILDING' + AND "t10"."o_orderdate" < DATEFROMPARTS(1995, 3, 15) + AND "t10"."l_shipdate" > DATEFROMPARTS(1995, 3, 15) + ) AS "t11" GROUP BY 1, 2, 3 -) -SELECT - t4."l_orderkey", - t4."revenue", - t4."o_orderdate", - t4."o_shippriority" -FROM ( - SELECT - t3."l_orderkey" AS "l_orderkey", - t3."revenue" AS "revenue", - t3."o_orderdate" AS "o_orderdate", - t3."o_shippriority" AS "o_shippriority" - FROM t3 -) AS t4 +) AS "t12" ORDER BY - t4."revenue" DESC, - t4."o_orderdate" ASC + "t12"."revenue" DESC NULLS LAST, + "t12"."o_orderdate" ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql index b4ef1e6dabfc..f56cd81b6401 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql @@ -1,20 +1,41 @@ SELECT - t0.o_orderpriority, - COUNT(*) AS order_count -FROM main.orders AS t0 -WHERE - ( - EXISTS( - SELECT - CAST(1 AS TINYINT) AS anon_1 - FROM main.lineitem AS t1 - WHERE - t1.l_orderkey = t0.o_orderkey AND t1.l_commitdate < t1.l_receiptdate - ) - ) - AND t0.o_orderdate >= MAKE_DATE(1993, 7, 1) - AND t0.o_orderdate < MAKE_DATE(1993, 10, 1) -GROUP BY - 1 + t4.o_orderpriority AS o_orderpriority, + t4.order_count AS order_count +FROM ( + SELECT + t3.o_orderpriority AS o_orderpriority, + COUNT(*) AS order_count + FROM ( + SELECT + t0.o_orderkey AS o_orderkey, + t0.o_custkey AS o_custkey, + t0.o_orderstatus AS o_orderstatus, + t0.o_totalprice AS o_totalprice, + t0.o_orderdate AS o_orderdate, + t0.o_orderpriority AS o_orderpriority, + t0.o_clerk AS o_clerk, + t0.o_shippriority AS o_shippriority, + t0.o_comment AS o_comment + FROM orders AS t0 + WHERE + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM lineitem AS t1 + WHERE + ( + t1.l_orderkey = t0.o_orderkey + ) AND ( + t1.l_commitdate < t1.l_receiptdate + ) + ) + ) + AND t0.o_orderdate >= MAKE_DATE(1993, 7, 1) + AND t0.o_orderdate < MAKE_DATE(1993, 10, 1) + ) AS t3 + GROUP BY + 1 +) AS t4 ORDER BY - t0.o_orderpriority ASC \ No newline at end of file + t4.o_orderpriority ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql index 756fa1049150..67291d6a3632 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql @@ -1,52 +1,42 @@ -WITH t1 AS ( - SELECT - t2."O_ORDERKEY" AS "o_orderkey", - t2."O_CUSTKEY" AS "o_custkey", - t2."O_ORDERSTATUS" AS "o_orderstatus", - t2."O_TOTALPRICE" AS "o_totalprice", - t2."O_ORDERDATE" AS "o_orderdate", - t2."O_ORDERPRIORITY" AS "o_orderpriority", - t2."O_CLERK" AS "o_clerk", - t2."O_SHIPPRIORITY" AS "o_shippriority", - t2."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t2 -), t0 AS ( - SELECT - t2."L_ORDERKEY" AS "l_orderkey", - t2."L_PARTKEY" AS "l_partkey", - t2."L_SUPPKEY" AS "l_suppkey", - t2."L_LINENUMBER" AS "l_linenumber", - t2."L_QUANTITY" AS "l_quantity", - t2."L_EXTENDEDPRICE" AS "l_extendedprice", - t2."L_DISCOUNT" AS "l_discount", - t2."L_TAX" AS "l_tax", - t2."L_RETURNFLAG" AS "l_returnflag", - t2."L_LINESTATUS" AS "l_linestatus", - t2."L_SHIPDATE" AS "l_shipdate", - t2."L_COMMITDATE" AS "l_commitdate", - t2."L_RECEIPTDATE" AS "l_receiptdate", - t2."L_SHIPINSTRUCT" AS "l_shipinstruct", - t2."L_SHIPMODE" AS "l_shipmode", - t2."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t2 -) SELECT - t1."o_orderpriority", - COUNT(*) AS "order_count" -FROM t1 -WHERE - ( - EXISTS( - SELECT - 1 AS anon_1 - FROM t0 - WHERE - t0."l_orderkey" = t1."o_orderkey" AND t0."l_commitdate" < t0."l_receiptdate" - ) - ) - AND t1."o_orderdate" >= DATE_FROM_PARTS(1993, 7, 1) - AND t1."o_orderdate" < DATE_FROM_PARTS(1993, 10, 1) -GROUP BY - 1 + "t4"."o_orderpriority" AS "o_orderpriority", + "t4"."order_count" AS "order_count" +FROM ( + SELECT + "t3"."o_orderpriority" AS "o_orderpriority", + COUNT(*) AS "order_count" + FROM ( + SELECT + "t0"."O_ORDERKEY" AS "o_orderkey", + "t0"."O_CUSTKEY" AS "o_custkey", + "t0"."O_ORDERSTATUS" AS "o_orderstatus", + "t0"."O_TOTALPRICE" AS "o_totalprice", + "t0"."O_ORDERDATE" AS "o_orderdate", + "t0"."O_ORDERPRIORITY" AS "o_orderpriority", + "t0"."O_CLERK" AS "o_clerk", + "t0"."O_SHIPPRIORITY" AS "o_shippriority", + "t0"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t0" + WHERE + EXISTS( + ( + SELECT + 1 AS "1" + FROM "LINEITEM" AS "t1" + WHERE + ( + "t1"."L_ORDERKEY" = "t0"."O_ORDERKEY" + ) + AND ( + "t1"."L_COMMITDATE" < "t1"."L_RECEIPTDATE" + ) + ) + ) + AND "t0"."O_ORDERDATE" >= DATEFROMPARTS(1993, 7, 1) + AND "t0"."O_ORDERDATE" < DATEFROMPARTS(1993, 10, 1) + ) AS "t3" + GROUP BY + 1 +) AS "t4" ORDER BY - t1."o_orderpriority" ASC \ No newline at end of file + "t4"."o_orderpriority" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql index 90574ad58db1..6fed94b3b38c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql @@ -1,29 +1,129 @@ SELECT - t0.n_name, - t0.revenue + t18.n_name AS n_name, + t18.revenue AS revenue FROM ( SELECT - t5.n_name AS n_name, - SUM(t3.l_extendedprice * ( - CAST(1 AS TINYINT) - t3.l_discount + t17.n_name AS n_name, + SUM(t17.l_extendedprice * ( + CAST(1 AS TINYINT) - t17.l_discount )) AS revenue - FROM main.customer AS t1 - JOIN main.orders AS t2 - ON t1.c_custkey = t2.o_custkey - JOIN main.lineitem AS t3 - ON t3.l_orderkey = t2.o_orderkey - JOIN main.supplier AS t4 - ON t3.l_suppkey = t4.s_suppkey - JOIN main.nation AS t5 - ON t1.c_nationkey = t4.s_nationkey AND t4.s_nationkey = t5.n_nationkey - JOIN main.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t6.r_name = 'ASIA' - AND t2.o_orderdate >= MAKE_DATE(1994, 1, 1) - AND t2.o_orderdate < MAKE_DATE(1995, 1, 1) + FROM ( + SELECT + t16.c_custkey AS c_custkey, + t16.c_name AS c_name, + t16.c_address AS c_address, + t16.c_nationkey AS c_nationkey, + t16.c_phone AS c_phone, + t16.c_acctbal AS c_acctbal, + t16.c_mktsegment AS c_mktsegment, + t16.c_comment AS c_comment, + t16.o_orderkey AS o_orderkey, + t16.o_custkey AS o_custkey, + t16.o_orderstatus AS o_orderstatus, + t16.o_totalprice AS o_totalprice, + t16.o_orderdate AS o_orderdate, + t16.o_orderpriority AS o_orderpriority, + t16.o_clerk AS o_clerk, + t16.o_shippriority AS o_shippriority, + t16.o_comment AS o_comment, + t16.l_orderkey AS l_orderkey, + t16.l_partkey AS l_partkey, + t16.l_suppkey AS l_suppkey, + t16.l_linenumber AS l_linenumber, + t16.l_quantity AS l_quantity, + t16.l_extendedprice AS l_extendedprice, + t16.l_discount AS l_discount, + t16.l_tax AS l_tax, + t16.l_returnflag AS l_returnflag, + t16.l_linestatus AS l_linestatus, + t16.l_shipdate AS l_shipdate, + t16.l_commitdate AS l_commitdate, + t16.l_receiptdate AS l_receiptdate, + t16.l_shipinstruct AS l_shipinstruct, + t16.l_shipmode AS l_shipmode, + t16.l_comment AS l_comment, + t16.s_suppkey AS s_suppkey, + t16.s_name AS s_name, + t16.s_address AS s_address, + t16.s_nationkey AS s_nationkey, + t16.s_phone AS s_phone, + t16.s_acctbal AS s_acctbal, + t16.s_comment AS s_comment, + t16.n_nationkey AS n_nationkey, + t16.n_name AS n_name, + t16.n_regionkey AS n_regionkey, + t16.n_comment AS n_comment, + t16.r_regionkey AS r_regionkey, + t16.r_name AS r_name, + t16.r_comment AS r_comment + FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t6.o_orderkey AS o_orderkey, + t6.o_custkey AS o_custkey, + t6.o_orderstatus AS o_orderstatus, + t6.o_totalprice AS o_totalprice, + t6.o_orderdate AS o_orderdate, + t6.o_orderpriority AS o_orderpriority, + t6.o_clerk AS o_clerk, + t6.o_shippriority AS o_shippriority, + t6.o_comment AS o_comment, + t7.l_orderkey AS l_orderkey, + t7.l_partkey AS l_partkey, + t7.l_suppkey AS l_suppkey, + t7.l_linenumber AS l_linenumber, + t7.l_quantity AS l_quantity, + t7.l_extendedprice AS l_extendedprice, + t7.l_discount AS l_discount, + t7.l_tax AS l_tax, + t7.l_returnflag AS l_returnflag, + t7.l_linestatus AS l_linestatus, + t7.l_shipdate AS l_shipdate, + t7.l_commitdate AS l_commitdate, + t7.l_receiptdate AS l_receiptdate, + t7.l_shipinstruct AS l_shipinstruct, + t7.l_shipmode AS l_shipmode, + t7.l_comment AS l_comment, + t8.s_suppkey AS s_suppkey, + t8.s_name AS s_name, + t8.s_address AS s_address, + t8.s_nationkey AS s_nationkey, + t8.s_phone AS s_phone, + t8.s_acctbal AS s_acctbal, + t8.s_comment AS s_comment, + t9.n_nationkey AS n_nationkey, + t9.n_name AS n_name, + t9.n_regionkey AS n_regionkey, + t9.n_comment AS n_comment, + t10.r_regionkey AS r_regionkey, + t10.r_name AS r_name, + t10.r_comment AS r_comment + FROM customer AS t0 + INNER JOIN orders AS t6 + ON t0.c_custkey = t6.o_custkey + INNER JOIN lineitem AS t7 + ON t7.l_orderkey = t6.o_orderkey + INNER JOIN supplier AS t8 + ON t7.l_suppkey = t8.s_suppkey + INNER JOIN nation AS t9 + ON t0.c_nationkey = t8.s_nationkey AND t8.s_nationkey = t9.n_nationkey + INNER JOIN region AS t10 + ON t9.n_regionkey = t10.r_regionkey + ) AS t16 + WHERE + t16.r_name = 'ASIA' + AND t16.o_orderdate >= MAKE_DATE(1994, 1, 1) + AND t16.o_orderdate < MAKE_DATE(1995, 1, 1) + ) AS t17 GROUP BY 1 -) AS t0 +) AS t18 ORDER BY - t0.revenue DESC \ No newline at end of file + t18.revenue DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql index 0b1c85164dfa..4ec8d7241f0d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql @@ -1,95 +1,195 @@ -WITH t1 AS ( - SELECT - t7."C_CUSTKEY" AS "c_custkey", - t7."C_NAME" AS "c_name", - t7."C_ADDRESS" AS "c_address", - t7."C_NATIONKEY" AS "c_nationkey", - t7."C_PHONE" AS "c_phone", - t7."C_ACCTBAL" AS "c_acctbal", - t7."C_MKTSEGMENT" AS "c_mktsegment", - t7."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t7 -), t0 AS ( - SELECT - t7."O_ORDERKEY" AS "o_orderkey", - t7."O_CUSTKEY" AS "o_custkey", - t7."O_ORDERSTATUS" AS "o_orderstatus", - t7."O_TOTALPRICE" AS "o_totalprice", - t7."O_ORDERDATE" AS "o_orderdate", - t7."O_ORDERPRIORITY" AS "o_orderpriority", - t7."O_CLERK" AS "o_clerk", - t7."O_SHIPPRIORITY" AS "o_shippriority", - t7."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t7 -), t2 AS ( - SELECT - t7."L_ORDERKEY" AS "l_orderkey", - t7."L_PARTKEY" AS "l_partkey", - t7."L_SUPPKEY" AS "l_suppkey", - t7."L_LINENUMBER" AS "l_linenumber", - t7."L_QUANTITY" AS "l_quantity", - t7."L_EXTENDEDPRICE" AS "l_extendedprice", - t7."L_DISCOUNT" AS "l_discount", - t7."L_TAX" AS "l_tax", - t7."L_RETURNFLAG" AS "l_returnflag", - t7."L_LINESTATUS" AS "l_linestatus", - t7."L_SHIPDATE" AS "l_shipdate", - t7."L_COMMITDATE" AS "l_commitdate", - t7."L_RECEIPTDATE" AS "l_receiptdate", - t7."L_SHIPINSTRUCT" AS "l_shipinstruct", - t7."L_SHIPMODE" AS "l_shipmode", - t7."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t7 -), t3 AS ( - SELECT - t7."S_SUPPKEY" AS "s_suppkey", - t7."S_NAME" AS "s_name", - t7."S_ADDRESS" AS "s_address", - t7."S_NATIONKEY" AS "s_nationkey", - t7."S_PHONE" AS "s_phone", - t7."S_ACCTBAL" AS "s_acctbal", - t7."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t7 -), t4 AS ( - SELECT - t7."N_NATIONKEY" AS "n_nationkey", - t7."N_NAME" AS "n_name", - t7."N_REGIONKEY" AS "n_regionkey", - t7."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t7 -), t5 AS ( - SELECT - t7."R_REGIONKEY" AS "r_regionkey", - t7."R_NAME" AS "r_name", - t7."R_COMMENT" AS "r_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS t7 -) SELECT - t6."n_name", - t6."revenue" + "t24"."n_name" AS "n_name", + "t24"."revenue" AS "revenue" FROM ( SELECT - t4."n_name" AS "n_name", - SUM(t2."l_extendedprice" * ( - 1 - t2."l_discount" + "t23"."n_name" AS "n_name", + SUM("t23"."l_extendedprice" * ( + 1 - "t23"."l_discount" )) AS "revenue" - FROM t1 - JOIN t0 - ON t1."c_custkey" = t0."o_custkey" - JOIN t2 - ON t2."l_orderkey" = t0."o_orderkey" - JOIN t3 - ON t2."l_suppkey" = t3."s_suppkey" - JOIN t4 - ON t1."c_nationkey" = t3."s_nationkey" AND t3."s_nationkey" = t4."n_nationkey" - JOIN t5 - ON t4."n_regionkey" = t5."r_regionkey" - WHERE - t5."r_name" = 'ASIA' - AND t0."o_orderdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND t0."o_orderdate" < DATE_FROM_PARTS(1995, 1, 1) + FROM ( + SELECT + "t22"."c_custkey" AS "c_custkey", + "t22"."c_name" AS "c_name", + "t22"."c_address" AS "c_address", + "t22"."c_nationkey" AS "c_nationkey", + "t22"."c_phone" AS "c_phone", + "t22"."c_acctbal" AS "c_acctbal", + "t22"."c_mktsegment" AS "c_mktsegment", + "t22"."c_comment" AS "c_comment", + "t22"."o_orderkey" AS "o_orderkey", + "t22"."o_custkey" AS "o_custkey", + "t22"."o_orderstatus" AS "o_orderstatus", + "t22"."o_totalprice" AS "o_totalprice", + "t22"."o_orderdate" AS "o_orderdate", + "t22"."o_orderpriority" AS "o_orderpriority", + "t22"."o_clerk" AS "o_clerk", + "t22"."o_shippriority" AS "o_shippriority", + "t22"."o_comment" AS "o_comment", + "t22"."l_orderkey" AS "l_orderkey", + "t22"."l_partkey" AS "l_partkey", + "t22"."l_suppkey" AS "l_suppkey", + "t22"."l_linenumber" AS "l_linenumber", + "t22"."l_quantity" AS "l_quantity", + "t22"."l_extendedprice" AS "l_extendedprice", + "t22"."l_discount" AS "l_discount", + "t22"."l_tax" AS "l_tax", + "t22"."l_returnflag" AS "l_returnflag", + "t22"."l_linestatus" AS "l_linestatus", + "t22"."l_shipdate" AS "l_shipdate", + "t22"."l_commitdate" AS "l_commitdate", + "t22"."l_receiptdate" AS "l_receiptdate", + "t22"."l_shipinstruct" AS "l_shipinstruct", + "t22"."l_shipmode" AS "l_shipmode", + "t22"."l_comment" AS "l_comment", + "t22"."s_suppkey" AS "s_suppkey", + "t22"."s_name" AS "s_name", + "t22"."s_address" AS "s_address", + "t22"."s_nationkey" AS "s_nationkey", + "t22"."s_phone" AS "s_phone", + "t22"."s_acctbal" AS "s_acctbal", + "t22"."s_comment" AS "s_comment", + "t22"."n_nationkey" AS "n_nationkey", + "t22"."n_name" AS "n_name", + "t22"."n_regionkey" AS "n_regionkey", + "t22"."n_comment" AS "n_comment", + "t22"."r_regionkey" AS "r_regionkey", + "t22"."r_name" AS "r_name", + "t22"."r_comment" AS "r_comment" + FROM ( + SELECT + "t6"."c_custkey" AS "c_custkey", + "t6"."c_name" AS "c_name", + "t6"."c_address" AS "c_address", + "t6"."c_nationkey" AS "c_nationkey", + "t6"."c_phone" AS "c_phone", + "t6"."c_acctbal" AS "c_acctbal", + "t6"."c_mktsegment" AS "c_mktsegment", + "t6"."c_comment" AS "c_comment", + "t12"."o_orderkey" AS "o_orderkey", + "t12"."o_custkey" AS "o_custkey", + "t12"."o_orderstatus" AS "o_orderstatus", + "t12"."o_totalprice" AS "o_totalprice", + "t12"."o_orderdate" AS "o_orderdate", + "t12"."o_orderpriority" AS "o_orderpriority", + "t12"."o_clerk" AS "o_clerk", + "t12"."o_shippriority" AS "o_shippriority", + "t12"."o_comment" AS "o_comment", + "t13"."l_orderkey" AS "l_orderkey", + "t13"."l_partkey" AS "l_partkey", + "t13"."l_suppkey" AS "l_suppkey", + "t13"."l_linenumber" AS "l_linenumber", + "t13"."l_quantity" AS "l_quantity", + "t13"."l_extendedprice" AS "l_extendedprice", + "t13"."l_discount" AS "l_discount", + "t13"."l_tax" AS "l_tax", + "t13"."l_returnflag" AS "l_returnflag", + "t13"."l_linestatus" AS "l_linestatus", + "t13"."l_shipdate" AS "l_shipdate", + "t13"."l_commitdate" AS "l_commitdate", + "t13"."l_receiptdate" AS "l_receiptdate", + "t13"."l_shipinstruct" AS "l_shipinstruct", + "t13"."l_shipmode" AS "l_shipmode", + "t13"."l_comment" AS "l_comment", + "t14"."s_suppkey" AS "s_suppkey", + "t14"."s_name" AS "s_name", + "t14"."s_address" AS "s_address", + "t14"."s_nationkey" AS "s_nationkey", + "t14"."s_phone" AS "s_phone", + "t14"."s_acctbal" AS "s_acctbal", + "t14"."s_comment" AS "s_comment", + "t15"."n_nationkey" AS "n_nationkey", + "t15"."n_name" AS "n_name", + "t15"."n_regionkey" AS "n_regionkey", + "t15"."n_comment" AS "n_comment", + "t16"."r_regionkey" AS "r_regionkey", + "t16"."r_name" AS "r_name", + "t16"."r_comment" AS "r_comment" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t12" + ON "t6"."c_custkey" = "t12"."o_custkey" + INNER JOIN ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t13" + ON "t13"."l_orderkey" = "t12"."o_orderkey" + INNER JOIN ( + SELECT + "t3"."S_SUPPKEY" AS "s_suppkey", + "t3"."S_NAME" AS "s_name", + "t3"."S_ADDRESS" AS "s_address", + "t3"."S_NATIONKEY" AS "s_nationkey", + "t3"."S_PHONE" AS "s_phone", + "t3"."S_ACCTBAL" AS "s_acctbal", + "t3"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t3" + ) AS "t14" + ON "t13"."l_suppkey" = "t14"."s_suppkey" + INNER JOIN ( + SELECT + "t4"."N_NATIONKEY" AS "n_nationkey", + "t4"."N_NAME" AS "n_name", + "t4"."N_REGIONKEY" AS "n_regionkey", + "t4"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t4" + ) AS "t15" + ON "t6"."c_nationkey" = "t14"."s_nationkey" + AND "t14"."s_nationkey" = "t15"."n_nationkey" + INNER JOIN ( + SELECT + "t5"."R_REGIONKEY" AS "r_regionkey", + "t5"."R_NAME" AS "r_name", + "t5"."R_COMMENT" AS "r_comment" + FROM "REGION" AS "t5" + ) AS "t16" + ON "t15"."n_regionkey" = "t16"."r_regionkey" + ) AS "t22" + WHERE + "t22"."r_name" = 'ASIA' + AND "t22"."o_orderdate" >= DATEFROMPARTS(1994, 1, 1) + AND "t22"."o_orderdate" < DATEFROMPARTS(1995, 1, 1) + ) AS "t23" GROUP BY 1 -) AS t6 +) AS "t24" ORDER BY - t6."revenue" DESC \ No newline at end of file + "t24"."revenue" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql index eae15c8677d5..d42e3466036d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql @@ -1,8 +1,27 @@ SELECT - SUM(t0.l_extendedprice * t0.l_discount) AS revenue -FROM main.lineitem AS t0 -WHERE - t0.l_shipdate >= MAKE_DATE(1994, 1, 1) - AND t0.l_shipdate < MAKE_DATE(1995, 1, 1) - AND t0.l_discount BETWEEN CAST(0.05 AS DOUBLE) AND CAST(0.07 AS DOUBLE) - AND t0.l_quantity < CAST(24 AS TINYINT) \ No newline at end of file + SUM(t1.l_extendedprice * t1.l_discount) AS revenue +FROM ( + SELECT + t0.l_orderkey AS l_orderkey, + t0.l_partkey AS l_partkey, + t0.l_suppkey AS l_suppkey, + t0.l_linenumber AS l_linenumber, + t0.l_quantity AS l_quantity, + t0.l_extendedprice AS l_extendedprice, + t0.l_discount AS l_discount, + t0.l_tax AS l_tax, + t0.l_returnflag AS l_returnflag, + t0.l_linestatus AS l_linestatus, + t0.l_shipdate AS l_shipdate, + t0.l_commitdate AS l_commitdate, + t0.l_receiptdate AS l_receiptdate, + t0.l_shipinstruct AS l_shipinstruct, + t0.l_shipmode AS l_shipmode, + t0.l_comment AS l_comment + FROM lineitem AS t0 + WHERE + t0.l_shipdate >= MAKE_DATE(1994, 1, 1) + AND t0.l_shipdate < MAKE_DATE(1995, 1, 1) + AND t0.l_discount BETWEEN CAST(0.05 AS DOUBLE) AND CAST(0.07 AS DOUBLE) + AND t0.l_quantity < CAST(24 AS TINYINT) +) AS t1 diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql index 3ac88adee307..5d0be126fb13 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql @@ -1,28 +1,27 @@ -WITH t0 AS ( - SELECT - t1."L_ORDERKEY" AS "l_orderkey", - t1."L_PARTKEY" AS "l_partkey", - t1."L_SUPPKEY" AS "l_suppkey", - t1."L_LINENUMBER" AS "l_linenumber", - t1."L_QUANTITY" AS "l_quantity", - t1."L_EXTENDEDPRICE" AS "l_extendedprice", - t1."L_DISCOUNT" AS "l_discount", - t1."L_TAX" AS "l_tax", - t1."L_RETURNFLAG" AS "l_returnflag", - t1."L_LINESTATUS" AS "l_linestatus", - t1."L_SHIPDATE" AS "l_shipdate", - t1."L_COMMITDATE" AS "l_commitdate", - t1."L_RECEIPTDATE" AS "l_receiptdate", - t1."L_SHIPINSTRUCT" AS "l_shipinstruct", - t1."L_SHIPMODE" AS "l_shipmode", - t1."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t1 -) SELECT - SUM(t0."l_extendedprice" * t0."l_discount") AS "revenue" -FROM t0 -WHERE - t0."l_shipdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND t0."l_shipdate" < DATE_FROM_PARTS(1995, 1, 1) - AND t0."l_discount" BETWEEN 0.05 AND 0.07 - AND t0."l_quantity" < 24 \ No newline at end of file + SUM("t1"."l_extendedprice" * "t1"."l_discount") AS "revenue" +FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + WHERE + "t0"."L_SHIPDATE" >= DATEFROMPARTS(1994, 1, 1) + AND "t0"."L_SHIPDATE" < DATEFROMPARTS(1995, 1, 1) + AND "t0"."L_DISCOUNT" BETWEEN 0.05 AND 0.07 + AND "t0"."L_QUANTITY" < 24 +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql index f7cdf6bd08e1..69d9b1af31d1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql @@ -1,110 +1,128 @@ -WITH t1 AS ( - SELECT - t7."S_SUPPKEY" AS "s_suppkey", - t7."S_NAME" AS "s_name", - t7."S_ADDRESS" AS "s_address", - t7."S_NATIONKEY" AS "s_nationkey", - t7."S_PHONE" AS "s_phone", - t7."S_ACCTBAL" AS "s_acctbal", - t7."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t7 -), t0 AS ( - SELECT - t7."L_ORDERKEY" AS "l_orderkey", - t7."L_PARTKEY" AS "l_partkey", - t7."L_SUPPKEY" AS "l_suppkey", - t7."L_LINENUMBER" AS "l_linenumber", - t7."L_QUANTITY" AS "l_quantity", - t7."L_EXTENDEDPRICE" AS "l_extendedprice", - t7."L_DISCOUNT" AS "l_discount", - t7."L_TAX" AS "l_tax", - t7."L_RETURNFLAG" AS "l_returnflag", - t7."L_LINESTATUS" AS "l_linestatus", - t7."L_SHIPDATE" AS "l_shipdate", - t7."L_COMMITDATE" AS "l_commitdate", - t7."L_RECEIPTDATE" AS "l_receiptdate", - t7."L_SHIPINSTRUCT" AS "l_shipinstruct", - t7."L_SHIPMODE" AS "l_shipmode", - t7."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t7 -), t2 AS ( - SELECT - t7."O_ORDERKEY" AS "o_orderkey", - t7."O_CUSTKEY" AS "o_custkey", - t7."O_ORDERSTATUS" AS "o_orderstatus", - t7."O_TOTALPRICE" AS "o_totalprice", - t7."O_ORDERDATE" AS "o_orderdate", - t7."O_ORDERPRIORITY" AS "o_orderpriority", - t7."O_CLERK" AS "o_clerk", - t7."O_SHIPPRIORITY" AS "o_shippriority", - t7."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t7 -), t3 AS ( - SELECT - t7."C_CUSTKEY" AS "c_custkey", - t7."C_NAME" AS "c_name", - t7."C_ADDRESS" AS "c_address", - t7."C_NATIONKEY" AS "c_nationkey", - t7."C_PHONE" AS "c_phone", - t7."C_ACCTBAL" AS "c_acctbal", - t7."C_MKTSEGMENT" AS "c_mktsegment", - t7."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t7 -), t4 AS ( - SELECT - t7."N_NATIONKEY" AS "n_nationkey", - t7."N_NAME" AS "n_name", - t7."N_REGIONKEY" AS "n_regionkey", - t7."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t7 -), t5 AS ( - SELECT - t4."n_name" AS "supp_nation", - t7."n_name" AS "cust_nation", - t0."l_shipdate" AS "l_shipdate", - t0."l_extendedprice" AS "l_extendedprice", - t0."l_discount" AS "l_discount", - CAST(DATE_PART(year, t0."l_shipdate") AS SMALLINT) AS "l_year", - t0."l_extendedprice" * ( - 1 - t0."l_discount" - ) AS "volume" - FROM t1 - JOIN t0 - ON t1."s_suppkey" = t0."l_suppkey" - JOIN t2 - ON t2."o_orderkey" = t0."l_orderkey" - JOIN t3 - ON t3."c_custkey" = t2."o_custkey" - JOIN t4 - ON t1."s_nationkey" = t4."n_nationkey" - JOIN t4 AS t7 - ON t3."c_nationkey" = t7."n_nationkey" -) SELECT - t6."supp_nation", - t6."cust_nation", - t6."l_year", - t6."revenue" + * FROM ( SELECT - t5."supp_nation" AS "supp_nation", - t5."cust_nation" AS "cust_nation", - t5."l_year" AS "l_year", - SUM(t5."volume") AS "revenue" - FROM t5 - WHERE - ( - t5."cust_nation" = 'FRANCE' AND t5."supp_nation" = 'GERMANY' - OR t5."cust_nation" = 'GERMANY' - AND t5."supp_nation" = 'FRANCE' - ) - AND t5."l_shipdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) + "t17"."supp_nation" AS "supp_nation", + "t17"."cust_nation" AS "cust_nation", + "t17"."l_year" AS "l_year", + SUM("t17"."volume") AS "revenue" + FROM ( + SELECT + * + FROM ( + SELECT + "t9"."n_name" AS "supp_nation", + "t10"."n_name" AS "cust_nation", + "t6"."l_shipdate" AS "l_shipdate", + "t6"."l_extendedprice" AS "l_extendedprice", + "t6"."l_discount" AS "l_discount", + DATE_PART('year', "t6"."l_shipdate") AS "l_year", + "t6"."l_extendedprice" * ( + 1 - "t6"."l_discount" + ) AS "volume" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t5" + INNER JOIN ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t6" + ON "t5"."s_suppkey" = "t6"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."O_ORDERKEY" AS "o_orderkey", + "t2"."O_CUSTKEY" AS "o_custkey", + "t2"."O_ORDERSTATUS" AS "o_orderstatus", + "t2"."O_TOTALPRICE" AS "o_totalprice", + "t2"."O_ORDERDATE" AS "o_orderdate", + "t2"."O_ORDERPRIORITY" AS "o_orderpriority", + "t2"."O_CLERK" AS "o_clerk", + "t2"."O_SHIPPRIORITY" AS "o_shippriority", + "t2"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t2" + ) AS "t7" + ON "t7"."o_orderkey" = "t6"."l_orderkey" + INNER JOIN ( + SELECT + "t3"."C_CUSTKEY" AS "c_custkey", + "t3"."C_NAME" AS "c_name", + "t3"."C_ADDRESS" AS "c_address", + "t3"."C_NATIONKEY" AS "c_nationkey", + "t3"."C_PHONE" AS "c_phone", + "t3"."C_ACCTBAL" AS "c_acctbal", + "t3"."C_MKTSEGMENT" AS "c_mktsegment", + "t3"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t3" + ) AS "t8" + ON "t8"."c_custkey" = "t7"."o_custkey" + INNER JOIN ( + SELECT + "t4"."N_NATIONKEY" AS "n_nationkey", + "t4"."N_NAME" AS "n_name", + "t4"."N_REGIONKEY" AS "n_regionkey", + "t4"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t4" + ) AS "t9" + ON "t5"."s_nationkey" = "t9"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."N_NATIONKEY" AS "n_nationkey", + "t4"."N_NAME" AS "n_name", + "t4"."N_REGIONKEY" AS "n_regionkey", + "t4"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t4" + ) AS "t10" + ON "t8"."c_nationkey" = "t10"."n_nationkey" + ) AS "t16" + WHERE + ( + ( + ( + "t16"."cust_nation" = 'FRANCE' + ) AND ( + "t16"."supp_nation" = 'GERMANY' + ) + ) + OR ( + ( + "t16"."cust_nation" = 'GERMANY' + ) AND ( + "t16"."supp_nation" = 'FRANCE' + ) + ) + ) + AND "t16"."l_shipdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) + ) AS "t17" GROUP BY 1, 2, 3 -) AS t6 +) AS "t18" ORDER BY - t6."supp_nation" ASC, - t6."cust_nation" ASC, - t6."l_year" ASC \ No newline at end of file + "t18"."supp_nation" ASC, + "t18"."cust_nation" ASC, + "t18"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql index 26823ce9ad6b..e588d3e1466f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql @@ -1,64 +1,52 @@ -WITH t0 AS ( - SELECT - CAST(EXTRACT(year FROM t7.o_orderdate) AS SMALLINT) AS o_year, - t5.l_extendedprice * ( - CAST(1 AS TINYINT) - t5.l_discount - ) AS volume, - t11.n_name AS nation, - t10.r_name AS r_name, - t7.o_orderdate AS o_orderdate, - t4.p_type AS p_type - FROM main.part AS t4 - JOIN main.lineitem AS t5 - ON t4.p_partkey = t5.l_partkey - JOIN main.supplier AS t6 - ON t6.s_suppkey = t5.l_suppkey - JOIN main.orders AS t7 - ON t5.l_orderkey = t7.o_orderkey - JOIN main.customer AS t8 - ON t7.o_custkey = t8.c_custkey - JOIN main.nation AS t9 - ON t8.c_nationkey = t9.n_nationkey - JOIN main.region AS t10 - ON t9.n_regionkey = t10.r_regionkey - JOIN main.nation AS t11 - ON t6.s_nationkey = t11.n_nationkey -), t1 AS ( - SELECT - t0.o_year AS o_year, - t0.volume AS volume, - t0.nation AS nation, - t0.r_name AS r_name, - t0.o_orderdate AS o_orderdate, - t0.p_type AS p_type - FROM t0 - WHERE - t0.r_name = 'AMERICA' - AND t0.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) - AND t0.p_type = 'ECONOMY ANODIZED STEEL' -), t2 AS ( - SELECT - t1.o_year AS o_year, - t1.volume AS volume, - t1.nation AS nation, - t1.r_name AS r_name, - t1.o_orderdate AS o_orderdate, - t1.p_type AS p_type, - CASE WHEN ( - t1.nation = 'BRAZIL' - ) THEN t1.volume ELSE CAST(0 AS TINYINT) END AS nation_volume - FROM t1 -) SELECT - t3.o_year, - t3.mkt_share + t23.o_year AS o_year, + t23.mkt_share AS mkt_share FROM ( SELECT - t2.o_year AS o_year, - SUM(t2.nation_volume) / SUM(t2.volume) AS mkt_share - FROM t2 + t22.o_year AS o_year, + SUM(t22.nation_volume) / SUM(t22.volume) AS mkt_share + FROM ( + SELECT + t21.o_year AS o_year, + t21.volume AS volume, + t21.nation AS nation, + t21.r_name AS r_name, + t21.o_orderdate AS o_orderdate, + t21.p_type AS p_type, + CASE WHEN t21.nation = 'BRAZIL' THEN t21.volume ELSE CAST(0 AS TINYINT) END AS nation_volume + FROM ( + SELECT + EXTRACT('year' FROM t9.o_orderdate) AS o_year, + t7.l_extendedprice * ( + CAST(1 AS TINYINT) - t7.l_discount + ) AS volume, + t12.n_name AS nation, + t13.r_name AS r_name, + t9.o_orderdate AS o_orderdate, + t0.p_type AS p_type + FROM part AS t0 + INNER JOIN lineitem AS t7 + ON t0.p_partkey = t7.l_partkey + INNER JOIN supplier AS t8 + ON t8.s_suppkey = t7.l_suppkey + INNER JOIN orders AS t9 + ON t7.l_orderkey = t9.o_orderkey + INNER JOIN customer AS t10 + ON t9.o_custkey = t10.c_custkey + INNER JOIN nation AS t11 + ON t10.c_nationkey = t11.n_nationkey + INNER JOIN region AS t13 + ON t11.n_regionkey = t13.r_regionkey + INNER JOIN nation AS t12 + ON t8.s_nationkey = t12.n_nationkey + ) AS t21 + WHERE + t21.r_name = 'AMERICA' + AND t21.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + AND t21.p_type = 'ECONOMY ANODIZED STEEL' + ) AS t22 GROUP BY 1 -) AS t3 +) AS t23 ORDER BY - t3.o_year ASC \ No newline at end of file + t23.o_year ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql index 84b17e92e572..800d7a74c645 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql @@ -1,146 +1,136 @@ -WITH t1 AS ( - SELECT - t12."P_PARTKEY" AS "p_partkey", - t12."P_NAME" AS "p_name", - t12."P_MFGR" AS "p_mfgr", - t12."P_BRAND" AS "p_brand", - t12."P_TYPE" AS "p_type", - t12."P_SIZE" AS "p_size", - t12."P_CONTAINER" AS "p_container", - t12."P_RETAILPRICE" AS "p_retailprice", - t12."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t12 -), t0 AS ( - SELECT - t12."L_ORDERKEY" AS "l_orderkey", - t12."L_PARTKEY" AS "l_partkey", - t12."L_SUPPKEY" AS "l_suppkey", - t12."L_LINENUMBER" AS "l_linenumber", - t12."L_QUANTITY" AS "l_quantity", - t12."L_EXTENDEDPRICE" AS "l_extendedprice", - t12."L_DISCOUNT" AS "l_discount", - t12."L_TAX" AS "l_tax", - t12."L_RETURNFLAG" AS "l_returnflag", - t12."L_LINESTATUS" AS "l_linestatus", - t12."L_SHIPDATE" AS "l_shipdate", - t12."L_COMMITDATE" AS "l_commitdate", - t12."L_RECEIPTDATE" AS "l_receiptdate", - t12."L_SHIPINSTRUCT" AS "l_shipinstruct", - t12."L_SHIPMODE" AS "l_shipmode", - t12."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t12 -), t2 AS ( - SELECT - t12."S_SUPPKEY" AS "s_suppkey", - t12."S_NAME" AS "s_name", - t12."S_ADDRESS" AS "s_address", - t12."S_NATIONKEY" AS "s_nationkey", - t12."S_PHONE" AS "s_phone", - t12."S_ACCTBAL" AS "s_acctbal", - t12."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t12 -), t3 AS ( - SELECT - t12."O_ORDERKEY" AS "o_orderkey", - t12."O_CUSTKEY" AS "o_custkey", - t12."O_ORDERSTATUS" AS "o_orderstatus", - t12."O_TOTALPRICE" AS "o_totalprice", - t12."O_ORDERDATE" AS "o_orderdate", - t12."O_ORDERPRIORITY" AS "o_orderpriority", - t12."O_CLERK" AS "o_clerk", - t12."O_SHIPPRIORITY" AS "o_shippriority", - t12."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t12 -), t4 AS ( - SELECT - t12."C_CUSTKEY" AS "c_custkey", - t12."C_NAME" AS "c_name", - t12."C_ADDRESS" AS "c_address", - t12."C_NATIONKEY" AS "c_nationkey", - t12."C_PHONE" AS "c_phone", - t12."C_ACCTBAL" AS "c_acctbal", - t12."C_MKTSEGMENT" AS "c_mktsegment", - t12."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t12 -), t5 AS ( - SELECT - t12."N_NATIONKEY" AS "n_nationkey", - t12."N_NAME" AS "n_name", - t12."N_REGIONKEY" AS "n_regionkey", - t12."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t12 -), t6 AS ( - SELECT - t12."R_REGIONKEY" AS "r_regionkey", - t12."R_NAME" AS "r_name", - t12."R_COMMENT" AS "r_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS t12 -), t7 AS ( - SELECT - CAST(DATE_PART(year, t3."o_orderdate") AS SMALLINT) AS "o_year", - t0."l_extendedprice" * ( - 1 - t0."l_discount" - ) AS "volume", - t12."n_name" AS "nation", - t6."r_name" AS "r_name", - t3."o_orderdate" AS "o_orderdate", - t1."p_type" AS "p_type" - FROM t1 - JOIN t0 - ON t1."p_partkey" = t0."l_partkey" - JOIN t2 - ON t2."s_suppkey" = t0."l_suppkey" - JOIN t3 - ON t0."l_orderkey" = t3."o_orderkey" - JOIN t4 - ON t3."o_custkey" = t4."c_custkey" - JOIN t5 - ON t4."c_nationkey" = t5."n_nationkey" - JOIN t6 - ON t5."n_regionkey" = t6."r_regionkey" - JOIN t5 AS t12 - ON t2."s_nationkey" = t12."n_nationkey" -), t8 AS ( - SELECT - t7."o_year" AS "o_year", - t7."volume" AS "volume", - t7."nation" AS "nation", - t7."r_name" AS "r_name", - t7."o_orderdate" AS "o_orderdate", - t7."p_type" AS "p_type" - FROM t7 - WHERE - t7."r_name" = 'AMERICA' - AND t7."o_orderdate" BETWEEN '1995-01-01' AND '1996-12-31' - AND t7."p_type" = 'ECONOMY ANODIZED STEEL' -), t9 AS ( - SELECT - t8."o_year" AS "o_year", - t8."volume" AS "volume", - t8."nation" AS "nation", - t8."r_name" AS "r_name", - t8."o_orderdate" AS "o_orderdate", - t8."p_type" AS "p_type", - CASE WHEN ( - t8."nation" = 'BRAZIL' - ) THEN t8."volume" ELSE 0 END AS "nation_volume" - FROM t8 -), t10 AS ( - SELECT - t9."o_year" AS "o_year", - SUM(t9."nation_volume") / SUM(t9."volume") AS "mkt_share" - FROM t9 - GROUP BY - 1 -) SELECT - CAST(t11."o_year" AS BIGINT) AS "o_year", - CAST(t11."mkt_share" AS DECIMAL(38, 10)) AS "mkt_share" + "t30"."o_year" AS "o_year", + "t30"."mkt_share" AS "mkt_share" FROM ( SELECT - t10."o_year" AS "o_year", - t10."mkt_share" AS "mkt_share" - FROM t10 - ORDER BY - t10."o_year" -) AS t11 \ No newline at end of file + "t29"."o_year" AS "o_year", + SUM("t29"."nation_volume") / SUM("t29"."volume") AS "mkt_share" + FROM ( + SELECT + "t28"."o_year" AS "o_year", + "t28"."volume" AS "volume", + "t28"."nation" AS "nation", + "t28"."r_name" AS "r_name", + "t28"."o_orderdate" AS "o_orderdate", + "t28"."p_type" AS "p_type", + CASE WHEN "t28"."nation" = 'BRAZIL' THEN "t28"."volume" ELSE 0 END AS "nation_volume" + FROM ( + SELECT + DATE_PART('year', "t16"."o_orderdate") AS "o_year", + "t14"."l_extendedprice" * ( + 1 - "t14"."l_discount" + ) AS "volume", + "t19"."n_name" AS "nation", + "t20"."r_name" AS "r_name", + "t16"."o_orderdate" AS "o_orderdate", + "t7"."p_type" AS "p_type" + FROM ( + SELECT + "t0"."P_PARTKEY" AS "p_partkey", + "t0"."P_NAME" AS "p_name", + "t0"."P_MFGR" AS "p_mfgr", + "t0"."P_BRAND" AS "p_brand", + "t0"."P_TYPE" AS "p_type", + "t0"."P_SIZE" AS "p_size", + "t0"."P_CONTAINER" AS "p_container", + "t0"."P_RETAILPRICE" AS "p_retailprice", + "t0"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t0" + ) AS "t7" + INNER JOIN ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t14" + ON "t7"."p_partkey" = "t14"."l_partkey" + INNER JOIN ( + SELECT + "t2"."S_SUPPKEY" AS "s_suppkey", + "t2"."S_NAME" AS "s_name", + "t2"."S_ADDRESS" AS "s_address", + "t2"."S_NATIONKEY" AS "s_nationkey", + "t2"."S_PHONE" AS "s_phone", + "t2"."S_ACCTBAL" AS "s_acctbal", + "t2"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t2" + ) AS "t15" + ON "t15"."s_suppkey" = "t14"."l_suppkey" + INNER JOIN ( + SELECT + "t3"."O_ORDERKEY" AS "o_orderkey", + "t3"."O_CUSTKEY" AS "o_custkey", + "t3"."O_ORDERSTATUS" AS "o_orderstatus", + "t3"."O_TOTALPRICE" AS "o_totalprice", + "t3"."O_ORDERDATE" AS "o_orderdate", + "t3"."O_ORDERPRIORITY" AS "o_orderpriority", + "t3"."O_CLERK" AS "o_clerk", + "t3"."O_SHIPPRIORITY" AS "o_shippriority", + "t3"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t3" + ) AS "t16" + ON "t14"."l_orderkey" = "t16"."o_orderkey" + INNER JOIN ( + SELECT + "t4"."C_CUSTKEY" AS "c_custkey", + "t4"."C_NAME" AS "c_name", + "t4"."C_ADDRESS" AS "c_address", + "t4"."C_NATIONKEY" AS "c_nationkey", + "t4"."C_PHONE" AS "c_phone", + "t4"."C_ACCTBAL" AS "c_acctbal", + "t4"."C_MKTSEGMENT" AS "c_mktsegment", + "t4"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t4" + ) AS "t17" + ON "t16"."o_custkey" = "t17"."c_custkey" + INNER JOIN ( + SELECT + "t5"."N_NATIONKEY" AS "n_nationkey", + "t5"."N_NAME" AS "n_name", + "t5"."N_REGIONKEY" AS "n_regionkey", + "t5"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t5" + ) AS "t18" + ON "t17"."c_nationkey" = "t18"."n_nationkey" + INNER JOIN ( + SELECT + "t6"."R_REGIONKEY" AS "r_regionkey", + "t6"."R_NAME" AS "r_name", + "t6"."R_COMMENT" AS "r_comment" + FROM "REGION" AS "t6" + ) AS "t20" + ON "t18"."n_regionkey" = "t20"."r_regionkey" + INNER JOIN ( + SELECT + "t5"."N_NATIONKEY" AS "n_nationkey", + "t5"."N_NAME" AS "n_name", + "t5"."N_REGIONKEY" AS "n_regionkey", + "t5"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t5" + ) AS "t19" + ON "t15"."s_nationkey" = "t19"."n_nationkey" + ) AS "t28" + WHERE + "t28"."r_name" = 'AMERICA' + AND "t28"."o_orderdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) + AND "t28"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t29" + GROUP BY + 1 +) AS "t30" +ORDER BY + "t30"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql index b524abecbc3d..1b3de4b3fb5b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql @@ -1,39 +1,49 @@ -WITH t0 AS ( - SELECT - t2.l_extendedprice * ( - CAST(1 AS TINYINT) - t2.l_discount - ) - t4.ps_supplycost * t2.l_quantity AS amount, - CAST(EXTRACT(year FROM t6.o_orderdate) AS SMALLINT) AS o_year, - t7.n_name AS nation, - t5.p_name AS p_name - FROM main.lineitem AS t2 - JOIN main.supplier AS t3 - ON t3.s_suppkey = t2.l_suppkey - JOIN main.partsupp AS t4 - ON t4.ps_suppkey = t2.l_suppkey AND t4.ps_partkey = t2.l_partkey - JOIN main.part AS t5 - ON t5.p_partkey = t2.l_partkey - JOIN main.orders AS t6 - ON t6.o_orderkey = t2.l_orderkey - JOIN main.nation AS t7 - ON t3.s_nationkey = t7.n_nationkey - WHERE - t5.p_name LIKE '%green%' -) SELECT - t1.nation, - t1.o_year, - t1.sum_profit + t18.nation AS nation, + t18.o_year AS o_year, + t18.sum_profit AS sum_profit FROM ( SELECT - t0.nation AS nation, - t0.o_year AS o_year, - SUM(t0.amount) AS sum_profit - FROM t0 + t17.nation AS nation, + t17.o_year AS o_year, + SUM(t17.amount) AS sum_profit + FROM ( + SELECT + t16.amount AS amount, + t16.o_year AS o_year, + t16.nation AS nation, + t16.p_name AS p_name + FROM ( + SELECT + ( + t0.l_extendedprice * ( + CAST(1 AS TINYINT) - t0.l_discount + ) + ) - ( + t7.ps_supplycost * t0.l_quantity + ) AS amount, + EXTRACT('year' FROM t9.o_orderdate) AS o_year, + t10.n_name AS nation, + t8.p_name AS p_name + FROM lineitem AS t0 + INNER JOIN supplier AS t6 + ON t6.s_suppkey = t0.l_suppkey + INNER JOIN partsupp AS t7 + ON t7.ps_suppkey = t0.l_suppkey AND t7.ps_partkey = t0.l_partkey + INNER JOIN part AS t8 + ON t8.p_partkey = t0.l_partkey + INNER JOIN orders AS t9 + ON t9.o_orderkey = t0.l_orderkey + INNER JOIN nation AS t10 + ON t6.s_nationkey = t10.n_nationkey + ) AS t16 + WHERE + t16.p_name LIKE '%green%' + ) AS t17 GROUP BY 1, 2 -) AS t1 +) AS t18 ORDER BY - t1.nation ASC, - t1.o_year DESC \ No newline at end of file + t18.nation ASC, + t18.o_year DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql index 0982de40732d..2ae1e1172403 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql @@ -1,107 +1,117 @@ -WITH t0 AS ( - SELECT - t8."L_ORDERKEY" AS "l_orderkey", - t8."L_PARTKEY" AS "l_partkey", - t8."L_SUPPKEY" AS "l_suppkey", - t8."L_LINENUMBER" AS "l_linenumber", - t8."L_QUANTITY" AS "l_quantity", - t8."L_EXTENDEDPRICE" AS "l_extendedprice", - t8."L_DISCOUNT" AS "l_discount", - t8."L_TAX" AS "l_tax", - t8."L_RETURNFLAG" AS "l_returnflag", - t8."L_LINESTATUS" AS "l_linestatus", - t8."L_SHIPDATE" AS "l_shipdate", - t8."L_COMMITDATE" AS "l_commitdate", - t8."L_RECEIPTDATE" AS "l_receiptdate", - t8."L_SHIPINSTRUCT" AS "l_shipinstruct", - t8."L_SHIPMODE" AS "l_shipmode", - t8."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t8 -), t1 AS ( - SELECT - t8."S_SUPPKEY" AS "s_suppkey", - t8."S_NAME" AS "s_name", - t8."S_ADDRESS" AS "s_address", - t8."S_NATIONKEY" AS "s_nationkey", - t8."S_PHONE" AS "s_phone", - t8."S_ACCTBAL" AS "s_acctbal", - t8."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t8 -), t2 AS ( - SELECT - t8."PS_PARTKEY" AS "ps_partkey", - t8."PS_SUPPKEY" AS "ps_suppkey", - t8."PS_AVAILQTY" AS "ps_availqty", - t8."PS_SUPPLYCOST" AS "ps_supplycost", - t8."PS_COMMENT" AS "ps_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS t8 -), t3 AS ( - SELECT - t8."P_PARTKEY" AS "p_partkey", - t8."P_NAME" AS "p_name", - t8."P_MFGR" AS "p_mfgr", - t8."P_BRAND" AS "p_brand", - t8."P_TYPE" AS "p_type", - t8."P_SIZE" AS "p_size", - t8."P_CONTAINER" AS "p_container", - t8."P_RETAILPRICE" AS "p_retailprice", - t8."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t8 -), t4 AS ( - SELECT - t8."O_ORDERKEY" AS "o_orderkey", - t8."O_CUSTKEY" AS "o_custkey", - t8."O_ORDERSTATUS" AS "o_orderstatus", - t8."O_TOTALPRICE" AS "o_totalprice", - t8."O_ORDERDATE" AS "o_orderdate", - t8."O_ORDERPRIORITY" AS "o_orderpriority", - t8."O_CLERK" AS "o_clerk", - t8."O_SHIPPRIORITY" AS "o_shippriority", - t8."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t8 -), t5 AS ( - SELECT - t8."N_NATIONKEY" AS "n_nationkey", - t8."N_NAME" AS "n_name", - t8."N_REGIONKEY" AS "n_regionkey", - t8."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t8 -), t6 AS ( - SELECT - t0."l_extendedprice" * ( - 1 - t0."l_discount" - ) - t2."ps_supplycost" * t0."l_quantity" AS "amount", - CAST(DATE_PART(year, t4."o_orderdate") AS SMALLINT) AS "o_year", - t5."n_name" AS "nation", - t3."p_name" AS "p_name" - FROM t0 - JOIN t1 - ON t1."s_suppkey" = t0."l_suppkey" - JOIN t2 - ON t2."ps_suppkey" = t0."l_suppkey" AND t2."ps_partkey" = t0."l_partkey" - JOIN t3 - ON t3."p_partkey" = t0."l_partkey" - JOIN t4 - ON t4."o_orderkey" = t0."l_orderkey" - JOIN t5 - ON t1."s_nationkey" = t5."n_nationkey" - WHERE - t3."p_name" LIKE '%green%' -) SELECT - t7."nation", - t7."o_year", - t7."sum_profit" + "t24"."nation" AS "nation", + "t24"."o_year" AS "o_year", + "t24"."sum_profit" AS "sum_profit" FROM ( SELECT - t6."nation" AS "nation", - t6."o_year" AS "o_year", - SUM(t6."amount") AS "sum_profit" - FROM t6 + "t23"."nation" AS "nation", + "t23"."o_year" AS "o_year", + SUM("t23"."amount") AS "sum_profit" + FROM ( + SELECT + "t22"."amount" AS "amount", + "t22"."o_year" AS "o_year", + "t22"."nation" AS "nation", + "t22"."p_name" AS "p_name" + FROM ( + SELECT + ( + "t6"."l_extendedprice" * ( + 1 - "t6"."l_discount" + ) + ) - ( + "t13"."ps_supplycost" * "t6"."l_quantity" + ) AS "amount", + DATE_PART('year', "t15"."o_orderdate") AS "o_year", + "t16"."n_name" AS "nation", + "t14"."p_name" AS "p_name" + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t1"."S_SUPPKEY" AS "s_suppkey", + "t1"."S_NAME" AS "s_name", + "t1"."S_ADDRESS" AS "s_address", + "t1"."S_NATIONKEY" AS "s_nationkey", + "t1"."S_PHONE" AS "s_phone", + "t1"."S_ACCTBAL" AS "s_acctbal", + "t1"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t1" + ) AS "t12" + ON "t12"."s_suppkey" = "t6"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."PS_PARTKEY" AS "ps_partkey", + "t2"."PS_SUPPKEY" AS "ps_suppkey", + "t2"."PS_AVAILQTY" AS "ps_availqty", + "t2"."PS_SUPPLYCOST" AS "ps_supplycost", + "t2"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t2" + ) AS "t13" + ON "t13"."ps_suppkey" = "t6"."l_suppkey" AND "t13"."ps_partkey" = "t6"."l_partkey" + INNER JOIN ( + SELECT + "t3"."P_PARTKEY" AS "p_partkey", + "t3"."P_NAME" AS "p_name", + "t3"."P_MFGR" AS "p_mfgr", + "t3"."P_BRAND" AS "p_brand", + "t3"."P_TYPE" AS "p_type", + "t3"."P_SIZE" AS "p_size", + "t3"."P_CONTAINER" AS "p_container", + "t3"."P_RETAILPRICE" AS "p_retailprice", + "t3"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t3" + ) AS "t14" + ON "t14"."p_partkey" = "t6"."l_partkey" + INNER JOIN ( + SELECT + "t4"."O_ORDERKEY" AS "o_orderkey", + "t4"."O_CUSTKEY" AS "o_custkey", + "t4"."O_ORDERSTATUS" AS "o_orderstatus", + "t4"."O_TOTALPRICE" AS "o_totalprice", + "t4"."O_ORDERDATE" AS "o_orderdate", + "t4"."O_ORDERPRIORITY" AS "o_orderpriority", + "t4"."O_CLERK" AS "o_clerk", + "t4"."O_SHIPPRIORITY" AS "o_shippriority", + "t4"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t4" + ) AS "t15" + ON "t15"."o_orderkey" = "t6"."l_orderkey" + INNER JOIN ( + SELECT + "t5"."N_NATIONKEY" AS "n_nationkey", + "t5"."N_NAME" AS "n_name", + "t5"."N_REGIONKEY" AS "n_regionkey", + "t5"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t5" + ) AS "t16" + ON "t12"."s_nationkey" = "t16"."n_nationkey" + ) AS "t22" + WHERE + "t22"."p_name" LIKE '%green%' + ) AS "t23" GROUP BY 1, 2 -) AS t7 +) AS "t24" ORDER BY - t7."nation" ASC, - t7."o_year" DESC \ No newline at end of file + "t24"."nation" ASC, + "t24"."o_year" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql index c1c8835fd3e8..9fd9b9eec366 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql @@ -1,26 +1,115 @@ -WITH t0 AS ( +SELECT + t12.c_custkey AS c_custkey, + t12.c_name AS c_name, + t12.revenue AS revenue, + t12.c_acctbal AS c_acctbal, + t12.n_name AS n_name, + t12.c_address AS c_address, + t12.c_phone AS c_phone, + t12.c_comment AS c_comment +FROM ( SELECT - t2.c_custkey AS c_custkey, - t2.c_name AS c_name, - t2.c_acctbal AS c_acctbal, - t5.n_name AS n_name, - t2.c_address AS c_address, - t2.c_phone AS c_phone, - t2.c_comment AS c_comment, - SUM(t4.l_extendedprice * ( - CAST(1 AS TINYINT) - t4.l_discount + t11.c_custkey AS c_custkey, + t11.c_name AS c_name, + t11.c_acctbal AS c_acctbal, + t11.n_name AS n_name, + t11.c_address AS c_address, + t11.c_phone AS c_phone, + t11.c_comment AS c_comment, + SUM(t11.l_extendedprice * ( + CAST(1 AS TINYINT) - t11.l_discount )) AS revenue - FROM main.customer AS t2 - JOIN main.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN main.lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - JOIN main.nation AS t5 - ON t2.c_nationkey = t5.n_nationkey - WHERE - t3.o_orderdate >= MAKE_DATE(1993, 10, 1) - AND t3.o_orderdate < MAKE_DATE(1994, 1, 1) - AND t4.l_returnflag = 'R' + FROM ( + SELECT + t10.c_custkey AS c_custkey, + t10.c_name AS c_name, + t10.c_address AS c_address, + t10.c_nationkey AS c_nationkey, + t10.c_phone AS c_phone, + t10.c_acctbal AS c_acctbal, + t10.c_mktsegment AS c_mktsegment, + t10.c_comment AS c_comment, + t10.o_orderkey AS o_orderkey, + t10.o_custkey AS o_custkey, + t10.o_orderstatus AS o_orderstatus, + t10.o_totalprice AS o_totalprice, + t10.o_orderdate AS o_orderdate, + t10.o_orderpriority AS o_orderpriority, + t10.o_clerk AS o_clerk, + t10.o_shippriority AS o_shippriority, + t10.o_comment AS o_comment, + t10.l_orderkey AS l_orderkey, + t10.l_partkey AS l_partkey, + t10.l_suppkey AS l_suppkey, + t10.l_linenumber AS l_linenumber, + t10.l_quantity AS l_quantity, + t10.l_extendedprice AS l_extendedprice, + t10.l_discount AS l_discount, + t10.l_tax AS l_tax, + t10.l_returnflag AS l_returnflag, + t10.l_linestatus AS l_linestatus, + t10.l_shipdate AS l_shipdate, + t10.l_commitdate AS l_commitdate, + t10.l_receiptdate AS l_receiptdate, + t10.l_shipinstruct AS l_shipinstruct, + t10.l_shipmode AS l_shipmode, + t10.l_comment AS l_comment, + t10.n_nationkey AS n_nationkey, + t10.n_name AS n_name, + t10.n_regionkey AS n_regionkey, + t10.n_comment AS n_comment + FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t4.o_orderkey AS o_orderkey, + t4.o_custkey AS o_custkey, + t4.o_orderstatus AS o_orderstatus, + t4.o_totalprice AS o_totalprice, + t4.o_orderdate AS o_orderdate, + t4.o_orderpriority AS o_orderpriority, + t4.o_clerk AS o_clerk, + t4.o_shippriority AS o_shippriority, + t4.o_comment AS o_comment, + t5.l_orderkey AS l_orderkey, + t5.l_partkey AS l_partkey, + t5.l_suppkey AS l_suppkey, + t5.l_linenumber AS l_linenumber, + t5.l_quantity AS l_quantity, + t5.l_extendedprice AS l_extendedprice, + t5.l_discount AS l_discount, + t5.l_tax AS l_tax, + t5.l_returnflag AS l_returnflag, + t5.l_linestatus AS l_linestatus, + t5.l_shipdate AS l_shipdate, + t5.l_commitdate AS l_commitdate, + t5.l_receiptdate AS l_receiptdate, + t5.l_shipinstruct AS l_shipinstruct, + t5.l_shipmode AS l_shipmode, + t5.l_comment AS l_comment, + t6.n_nationkey AS n_nationkey, + t6.n_name AS n_name, + t6.n_regionkey AS n_regionkey, + t6.n_comment AS n_comment + FROM customer AS t0 + INNER JOIN orders AS t4 + ON t0.c_custkey = t4.o_custkey + INNER JOIN lineitem AS t5 + ON t5.l_orderkey = t4.o_orderkey + INNER JOIN nation AS t6 + ON t0.c_nationkey = t6.n_nationkey + ) AS t10 + WHERE + t10.o_orderdate >= MAKE_DATE(1993, 10, 1) + AND t10.o_orderdate < MAKE_DATE(1994, 1, 1) + AND t10.l_returnflag = 'R' + ) AS t11 GROUP BY 1, 2, @@ -29,28 +118,7 @@ WITH t0 AS ( 5, 6, 7 -) -SELECT - t1.c_custkey, - t1.c_name, - t1.revenue, - t1.c_acctbal, - t1.n_name, - t1.c_address, - t1.c_phone, - t1.c_comment -FROM ( - SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.revenue AS revenue, - t0.c_acctbal AS c_acctbal, - t0.n_name AS n_name, - t0.c_address AS c_address, - t0.c_phone AS c_phone, - t0.c_comment AS c_comment - FROM t0 -) AS t1 +) AS t12 ORDER BY - t1.revenue DESC + t12.revenue DESC LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql index deb4545ccffb..a6f5f97cb8c8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql @@ -1,75 +1,164 @@ -WITH t1 AS ( - SELECT - t6."C_CUSTKEY" AS "c_custkey", - t6."C_NAME" AS "c_name", - t6."C_ADDRESS" AS "c_address", - t6."C_NATIONKEY" AS "c_nationkey", - t6."C_PHONE" AS "c_phone", - t6."C_ACCTBAL" AS "c_acctbal", - t6."C_MKTSEGMENT" AS "c_mktsegment", - t6."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t6 -), t0 AS ( - SELECT - t6."O_ORDERKEY" AS "o_orderkey", - t6."O_CUSTKEY" AS "o_custkey", - t6."O_ORDERSTATUS" AS "o_orderstatus", - t6."O_TOTALPRICE" AS "o_totalprice", - t6."O_ORDERDATE" AS "o_orderdate", - t6."O_ORDERPRIORITY" AS "o_orderpriority", - t6."O_CLERK" AS "o_clerk", - t6."O_SHIPPRIORITY" AS "o_shippriority", - t6."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t6 -), t2 AS ( - SELECT - t6."L_ORDERKEY" AS "l_orderkey", - t6."L_PARTKEY" AS "l_partkey", - t6."L_SUPPKEY" AS "l_suppkey", - t6."L_LINENUMBER" AS "l_linenumber", - t6."L_QUANTITY" AS "l_quantity", - t6."L_EXTENDEDPRICE" AS "l_extendedprice", - t6."L_DISCOUNT" AS "l_discount", - t6."L_TAX" AS "l_tax", - t6."L_RETURNFLAG" AS "l_returnflag", - t6."L_LINESTATUS" AS "l_linestatus", - t6."L_SHIPDATE" AS "l_shipdate", - t6."L_COMMITDATE" AS "l_commitdate", - t6."L_RECEIPTDATE" AS "l_receiptdate", - t6."L_SHIPINSTRUCT" AS "l_shipinstruct", - t6."L_SHIPMODE" AS "l_shipmode", - t6."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t6 -), t3 AS ( - SELECT - t6."N_NATIONKEY" AS "n_nationkey", - t6."N_NAME" AS "n_name", - t6."N_REGIONKEY" AS "n_regionkey", - t6."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t6 -), t4 AS ( +SELECT + "t16"."c_custkey" AS "c_custkey", + "t16"."c_name" AS "c_name", + "t16"."revenue" AS "revenue", + "t16"."c_acctbal" AS "c_acctbal", + "t16"."n_name" AS "n_name", + "t16"."c_address" AS "c_address", + "t16"."c_phone" AS "c_phone", + "t16"."c_comment" AS "c_comment" +FROM ( SELECT - t1."c_custkey" AS "c_custkey", - t1."c_name" AS "c_name", - t1."c_acctbal" AS "c_acctbal", - t3."n_name" AS "n_name", - t1."c_address" AS "c_address", - t1."c_phone" AS "c_phone", - t1."c_comment" AS "c_comment", - SUM(t2."l_extendedprice" * ( - 1 - t2."l_discount" + "t15"."c_custkey" AS "c_custkey", + "t15"."c_name" AS "c_name", + "t15"."c_acctbal" AS "c_acctbal", + "t15"."n_name" AS "n_name", + "t15"."c_address" AS "c_address", + "t15"."c_phone" AS "c_phone", + "t15"."c_comment" AS "c_comment", + SUM("t15"."l_extendedprice" * ( + 1 - "t15"."l_discount" )) AS "revenue" - FROM t1 - JOIN t0 - ON t1."c_custkey" = t0."o_custkey" - JOIN t2 - ON t2."l_orderkey" = t0."o_orderkey" - JOIN t3 - ON t1."c_nationkey" = t3."n_nationkey" - WHERE - t0."o_orderdate" >= DATE_FROM_PARTS(1993, 10, 1) - AND t0."o_orderdate" < DATE_FROM_PARTS(1994, 1, 1) - AND t2."l_returnflag" = 'R' + FROM ( + SELECT + "t14"."c_custkey" AS "c_custkey", + "t14"."c_name" AS "c_name", + "t14"."c_address" AS "c_address", + "t14"."c_nationkey" AS "c_nationkey", + "t14"."c_phone" AS "c_phone", + "t14"."c_acctbal" AS "c_acctbal", + "t14"."c_mktsegment" AS "c_mktsegment", + "t14"."c_comment" AS "c_comment", + "t14"."o_orderkey" AS "o_orderkey", + "t14"."o_custkey" AS "o_custkey", + "t14"."o_orderstatus" AS "o_orderstatus", + "t14"."o_totalprice" AS "o_totalprice", + "t14"."o_orderdate" AS "o_orderdate", + "t14"."o_orderpriority" AS "o_orderpriority", + "t14"."o_clerk" AS "o_clerk", + "t14"."o_shippriority" AS "o_shippriority", + "t14"."o_comment" AS "o_comment", + "t14"."l_orderkey" AS "l_orderkey", + "t14"."l_partkey" AS "l_partkey", + "t14"."l_suppkey" AS "l_suppkey", + "t14"."l_linenumber" AS "l_linenumber", + "t14"."l_quantity" AS "l_quantity", + "t14"."l_extendedprice" AS "l_extendedprice", + "t14"."l_discount" AS "l_discount", + "t14"."l_tax" AS "l_tax", + "t14"."l_returnflag" AS "l_returnflag", + "t14"."l_linestatus" AS "l_linestatus", + "t14"."l_shipdate" AS "l_shipdate", + "t14"."l_commitdate" AS "l_commitdate", + "t14"."l_receiptdate" AS "l_receiptdate", + "t14"."l_shipinstruct" AS "l_shipinstruct", + "t14"."l_shipmode" AS "l_shipmode", + "t14"."l_comment" AS "l_comment", + "t14"."n_nationkey" AS "n_nationkey", + "t14"."n_name" AS "n_name", + "t14"."n_regionkey" AS "n_regionkey", + "t14"."n_comment" AS "n_comment" + FROM ( + SELECT + "t4"."c_custkey" AS "c_custkey", + "t4"."c_name" AS "c_name", + "t4"."c_address" AS "c_address", + "t4"."c_nationkey" AS "c_nationkey", + "t4"."c_phone" AS "c_phone", + "t4"."c_acctbal" AS "c_acctbal", + "t4"."c_mktsegment" AS "c_mktsegment", + "t4"."c_comment" AS "c_comment", + "t8"."o_orderkey" AS "o_orderkey", + "t8"."o_custkey" AS "o_custkey", + "t8"."o_orderstatus" AS "o_orderstatus", + "t8"."o_totalprice" AS "o_totalprice", + "t8"."o_orderdate" AS "o_orderdate", + "t8"."o_orderpriority" AS "o_orderpriority", + "t8"."o_clerk" AS "o_clerk", + "t8"."o_shippriority" AS "o_shippriority", + "t8"."o_comment" AS "o_comment", + "t9"."l_orderkey" AS "l_orderkey", + "t9"."l_partkey" AS "l_partkey", + "t9"."l_suppkey" AS "l_suppkey", + "t9"."l_linenumber" AS "l_linenumber", + "t9"."l_quantity" AS "l_quantity", + "t9"."l_extendedprice" AS "l_extendedprice", + "t9"."l_discount" AS "l_discount", + "t9"."l_tax" AS "l_tax", + "t9"."l_returnflag" AS "l_returnflag", + "t9"."l_linestatus" AS "l_linestatus", + "t9"."l_shipdate" AS "l_shipdate", + "t9"."l_commitdate" AS "l_commitdate", + "t9"."l_receiptdate" AS "l_receiptdate", + "t9"."l_shipinstruct" AS "l_shipinstruct", + "t9"."l_shipmode" AS "l_shipmode", + "t9"."l_comment" AS "l_comment", + "t10"."n_nationkey" AS "n_nationkey", + "t10"."n_name" AS "n_name", + "t10"."n_regionkey" AS "n_regionkey", + "t10"."n_comment" AS "n_comment" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t8" + ON "t4"."c_custkey" = "t8"."o_custkey" + INNER JOIN ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t9" + ON "t9"."l_orderkey" = "t8"."o_orderkey" + INNER JOIN ( + SELECT + "t3"."N_NATIONKEY" AS "n_nationkey", + "t3"."N_NAME" AS "n_name", + "t3"."N_REGIONKEY" AS "n_regionkey", + "t3"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t3" + ) AS "t10" + ON "t4"."c_nationkey" = "t10"."n_nationkey" + ) AS "t14" + WHERE + "t14"."o_orderdate" >= DATEFROMPARTS(1993, 10, 1) + AND "t14"."o_orderdate" < DATEFROMPARTS(1994, 1, 1) + AND "t14"."l_returnflag" = 'R' + ) AS "t15" GROUP BY 1, 2, @@ -78,28 +167,7 @@ WITH t1 AS ( 5, 6, 7 -) -SELECT - t5."c_custkey", - t5."c_name", - t5."revenue", - t5."c_acctbal", - t5."n_name", - t5."c_address", - t5."c_phone", - t5."c_comment" -FROM ( - SELECT - t4."c_custkey" AS "c_custkey", - t4."c_name" AS "c_name", - t4."revenue" AS "revenue", - t4."c_acctbal" AS "c_acctbal", - t4."n_name" AS "n_name", - t4."c_address" AS "c_address", - t4."c_phone" AS "c_phone", - t4."c_comment" AS "c_comment" - FROM t4 -) AS t5 +) AS "t16" ORDER BY - t5."revenue" DESC + "t16"."revenue" DESC NULLS LAST LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql index edbba7a0223d..79d4720321ab 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql @@ -1,41 +1,109 @@ -WITH t0 AS ( - SELECT - t2.ps_partkey AS ps_partkey, - SUM(t2.ps_supplycost * t2.ps_availqty) AS value - FROM main.partsupp AS t2 - JOIN main.supplier AS t3 - ON t2.ps_suppkey = t3.s_suppkey - JOIN main.nation AS t4 - ON t4.n_nationkey = t3.s_nationkey - WHERE - t4.n_name = 'GERMANY' - GROUP BY - 1 -) SELECT - t1.ps_partkey, - t1.value + t15.ps_partkey AS ps_partkey, + t15.value AS value FROM ( SELECT - t0.ps_partkey AS ps_partkey, - t0.value AS value - FROM t0 - WHERE - t0.value > ( + t13.ps_partkey AS ps_partkey, + SUM(t13.ps_supplycost * t13.ps_availqty) AS value + FROM ( + SELECT + t11.ps_partkey AS ps_partkey, + t11.ps_suppkey AS ps_suppkey, + t11.ps_availqty AS ps_availqty, + t11.ps_supplycost AS ps_supplycost, + t11.ps_comment AS ps_comment, + t11.s_suppkey AS s_suppkey, + t11.s_name AS s_name, + t11.s_address AS s_address, + t11.s_nationkey AS s_nationkey, + t11.s_phone AS s_phone, + t11.s_acctbal AS s_acctbal, + t11.s_comment AS s_comment, + t11.n_nationkey AS n_nationkey, + t11.n_name AS n_name, + t11.n_regionkey AS n_regionkey, + t11.n_comment AS n_comment + FROM ( + SELECT + t0.ps_partkey AS ps_partkey, + t0.ps_suppkey AS ps_suppkey, + t0.ps_availqty AS ps_availqty, + t0.ps_supplycost AS ps_supplycost, + t0.ps_comment AS ps_comment, + t3.s_suppkey AS s_suppkey, + t3.s_name AS s_name, + t3.s_address AS s_address, + t3.s_nationkey AS s_nationkey, + t3.s_phone AS s_phone, + t3.s_acctbal AS s_acctbal, + t3.s_comment AS s_comment, + t5.n_nationkey AS n_nationkey, + t5.n_name AS n_name, + t5.n_regionkey AS n_regionkey, + t5.n_comment AS n_comment + FROM partsupp AS t0 + INNER JOIN supplier AS t3 + ON t0.ps_suppkey = t3.s_suppkey + INNER JOIN nation AS t5 + ON t5.n_nationkey = t3.s_nationkey + ) AS t11 + WHERE + t11.n_name = 'GERMANY' + ) AS t13 + GROUP BY + 1 +) AS t15 +WHERE + t15.value > ( + ( SELECT - anon_1.total + SUM(t14.ps_supplycost * t14.ps_availqty) AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - SUM(t2.ps_supplycost * t2.ps_availqty) AS total - FROM main.partsupp AS t2 - JOIN main.supplier AS t3 - ON t2.ps_suppkey = t3.s_suppkey - JOIN main.nation AS t4 - ON t4.n_nationkey = t3.s_nationkey + t12.ps_partkey AS ps_partkey, + t12.ps_suppkey AS ps_suppkey, + t12.ps_availqty AS ps_availqty, + t12.ps_supplycost AS ps_supplycost, + t12.ps_comment AS ps_comment, + t12.s_suppkey AS s_suppkey, + t12.s_name AS s_name, + t12.s_address AS s_address, + t12.s_nationkey AS s_nationkey, + t12.s_phone AS s_phone, + t12.s_acctbal AS s_acctbal, + t12.s_comment AS s_comment, + t12.n_nationkey AS n_nationkey, + t12.n_name AS n_name, + t12.n_regionkey AS n_regionkey, + t12.n_comment AS n_comment + FROM ( + SELECT + t0.ps_partkey AS ps_partkey, + t0.ps_suppkey AS ps_suppkey, + t0.ps_availqty AS ps_availqty, + t0.ps_supplycost AS ps_supplycost, + t0.ps_comment AS ps_comment, + t4.s_suppkey AS s_suppkey, + t4.s_name AS s_name, + t4.s_address AS s_address, + t4.s_nationkey AS s_nationkey, + t4.s_phone AS s_phone, + t4.s_acctbal AS s_acctbal, + t4.s_comment AS s_comment, + t6.n_nationkey AS n_nationkey, + t6.n_name AS n_name, + t6.n_regionkey AS n_regionkey, + t6.n_comment AS n_comment + FROM partsupp AS t0 + INNER JOIN supplier AS t4 + ON t0.ps_suppkey = t4.s_suppkey + INNER JOIN nation AS t6 + ON t6.n_nationkey = t4.s_nationkey + ) AS t12 WHERE - t4.n_name = 'GERMANY' - ) AS anon_1 + t12.n_name = 'GERMANY' + ) AS t14 ) * CAST(0.0001 AS DOUBLE) -) AS t1 + ) ORDER BY - t1.value DESC \ No newline at end of file + t15.value DESC diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql new file mode 100644 index 000000000000..5d311642834a --- /dev/null +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql @@ -0,0 +1,159 @@ +SELECT + "t18"."ps_partkey" AS "ps_partkey", + "t18"."value" AS "value" +FROM ( + SELECT + "t16"."ps_partkey" AS "ps_partkey", + SUM("t16"."ps_supplycost" * "t16"."ps_availqty") AS "value" + FROM ( + SELECT + "t14"."ps_partkey" AS "ps_partkey", + "t14"."ps_suppkey" AS "ps_suppkey", + "t14"."ps_availqty" AS "ps_availqty", + "t14"."ps_supplycost" AS "ps_supplycost", + "t14"."ps_comment" AS "ps_comment", + "t14"."s_suppkey" AS "s_suppkey", + "t14"."s_name" AS "s_name", + "t14"."s_address" AS "s_address", + "t14"."s_nationkey" AS "s_nationkey", + "t14"."s_phone" AS "s_phone", + "t14"."s_acctbal" AS "s_acctbal", + "t14"."s_comment" AS "s_comment", + "t14"."n_nationkey" AS "n_nationkey", + "t14"."n_name" AS "n_name", + "t14"."n_regionkey" AS "n_regionkey", + "t14"."n_comment" AS "n_comment" + FROM ( + SELECT + "t3"."ps_partkey" AS "ps_partkey", + "t3"."ps_suppkey" AS "ps_suppkey", + "t3"."ps_availqty" AS "ps_availqty", + "t3"."ps_supplycost" AS "ps_supplycost", + "t3"."ps_comment" AS "ps_comment", + "t6"."s_suppkey" AS "s_suppkey", + "t6"."s_name" AS "s_name", + "t6"."s_address" AS "s_address", + "t6"."s_nationkey" AS "s_nationkey", + "t6"."s_phone" AS "s_phone", + "t6"."s_acctbal" AS "s_acctbal", + "t6"."s_comment" AS "s_comment", + "t8"."n_nationkey" AS "n_nationkey", + "t8"."n_name" AS "n_name", + "t8"."n_regionkey" AS "n_regionkey", + "t8"."n_comment" AS "n_comment" + FROM ( + SELECT + "t0"."PS_PARTKEY" AS "ps_partkey", + "t0"."PS_SUPPKEY" AS "ps_suppkey", + "t0"."PS_AVAILQTY" AS "ps_availqty", + "t0"."PS_SUPPLYCOST" AS "ps_supplycost", + "t0"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t1"."S_SUPPKEY" AS "s_suppkey", + "t1"."S_NAME" AS "s_name", + "t1"."S_ADDRESS" AS "s_address", + "t1"."S_NATIONKEY" AS "s_nationkey", + "t1"."S_PHONE" AS "s_phone", + "t1"."S_ACCTBAL" AS "s_acctbal", + "t1"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t1" + ) AS "t6" + ON "t3"."ps_suppkey" = "t6"."s_suppkey" + INNER JOIN ( + SELECT + "t2"."N_NATIONKEY" AS "n_nationkey", + "t2"."N_NAME" AS "n_name", + "t2"."N_REGIONKEY" AS "n_regionkey", + "t2"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t2" + ) AS "t8" + ON "t8"."n_nationkey" = "t6"."s_nationkey" + ) AS "t14" + WHERE + "t14"."n_name" = 'GERMANY' + ) AS "t16" + GROUP BY + 1 +) AS "t18" +WHERE + "t18"."value" > ( + ( + SELECT + SUM("t17"."ps_supplycost" * "t17"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" + FROM ( + SELECT + "t15"."ps_partkey" AS "ps_partkey", + "t15"."ps_suppkey" AS "ps_suppkey", + "t15"."ps_availqty" AS "ps_availqty", + "t15"."ps_supplycost" AS "ps_supplycost", + "t15"."ps_comment" AS "ps_comment", + "t15"."s_suppkey" AS "s_suppkey", + "t15"."s_name" AS "s_name", + "t15"."s_address" AS "s_address", + "t15"."s_nationkey" AS "s_nationkey", + "t15"."s_phone" AS "s_phone", + "t15"."s_acctbal" AS "s_acctbal", + "t15"."s_comment" AS "s_comment", + "t15"."n_nationkey" AS "n_nationkey", + "t15"."n_name" AS "n_name", + "t15"."n_regionkey" AS "n_regionkey", + "t15"."n_comment" AS "n_comment" + FROM ( + SELECT + "t3"."ps_partkey" AS "ps_partkey", + "t3"."ps_suppkey" AS "ps_suppkey", + "t3"."ps_availqty" AS "ps_availqty", + "t3"."ps_supplycost" AS "ps_supplycost", + "t3"."ps_comment" AS "ps_comment", + "t7"."s_suppkey" AS "s_suppkey", + "t7"."s_name" AS "s_name", + "t7"."s_address" AS "s_address", + "t7"."s_nationkey" AS "s_nationkey", + "t7"."s_phone" AS "s_phone", + "t7"."s_acctbal" AS "s_acctbal", + "t7"."s_comment" AS "s_comment", + "t9"."n_nationkey" AS "n_nationkey", + "t9"."n_name" AS "n_name", + "t9"."n_regionkey" AS "n_regionkey", + "t9"."n_comment" AS "n_comment" + FROM ( + SELECT + "t0"."PS_PARTKEY" AS "ps_partkey", + "t0"."PS_SUPPKEY" AS "ps_suppkey", + "t0"."PS_AVAILQTY" AS "ps_availqty", + "t0"."PS_SUPPLYCOST" AS "ps_supplycost", + "t0"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t1"."S_SUPPKEY" AS "s_suppkey", + "t1"."S_NAME" AS "s_name", + "t1"."S_ADDRESS" AS "s_address", + "t1"."S_NATIONKEY" AS "s_nationkey", + "t1"."S_PHONE" AS "s_phone", + "t1"."S_ACCTBAL" AS "s_acctbal", + "t1"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t1" + ) AS "t7" + ON "t3"."ps_suppkey" = "t7"."s_suppkey" + INNER JOIN ( + SELECT + "t2"."N_NATIONKEY" AS "n_nationkey", + "t2"."N_NAME" AS "n_name", + "t2"."N_REGIONKEY" AS "n_regionkey", + "t2"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t2" + ) AS "t9" + ON "t9"."n_nationkey" = "t7"."s_nationkey" + ) AS "t15" + WHERE + "t15"."n_name" = 'GERMANY' + ) AS "t17" + ) * 0.0001 + ) +ORDER BY + "t18"."value" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql index 0542a214407d..5dd65a2837f0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql @@ -1,12 +1,12 @@ SELECT - t0.l_shipmode, - t0.high_line_count, - t0.low_line_count + t6.l_shipmode AS l_shipmode, + t6.high_line_count AS high_line_count, + t6.low_line_count AS low_line_count FROM ( SELECT - t2.l_shipmode AS l_shipmode, + t5.l_shipmode AS l_shipmode, SUM( - CASE t1.o_orderpriority + CASE t5.o_orderpriority WHEN '1-URGENT' THEN CAST(1 AS TINYINT) WHEN '2-HIGH' @@ -15,7 +15,7 @@ FROM ( END ) AS high_line_count, SUM( - CASE t1.o_orderpriority + CASE t5.o_orderpriority WHEN '1-URGENT' THEN CAST(0 AS TINYINT) WHEN '2-HIGH' @@ -23,17 +23,73 @@ FROM ( ELSE CAST(1 AS TINYINT) END ) AS low_line_count - FROM main.orders AS t1 - JOIN main.lineitem AS t2 - ON t1.o_orderkey = t2.l_orderkey - WHERE - t2.l_shipmode IN ('MAIL', 'SHIP') - AND t2.l_commitdate < t2.l_receiptdate - AND t2.l_shipdate < t2.l_commitdate - AND t2.l_receiptdate >= MAKE_DATE(1994, 1, 1) - AND t2.l_receiptdate < MAKE_DATE(1995, 1, 1) + FROM ( + SELECT + t4.o_orderkey AS o_orderkey, + t4.o_custkey AS o_custkey, + t4.o_orderstatus AS o_orderstatus, + t4.o_totalprice AS o_totalprice, + t4.o_orderdate AS o_orderdate, + t4.o_orderpriority AS o_orderpriority, + t4.o_clerk AS o_clerk, + t4.o_shippriority AS o_shippriority, + t4.o_comment AS o_comment, + t4.l_orderkey AS l_orderkey, + t4.l_partkey AS l_partkey, + t4.l_suppkey AS l_suppkey, + t4.l_linenumber AS l_linenumber, + t4.l_quantity AS l_quantity, + t4.l_extendedprice AS l_extendedprice, + t4.l_discount AS l_discount, + t4.l_tax AS l_tax, + t4.l_returnflag AS l_returnflag, + t4.l_linestatus AS l_linestatus, + t4.l_shipdate AS l_shipdate, + t4.l_commitdate AS l_commitdate, + t4.l_receiptdate AS l_receiptdate, + t4.l_shipinstruct AS l_shipinstruct, + t4.l_shipmode AS l_shipmode, + t4.l_comment AS l_comment + FROM ( + SELECT + t0.o_orderkey AS o_orderkey, + t0.o_custkey AS o_custkey, + t0.o_orderstatus AS o_orderstatus, + t0.o_totalprice AS o_totalprice, + t0.o_orderdate AS o_orderdate, + t0.o_orderpriority AS o_orderpriority, + t0.o_clerk AS o_clerk, + t0.o_shippriority AS o_shippriority, + t0.o_comment AS o_comment, + t2.l_orderkey AS l_orderkey, + t2.l_partkey AS l_partkey, + t2.l_suppkey AS l_suppkey, + t2.l_linenumber AS l_linenumber, + t2.l_quantity AS l_quantity, + t2.l_extendedprice AS l_extendedprice, + t2.l_discount AS l_discount, + t2.l_tax AS l_tax, + t2.l_returnflag AS l_returnflag, + t2.l_linestatus AS l_linestatus, + t2.l_shipdate AS l_shipdate, + t2.l_commitdate AS l_commitdate, + t2.l_receiptdate AS l_receiptdate, + t2.l_shipinstruct AS l_shipinstruct, + t2.l_shipmode AS l_shipmode, + t2.l_comment AS l_comment + FROM orders AS t0 + INNER JOIN lineitem AS t2 + ON t0.o_orderkey = t2.l_orderkey + ) AS t4 + WHERE + t4.l_shipmode IN ('MAIL', 'SHIP') + AND t4.l_commitdate < t4.l_receiptdate + AND t4.l_shipdate < t4.l_commitdate + AND t4.l_receiptdate >= MAKE_DATE(1994, 1, 1) + AND t4.l_receiptdate < MAKE_DATE(1995, 1, 1) + ) AS t5 GROUP BY 1 -) AS t0 +) AS t6 ORDER BY - t0.l_shipmode ASC \ No newline at end of file + t6.l_shipmode ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql index 9bff7b2fc81c..799ef5b12cd0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql @@ -1,55 +1,114 @@ -WITH t1 AS ( - SELECT - t3."O_ORDERKEY" AS "o_orderkey", - t3."O_CUSTKEY" AS "o_custkey", - t3."O_ORDERSTATUS" AS "o_orderstatus", - t3."O_TOTALPRICE" AS "o_totalprice", - t3."O_ORDERDATE" AS "o_orderdate", - t3."O_ORDERPRIORITY" AS "o_orderpriority", - t3."O_CLERK" AS "o_clerk", - t3."O_SHIPPRIORITY" AS "o_shippriority", - t3."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t3 -), t0 AS ( - SELECT - t3."L_ORDERKEY" AS "l_orderkey", - t3."L_PARTKEY" AS "l_partkey", - t3."L_SUPPKEY" AS "l_suppkey", - t3."L_LINENUMBER" AS "l_linenumber", - t3."L_QUANTITY" AS "l_quantity", - t3."L_EXTENDEDPRICE" AS "l_extendedprice", - t3."L_DISCOUNT" AS "l_discount", - t3."L_TAX" AS "l_tax", - t3."L_RETURNFLAG" AS "l_returnflag", - t3."L_LINESTATUS" AS "l_linestatus", - t3."L_SHIPDATE" AS "l_shipdate", - t3."L_COMMITDATE" AS "l_commitdate", - t3."L_RECEIPTDATE" AS "l_receiptdate", - t3."L_SHIPINSTRUCT" AS "l_shipinstruct", - t3."L_SHIPMODE" AS "l_shipmode", - t3."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t3 -) SELECT - t2."l_shipmode", - t2."high_line_count", - t2."low_line_count" + "t8"."l_shipmode" AS "l_shipmode", + "t8"."high_line_count" AS "high_line_count", + "t8"."low_line_count" AS "low_line_count" FROM ( SELECT - t0."l_shipmode" AS "l_shipmode", - SUM(CASE t1."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END) AS "high_line_count", - SUM(CASE t1."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END) AS "low_line_count" - FROM t1 - JOIN t0 - ON t1."o_orderkey" = t0."l_orderkey" - WHERE - t0."l_shipmode" IN ('MAIL', 'SHIP') - AND t0."l_commitdate" < t0."l_receiptdate" - AND t0."l_shipdate" < t0."l_commitdate" - AND t0."l_receiptdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND t0."l_receiptdate" < DATE_FROM_PARTS(1995, 1, 1) + "t7"."l_shipmode" AS "l_shipmode", + SUM( + CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END + ) AS "high_line_count", + SUM( + CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END + ) AS "low_line_count" + FROM ( + SELECT + "t6"."o_orderkey" AS "o_orderkey", + "t6"."o_custkey" AS "o_custkey", + "t6"."o_orderstatus" AS "o_orderstatus", + "t6"."o_totalprice" AS "o_totalprice", + "t6"."o_orderdate" AS "o_orderdate", + "t6"."o_orderpriority" AS "o_orderpriority", + "t6"."o_clerk" AS "o_clerk", + "t6"."o_shippriority" AS "o_shippriority", + "t6"."o_comment" AS "o_comment", + "t6"."l_orderkey" AS "l_orderkey", + "t6"."l_partkey" AS "l_partkey", + "t6"."l_suppkey" AS "l_suppkey", + "t6"."l_linenumber" AS "l_linenumber", + "t6"."l_quantity" AS "l_quantity", + "t6"."l_extendedprice" AS "l_extendedprice", + "t6"."l_discount" AS "l_discount", + "t6"."l_tax" AS "l_tax", + "t6"."l_returnflag" AS "l_returnflag", + "t6"."l_linestatus" AS "l_linestatus", + "t6"."l_shipdate" AS "l_shipdate", + "t6"."l_commitdate" AS "l_commitdate", + "t6"."l_receiptdate" AS "l_receiptdate", + "t6"."l_shipinstruct" AS "l_shipinstruct", + "t6"."l_shipmode" AS "l_shipmode", + "t6"."l_comment" AS "l_comment" + FROM ( + SELECT + "t2"."o_orderkey" AS "o_orderkey", + "t2"."o_custkey" AS "o_custkey", + "t2"."o_orderstatus" AS "o_orderstatus", + "t2"."o_totalprice" AS "o_totalprice", + "t2"."o_orderdate" AS "o_orderdate", + "t2"."o_orderpriority" AS "o_orderpriority", + "t2"."o_clerk" AS "o_clerk", + "t2"."o_shippriority" AS "o_shippriority", + "t2"."o_comment" AS "o_comment", + "t4"."l_orderkey" AS "l_orderkey", + "t4"."l_partkey" AS "l_partkey", + "t4"."l_suppkey" AS "l_suppkey", + "t4"."l_linenumber" AS "l_linenumber", + "t4"."l_quantity" AS "l_quantity", + "t4"."l_extendedprice" AS "l_extendedprice", + "t4"."l_discount" AS "l_discount", + "t4"."l_tax" AS "l_tax", + "t4"."l_returnflag" AS "l_returnflag", + "t4"."l_linestatus" AS "l_linestatus", + "t4"."l_shipdate" AS "l_shipdate", + "t4"."l_commitdate" AS "l_commitdate", + "t4"."l_receiptdate" AS "l_receiptdate", + "t4"."l_shipinstruct" AS "l_shipinstruct", + "t4"."l_shipmode" AS "l_shipmode", + "t4"."l_comment" AS "l_comment" + FROM ( + SELECT + "t0"."O_ORDERKEY" AS "o_orderkey", + "t0"."O_CUSTKEY" AS "o_custkey", + "t0"."O_ORDERSTATUS" AS "o_orderstatus", + "t0"."O_TOTALPRICE" AS "o_totalprice", + "t0"."O_ORDERDATE" AS "o_orderdate", + "t0"."O_ORDERPRIORITY" AS "o_orderpriority", + "t0"."O_CLERK" AS "o_clerk", + "t0"."O_SHIPPRIORITY" AS "o_shippriority", + "t0"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t4" + ON "t2"."o_orderkey" = "t4"."l_orderkey" + ) AS "t6" + WHERE + "t6"."l_shipmode" IN ('MAIL', 'SHIP') + AND "t6"."l_commitdate" < "t6"."l_receiptdate" + AND "t6"."l_shipdate" < "t6"."l_commitdate" + AND "t6"."l_receiptdate" >= DATEFROMPARTS(1994, 1, 1) + AND "t6"."l_receiptdate" < DATEFROMPARTS(1995, 1, 1) + ) AS "t7" GROUP BY 1 -) AS t2 +) AS "t8" ORDER BY - t2."l_shipmode" ASC \ No newline at end of file + "t8"."l_shipmode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql index ddc206f3e537..72657a284609 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql @@ -1,24 +1,45 @@ -WITH t0 AS ( - SELECT - t2.c_custkey AS c_custkey, - COUNT(t3.o_orderkey) AS c_count - FROM main.customer AS t2 - LEFT OUTER JOIN main.orders AS t3 - ON t2.c_custkey = t3.o_custkey AND NOT t3.o_comment LIKE '%special%requests%' - GROUP BY - 1 -) SELECT - t1.c_count, - t1.custdist + t6.c_count AS c_count, + t6.custdist AS custdist FROM ( SELECT - t0.c_count AS c_count, + t5.c_count AS c_count, COUNT(*) AS custdist - FROM t0 + FROM ( + SELECT + t4.c_custkey AS c_custkey, + COUNT(t4.o_orderkey) AS c_count + FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t2.o_orderkey AS o_orderkey, + t2.o_custkey AS o_custkey, + t2.o_orderstatus AS o_orderstatus, + t2.o_totalprice AS o_totalprice, + t2.o_orderdate AS o_orderdate, + t2.o_orderpriority AS o_orderpriority, + t2.o_clerk AS o_clerk, + t2.o_shippriority AS o_shippriority, + t2.o_comment AS o_comment + FROM customer AS t0 + LEFT OUTER JOIN orders AS t2 + ON t0.c_custkey = t2.o_custkey AND NOT ( + t2.o_comment LIKE '%special%requests%' + ) + ) AS t4 + GROUP BY + 1 + ) AS t5 GROUP BY 1 -) AS t1 +) AS t6 ORDER BY - t1.custdist DESC, - t1.c_count DESC \ No newline at end of file + t6.custdist DESC, + t6.c_count DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql index 30129ee623a1..fd4edd9dfb77 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql @@ -1,47 +1,69 @@ -WITH t1 AS ( - SELECT - t4."C_CUSTKEY" AS "c_custkey", - t4."C_NAME" AS "c_name", - t4."C_ADDRESS" AS "c_address", - t4."C_NATIONKEY" AS "c_nationkey", - t4."C_PHONE" AS "c_phone", - t4."C_ACCTBAL" AS "c_acctbal", - t4."C_MKTSEGMENT" AS "c_mktsegment", - t4."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t4 -), t0 AS ( - SELECT - t4."O_ORDERKEY" AS "o_orderkey", - t4."O_CUSTKEY" AS "o_custkey", - t4."O_ORDERSTATUS" AS "o_orderstatus", - t4."O_TOTALPRICE" AS "o_totalprice", - t4."O_ORDERDATE" AS "o_orderdate", - t4."O_ORDERPRIORITY" AS "o_orderpriority", - t4."O_CLERK" AS "o_clerk", - t4."O_SHIPPRIORITY" AS "o_shippriority", - t4."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t4 -), t2 AS ( - SELECT - t1."c_custkey" AS "c_custkey", - COUNT(t0."o_orderkey") AS "c_count" - FROM t1 - LEFT OUTER JOIN t0 - ON t1."c_custkey" = t0."o_custkey" AND NOT t0."o_comment" LIKE '%special%requests%' - GROUP BY - 1 -) SELECT - t3."c_count", - t3."custdist" + "t8"."c_count" AS "c_count", + "t8"."custdist" AS "custdist" FROM ( SELECT - t2."c_count" AS "c_count", + "t7"."c_count" AS "c_count", COUNT(*) AS "custdist" - FROM t2 + FROM ( + SELECT + "t6"."c_custkey" AS "c_custkey", + COUNT("t6"."o_orderkey") AS "c_count" + FROM ( + SELECT + "t2"."c_custkey" AS "c_custkey", + "t2"."c_name" AS "c_name", + "t2"."c_address" AS "c_address", + "t2"."c_nationkey" AS "c_nationkey", + "t2"."c_phone" AS "c_phone", + "t2"."c_acctbal" AS "c_acctbal", + "t2"."c_mktsegment" AS "c_mktsegment", + "t2"."c_comment" AS "c_comment", + "t4"."o_orderkey" AS "o_orderkey", + "t4"."o_custkey" AS "o_custkey", + "t4"."o_orderstatus" AS "o_orderstatus", + "t4"."o_totalprice" AS "o_totalprice", + "t4"."o_orderdate" AS "o_orderdate", + "t4"."o_orderpriority" AS "o_orderpriority", + "t4"."o_clerk" AS "o_clerk", + "t4"."o_shippriority" AS "o_shippriority", + "t4"."o_comment" AS "o_comment" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t2" + LEFT OUTER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t4" + ON "t2"."c_custkey" = "t4"."o_custkey" + AND NOT ( + "t4"."o_comment" LIKE '%special%requests%' + ) + ) AS "t6" + GROUP BY + 1 + ) AS "t7" GROUP BY 1 -) AS t3 +) AS "t8" ORDER BY - t3."custdist" DESC, - t3."c_count" DESC \ No newline at end of file + "t8"."custdist" DESC NULLS LAST, + "t8"."c_count" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql index 8bfbb135fce7..dc27f6c65550 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql @@ -2,20 +2,74 @@ SELECT ( SUM( CASE - WHEN ( - t1.p_type LIKE 'PROMO%' - ) - THEN t0.l_extendedprice * ( - CAST(1 AS TINYINT) - t0.l_discount + WHEN t5.p_type LIKE 'PROMO%' + THEN t5.l_extendedprice * ( + CAST(1 AS TINYINT) - t5.l_discount ) ELSE CAST(0 AS TINYINT) END ) * CAST(100 AS TINYINT) - ) / SUM(t0.l_extendedprice * ( - CAST(1 AS TINYINT) - t0.l_discount + ) / SUM(t5.l_extendedprice * ( + CAST(1 AS TINYINT) - t5.l_discount )) AS promo_revenue -FROM main.lineitem AS t0 -JOIN main.part AS t1 - ON t0.l_partkey = t1.p_partkey -WHERE - t0.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t0.l_shipdate < MAKE_DATE(1995, 10, 1) \ No newline at end of file +FROM ( + SELECT + t4.l_orderkey AS l_orderkey, + t4.l_partkey AS l_partkey, + t4.l_suppkey AS l_suppkey, + t4.l_linenumber AS l_linenumber, + t4.l_quantity AS l_quantity, + t4.l_extendedprice AS l_extendedprice, + t4.l_discount AS l_discount, + t4.l_tax AS l_tax, + t4.l_returnflag AS l_returnflag, + t4.l_linestatus AS l_linestatus, + t4.l_shipdate AS l_shipdate, + t4.l_commitdate AS l_commitdate, + t4.l_receiptdate AS l_receiptdate, + t4.l_shipinstruct AS l_shipinstruct, + t4.l_shipmode AS l_shipmode, + t4.l_comment AS l_comment, + t4.p_partkey AS p_partkey, + t4.p_name AS p_name, + t4.p_mfgr AS p_mfgr, + t4.p_brand AS p_brand, + t4.p_type AS p_type, + t4.p_size AS p_size, + t4.p_container AS p_container, + t4.p_retailprice AS p_retailprice, + t4.p_comment AS p_comment + FROM ( + SELECT + t0.l_orderkey AS l_orderkey, + t0.l_partkey AS l_partkey, + t0.l_suppkey AS l_suppkey, + t0.l_linenumber AS l_linenumber, + t0.l_quantity AS l_quantity, + t0.l_extendedprice AS l_extendedprice, + t0.l_discount AS l_discount, + t0.l_tax AS l_tax, + t0.l_returnflag AS l_returnflag, + t0.l_linestatus AS l_linestatus, + t0.l_shipdate AS l_shipdate, + t0.l_commitdate AS l_commitdate, + t0.l_receiptdate AS l_receiptdate, + t0.l_shipinstruct AS l_shipinstruct, + t0.l_shipmode AS l_shipmode, + t0.l_comment AS l_comment, + t2.p_partkey AS p_partkey, + t2.p_name AS p_name, + t2.p_mfgr AS p_mfgr, + t2.p_brand AS p_brand, + t2.p_type AS p_type, + t2.p_size AS p_size, + t2.p_container AS p_container, + t2.p_retailprice AS p_retailprice, + t2.p_comment AS p_comment + FROM lineitem AS t0 + INNER JOIN part AS t2 + ON t0.l_partkey = t2.p_partkey + ) AS t4 + WHERE + t4.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t4.l_shipdate < MAKE_DATE(1995, 10, 1) +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql index 76a5bce247d0..a417e7d42d34 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql @@ -1,51 +1,103 @@ -WITH t1 AS ( - SELECT - t3."L_ORDERKEY" AS "l_orderkey", - t3."L_PARTKEY" AS "l_partkey", - t3."L_SUPPKEY" AS "l_suppkey", - t3."L_LINENUMBER" AS "l_linenumber", - t3."L_QUANTITY" AS "l_quantity", - t3."L_EXTENDEDPRICE" AS "l_extendedprice", - t3."L_DISCOUNT" AS "l_discount", - t3."L_TAX" AS "l_tax", - t3."L_RETURNFLAG" AS "l_returnflag", - t3."L_LINESTATUS" AS "l_linestatus", - t3."L_SHIPDATE" AS "l_shipdate", - t3."L_COMMITDATE" AS "l_commitdate", - t3."L_RECEIPTDATE" AS "l_receiptdate", - t3."L_SHIPINSTRUCT" AS "l_shipinstruct", - t3."L_SHIPMODE" AS "l_shipmode", - t3."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t3 -), t0 AS ( - SELECT - t3."P_PARTKEY" AS "p_partkey", - t3."P_NAME" AS "p_name", - t3."P_MFGR" AS "p_mfgr", - t3."P_BRAND" AS "p_brand", - t3."P_TYPE" AS "p_type", - t3."P_SIZE" AS "p_size", - t3."P_CONTAINER" AS "p_container", - t3."P_RETAILPRICE" AS "p_retailprice", - t3."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t3 -) SELECT - CAST(t2."promo_revenue" AS DECIMAL(38, 10)) AS "promo_revenue" + ( + SUM( + IFF("t7"."p_type" LIKE 'PROMO%', "t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" + ), 0) + ) * 100 + ) / SUM("t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" + )) AS "promo_revenue" FROM ( SELECT - ( - SUM( - IFF(t0."p_type" LIKE 'PROMO%', t1."l_extendedprice" * ( - 1 - t1."l_discount" - ), 0) - ) * 100 - ) / SUM(t1."l_extendedprice" * ( - 1 - t1."l_discount" - )) AS "promo_revenue" - FROM t1 - JOIN t0 - ON t1."l_partkey" = t0."p_partkey" + "t6"."l_orderkey" AS "l_orderkey", + "t6"."l_partkey" AS "l_partkey", + "t6"."l_suppkey" AS "l_suppkey", + "t6"."l_linenumber" AS "l_linenumber", + "t6"."l_quantity" AS "l_quantity", + "t6"."l_extendedprice" AS "l_extendedprice", + "t6"."l_discount" AS "l_discount", + "t6"."l_tax" AS "l_tax", + "t6"."l_returnflag" AS "l_returnflag", + "t6"."l_linestatus" AS "l_linestatus", + "t6"."l_shipdate" AS "l_shipdate", + "t6"."l_commitdate" AS "l_commitdate", + "t6"."l_receiptdate" AS "l_receiptdate", + "t6"."l_shipinstruct" AS "l_shipinstruct", + "t6"."l_shipmode" AS "l_shipmode", + "t6"."l_comment" AS "l_comment", + "t6"."p_partkey" AS "p_partkey", + "t6"."p_name" AS "p_name", + "t6"."p_mfgr" AS "p_mfgr", + "t6"."p_brand" AS "p_brand", + "t6"."p_type" AS "p_type", + "t6"."p_size" AS "p_size", + "t6"."p_container" AS "p_container", + "t6"."p_retailprice" AS "p_retailprice", + "t6"."p_comment" AS "p_comment" + FROM ( + SELECT + "t2"."l_orderkey" AS "l_orderkey", + "t2"."l_partkey" AS "l_partkey", + "t2"."l_suppkey" AS "l_suppkey", + "t2"."l_linenumber" AS "l_linenumber", + "t2"."l_quantity" AS "l_quantity", + "t2"."l_extendedprice" AS "l_extendedprice", + "t2"."l_discount" AS "l_discount", + "t2"."l_tax" AS "l_tax", + "t2"."l_returnflag" AS "l_returnflag", + "t2"."l_linestatus" AS "l_linestatus", + "t2"."l_shipdate" AS "l_shipdate", + "t2"."l_commitdate" AS "l_commitdate", + "t2"."l_receiptdate" AS "l_receiptdate", + "t2"."l_shipinstruct" AS "l_shipinstruct", + "t2"."l_shipmode" AS "l_shipmode", + "t2"."l_comment" AS "l_comment", + "t4"."p_partkey" AS "p_partkey", + "t4"."p_name" AS "p_name", + "t4"."p_mfgr" AS "p_mfgr", + "t4"."p_brand" AS "p_brand", + "t4"."p_type" AS "p_type", + "t4"."p_size" AS "p_size", + "t4"."p_container" AS "p_container", + "t4"."p_retailprice" AS "p_retailprice", + "t4"."p_comment" AS "p_comment" + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."P_PARTKEY" AS "p_partkey", + "t1"."P_NAME" AS "p_name", + "t1"."P_MFGR" AS "p_mfgr", + "t1"."P_BRAND" AS "p_brand", + "t1"."P_TYPE" AS "p_type", + "t1"."P_SIZE" AS "p_size", + "t1"."P_CONTAINER" AS "p_container", + "t1"."P_RETAILPRICE" AS "p_retailprice", + "t1"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t1" + ) AS "t4" + ON "t2"."l_partkey" = "t4"."p_partkey" + ) AS "t6" WHERE - t1."l_shipdate" >= '1995-09-01' AND t1."l_shipdate" < '1995-10-01' -) AS t2 \ No newline at end of file + "t6"."l_shipdate" >= DATEFROMPARTS(1995, 9, 1) + AND "t6"."l_shipdate" < DATEFROMPARTS(1995, 10, 1) +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql index 34a990f1423d..fc6c924aca22 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql @@ -1,53 +1,103 @@ -WITH t0 AS ( - SELECT - t3.l_suppkey AS l_suppkey, - SUM(t3.l_extendedprice * ( - CAST(1 AS TINYINT) - t3.l_discount - )) AS total_revenue - FROM main.lineitem AS t3 - WHERE - t3.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t3.l_shipdate < MAKE_DATE(1996, 4, 1) - GROUP BY - 1 -), t1 AS ( - SELECT - t3.s_suppkey AS s_suppkey, - t3.s_name AS s_name, - t3.s_address AS s_address, - t3.s_nationkey AS s_nationkey, - t3.s_phone AS s_phone, - t3.s_acctbal AS s_acctbal, - t3.s_comment AS s_comment, - t0.l_suppkey AS l_suppkey, - t0.total_revenue AS total_revenue - FROM main.supplier AS t3 - JOIN t0 - ON t3.s_suppkey = t0.l_suppkey - WHERE - t0.total_revenue = ( - SELECT - MAX(t0.total_revenue) AS "Max(total_revenue)" - FROM t0 - ) -) SELECT - t2.s_suppkey, - t2.s_name, - t2.s_address, - t2.s_phone, - t2.total_revenue + t6.s_suppkey AS s_suppkey, + t6.s_name AS s_name, + t6.s_address AS s_address, + t6.s_phone AS s_phone, + t6.total_revenue AS total_revenue FROM ( SELECT - t1.s_suppkey AS s_suppkey, - t1.s_name AS s_name, - t1.s_address AS s_address, - t1.s_nationkey AS s_nationkey, - t1.s_phone AS s_phone, - t1.s_acctbal AS s_acctbal, - t1.s_comment AS s_comment, - t1.l_suppkey AS l_suppkey, - t1.total_revenue AS total_revenue - FROM t1 - ORDER BY - t1.s_suppkey ASC -) AS t2 \ No newline at end of file + t0.s_suppkey AS s_suppkey, + t0.s_name AS s_name, + t0.s_address AS s_address, + t0.s_nationkey AS s_nationkey, + t0.s_phone AS s_phone, + t0.s_acctbal AS s_acctbal, + t0.s_comment AS s_comment, + t4.l_suppkey AS l_suppkey, + t4.total_revenue AS total_revenue + FROM supplier AS t0 + INNER JOIN ( + SELECT + t2.l_suppkey AS l_suppkey, + SUM(t2.l_extendedprice * ( + CAST(1 AS TINYINT) - t2.l_discount + )) AS total_revenue + FROM ( + SELECT + t1.l_orderkey AS l_orderkey, + t1.l_partkey AS l_partkey, + t1.l_suppkey AS l_suppkey, + t1.l_linenumber AS l_linenumber, + t1.l_quantity AS l_quantity, + t1.l_extendedprice AS l_extendedprice, + t1.l_discount AS l_discount, + t1.l_tax AS l_tax, + t1.l_returnflag AS l_returnflag, + t1.l_linestatus AS l_linestatus, + t1.l_shipdate AS l_shipdate, + t1.l_commitdate AS l_commitdate, + t1.l_receiptdate AS l_receiptdate, + t1.l_shipinstruct AS l_shipinstruct, + t1.l_shipmode AS l_shipmode, + t1.l_comment AS l_comment + FROM lineitem AS t1 + WHERE + t1.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t1.l_shipdate < MAKE_DATE(1996, 4, 1) + ) AS t2 + GROUP BY + 1 + ) AS t4 + ON t0.s_suppkey = t4.l_suppkey +) AS t6 +WHERE + t6.total_revenue = ( + SELECT + MAX(t6.total_revenue) AS "Max(total_revenue)" + FROM ( + SELECT + t0.s_suppkey AS s_suppkey, + t0.s_name AS s_name, + t0.s_address AS s_address, + t0.s_nationkey AS s_nationkey, + t0.s_phone AS s_phone, + t0.s_acctbal AS s_acctbal, + t0.s_comment AS s_comment, + t4.l_suppkey AS l_suppkey, + t4.total_revenue AS total_revenue + FROM supplier AS t0 + INNER JOIN ( + SELECT + t2.l_suppkey AS l_suppkey, + SUM(t2.l_extendedprice * ( + CAST(1 AS TINYINT) - t2.l_discount + )) AS total_revenue + FROM ( + SELECT + t1.l_orderkey AS l_orderkey, + t1.l_partkey AS l_partkey, + t1.l_suppkey AS l_suppkey, + t1.l_linenumber AS l_linenumber, + t1.l_quantity AS l_quantity, + t1.l_extendedprice AS l_extendedprice, + t1.l_discount AS l_discount, + t1.l_tax AS l_tax, + t1.l_returnflag AS l_returnflag, + t1.l_linestatus AS l_linestatus, + t1.l_shipdate AS l_shipdate, + t1.l_commitdate AS l_commitdate, + t1.l_receiptdate AS l_receiptdate, + t1.l_shipinstruct AS l_shipinstruct, + t1.l_shipmode AS l_shipmode, + t1.l_comment AS l_comment + FROM lineitem AS t1 + WHERE + t1.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t1.l_shipdate < MAKE_DATE(1996, 4, 1) + ) AS t2 + GROUP BY + 1 + ) AS t4 + ON t0.s_suppkey = t4.l_suppkey + ) AS t6 + ) +ORDER BY + t6.s_suppkey ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql index 1ce09bf25f75..c919360a42d2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql @@ -1,83 +1,125 @@ -WITH t2 AS ( - SELECT - t5."S_SUPPKEY" AS "s_suppkey", - t5."S_NAME" AS "s_name", - t5."S_ADDRESS" AS "s_address", - t5."S_NATIONKEY" AS "s_nationkey", - t5."S_PHONE" AS "s_phone", - t5."S_ACCTBAL" AS "s_acctbal", - t5."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t5 -), t0 AS ( - SELECT - t5."L_ORDERKEY" AS "l_orderkey", - t5."L_PARTKEY" AS "l_partkey", - t5."L_SUPPKEY" AS "l_suppkey", - t5."L_LINENUMBER" AS "l_linenumber", - t5."L_QUANTITY" AS "l_quantity", - t5."L_EXTENDEDPRICE" AS "l_extendedprice", - t5."L_DISCOUNT" AS "l_discount", - t5."L_TAX" AS "l_tax", - t5."L_RETURNFLAG" AS "l_returnflag", - t5."L_LINESTATUS" AS "l_linestatus", - t5."L_SHIPDATE" AS "l_shipdate", - t5."L_COMMITDATE" AS "l_commitdate", - t5."L_RECEIPTDATE" AS "l_receiptdate", - t5."L_SHIPINSTRUCT" AS "l_shipinstruct", - t5."L_SHIPMODE" AS "l_shipmode", - t5."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t5 -), t1 AS ( - SELECT - t0."l_suppkey" AS "l_suppkey", - SUM(t0."l_extendedprice" * ( - 1 - t0."l_discount" - )) AS "total_revenue" - FROM t0 - WHERE - t0."l_shipdate" >= DATE_FROM_PARTS(1996, 1, 1) - AND t0."l_shipdate" < DATE_FROM_PARTS(1996, 4, 1) - GROUP BY - 1 -), t3 AS ( - SELECT - t2."s_suppkey" AS "s_suppkey", - t2."s_name" AS "s_name", - t2."s_address" AS "s_address", - t2."s_nationkey" AS "s_nationkey", - t2."s_phone" AS "s_phone", - t2."s_acctbal" AS "s_acctbal", - t2."s_comment" AS "s_comment", - t1."l_suppkey" AS "l_suppkey", - t1."total_revenue" AS "total_revenue" - FROM t2 - JOIN t1 - ON t2."s_suppkey" = t1."l_suppkey" - WHERE - t1."total_revenue" = ( - SELECT - MAX(t1."total_revenue") AS "Max(total_revenue)" - FROM t1 - ) -) SELECT - t4."s_suppkey", - t4."s_name", - t4."s_address", - t4."s_phone", - t4."total_revenue" + "t7"."s_suppkey" AS "s_suppkey", + "t7"."s_name" AS "s_name", + "t7"."s_address" AS "s_address", + "t7"."s_phone" AS "s_phone", + "t7"."total_revenue" AS "total_revenue" FROM ( SELECT - t3."s_suppkey" AS "s_suppkey", - t3."s_name" AS "s_name", - t3."s_address" AS "s_address", - t3."s_nationkey" AS "s_nationkey", - t3."s_phone" AS "s_phone", - t3."s_acctbal" AS "s_acctbal", - t3."s_comment" AS "s_comment", - t3."l_suppkey" AS "l_suppkey", - t3."total_revenue" AS "total_revenue" - FROM t3 - ORDER BY - t3."s_suppkey" ASC -) AS t4 \ No newline at end of file + "t2"."s_suppkey" AS "s_suppkey", + "t2"."s_name" AS "s_name", + "t2"."s_address" AS "s_address", + "t2"."s_nationkey" AS "s_nationkey", + "t2"."s_phone" AS "s_phone", + "t2"."s_acctbal" AS "s_acctbal", + "t2"."s_comment" AS "s_comment", + "t5"."l_suppkey" AS "l_suppkey", + "t5"."total_revenue" AS "total_revenue" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t3"."l_suppkey" AS "l_suppkey", + SUM("t3"."l_extendedprice" * ( + 1 - "t3"."l_discount" + )) AS "total_revenue" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + WHERE + "t1"."L_SHIPDATE" >= DATEFROMPARTS(1996, 1, 1) + AND "t1"."L_SHIPDATE" < DATEFROMPARTS(1996, 4, 1) + ) AS "t3" + GROUP BY + 1 + ) AS "t5" + ON "t2"."s_suppkey" = "t5"."l_suppkey" +) AS "t7" +WHERE + "t7"."total_revenue" = ( + SELECT + MAX("t7"."total_revenue") AS "Max(total_revenue)" + FROM ( + SELECT + "t2"."s_suppkey" AS "s_suppkey", + "t2"."s_name" AS "s_name", + "t2"."s_address" AS "s_address", + "t2"."s_nationkey" AS "s_nationkey", + "t2"."s_phone" AS "s_phone", + "t2"."s_acctbal" AS "s_acctbal", + "t2"."s_comment" AS "s_comment", + "t5"."l_suppkey" AS "l_suppkey", + "t5"."total_revenue" AS "total_revenue" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t3"."l_suppkey" AS "l_suppkey", + SUM("t3"."l_extendedprice" * ( + 1 - "t3"."l_discount" + )) AS "total_revenue" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + WHERE + "t1"."L_SHIPDATE" >= DATEFROMPARTS(1996, 1, 1) + AND "t1"."L_SHIPDATE" < DATEFROMPARTS(1996, 4, 1) + ) AS "t3" + GROUP BY + 1 + ) AS "t5" + ON "t2"."s_suppkey" = "t5"."l_suppkey" + ) AS "t7" + ) +ORDER BY + "t7"."s_suppkey" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql index 5f9ebafc8322..b0634e8a2e27 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql @@ -1,47 +1,73 @@ SELECT - t0.p_brand, - t0.p_type, - t0.p_size, - t0.supplier_cnt + t8.p_brand AS p_brand, + t8.p_type AS p_type, + t8.p_size AS p_size, + t8.supplier_cnt AS supplier_cnt FROM ( SELECT - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - COUNT(DISTINCT t1.ps_suppkey) AS supplier_cnt - FROM main.partsupp AS t1 - JOIN main.part AS t2 - ON t2.p_partkey = t1.ps_partkey - WHERE - t2.p_brand <> 'Brand#45' - AND NOT t2.p_type LIKE 'MEDIUM POLISHED%' - AND t2.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) - AND ( - NOT t1.ps_suppkey IN ( - SELECT - t3.s_suppkey - FROM ( + t7.p_brand AS p_brand, + t7.p_type AS p_type, + t7.p_size AS p_size, + COUNT(DISTINCT t7.ps_suppkey) AS supplier_cnt + FROM ( + SELECT + t6.ps_partkey AS ps_partkey, + t6.ps_suppkey AS ps_suppkey, + t6.ps_availqty AS ps_availqty, + t6.ps_supplycost AS ps_supplycost, + t6.ps_comment AS ps_comment, + t6.p_partkey AS p_partkey, + t6.p_name AS p_name, + t6.p_mfgr AS p_mfgr, + t6.p_brand AS p_brand, + t6.p_type AS p_type, + t6.p_size AS p_size, + t6.p_container AS p_container, + t6.p_retailprice AS p_retailprice, + t6.p_comment AS p_comment + FROM ( + SELECT + t0.ps_partkey AS ps_partkey, + t0.ps_suppkey AS ps_suppkey, + t0.ps_availqty AS ps_availqty, + t0.ps_supplycost AS ps_supplycost, + t0.ps_comment AS ps_comment, + t3.p_partkey AS p_partkey, + t3.p_name AS p_name, + t3.p_mfgr AS p_mfgr, + t3.p_brand AS p_brand, + t3.p_type AS p_type, + t3.p_size AS p_size, + t3.p_container AS p_container, + t3.p_retailprice AS p_retailprice, + t3.p_comment AS p_comment + FROM partsupp AS t0 + INNER JOIN part AS t3 + ON t3.p_partkey = t0.ps_partkey + ) AS t6 + WHERE + t6.p_brand <> 'Brand#45' + AND NOT ( + t6.p_type LIKE 'MEDIUM POLISHED%' + ) + AND t6.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) + AND NOT ( + t6.ps_suppkey IN (( SELECT - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment - FROM main.supplier AS t4 + t2.s_suppkey AS s_suppkey + FROM supplier AS t2 WHERE - t4.s_comment LIKE '%Customer%Complaints%' - ) AS t3 + t2.s_comment LIKE '%Customer%Complaints%' + )) ) - ) + ) AS t7 GROUP BY 1, 2, 3 -) AS t0 +) AS t8 ORDER BY - t0.supplier_cnt DESC, - t0.p_brand ASC, - t0.p_type ASC, - t0.p_size ASC \ No newline at end of file + t8.supplier_cnt DESC, + t8.p_brand ASC, + t8.p_type ASC, + t8.p_size ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql index da9166dcf9f0..6ebdf27930bf 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql @@ -1,78 +1,93 @@ -WITH t1 AS ( - SELECT - t4."PS_PARTKEY" AS "ps_partkey", - t4."PS_SUPPKEY" AS "ps_suppkey", - t4."PS_AVAILQTY" AS "ps_availqty", - t4."PS_SUPPLYCOST" AS "ps_supplycost", - t4."PS_COMMENT" AS "ps_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS t4 -), t2 AS ( - SELECT - t4."P_PARTKEY" AS "p_partkey", - t4."P_NAME" AS "p_name", - t4."P_MFGR" AS "p_mfgr", - t4."P_BRAND" AS "p_brand", - t4."P_TYPE" AS "p_type", - t4."P_SIZE" AS "p_size", - t4."P_CONTAINER" AS "p_container", - t4."P_RETAILPRICE" AS "p_retailprice", - t4."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t4 -), t0 AS ( - SELECT - t4."S_SUPPKEY" AS "s_suppkey", - t4."S_NAME" AS "s_name", - t4."S_ADDRESS" AS "s_address", - t4."S_NATIONKEY" AS "s_nationkey", - t4."S_PHONE" AS "s_phone", - t4."S_ACCTBAL" AS "s_acctbal", - t4."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t4 -) SELECT - t3."p_brand", - t3."p_type", - t3."p_size", - t3."supplier_cnt" + "t10"."p_brand" AS "p_brand", + "t10"."p_type" AS "p_type", + "t10"."p_size" AS "p_size", + "t10"."supplier_cnt" AS "supplier_cnt" FROM ( SELECT - t2."p_brand" AS "p_brand", - t2."p_type" AS "p_type", - t2."p_size" AS "p_size", - COUNT(DISTINCT t1."ps_suppkey") AS "supplier_cnt" - FROM t1 - JOIN t2 - ON t2."p_partkey" = t1."ps_partkey" - WHERE - t2."p_brand" <> 'Brand#45' - AND NOT t2."p_type" LIKE 'MEDIUM POLISHED%' - AND t2."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) - AND ( - NOT t1."ps_suppkey" IN ( + "t9"."p_brand" AS "p_brand", + "t9"."p_type" AS "p_type", + "t9"."p_size" AS "p_size", + COUNT(DISTINCT "t9"."ps_suppkey") AS "supplier_cnt" + FROM ( + SELECT + "t8"."ps_partkey" AS "ps_partkey", + "t8"."ps_suppkey" AS "ps_suppkey", + "t8"."ps_availqty" AS "ps_availqty", + "t8"."ps_supplycost" AS "ps_supplycost", + "t8"."ps_comment" AS "ps_comment", + "t8"."p_partkey" AS "p_partkey", + "t8"."p_name" AS "p_name", + "t8"."p_mfgr" AS "p_mfgr", + "t8"."p_brand" AS "p_brand", + "t8"."p_type" AS "p_type", + "t8"."p_size" AS "p_size", + "t8"."p_container" AS "p_container", + "t8"."p_retailprice" AS "p_retailprice", + "t8"."p_comment" AS "p_comment" + FROM ( + SELECT + "t3"."ps_partkey" AS "ps_partkey", + "t3"."ps_suppkey" AS "ps_suppkey", + "t3"."ps_availqty" AS "ps_availqty", + "t3"."ps_supplycost" AS "ps_supplycost", + "t3"."ps_comment" AS "ps_comment", + "t6"."p_partkey" AS "p_partkey", + "t6"."p_name" AS "p_name", + "t6"."p_mfgr" AS "p_mfgr", + "t6"."p_brand" AS "p_brand", + "t6"."p_type" AS "p_type", + "t6"."p_size" AS "p_size", + "t6"."p_container" AS "p_container", + "t6"."p_retailprice" AS "p_retailprice", + "t6"."p_comment" AS "p_comment" + FROM ( SELECT - t4."s_suppkey" - FROM ( + "t0"."PS_PARTKEY" AS "ps_partkey", + "t0"."PS_SUPPKEY" AS "ps_suppkey", + "t0"."PS_AVAILQTY" AS "ps_availqty", + "t0"."PS_SUPPLYCOST" AS "ps_supplycost", + "t0"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t2"."P_PARTKEY" AS "p_partkey", + "t2"."P_NAME" AS "p_name", + "t2"."P_MFGR" AS "p_mfgr", + "t2"."P_BRAND" AS "p_brand", + "t2"."P_TYPE" AS "p_type", + "t2"."P_SIZE" AS "p_size", + "t2"."P_CONTAINER" AS "p_container", + "t2"."P_RETAILPRICE" AS "p_retailprice", + "t2"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t2" + ) AS "t6" + ON "t6"."p_partkey" = "t3"."ps_partkey" + ) AS "t8" + WHERE + "t8"."p_brand" <> 'Brand#45' + AND NOT ( + "t8"."p_type" LIKE 'MEDIUM POLISHED%' + ) + AND "t8"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) + AND NOT ( + "t8"."ps_suppkey" IN (( SELECT - t0."s_suppkey" AS "s_suppkey", - t0."s_name" AS "s_name", - t0."s_address" AS "s_address", - t0."s_nationkey" AS "s_nationkey", - t0."s_phone" AS "s_phone", - t0."s_acctbal" AS "s_acctbal", - t0."s_comment" AS "s_comment" - FROM t0 + "t1"."S_SUPPKEY" AS "s_suppkey" + FROM "SUPPLIER" AS "t1" WHERE - t0."s_comment" LIKE '%Customer%Complaints%' - ) AS t4 + "t1"."S_COMMENT" LIKE '%Customer%Complaints%' + )) ) - ) + ) AS "t9" GROUP BY 1, 2, 3 -) AS t3 +) AS "t10" ORDER BY - t3."supplier_cnt" DESC, - t3."p_brand" ASC, - t3."p_type" ASC, - t3."p_size" ASC \ No newline at end of file + "t10"."supplier_cnt" DESC NULLS LAST, + "t10"."p_brand" ASC, + "t10"."p_type" ASC, + "t10"."p_size" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql index e0adc83afc3b..601ac35cd886 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql @@ -1,15 +1,92 @@ SELECT - SUM(t0.l_extendedprice) / CAST(7.0 AS DOUBLE) AS avg_yearly -FROM main.lineitem AS t0 -JOIN main.part AS t1 - ON t1.p_partkey = t0.l_partkey -WHERE - t1.p_brand = 'Brand#23' - AND t1.p_container = 'MED BOX' - AND t0.l_quantity < ( + SUM(t7.l_extendedprice) / CAST(7.0 AS DOUBLE) AS avg_yearly +FROM ( + SELECT + t4.l_orderkey AS l_orderkey, + t4.l_partkey AS l_partkey, + t4.l_suppkey AS l_suppkey, + t4.l_linenumber AS l_linenumber, + t4.l_quantity AS l_quantity, + t4.l_extendedprice AS l_extendedprice, + t4.l_discount AS l_discount, + t4.l_tax AS l_tax, + t4.l_returnflag AS l_returnflag, + t4.l_linestatus AS l_linestatus, + t4.l_shipdate AS l_shipdate, + t4.l_commitdate AS l_commitdate, + t4.l_receiptdate AS l_receiptdate, + t4.l_shipinstruct AS l_shipinstruct, + t4.l_shipmode AS l_shipmode, + t4.l_comment AS l_comment, + t4.p_partkey AS p_partkey, + t4.p_name AS p_name, + t4.p_mfgr AS p_mfgr, + t4.p_brand AS p_brand, + t4.p_type AS p_type, + t4.p_size AS p_size, + t4.p_container AS p_container, + t4.p_retailprice AS p_retailprice, + t4.p_comment AS p_comment + FROM ( SELECT - AVG(t0.l_quantity) AS "Mean(l_quantity)" - FROM main.lineitem AS t0 - WHERE - t0.l_partkey = t1.p_partkey - ) * CAST(0.2 AS DOUBLE) \ No newline at end of file + t0.l_orderkey AS l_orderkey, + t0.l_partkey AS l_partkey, + t0.l_suppkey AS l_suppkey, + t0.l_linenumber AS l_linenumber, + t0.l_quantity AS l_quantity, + t0.l_extendedprice AS l_extendedprice, + t0.l_discount AS l_discount, + t0.l_tax AS l_tax, + t0.l_returnflag AS l_returnflag, + t0.l_linestatus AS l_linestatus, + t0.l_shipdate AS l_shipdate, + t0.l_commitdate AS l_commitdate, + t0.l_receiptdate AS l_receiptdate, + t0.l_shipinstruct AS l_shipinstruct, + t0.l_shipmode AS l_shipmode, + t0.l_comment AS l_comment, + t2.p_partkey AS p_partkey, + t2.p_name AS p_name, + t2.p_mfgr AS p_mfgr, + t2.p_brand AS p_brand, + t2.p_type AS p_type, + t2.p_size AS p_size, + t2.p_container AS p_container, + t2.p_retailprice AS p_retailprice, + t2.p_comment AS p_comment + FROM lineitem AS t0 + INNER JOIN part AS t2 + ON t2.p_partkey = t0.l_partkey + ) AS t4 + WHERE + t4.p_brand = 'Brand#23' + AND t4.p_container = 'MED BOX' + AND t4.l_quantity < ( + ( + SELECT + AVG(t5.l_quantity) AS "Mean(l_quantity)" + FROM ( + SELECT + t0.l_orderkey AS l_orderkey, + t0.l_partkey AS l_partkey, + t0.l_suppkey AS l_suppkey, + t0.l_linenumber AS l_linenumber, + t0.l_quantity AS l_quantity, + t0.l_extendedprice AS l_extendedprice, + t0.l_discount AS l_discount, + t0.l_tax AS l_tax, + t0.l_returnflag AS l_returnflag, + t0.l_linestatus AS l_linestatus, + t0.l_shipdate AS l_shipdate, + t0.l_commitdate AS l_commitdate, + t0.l_receiptdate AS l_receiptdate, + t0.l_shipinstruct AS l_shipinstruct, + t0.l_shipmode AS l_shipmode, + t0.l_comment AS l_comment + FROM lineitem AS t0 + WHERE + t0.l_partkey = t4.p_partkey + ) AS t5 + ) * CAST(0.2 AS DOUBLE) + ) +) AS t7 diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql index 890b8e22fc2e..6bd68abfdee8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql @@ -1,51 +1,111 @@ -WITH t0 AS ( - SELECT - t3."L_ORDERKEY" AS "l_orderkey", - t3."L_PARTKEY" AS "l_partkey", - t3."L_SUPPKEY" AS "l_suppkey", - t3."L_LINENUMBER" AS "l_linenumber", - t3."L_QUANTITY" AS "l_quantity", - t3."L_EXTENDEDPRICE" AS "l_extendedprice", - t3."L_DISCOUNT" AS "l_discount", - t3."L_TAX" AS "l_tax", - t3."L_RETURNFLAG" AS "l_returnflag", - t3."L_LINESTATUS" AS "l_linestatus", - t3."L_SHIPDATE" AS "l_shipdate", - t3."L_COMMITDATE" AS "l_commitdate", - t3."L_RECEIPTDATE" AS "l_receiptdate", - t3."L_SHIPINSTRUCT" AS "l_shipinstruct", - t3."L_SHIPMODE" AS "l_shipmode", - t3."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t3 -), t1 AS ( - SELECT - t3."P_PARTKEY" AS "p_partkey", - t3."P_NAME" AS "p_name", - t3."P_MFGR" AS "p_mfgr", - t3."P_BRAND" AS "p_brand", - t3."P_TYPE" AS "p_type", - t3."P_SIZE" AS "p_size", - t3."P_CONTAINER" AS "p_container", - t3."P_RETAILPRICE" AS "p_retailprice", - t3."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t3 -) SELECT - CAST(t2."avg_yearly" AS DECIMAL(30, 8)) AS "avg_yearly" + SUM("t8"."l_extendedprice") / 7.0 AS "avg_yearly" FROM ( SELECT - SUM(t0."l_extendedprice") / 7.0 AS "avg_yearly" - FROM t0 - JOIN t1 - ON t1."p_partkey" = t0."l_partkey" - WHERE - t1."p_brand" = 'Brand#23' - AND t1."p_container" = 'MED BOX' - AND t0."l_quantity" < ( + * + FROM ( + SELECT + "t2"."l_orderkey" AS "l_orderkey", + "t2"."l_partkey" AS "l_partkey", + "t2"."l_suppkey" AS "l_suppkey", + "t2"."l_linenumber" AS "l_linenumber", + "t2"."l_quantity" AS "l_quantity", + "t2"."l_extendedprice" AS "l_extendedprice", + "t2"."l_discount" AS "l_discount", + "t2"."l_tax" AS "l_tax", + "t2"."l_returnflag" AS "l_returnflag", + "t2"."l_linestatus" AS "l_linestatus", + "t2"."l_shipdate" AS "l_shipdate", + "t2"."l_commitdate" AS "l_commitdate", + "t2"."l_receiptdate" AS "l_receiptdate", + "t2"."l_shipinstruct" AS "l_shipinstruct", + "t2"."l_shipmode" AS "l_shipmode", + "t2"."l_comment" AS "l_comment", + "t3"."p_partkey" AS "p_partkey", + "t3"."p_name" AS "p_name", + "t3"."p_mfgr" AS "p_mfgr", + "t3"."p_brand" AS "p_brand", + "t3"."p_type" AS "p_type", + "t3"."p_size" AS "p_size", + "t3"."p_container" AS "p_container", + "t3"."p_retailprice" AS "p_retailprice", + "t3"."p_comment" AS "p_comment" + FROM ( SELECT - AVG(t0."l_quantity") AS "Mean(l_quantity)" - FROM t0 - WHERE - t0."l_partkey" = t1."p_partkey" - ) * 0.2 -) AS t2 \ No newline at end of file + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."P_PARTKEY" AS "p_partkey", + "t1"."P_NAME" AS "p_name", + "t1"."P_MFGR" AS "p_mfgr", + "t1"."P_BRAND" AS "p_brand", + "t1"."P_TYPE" AS "p_type", + "t1"."P_SIZE" AS "p_size", + "t1"."P_CONTAINER" AS "p_container", + "t1"."P_RETAILPRICE" AS "p_retailprice", + "t1"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t1" + ) AS "t3" + ON "t3"."p_partkey" = "t2"."l_partkey" + ) AS "t5" + WHERE + ( + "t5"."p_brand" = 'Brand#23' + ) + AND ( + "t5"."p_container" = 'MED BOX' + ) + AND ( + "t5"."l_quantity" < ( + ( + SELECT + AVG("t6"."l_quantity") AS "Mean(l_quantity)" + FROM ( + SELECT + * + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t2" + WHERE + ( + "t2"."l_partkey" = "t5"."p_partkey" + ) + ) AS "t6" + ) * 0.2 + ) + ) +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql index 1f50d0e70368..621c6423e037 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql @@ -1,52 +1,118 @@ -WITH t0 AS ( - SELECT - t2.l_orderkey AS l_orderkey, - SUM(t2.l_quantity) AS qty_sum - FROM main.lineitem AS t2 - GROUP BY - 1 -) SELECT - t1.c_name, - t1.c_custkey, - t1.o_orderkey, - t1.o_orderdate, - t1.o_totalprice, - t1.sum_qty + t11.c_name AS c_name, + t11.c_custkey AS c_custkey, + t11.o_orderkey AS o_orderkey, + t11.o_orderdate AS o_orderdate, + t11.o_totalprice AS o_totalprice, + t11.sum_qty AS sum_qty FROM ( SELECT - t2.c_name AS c_name, - t2.c_custkey AS c_custkey, - t3.o_orderkey AS o_orderkey, - t3.o_orderdate AS o_orderdate, - t3.o_totalprice AS o_totalprice, - SUM(t4.l_quantity) AS sum_qty - FROM main.customer AS t2 - JOIN main.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN main.lineitem AS t4 - ON t3.o_orderkey = t4.l_orderkey - WHERE - t3.o_orderkey IN ( + t10.c_name AS c_name, + t10.c_custkey AS c_custkey, + t10.o_orderkey AS o_orderkey, + t10.o_orderdate AS o_orderdate, + t10.o_totalprice AS o_totalprice, + SUM(t10.l_quantity) AS sum_qty + FROM ( + SELECT + t8.c_custkey AS c_custkey, + t8.c_name AS c_name, + t8.c_address AS c_address, + t8.c_nationkey AS c_nationkey, + t8.c_phone AS c_phone, + t8.c_acctbal AS c_acctbal, + t8.c_mktsegment AS c_mktsegment, + t8.c_comment AS c_comment, + t8.o_orderkey AS o_orderkey, + t8.o_custkey AS o_custkey, + t8.o_orderstatus AS o_orderstatus, + t8.o_totalprice AS o_totalprice, + t8.o_orderdate AS o_orderdate, + t8.o_orderpriority AS o_orderpriority, + t8.o_clerk AS o_clerk, + t8.o_shippriority AS o_shippriority, + t8.o_comment AS o_comment, + t8.l_orderkey AS l_orderkey, + t8.l_partkey AS l_partkey, + t8.l_suppkey AS l_suppkey, + t8.l_linenumber AS l_linenumber, + t8.l_quantity AS l_quantity, + t8.l_extendedprice AS l_extendedprice, + t8.l_discount AS l_discount, + t8.l_tax AS l_tax, + t8.l_returnflag AS l_returnflag, + t8.l_linestatus AS l_linestatus, + t8.l_shipdate AS l_shipdate, + t8.l_commitdate AS l_commitdate, + t8.l_receiptdate AS l_receiptdate, + t8.l_shipinstruct AS l_shipinstruct, + t8.l_shipmode AS l_shipmode, + t8.l_comment AS l_comment + FROM ( SELECT - t5.l_orderkey - FROM ( + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment, + t3.o_orderkey AS o_orderkey, + t3.o_custkey AS o_custkey, + t3.o_orderstatus AS o_orderstatus, + t3.o_totalprice AS o_totalprice, + t3.o_orderdate AS o_orderdate, + t3.o_orderpriority AS o_orderpriority, + t3.o_clerk AS o_clerk, + t3.o_shippriority AS o_shippriority, + t3.o_comment AS o_comment, + t4.l_orderkey AS l_orderkey, + t4.l_partkey AS l_partkey, + t4.l_suppkey AS l_suppkey, + t4.l_linenumber AS l_linenumber, + t4.l_quantity AS l_quantity, + t4.l_extendedprice AS l_extendedprice, + t4.l_discount AS l_discount, + t4.l_tax AS l_tax, + t4.l_returnflag AS l_returnflag, + t4.l_linestatus AS l_linestatus, + t4.l_shipdate AS l_shipdate, + t4.l_commitdate AS l_commitdate, + t4.l_receiptdate AS l_receiptdate, + t4.l_shipinstruct AS l_shipinstruct, + t4.l_shipmode AS l_shipmode, + t4.l_comment AS l_comment + FROM customer AS t0 + INNER JOIN orders AS t3 + ON t0.c_custkey = t3.o_custkey + INNER JOIN lineitem AS t4 + ON t3.o_orderkey = t4.l_orderkey + ) AS t8 + WHERE + t8.o_orderkey IN (( SELECT - t0.l_orderkey AS l_orderkey, - t0.qty_sum AS qty_sum - FROM t0 + t5.l_orderkey AS l_orderkey + FROM ( + SELECT + t2.l_orderkey AS l_orderkey, + SUM(t2.l_quantity) AS qty_sum + FROM lineitem AS t2 + GROUP BY + 1 + ) AS t5 WHERE - t0.qty_sum > CAST(300 AS SMALLINT) - ) AS t5 - ) + t5.qty_sum > CAST(300 AS SMALLINT) + )) + ) AS t10 GROUP BY 1, 2, 3, 4, 5 -) AS t1 +) AS t11 ORDER BY - t1.o_totalprice DESC, - t1.o_orderdate ASC + t11.o_totalprice DESC, + t11.o_orderdate ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql index 61aae5b5f00d..e2a86eb98ccf 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql @@ -1,94 +1,179 @@ -WITH t2 AS ( - SELECT - t5."C_CUSTKEY" AS "c_custkey", - t5."C_NAME" AS "c_name", - t5."C_ADDRESS" AS "c_address", - t5."C_NATIONKEY" AS "c_nationkey", - t5."C_PHONE" AS "c_phone", - t5."C_ACCTBAL" AS "c_acctbal", - t5."C_MKTSEGMENT" AS "c_mktsegment", - t5."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t5 -), t1 AS ( - SELECT - t5."O_ORDERKEY" AS "o_orderkey", - t5."O_CUSTKEY" AS "o_custkey", - t5."O_ORDERSTATUS" AS "o_orderstatus", - t5."O_TOTALPRICE" AS "o_totalprice", - t5."O_ORDERDATE" AS "o_orderdate", - t5."O_ORDERPRIORITY" AS "o_orderpriority", - t5."O_CLERK" AS "o_clerk", - t5."O_SHIPPRIORITY" AS "o_shippriority", - t5."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t5 -), t0 AS ( - SELECT - t5."L_ORDERKEY" AS "l_orderkey", - t5."L_PARTKEY" AS "l_partkey", - t5."L_SUPPKEY" AS "l_suppkey", - t5."L_LINENUMBER" AS "l_linenumber", - t5."L_QUANTITY" AS "l_quantity", - t5."L_EXTENDEDPRICE" AS "l_extendedprice", - t5."L_DISCOUNT" AS "l_discount", - t5."L_TAX" AS "l_tax", - t5."L_RETURNFLAG" AS "l_returnflag", - t5."L_LINESTATUS" AS "l_linestatus", - t5."L_SHIPDATE" AS "l_shipdate", - t5."L_COMMITDATE" AS "l_commitdate", - t5."L_RECEIPTDATE" AS "l_receiptdate", - t5."L_SHIPINSTRUCT" AS "l_shipinstruct", - t5."L_SHIPMODE" AS "l_shipmode", - t5."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t5 -), t3 AS ( - SELECT - t0."l_orderkey" AS "l_orderkey", - SUM(t0."l_quantity") AS "qty_sum" - FROM t0 - GROUP BY - 1 -) SELECT - t4."c_name", - t4."c_custkey", - t4."o_orderkey", - t4."o_orderdate", - t4."o_totalprice", - t4."sum_qty" + "t14"."c_name" AS "c_name", + "t14"."c_custkey" AS "c_custkey", + "t14"."o_orderkey" AS "o_orderkey", + "t14"."o_orderdate" AS "o_orderdate", + "t14"."o_totalprice" AS "o_totalprice", + "t14"."sum_qty" AS "sum_qty" FROM ( SELECT - t2."c_name" AS "c_name", - t2."c_custkey" AS "c_custkey", - t1."o_orderkey" AS "o_orderkey", - t1."o_orderdate" AS "o_orderdate", - t1."o_totalprice" AS "o_totalprice", - SUM(t0."l_quantity") AS "sum_qty" - FROM t2 - JOIN t1 - ON t2."c_custkey" = t1."o_custkey" - JOIN t0 - ON t1."o_orderkey" = t0."l_orderkey" - WHERE - t1."o_orderkey" IN ( + "t13"."c_name" AS "c_name", + "t13"."c_custkey" AS "c_custkey", + "t13"."o_orderkey" AS "o_orderkey", + "t13"."o_orderdate" AS "o_orderdate", + "t13"."o_totalprice" AS "o_totalprice", + SUM("t13"."l_quantity") AS "sum_qty" + FROM ( + SELECT + "t11"."c_custkey" AS "c_custkey", + "t11"."c_name" AS "c_name", + "t11"."c_address" AS "c_address", + "t11"."c_nationkey" AS "c_nationkey", + "t11"."c_phone" AS "c_phone", + "t11"."c_acctbal" AS "c_acctbal", + "t11"."c_mktsegment" AS "c_mktsegment", + "t11"."c_comment" AS "c_comment", + "t11"."o_orderkey" AS "o_orderkey", + "t11"."o_custkey" AS "o_custkey", + "t11"."o_orderstatus" AS "o_orderstatus", + "t11"."o_totalprice" AS "o_totalprice", + "t11"."o_orderdate" AS "o_orderdate", + "t11"."o_orderpriority" AS "o_orderpriority", + "t11"."o_clerk" AS "o_clerk", + "t11"."o_shippriority" AS "o_shippriority", + "t11"."o_comment" AS "o_comment", + "t11"."l_orderkey" AS "l_orderkey", + "t11"."l_partkey" AS "l_partkey", + "t11"."l_suppkey" AS "l_suppkey", + "t11"."l_linenumber" AS "l_linenumber", + "t11"."l_quantity" AS "l_quantity", + "t11"."l_extendedprice" AS "l_extendedprice", + "t11"."l_discount" AS "l_discount", + "t11"."l_tax" AS "l_tax", + "t11"."l_returnflag" AS "l_returnflag", + "t11"."l_linestatus" AS "l_linestatus", + "t11"."l_shipdate" AS "l_shipdate", + "t11"."l_commitdate" AS "l_commitdate", + "t11"."l_receiptdate" AS "l_receiptdate", + "t11"."l_shipinstruct" AS "l_shipinstruct", + "t11"."l_shipmode" AS "l_shipmode", + "t11"."l_comment" AS "l_comment" + FROM ( SELECT - t5."l_orderkey" + "t3"."c_custkey" AS "c_custkey", + "t3"."c_name" AS "c_name", + "t3"."c_address" AS "c_address", + "t3"."c_nationkey" AS "c_nationkey", + "t3"."c_phone" AS "c_phone", + "t3"."c_acctbal" AS "c_acctbal", + "t3"."c_mktsegment" AS "c_mktsegment", + "t3"."c_comment" AS "c_comment", + "t6"."o_orderkey" AS "o_orderkey", + "t6"."o_custkey" AS "o_custkey", + "t6"."o_orderstatus" AS "o_orderstatus", + "t6"."o_totalprice" AS "o_totalprice", + "t6"."o_orderdate" AS "o_orderdate", + "t6"."o_orderpriority" AS "o_orderpriority", + "t6"."o_clerk" AS "o_clerk", + "t6"."o_shippriority" AS "o_shippriority", + "t6"."o_comment" AS "o_comment", + "t7"."l_orderkey" AS "l_orderkey", + "t7"."l_partkey" AS "l_partkey", + "t7"."l_suppkey" AS "l_suppkey", + "t7"."l_linenumber" AS "l_linenumber", + "t7"."l_quantity" AS "l_quantity", + "t7"."l_extendedprice" AS "l_extendedprice", + "t7"."l_discount" AS "l_discount", + "t7"."l_tax" AS "l_tax", + "t7"."l_returnflag" AS "l_returnflag", + "t7"."l_linestatus" AS "l_linestatus", + "t7"."l_shipdate" AS "l_shipdate", + "t7"."l_commitdate" AS "l_commitdate", + "t7"."l_receiptdate" AS "l_receiptdate", + "t7"."l_shipinstruct" AS "l_shipinstruct", + "t7"."l_shipmode" AS "l_shipmode", + "t7"."l_comment" AS "l_comment" FROM ( SELECT - t3."l_orderkey" AS "l_orderkey", - t3."qty_sum" AS "qty_sum" - FROM t3 + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t6" + ON "t3"."c_custkey" = "t6"."o_custkey" + INNER JOIN ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t7" + ON "t6"."o_orderkey" = "t7"."l_orderkey" + ) AS "t11" + WHERE + "t11"."o_orderkey" IN (( + SELECT + "t8"."l_orderkey" AS "l_orderkey" + FROM ( + SELECT + "t5"."l_orderkey" AS "l_orderkey", + SUM("t5"."l_quantity") AS "qty_sum" + FROM ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t5" + GROUP BY + 1 + ) AS "t8" WHERE - t3."qty_sum" > 300 - ) AS t5 - ) + "t8"."qty_sum" > 300 + )) + ) AS "t13" GROUP BY 1, 2, 3, 4, 5 -) AS t4 +) AS "t14" ORDER BY - t4."o_totalprice" DESC, - t4."o_orderdate" ASC + "t14"."o_totalprice" DESC NULLS LAST, + "t14"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql index e5d84f17ac70..288021f12b67 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql @@ -1,29 +1,147 @@ SELECT - SUM(t0.l_extendedprice * ( - CAST(1 AS TINYINT) - t0.l_discount + SUM(t5.l_extendedprice * ( + CAST(1 AS TINYINT) - t5.l_discount )) AS revenue -FROM main.lineitem AS t0 -JOIN main.part AS t1 - ON t1.p_partkey = t0.l_partkey -WHERE - t1.p_brand = 'Brand#12' - AND t1.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - AND t0.l_quantity >= CAST(1 AS TINYINT) - AND t0.l_quantity <= CAST(11 AS TINYINT) - AND t1.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' - OR t1.p_brand = 'Brand#23' - AND t1.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - AND t0.l_quantity >= CAST(10 AS TINYINT) - AND t0.l_quantity <= CAST(20 AS TINYINT) - AND t1.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' - OR t1.p_brand = 'Brand#34' - AND t1.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - AND t0.l_quantity >= CAST(20 AS TINYINT) - AND t0.l_quantity <= CAST(30 AS TINYINT) - AND t1.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' \ No newline at end of file +FROM ( + SELECT + t4.l_orderkey AS l_orderkey, + t4.l_partkey AS l_partkey, + t4.l_suppkey AS l_suppkey, + t4.l_linenumber AS l_linenumber, + t4.l_quantity AS l_quantity, + t4.l_extendedprice AS l_extendedprice, + t4.l_discount AS l_discount, + t4.l_tax AS l_tax, + t4.l_returnflag AS l_returnflag, + t4.l_linestatus AS l_linestatus, + t4.l_shipdate AS l_shipdate, + t4.l_commitdate AS l_commitdate, + t4.l_receiptdate AS l_receiptdate, + t4.l_shipinstruct AS l_shipinstruct, + t4.l_shipmode AS l_shipmode, + t4.l_comment AS l_comment, + t4.p_partkey AS p_partkey, + t4.p_name AS p_name, + t4.p_mfgr AS p_mfgr, + t4.p_brand AS p_brand, + t4.p_type AS p_type, + t4.p_size AS p_size, + t4.p_container AS p_container, + t4.p_retailprice AS p_retailprice, + t4.p_comment AS p_comment + FROM ( + SELECT + t0.l_orderkey AS l_orderkey, + t0.l_partkey AS l_partkey, + t0.l_suppkey AS l_suppkey, + t0.l_linenumber AS l_linenumber, + t0.l_quantity AS l_quantity, + t0.l_extendedprice AS l_extendedprice, + t0.l_discount AS l_discount, + t0.l_tax AS l_tax, + t0.l_returnflag AS l_returnflag, + t0.l_linestatus AS l_linestatus, + t0.l_shipdate AS l_shipdate, + t0.l_commitdate AS l_commitdate, + t0.l_receiptdate AS l_receiptdate, + t0.l_shipinstruct AS l_shipinstruct, + t0.l_shipmode AS l_shipmode, + t0.l_comment AS l_comment, + t2.p_partkey AS p_partkey, + t2.p_name AS p_name, + t2.p_mfgr AS p_mfgr, + t2.p_brand AS p_brand, + t2.p_type AS p_type, + t2.p_size AS p_size, + t2.p_container AS p_container, + t2.p_retailprice AS p_retailprice, + t2.p_comment AS p_comment + FROM lineitem AS t0 + INNER JOIN part AS t2 + ON t2.p_partkey = t0.l_partkey + ) AS t4 + WHERE + ( + ( + ( + ( + ( + ( + ( + ( + t4.p_brand = 'Brand#12' + ) + AND t4.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + ) + AND ( + t4.l_quantity >= CAST(1 AS TINYINT) + ) + ) + AND ( + t4.l_quantity <= CAST(11 AS TINYINT) + ) + ) + AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) + ) + AND t4.l_shipmode IN ('AIR', 'AIR REG') + ) + AND ( + t4.l_shipinstruct = 'DELIVER IN PERSON' + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + t4.p_brand = 'Brand#23' + ) + AND t4.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + ) + AND ( + t4.l_quantity >= CAST(10 AS TINYINT) + ) + ) + AND ( + t4.l_quantity <= CAST(20 AS TINYINT) + ) + ) + AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) + ) + AND t4.l_shipmode IN ('AIR', 'AIR REG') + ) + AND ( + t4.l_shipinstruct = 'DELIVER IN PERSON' + ) + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + t4.p_brand = 'Brand#34' + ) + AND t4.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + ) + AND ( + t4.l_quantity >= CAST(20 AS TINYINT) + ) + ) + AND ( + t4.l_quantity <= CAST(30 AS TINYINT) + ) + ) + AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) + ) + AND t4.l_shipmode IN ('AIR', 'AIR REG') + ) + AND ( + t4.l_shipinstruct = 'DELIVER IN PERSON' + ) + ) +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql index a4c94116b64e..b6db87f2435c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql @@ -1,61 +1,178 @@ -WITH t0 AS ( - SELECT - t2."L_ORDERKEY" AS "l_orderkey", - t2."L_PARTKEY" AS "l_partkey", - t2."L_SUPPKEY" AS "l_suppkey", - t2."L_LINENUMBER" AS "l_linenumber", - t2."L_QUANTITY" AS "l_quantity", - t2."L_EXTENDEDPRICE" AS "l_extendedprice", - t2."L_DISCOUNT" AS "l_discount", - t2."L_TAX" AS "l_tax", - t2."L_RETURNFLAG" AS "l_returnflag", - t2."L_LINESTATUS" AS "l_linestatus", - t2."L_SHIPDATE" AS "l_shipdate", - t2."L_COMMITDATE" AS "l_commitdate", - t2."L_RECEIPTDATE" AS "l_receiptdate", - t2."L_SHIPINSTRUCT" AS "l_shipinstruct", - t2."L_SHIPMODE" AS "l_shipmode", - t2."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t2 -), t1 AS ( - SELECT - t2."P_PARTKEY" AS "p_partkey", - t2."P_NAME" AS "p_name", - t2."P_MFGR" AS "p_mfgr", - t2."P_BRAND" AS "p_brand", - t2."P_TYPE" AS "p_type", - t2."P_SIZE" AS "p_size", - t2."P_CONTAINER" AS "p_container", - t2."P_RETAILPRICE" AS "p_retailprice", - t2."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t2 -) SELECT - SUM(t0."l_extendedprice" * ( - 1 - t0."l_discount" + SUM("t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" )) AS "revenue" -FROM t0 -JOIN t1 - ON t1."p_partkey" = t0."l_partkey" -WHERE - t1."p_brand" = 'Brand#12' - AND t1."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - AND t0."l_quantity" >= 1 - AND t0."l_quantity" <= 11 - AND t1."p_size" BETWEEN 1 AND 5 - AND t0."l_shipmode" IN ('AIR', 'AIR REG') - AND t0."l_shipinstruct" = 'DELIVER IN PERSON' - OR t1."p_brand" = 'Brand#23' - AND t1."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - AND t0."l_quantity" >= 10 - AND t0."l_quantity" <= 20 - AND t1."p_size" BETWEEN 1 AND 10 - AND t0."l_shipmode" IN ('AIR', 'AIR REG') - AND t0."l_shipinstruct" = 'DELIVER IN PERSON' - OR t1."p_brand" = 'Brand#34' - AND t1."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - AND t0."l_quantity" >= 20 - AND t0."l_quantity" <= 30 - AND t1."p_size" BETWEEN 1 AND 15 - AND t0."l_shipmode" IN ('AIR', 'AIR REG') - AND t0."l_shipinstruct" = 'DELIVER IN PERSON' \ No newline at end of file +FROM ( + SELECT + "t6"."l_orderkey" AS "l_orderkey", + "t6"."l_partkey" AS "l_partkey", + "t6"."l_suppkey" AS "l_suppkey", + "t6"."l_linenumber" AS "l_linenumber", + "t6"."l_quantity" AS "l_quantity", + "t6"."l_extendedprice" AS "l_extendedprice", + "t6"."l_discount" AS "l_discount", + "t6"."l_tax" AS "l_tax", + "t6"."l_returnflag" AS "l_returnflag", + "t6"."l_linestatus" AS "l_linestatus", + "t6"."l_shipdate" AS "l_shipdate", + "t6"."l_commitdate" AS "l_commitdate", + "t6"."l_receiptdate" AS "l_receiptdate", + "t6"."l_shipinstruct" AS "l_shipinstruct", + "t6"."l_shipmode" AS "l_shipmode", + "t6"."l_comment" AS "l_comment", + "t6"."p_partkey" AS "p_partkey", + "t6"."p_name" AS "p_name", + "t6"."p_mfgr" AS "p_mfgr", + "t6"."p_brand" AS "p_brand", + "t6"."p_type" AS "p_type", + "t6"."p_size" AS "p_size", + "t6"."p_container" AS "p_container", + "t6"."p_retailprice" AS "p_retailprice", + "t6"."p_comment" AS "p_comment" + FROM ( + SELECT + "t2"."l_orderkey" AS "l_orderkey", + "t2"."l_partkey" AS "l_partkey", + "t2"."l_suppkey" AS "l_suppkey", + "t2"."l_linenumber" AS "l_linenumber", + "t2"."l_quantity" AS "l_quantity", + "t2"."l_extendedprice" AS "l_extendedprice", + "t2"."l_discount" AS "l_discount", + "t2"."l_tax" AS "l_tax", + "t2"."l_returnflag" AS "l_returnflag", + "t2"."l_linestatus" AS "l_linestatus", + "t2"."l_shipdate" AS "l_shipdate", + "t2"."l_commitdate" AS "l_commitdate", + "t2"."l_receiptdate" AS "l_receiptdate", + "t2"."l_shipinstruct" AS "l_shipinstruct", + "t2"."l_shipmode" AS "l_shipmode", + "t2"."l_comment" AS "l_comment", + "t4"."p_partkey" AS "p_partkey", + "t4"."p_name" AS "p_name", + "t4"."p_mfgr" AS "p_mfgr", + "t4"."p_brand" AS "p_brand", + "t4"."p_type" AS "p_type", + "t4"."p_size" AS "p_size", + "t4"."p_container" AS "p_container", + "t4"."p_retailprice" AS "p_retailprice", + "t4"."p_comment" AS "p_comment" + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."P_PARTKEY" AS "p_partkey", + "t1"."P_NAME" AS "p_name", + "t1"."P_MFGR" AS "p_mfgr", + "t1"."P_BRAND" AS "p_brand", + "t1"."P_TYPE" AS "p_type", + "t1"."P_SIZE" AS "p_size", + "t1"."P_CONTAINER" AS "p_container", + "t1"."P_RETAILPRICE" AS "p_retailprice", + "t1"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t1" + ) AS "t4" + ON "t4"."p_partkey" = "t2"."l_partkey" + ) AS "t6" + WHERE + ( + ( + ( + ( + ( + ( + ( + ( + "t6"."p_brand" = 'Brand#12' + ) + AND "t6"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + ) + AND ( + "t6"."l_quantity" >= 1 + ) + ) + AND ( + "t6"."l_quantity" <= 11 + ) + ) + AND "t6"."p_size" BETWEEN 1 AND 5 + ) + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + "t6"."p_brand" = 'Brand#23' + ) + AND "t6"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + ) + AND ( + "t6"."l_quantity" >= 10 + ) + ) + AND ( + "t6"."l_quantity" <= 20 + ) + ) + AND "t6"."p_size" BETWEEN 1 AND 10 + ) + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + "t6"."p_brand" = 'Brand#34' + ) + AND "t6"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + ) + AND ( + "t6"."l_quantity" >= 20 + ) + ) + AND ( + "t6"."l_quantity" <= 30 + ) + ) + AND "t6"."p_size" BETWEEN 1 AND 15 + ) + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql index ec72d90e4bac..2cc90c2a16fa 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql @@ -1,73 +1,68 @@ -WITH t0 AS ( +SELECT + t9.s_name AS s_name, + t9.s_address AS s_address +FROM ( SELECT - t2.s_suppkey AS s_suppkey, - t2.s_name AS s_name, - t2.s_address AS s_address, - t2.s_nationkey AS s_nationkey, - t2.s_phone AS s_phone, - t2.s_acctbal AS s_acctbal, - t2.s_comment AS s_comment, - t3.n_nationkey AS n_nationkey, - t3.n_name AS n_name, - t3.n_regionkey AS n_regionkey, - t3.n_comment AS n_comment - FROM main.supplier AS t2 - JOIN main.nation AS t3 - ON t2.s_nationkey = t3.n_nationkey - WHERE - t3.n_name = 'CANADA' - AND t2.s_suppkey IN ( - SELECT - t4.ps_suppkey - FROM ( + t0.s_suppkey AS s_suppkey, + t0.s_name AS s_name, + t0.s_address AS s_address, + t0.s_nationkey AS s_nationkey, + t0.s_phone AS s_phone, + t0.s_acctbal AS s_acctbal, + t0.s_comment AS s_comment, + t5.n_nationkey AS n_nationkey, + t5.n_name AS n_name, + t5.n_regionkey AS n_regionkey, + t5.n_comment AS n_comment + FROM supplier AS t0 + INNER JOIN nation AS t5 + ON t0.s_nationkey = t5.n_nationkey +) AS t9 +WHERE + t9.n_name = 'CANADA' + AND t9.s_suppkey IN (( + SELECT + t1.ps_suppkey AS ps_suppkey + FROM partsupp AS t1 + WHERE + t1.ps_partkey IN (( SELECT - t5.ps_partkey AS ps_partkey, - t5.ps_suppkey AS ps_suppkey, - t5.ps_availqty AS ps_availqty, - t5.ps_supplycost AS ps_supplycost, - t5.ps_comment AS ps_comment - FROM main.partsupp AS t5 + t3.p_partkey AS p_partkey + FROM part AS t3 WHERE - t5.ps_partkey IN ( - SELECT - t6.p_partkey - FROM ( - SELECT - t7.p_partkey AS p_partkey, - t7.p_name AS p_name, - t7.p_mfgr AS p_mfgr, - t7.p_brand AS p_brand, - t7.p_type AS p_type, - t7.p_size AS p_size, - t7.p_container AS p_container, - t7.p_retailprice AS p_retailprice, - t7.p_comment AS p_comment - FROM main.part AS t7 - WHERE - t7.p_name LIKE 'forest%' - ) AS t6 - ) - AND t5.ps_availqty > ( + t3.p_name LIKE 'forest%' + )) + AND t1.ps_availqty > ( + ( + SELECT + SUM(t7.l_quantity) AS "Sum(l_quantity)" + FROM ( SELECT - SUM(t6.l_quantity) AS "Sum(l_quantity)" - FROM main.lineitem AS t6 + t4.l_orderkey AS l_orderkey, + t4.l_partkey AS l_partkey, + t4.l_suppkey AS l_suppkey, + t4.l_linenumber AS l_linenumber, + t4.l_quantity AS l_quantity, + t4.l_extendedprice AS l_extendedprice, + t4.l_discount AS l_discount, + t4.l_tax AS l_tax, + t4.l_returnflag AS l_returnflag, + t4.l_linestatus AS l_linestatus, + t4.l_shipdate AS l_shipdate, + t4.l_commitdate AS l_commitdate, + t4.l_receiptdate AS l_receiptdate, + t4.l_shipinstruct AS l_shipinstruct, + t4.l_shipmode AS l_shipmode, + t4.l_comment AS l_comment + FROM lineitem AS t4 WHERE - t6.l_partkey = t5.ps_partkey - AND t6.l_suppkey = t5.ps_suppkey - AND t6.l_shipdate >= MAKE_DATE(1994, 1, 1) - AND t6.l_shipdate < MAKE_DATE(1995, 1, 1) - ) * CAST(0.5 AS DOUBLE) - ) AS t4 - ) -) -SELECT - t1.s_name, - t1.s_address -FROM ( - SELECT - t0.s_name AS s_name, - t0.s_address AS s_address - FROM t0 -) AS t1 + t4.l_partkey = t1.ps_partkey + AND t4.l_suppkey = t1.ps_suppkey + AND t4.l_shipdate >= MAKE_DATE(1994, 1, 1) + AND t4.l_shipdate < MAKE_DATE(1995, 1, 1) + ) AS t7 + ) * CAST(0.5 AS DOUBLE) + ) + )) ORDER BY - t1.s_name ASC \ No newline at end of file + t9.s_name ASC diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql index 181c42bf2d75..3b49410ea996 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql @@ -1,129 +1,85 @@ -WITH t4 AS ( - SELECT - t7."S_SUPPKEY" AS "s_suppkey", - t7."S_NAME" AS "s_name", - t7."S_ADDRESS" AS "s_address", - t7."S_NATIONKEY" AS "s_nationkey", - t7."S_PHONE" AS "s_phone", - t7."S_ACCTBAL" AS "s_acctbal", - t7."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t7 -), t3 AS ( - SELECT - t7."N_NATIONKEY" AS "n_nationkey", - t7."N_NAME" AS "n_name", - t7."N_REGIONKEY" AS "n_regionkey", - t7."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t7 -), t1 AS ( - SELECT - t7."PS_PARTKEY" AS "ps_partkey", - t7."PS_SUPPKEY" AS "ps_suppkey", - t7."PS_AVAILQTY" AS "ps_availqty", - t7."PS_SUPPLYCOST" AS "ps_supplycost", - t7."PS_COMMENT" AS "ps_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS t7 -), t2 AS ( - SELECT - t7."P_PARTKEY" AS "p_partkey", - t7."P_NAME" AS "p_name", - t7."P_MFGR" AS "p_mfgr", - t7."P_BRAND" AS "p_brand", - t7."P_TYPE" AS "p_type", - t7."P_SIZE" AS "p_size", - t7."P_CONTAINER" AS "p_container", - t7."P_RETAILPRICE" AS "p_retailprice", - t7."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t7 -), t0 AS ( - SELECT - t7."L_ORDERKEY" AS "l_orderkey", - t7."L_PARTKEY" AS "l_partkey", - t7."L_SUPPKEY" AS "l_suppkey", - t7."L_LINENUMBER" AS "l_linenumber", - t7."L_QUANTITY" AS "l_quantity", - t7."L_EXTENDEDPRICE" AS "l_extendedprice", - t7."L_DISCOUNT" AS "l_discount", - t7."L_TAX" AS "l_tax", - t7."L_RETURNFLAG" AS "l_returnflag", - t7."L_LINESTATUS" AS "l_linestatus", - t7."L_SHIPDATE" AS "l_shipdate", - t7."L_COMMITDATE" AS "l_commitdate", - t7."L_RECEIPTDATE" AS "l_receiptdate", - t7."L_SHIPINSTRUCT" AS "l_shipinstruct", - t7."L_SHIPMODE" AS "l_shipmode", - t7."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t7 -), t5 AS ( +SELECT + "t12"."s_name" AS "s_name", + "t12"."s_address" AS "s_address" +FROM ( SELECT - t4."s_suppkey" AS "s_suppkey", - t4."s_name" AS "s_name", - t4."s_address" AS "s_address", - t4."s_nationkey" AS "s_nationkey", - t4."s_phone" AS "s_phone", - t4."s_acctbal" AS "s_acctbal", - t4."s_comment" AS "s_comment", - t3."n_nationkey" AS "n_nationkey", - t3."n_name" AS "n_name", - t3."n_regionkey" AS "n_regionkey", - t3."n_comment" AS "n_comment" - FROM t4 - JOIN t3 - ON t4."s_nationkey" = t3."n_nationkey" - WHERE - t3."n_name" = 'CANADA' - AND t4."s_suppkey" IN ( - SELECT - t7."ps_suppkey" - FROM ( + "t5"."s_suppkey" AS "s_suppkey", + "t5"."s_name" AS "s_name", + "t5"."s_address" AS "s_address", + "t5"."s_nationkey" AS "s_nationkey", + "t5"."s_phone" AS "s_phone", + "t5"."s_acctbal" AS "s_acctbal", + "t5"."s_comment" AS "s_comment", + "t7"."n_nationkey" AS "n_nationkey", + "t7"."n_name" AS "n_name", + "t7"."n_regionkey" AS "n_regionkey", + "t7"."n_comment" AS "n_comment" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t5" + INNER JOIN ( + SELECT + "t2"."N_NATIONKEY" AS "n_nationkey", + "t2"."N_NAME" AS "n_name", + "t2"."N_REGIONKEY" AS "n_regionkey", + "t2"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t2" + ) AS "t7" + ON "t5"."s_nationkey" = "t7"."n_nationkey" +) AS "t12" +WHERE + "t12"."n_name" = 'CANADA' + AND "t12"."s_suppkey" IN (( + SELECT + "t1"."PS_SUPPKEY" AS "ps_suppkey" + FROM "PARTSUPP" AS "t1" + WHERE + "t1"."PS_PARTKEY" IN (( SELECT - t1."ps_partkey" AS "ps_partkey", - t1."ps_suppkey" AS "ps_suppkey", - t1."ps_availqty" AS "ps_availqty", - t1."ps_supplycost" AS "ps_supplycost", - t1."ps_comment" AS "ps_comment" - FROM t1 + "t3"."P_PARTKEY" AS "p_partkey" + FROM "PART" AS "t3" WHERE - t1."ps_partkey" IN ( - SELECT - t8."p_partkey" - FROM ( - SELECT - t2."p_partkey" AS "p_partkey", - t2."p_name" AS "p_name", - t2."p_mfgr" AS "p_mfgr", - t2."p_brand" AS "p_brand", - t2."p_type" AS "p_type", - t2."p_size" AS "p_size", - t2."p_container" AS "p_container", - t2."p_retailprice" AS "p_retailprice", - t2."p_comment" AS "p_comment" - FROM t2 - WHERE - t2."p_name" LIKE 'forest%' - ) AS t8 - ) - AND t1."ps_availqty" > ( + "t3"."P_NAME" LIKE 'forest%' + )) + AND "t1"."PS_AVAILQTY" > ( + ( + SELECT + SUM("t9"."l_quantity") AS "Sum(l_quantity)" + FROM ( SELECT - SUM(t0."l_quantity") AS "Sum(l_quantity)" - FROM t0 + "t4"."L_ORDERKEY" AS "l_orderkey", + "t4"."L_PARTKEY" AS "l_partkey", + "t4"."L_SUPPKEY" AS "l_suppkey", + "t4"."L_LINENUMBER" AS "l_linenumber", + "t4"."L_QUANTITY" AS "l_quantity", + "t4"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t4"."L_DISCOUNT" AS "l_discount", + "t4"."L_TAX" AS "l_tax", + "t4"."L_RETURNFLAG" AS "l_returnflag", + "t4"."L_LINESTATUS" AS "l_linestatus", + "t4"."L_SHIPDATE" AS "l_shipdate", + "t4"."L_COMMITDATE" AS "l_commitdate", + "t4"."L_RECEIPTDATE" AS "l_receiptdate", + "t4"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t4"."L_SHIPMODE" AS "l_shipmode", + "t4"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t4" WHERE - t0."l_partkey" = t1."ps_partkey" - AND t0."l_suppkey" = t1."ps_suppkey" - AND t0."l_shipdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND t0."l_shipdate" < DATE_FROM_PARTS(1995, 1, 1) - ) * 0.5 - ) AS t7 - ) -) -SELECT - t6."s_name", - t6."s_address" -FROM ( - SELECT - t5."s_name" AS "s_name", - t5."s_address" AS "s_address" - FROM t5 -) AS t6 + "t4"."L_PARTKEY" = "t1"."PS_PARTKEY" + AND "t4"."L_SUPPKEY" = "t1"."PS_SUPPKEY" + AND "t4"."L_SHIPDATE" >= DATEFROMPARTS(1994, 1, 1) + AND "t4"."L_SHIPDATE" < DATEFROMPARTS(1995, 1, 1) + ) AS "t9" + ) * 0.5 + ) + )) ORDER BY - t6."s_name" ASC \ No newline at end of file + "t12"."s_name" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql index 3963cacc039e..282d2c3c05e2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql @@ -1,56 +1,78 @@ -WITH t0 AS ( - SELECT - t3.l_orderkey AS l1_orderkey, - t4.o_orderstatus AS o_orderstatus, - t3.l_receiptdate AS l_receiptdate, - t3.l_commitdate AS l_commitdate, - t3.l_suppkey AS l1_suppkey, - t2.s_name AS s_name, - t5.n_name AS n_name - FROM main.supplier AS t2 - JOIN main.lineitem AS t3 - ON t2.s_suppkey = t3.l_suppkey - JOIN main.orders AS t4 - ON t4.o_orderkey = t3.l_orderkey - JOIN main.nation AS t5 - ON t2.s_nationkey = t5.n_nationkey -) SELECT - t1.s_name, - t1.numwait + t16.s_name AS s_name, + t16.numwait AS numwait FROM ( SELECT - t0.s_name AS s_name, + t15.s_name AS s_name, COUNT(*) AS numwait - FROM t0 - WHERE - t0.o_orderstatus = 'F' - AND t0.l_receiptdate > t0.l_commitdate - AND t0.n_name = 'SAUDI ARABIA' - AND ( - EXISTS( - SELECT - CAST(1 AS TINYINT) AS anon_1 - FROM main.lineitem AS t2 - WHERE - t2.l_orderkey = t0.l1_orderkey AND t2.l_suppkey <> t0.l1_suppkey + FROM ( + SELECT + t12.l1_orderkey AS l1_orderkey, + t12.o_orderstatus AS o_orderstatus, + t12.l_receiptdate AS l_receiptdate, + t12.l_commitdate AS l_commitdate, + t12.l1_suppkey AS l1_suppkey, + t12.s_name AS s_name, + t12.n_name AS n_name + FROM ( + SELECT + t4.l_orderkey AS l1_orderkey, + t7.o_orderstatus AS o_orderstatus, + t4.l_receiptdate AS l_receiptdate, + t4.l_commitdate AS l_commitdate, + t4.l_suppkey AS l1_suppkey, + t0.s_name AS s_name, + t8.n_name AS n_name + FROM supplier AS t0 + INNER JOIN lineitem AS t4 + ON t0.s_suppkey = t4.l_suppkey + INNER JOIN orders AS t7 + ON t7.o_orderkey = t4.l_orderkey + INNER JOIN nation AS t8 + ON t0.s_nationkey = t8.n_nationkey + ) AS t12 + WHERE + t12.o_orderstatus = 'F' + AND t12.l_receiptdate > t12.l_commitdate + AND t12.n_name = 'SAUDI ARABIA' + AND EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM lineitem AS t5 + WHERE + ( + t5.l_orderkey = t12.l1_orderkey + ) AND ( + t5.l_suppkey <> t12.l1_suppkey + ) + ) ) - ) - AND NOT ( - EXISTS( - SELECT - CAST(1 AS TINYINT) AS anon_2 - FROM main.lineitem AS t2 - WHERE - t2.l_orderkey = t0.l1_orderkey - AND t2.l_suppkey <> t0.l1_suppkey - AND t2.l_receiptdate > t2.l_commitdate + AND NOT ( + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM lineitem AS t6 + WHERE + ( + ( + t6.l_orderkey = t12.l1_orderkey + ) AND ( + t6.l_suppkey <> t12.l1_suppkey + ) + ) + AND ( + t6.l_receiptdate > t6.l_commitdate + ) + ) + ) ) - ) + ) AS t15 GROUP BY 1 -) AS t1 +) AS t16 ORDER BY - t1.numwait DESC, - t1.s_name ASC + t16.numwait DESC, + t16.s_name ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql new file mode 100644 index 000000000000..89f5d7d5071c --- /dev/null +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql @@ -0,0 +1,166 @@ +SELECT + "t20"."s_name" AS "s_name", + "t20"."numwait" AS "numwait" +FROM ( + SELECT + "t19"."s_name" AS "s_name", + COUNT(*) AS "numwait" + FROM ( + SELECT + "t16"."l1_orderkey" AS "l1_orderkey", + "t16"."o_orderstatus" AS "o_orderstatus", + "t16"."l_receiptdate" AS "l_receiptdate", + "t16"."l_commitdate" AS "l_commitdate", + "t16"."l1_suppkey" AS "l1_suppkey", + "t16"."s_name" AS "s_name", + "t16"."n_name" AS "n_name" + FROM ( + SELECT + "t8"."l_orderkey" AS "l1_orderkey", + "t11"."o_orderstatus" AS "o_orderstatus", + "t8"."l_receiptdate" AS "l_receiptdate", + "t8"."l_commitdate" AS "l_commitdate", + "t8"."l_suppkey" AS "l1_suppkey", + "t4"."s_name" AS "s_name", + "t12"."n_name" AS "n_name" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t8" + ON "t4"."s_suppkey" = "t8"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."O_ORDERKEY" AS "o_orderkey", + "t2"."O_CUSTKEY" AS "o_custkey", + "t2"."O_ORDERSTATUS" AS "o_orderstatus", + "t2"."O_TOTALPRICE" AS "o_totalprice", + "t2"."O_ORDERDATE" AS "o_orderdate", + "t2"."O_ORDERPRIORITY" AS "o_orderpriority", + "t2"."O_CLERK" AS "o_clerk", + "t2"."O_SHIPPRIORITY" AS "o_shippriority", + "t2"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t2" + ) AS "t11" + ON "t11"."o_orderkey" = "t8"."l_orderkey" + INNER JOIN ( + SELECT + "t3"."N_NATIONKEY" AS "n_nationkey", + "t3"."N_NAME" AS "n_name", + "t3"."N_REGIONKEY" AS "n_regionkey", + "t3"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t3" + ) AS "t12" + ON "t4"."s_nationkey" = "t12"."n_nationkey" + ) AS "t16" + WHERE + "t16"."o_orderstatus" = 'F' + AND "t16"."l_receiptdate" > "t16"."l_commitdate" + AND "t16"."n_name" = 'SAUDI ARABIA' + AND EXISTS( + ( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t9" + WHERE + ( + "t9"."l_orderkey" = "t16"."l1_orderkey" + ) + AND ( + "t9"."l_suppkey" <> "t16"."l1_suppkey" + ) + ) + ) + AND NOT ( + EXISTS( + ( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t10" + WHERE + ( + ( + "t10"."l_orderkey" = "t16"."l1_orderkey" + ) + AND ( + "t10"."l_suppkey" <> "t16"."l1_suppkey" + ) + ) + AND ( + "t10"."l_receiptdate" > "t10"."l_commitdate" + ) + ) + ) + ) + ) AS "t19" + GROUP BY + 1 +) AS "t20" +ORDER BY + "t20"."numwait" DESC NULLS LAST, + "t20"."s_name" ASC +LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql index 76fa737c1056..e8d7d1723ac7 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql @@ -1,62 +1,64 @@ -WITH t0 AS ( - SELECT - CASE - WHEN ( - CAST(0 AS TINYINT) + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t2.c_phone), CAST(2 AS TINYINT)) - END AS cntrycode, - t2.c_acctbal AS c_acctbal - FROM main.customer AS t2 - WHERE - CASE - WHEN ( - CAST(0 AS TINYINT) + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t2.c_phone), CAST(2 AS TINYINT)) - END IN ('13', '31', '23', '29', '30', '18', '17') - AND t2.c_acctbal > ( - SELECT - anon_1.avg_bal - FROM ( - SELECT - AVG(t2.c_acctbal) AS avg_bal - FROM main.customer AS t2 - WHERE - t2.c_acctbal > CAST(0.0 AS DOUBLE) - AND CASE - WHEN ( - CAST(0 AS TINYINT) + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t2.c_phone), CAST(2 AS TINYINT)) - END IN ('13', '31', '23', '29', '30', '18', '17') - ) AS anon_1 - ) - AND NOT ( - EXISTS( - SELECT - CAST(1 AS TINYINT) AS anon_2 - FROM main.orders AS t3 - WHERE - t3.o_custkey = t2.c_custkey - ) - ) -) SELECT - t1.cntrycode, - t1.numcust, - t1.totacctbal + t6.cntrycode AS cntrycode, + t6.numcust AS numcust, + t6.totacctbal AS totacctbal FROM ( SELECT - t0.cntrycode AS cntrycode, + t5.cntrycode AS cntrycode, COUNT(*) AS numcust, - SUM(t0.c_acctbal) AS totacctbal - FROM t0 + SUM(t5.c_acctbal) AS totacctbal + FROM ( + SELECT + CASE + WHEN CAST(0 AS TINYINT) >= 0 + THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + END AS cntrycode, + t0.c_acctbal AS c_acctbal + FROM customer AS t0 + WHERE + CASE + WHEN CAST(0 AS TINYINT) >= 0 + THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + END IN ('13', '31', '23', '29', '30', '18', '17') + AND t0.c_acctbal > ( + SELECT + AVG(t3.c_acctbal) AS "Mean(c_acctbal)" + FROM ( + SELECT + t0.c_custkey AS c_custkey, + t0.c_name AS c_name, + t0.c_address AS c_address, + t0.c_nationkey AS c_nationkey, + t0.c_phone AS c_phone, + t0.c_acctbal AS c_acctbal, + t0.c_mktsegment AS c_mktsegment, + t0.c_comment AS c_comment + FROM customer AS t0 + WHERE + t0.c_acctbal > CAST(0.0 AS DOUBLE) + AND CASE + WHEN CAST(0 AS TINYINT) >= 0 + THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + END IN ('13', '31', '23', '29', '30', '18', '17') + ) AS t3 + ) + AND NOT ( + EXISTS( + ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM orders AS t1 + WHERE + t1.o_custkey = t0.c_custkey + ) + ) + ) + ) AS t5 GROUP BY 1 -) AS t1 +) AS t6 ORDER BY - t1.cntrycode ASC \ No newline at end of file + t6.cntrycode ASC diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql new file mode 100644 index 000000000000..a8e109a09208 --- /dev/null +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql @@ -0,0 +1,52 @@ +SELECT + "t6"."cntrycode" AS "cntrycode", + "t6"."numcust" AS "numcust", + "t6"."totacctbal" AS "totacctbal" +FROM ( + SELECT + "t5"."cntrycode" AS "cntrycode", + COUNT(*) AS "numcust", + SUM("t5"."c_acctbal") AS "totacctbal" + FROM ( + SELECT + IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) AS "cntrycode", + "t0"."C_ACCTBAL" AS "c_acctbal" + FROM "CUSTOMER" AS "t0" + WHERE + IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + AND "t0"."C_ACCTBAL" > ( + SELECT + AVG("t3"."c_acctbal") AS "Mean(c_acctbal)" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + WHERE + "t0"."C_ACCTBAL" > 0.0 + AND IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + ) AS "t3" + ) + AND NOT ( + EXISTS( + ( + SELECT + 1 AS "1" + FROM "ORDERS" AS "t1" + WHERE + "t1"."O_CUSTKEY" = "t0"."C_CUSTKEY" + ) + ) + ) + ) AS "t5" + GROUP BY + 1 +) AS "t6" +ORDER BY + "t6"."cntrycode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/test_h01.py b/ibis/backends/tests/tpch/test_h01.py index 4f136266b00e..2fd02b86eb54 100644 --- a/ibis/backends/tests/tpch/test_h01.py +++ b/ibis/backends/tests/tpch/test_h01.py @@ -30,7 +30,7 @@ def test_tpc_h01(lineitem): avg_qty=t.l_quantity.mean(), avg_price=t.l_extendedprice.mean(), avg_disc=t.l_discount.mean(), - count_order=t.count(), + count_order=lambda t: t.count(), ) q = q.order_by(["l_returnflag", "l_linestatus"]) return q diff --git a/ibis/backends/tests/tpch/test_h04.py b/ibis/backends/tests/tpch/test_h04.py index bf7e40adec6d..536eaa2a1913 100644 --- a/ibis/backends/tests/tpch/test_h04.py +++ b/ibis/backends/tests/tpch/test_h04.py @@ -20,6 +20,6 @@ def test_tpc_h04(orders, lineitem): ] ) q = q.group_by([orders.o_orderpriority]) - q = q.aggregate(order_count=orders.count()) + q = q.aggregate(order_count=lambda t: t.count()) q = q.order_by([orders.o_orderpriority]) return q diff --git a/ibis/backends/tests/tpch/test_h08.py b/ibis/backends/tests/tpch/test_h08.py index 18bcf10168d2..971a83c4c352 100644 --- a/ibis/backends/tests/tpch/test_h08.py +++ b/ibis/backends/tests/tpch/test_h08.py @@ -8,13 +8,7 @@ @tpch_test -@pytest.mark.notimpl( - ["snowflake"], - raises=AssertionError, - reason="ibis doesn't preserve decimal types in aggregations", -) @pytest.mark.xfail_version( - duckdb=["sqlalchemy>=2"], trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", ) diff --git a/ibis/backends/tests/tpch/test_h11.py b/ibis/backends/tests/tpch/test_h11.py index 75439d06c8ce..e13ba99179f9 100644 --- a/ibis/backends/tests/tpch/test_h11.py +++ b/ibis/backends/tests/tpch/test_h11.py @@ -1,18 +1,11 @@ from __future__ import annotations -import pytest - import ibis from .conftest import tpch_test @tpch_test -@pytest.mark.broken( - ["snowflake"], - reason="ibis generates incorrect code for the right-hand-side of the exists statement", - raises=AssertionError, -) def test_tpc_h11(partsupp, supplier, nation): NATION = "GERMANY" FRACTION = 0.0001 diff --git a/ibis/backends/tests/tpch/test_h14.py b/ibis/backends/tests/tpch/test_h14.py index bf78c57481f6..f72bbcaf6c2b 100644 --- a/ibis/backends/tests/tpch/test_h14.py +++ b/ibis/backends/tests/tpch/test_h14.py @@ -8,13 +8,7 @@ @tpch_test -@pytest.mark.notimpl( - ["snowflake"], - raises=AssertionError, - reason="ibis doesn't preserve decimal types in aggregations", -) @pytest.mark.xfail_version( - duckdb=["sqlalchemy>=2"], trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", ) diff --git a/ibis/backends/tests/tpch/test_h17.py b/ibis/backends/tests/tpch/test_h17.py index 1eed92064476..0d112d048c91 100644 --- a/ibis/backends/tests/tpch/test_h17.py +++ b/ibis/backends/tests/tpch/test_h17.py @@ -6,13 +6,7 @@ @tpch_test -@pytest.mark.notimpl( - ["snowflake"], - raises=AssertionError, - reason="ibis doesn't preserve decimal types in aggregations", -) @pytest.mark.xfail_version( - duckdb=["sqlalchemy>=2"], trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", ) diff --git a/ibis/backends/tests/tpch/test_h21.py b/ibis/backends/tests/tpch/test_h21.py index f8aea4314c81..487b574bb615 100644 --- a/ibis/backends/tests/tpch/test_h21.py +++ b/ibis/backends/tests/tpch/test_h21.py @@ -1,19 +1,11 @@ from __future__ import annotations -import pytest -import sqlalchemy as sa - import ibis from .conftest import tpch_test @tpch_test -@pytest.mark.broken( - ["snowflake"], - reason="ibis generates overlapping aliases", - raises=sa.exc.CompileError, -) def test_tpc_h21(supplier, lineitem, orders, nation): """Suppliers Who Kept Orders Waiting Query (Q21) diff --git a/ibis/backends/tests/tpch/test_h22.py b/ibis/backends/tests/tpch/test_h22.py index f18da1d2930b..d505436c4927 100644 --- a/ibis/backends/tests/tpch/test_h22.py +++ b/ibis/backends/tests/tpch/test_h22.py @@ -1,16 +1,9 @@ from __future__ import annotations -import pytest - from .conftest import tpch_test @tpch_test -@pytest.mark.broken( - ["snowflake"], - reason="ibis generates incorrect code for the right-hand-side of the exists statement", - raises=AssertionError, -) def test_tpc_h22(customer, orders): """Global Sales Opportunity Query (Q22) From da7745e4789154c9c80d541eb4cf6774f3de0906 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 22 Dec 2023 12:38:39 -0500 Subject: [PATCH 018/161] refactor(duckdb/clickhouse): implement sqlglot backends and re-enable ci --- .github/renovate.json | 2 +- .../workflows/ibis-backends-skip-helper.yml | 4 +- .github/workflows/ibis-backends.yml | 1193 +++++++++-------- .github/workflows/ibis-main.yml | 7 +- ibis/backends/base/__init__.py | 5 +- ibis/backends/base/df/timecontext.py | 20 +- ibis/backends/base/sql/alchemy/registry.py | 2 +- .../base/sql/compiler/query_builder.py | 10 - .../base/sql/compiler/select_builder.py | 29 +- ibis/backends/base/sql/registry/main.py | 2 +- ibis/backends/base/sqlglot/__init__.py | 21 +- ibis/backends/base/sqlglot/compiler.py | 165 +-- ibis/backends/base/sqlglot/datatypes.py | 28 + ibis/backends/base/sqlglot/rewrites.py | 15 +- ibis/backends/clickhouse/__init__.py | 47 +- ibis/backends/clickhouse/compiler.py | 99 +- .../test_cast_double_col/float/out.sql | 2 +- .../test_noop_cast/bigint_col/out.sql | 2 +- .../test_noop_cast/bool_col/out.sql | 2 +- .../test_noop_cast/date_string_col/out.sql | 2 +- .../test_noop_cast/double_col/out.sql | 2 +- .../test_noop_cast/float_col/out.sql | 2 +- .../test_functions/test_noop_cast/id/out.sql | 2 +- .../test_noop_cast/int_col/out.sql | 2 +- .../test_noop_cast/month/out.sql | 2 +- .../test_noop_cast/smallint_col/out.sql | 2 +- .../test_noop_cast/string_col/out.sql | 2 +- .../test_noop_cast/timestamp_col/out.sql | 2 +- .../test_noop_cast/tinyint_col/out.sql | 2 +- .../test_noop_cast/year/out.sql | 2 +- .../test_array_join_in_subquery/out.sql | 8 +- .../out.sql | 2 +- .../test_select/test_complex_join/out.sql | 25 +- .../test_select/test_count_name/out.sql | 2 +- .../test_isin_notin_in_select/out1.sql | 26 +- .../test_isin_notin_in_select/out2.sql | 26 +- .../test_join_self_reference/out.sql | 32 +- .../test_named_from_filter_groupby/out1.sql | 6 +- .../test_named_from_filter_groupby/out2.sql | 6 +- .../test_self_reference_simple/out.sql | 2 +- .../playerID-awardID-any_inner_join/out.sql | 50 +- .../playerID-awardID-any_left_join/out.sql | 50 +- .../playerID-awardID-inner_join/out.sql | 50 +- .../playerID-awardID-left_join/out.sql | 50 +- .../playerID-playerID-any_inner_join/out.sql | 50 +- .../playerID-playerID-any_left_join/out.sql | 50 +- .../playerID-playerID-inner_join/out.sql | 50 +- .../playerID-playerID-left_join/out.sql | 50 +- .../test_simple_scalar_aggregates/out.sql | 26 +- .../test_table_column_unbox/out.sql | 30 +- .../test_where_simple_comparisons/out.sql | 26 +- .../test_where_with_between/out.sql | 26 +- .../test_where_with_timestamp/out.sql | 2 +- ibis/backends/conftest.py | 1 - ibis/backends/duckdb/__init__.py | 52 +- ibis/backends/duckdb/compiler.py | 13 +- ibis/backends/duckdb/registry.py | 605 --------- ibis/backends/duckdb/tests/conftest.py | 27 +- .../test_geospatial_dwithin/out.sql | 4 +- .../as_text/out.sql | 2 +- .../n_points/out.sql | 2 +- ibis/backends/duckdb/tests/test_client.py | 1 + ibis/backends/duckdb/tests/test_datatypes.py | 1 + ibis/backends/duckdb/tests/test_geospatial.py | 33 +- ibis/backends/duckdb/tests/test_register.py | 5 +- ibis/backends/tests/errors.py | 7 +- .../test_default_limit/clickhouse/out.sql | 5 + .../test_default_limit/duckdb/out.sql | 5 + .../clickhouse/out.sql | 5 + .../test_disable_query_limit/duckdb/out.sql | 5 + .../clickhouse/out.sql | 3 + .../duckdb/out.sql | 3 + .../test_respect_set_limit/clickhouse/out.sql | 10 + .../test_respect_set_limit/duckdb/out.sql | 10 + .../test_sql/test_isin_bug/clickhouse/out.sql | 6 +- .../test_sql/test_isin_bug/duckdb/out.sql | 6 +- .../test_union_aliasing/clickhouse/out.sql | 56 +- .../test_union_aliasing/duckdb/out.sql | 56 +- ibis/backends/tests/sql/conftest.py | 19 +- .../test_agg_and_non_agg_filter/out.sql | 28 +- .../test_compiler/test_agg_filter/out.sql | 29 +- .../test_agg_filter_with_alias/out.sql | 29 +- .../test_column_distinct/out.sql | 2 +- .../test_column_expr_default_name/out.sql | 4 +- .../test_column_expr_retains_name/out.sql | 4 +- .../test_compiler/test_count_distinct/out.sql | 20 +- .../test_difference_project_column/out.sql | 36 +- .../test_having_from_filter/decompiled.py | 2 +- .../test_having_from_filter/out.sql | 39 +- .../test_compiler/test_having_size/out.sql | 28 +- .../test_intersect_project_column/out.sql | 36 +- .../test_multiple_count_distinct/out.sql | 2 +- .../test_pushdown_with_or/out.sql | 9 +- .../test_simple_agg_filter/out.sql | 26 +- .../decompiled.py | 6 +- .../test_subquery_where_location/out.sql | 28 +- .../test_table_difference/out.sql | 27 +- .../test_table_distinct/decompiled.py | 2 +- .../test_compiler/test_table_distinct/out.sql | 4 +- .../test_table_drop_with_filter/decompiled.py | 7 +- .../test_table_drop_with_filter/out.sql | 32 +- .../test_table_intersect/out.sql | 27 +- .../test_compiler/test_union/out.sql | 27 +- .../test_compiler/test_union_order_by/out.sql | 27 +- .../test_union_project_column/out.sql | 36 +- .../test_aggregate_count_joined/decompiled.py | 16 +- .../test_aggregate_count_joined/out.sql | 14 +- .../test_aggregate_having/explicit.sql | 9 +- .../test_aggregate_having/inline.sql | 28 +- .../out.sql | 22 +- .../agg_filtered.sql | 46 +- .../agg_filtered2.sql | 50 +- .../filtered.sql | 38 +- .../proj.sql | 36 +- .../test_anti_join/decompiled.py | 4 +- .../test_select_sql/test_anti_join/out.sql | 14 +- .../test_select_sql/test_bool_bool/out.sql | 10 +- .../test_bug_duplicated_where/out.sql | 36 +- .../test_bug_project_multiple_times/out.sql | 76 +- .../test_case_in_projection/out.sql | 22 +- .../result.sql | 10 +- .../test_complex_union/result.sql | 21 +- .../out.sql | 12 +- .../test_endswith/decompiled.py | 12 +- .../test_select_sql/test_endswith/out.sql | 4 +- .../test_exists_subquery/out.sql | 13 + .../test_filter_inside_exists/out.sql | 25 +- .../test_filter_predicates/out.sql | 4 +- .../result.sql | 34 +- .../expr3.sql | 32 +- .../expr4.sql | 38 +- .../test_fuse_projections/decompiled.py | 9 +- .../test_fuse_projections/project.sql | 6 +- .../test_fuse_projections/project_filter.sql | 22 +- .../result.sql | 18 +- .../result.sql | 12 +- .../test_join_between_joins/decompiled.py | 27 +- .../test_join_between_joins/out.sql | 43 +- .../out.sql | 48 +- .../test_join_just_materialized/decompiled.py | 26 +- .../test_join_just_materialized/out.sql | 40 +- .../test_join_projection_subquery_bug/out.sql | 38 +- .../result.sql | 36 +- .../test_join_with_limited_table/out.sql | 14 +- .../test_limit_cte_extract/out.sql | 33 +- .../test_limit_with_self_join/decompiled.py | 54 +- .../test_limit_with_self_join/out.sql | 35 +- .../test_loj_subquery_filter_handling/out.sql | 28 +- .../test_multiple_joins/decompiled.py | 6 +- .../test_multiple_joins/out.sql | 22 +- .../test_multiple_limits/decompiled.py | 40 +- .../test_multiple_limits/out.sql | 2 +- .../out.sql | 5 +- .../test_projection_filter_fuse/out.sql | 13 +- .../out.sql | 15 +- .../agg_explicit_column/out.sql | 2 +- .../agg_string_columns/out.sql | 4 +- .../decompiled.py | 6 +- .../aggregate_table_count_metric/out.sql | 4 +- .../test_select_sql/filter_then_limit/out.sql | 9 +- .../limit_simple/decompiled.py | 6 +- .../limit_then_filter/decompiled.py | 5 +- .../test_select_sql/limit_then_filter/out.sql | 9 +- .../limit_with_offset/decompiled.py | 6 +- .../self_reference_simple/decompiled.py | 2 +- .../test_select_sql/single_column/out.sql | 5 +- .../out.sql | 44 +- .../test_semi_join/decompiled.py | 4 +- .../test_select_sql/test_semi_join/out.sql | 14 +- .../test_simple_joins/decompiled.py | 2 +- .../test_simple_joins/inner.sql | 14 +- .../test_simple_joins/inner_two_preds.sql | 14 +- .../test_simple_joins/left.sql | 14 +- .../test_simple_joins/outer.sql | 14 +- .../result1.sql | 16 +- .../result2.sql | 16 +- .../test_startswith/decompiled.py | 12 +- .../test_select_sql/test_startswith/out.sql | 4 +- .../out.sql | 118 +- .../expr.sql | 17 +- .../expr2.sql | 32 +- .../test_subquery_in_union/decompiled.py | 12 +- .../test_subquery_in_union/out.sql | 100 +- .../test_subquery_used_for_self_join/out.sql | 40 +- .../test_topk_analysis_bug/out.sql | 33 +- .../test_topk_operation/e1.sql | 25 +- .../test_topk_operation/e2.sql | 25 +- .../test_topk_predicate_pushdown_bug/out.sql | 90 +- .../test_topk_to_aggregate/out.sql | 5 +- .../test_tpch_self_join_failure/out.sql | 74 +- .../test_where_analyze_scalar_op/out.sql | 27 +- .../decompiled.py | 4 +- .../test_where_no_pushdown_possible/out.sql | 28 +- .../test_where_with_between/out.sql | 17 +- .../test_where_with_join/decompiled.py | 4 +- .../test_where_with_join/out.sql | 33 +- .../test_aggregate/having_count/out.sql | 28 +- .../test_aggregate/having_sum/out.sql | 9 +- .../test_sql/test_aggregate/single/out.sql | 2 +- .../test_sql/test_aggregate/two/out.sql | 4 +- .../snapshots/test_sql/test_between/out.sql | 4 +- .../test_boolean_conjunction/and/out.sql | 12 +- .../test_boolean_conjunction/or/out.sql | 12 +- .../snapshots/test_sql/test_coalesce/out.sql | 12 +- .../test_sql/test_comparisons/eq/out.sql | 4 +- .../test_sql/test_comparisons/ge/out.sql | 4 +- .../test_sql/test_comparisons/gt/out.sql | 4 +- .../test_sql/test_comparisons/le/out.sql | 4 +- .../test_sql/test_comparisons/lt/out.sql | 4 +- .../test_sql/test_comparisons/ne/out.sql | 4 +- .../out.sql | 14 +- .../test_distinct/count_distinct/out.sql | 4 +- .../group_by_count_distinct/out.sql | 2 +- .../test_distinct/projection_distinct/out.sql | 4 +- .../single_column_projection_distinct/out.sql | 2 +- .../sql/snapshots/test_sql/test_exists/e1.sql | 22 +- .../sql/snapshots/test_sql/test_exists/e2.sql | 30 +- .../out.sql | 9 +- .../snapshots/test_sql/test_gh_1045/out.sql | 37 + .../test_isnull_notnull/isnull/out.sql | 4 +- .../test_isnull_notnull/notnull/out.sql | 4 +- .../test_join_just_materialized/out.sql | 40 +- .../test_sql/test_joins/inner/out.sql | 11 + .../test_sql/test_joins/inner_select/out.sql | 8 + .../test_sql/test_joins/left/out.sql | 11 + .../test_sql/test_joins/left_select/out.sql | 8 + .../test_sql/test_joins/outer/out.sql | 11 + .../test_sql/test_joins/outer_select/out.sql | 8 + .../test_sql/test_limit_filter/out.sql | 9 +- .../test_sql/test_limit_subquery/out.sql | 9 +- .../decompiled.py | 4 +- .../test_lower_projection_sort_key/out.sql | 38 +- .../test_sql/test_multi_join/out.sql | 34 +- .../out.sql | 14 +- .../snapshots/test_sql/test_negate/out.sql | 8 +- .../test_sql/test_no_cart_join/out.sql | 37 +- .../test_sql/test_no_cartesian_join/out.sql | 92 +- .../test_sql/test_no_cross_join/out.sql | 30 +- .../test_sql/test_not_exists/out.sql | 22 +- .../test_sql/test_order_by/column/out.sql | 5 +- .../test_sql/test_order_by/random/out.sql | 5 +- .../test_sql/test_order_by_expr/out.sql | 17 +- .../test_sql/test_searched_case/out.sql | 16 +- .../anti.sql | 32 +- .../semi.sql | 32 +- .../test_sql/test_self_reference_join/out.sql | 14 +- .../test_sql/test_simple_case/out.sql | 4 +- .../out.sql | 5 +- .../test_sql/test_subquery_aliased/out.sql | 14 +- .../test_where_correlated_subquery/out.sql | 30 +- .../out.sql | 48 +- .../test_where_simple_comparisons/out.sql | 13 +- .../test_where_uncorrelated_subquery/out.sql | 11 +- ibis/backends/tests/sql/test_compiler.py | 3 +- ibis/backends/tests/sql/test_select_sql.py | 41 +- ibis/backends/tests/sql/test_sql.py | 12 +- ibis/backends/tests/test_aggregation.py | 38 +- ibis/backends/tests/test_api.py | 2 +- ibis/backends/tests/test_array.py | 5 +- ibis/backends/tests/test_benchmarks.py | 3 - ibis/backends/tests/test_client.py | 6 +- ibis/backends/tests/test_export.py | 22 +- ibis/backends/tests/test_generic.py | 44 +- ibis/backends/tests/test_interactive.py | 97 +- ibis/backends/tests/test_join.py | 2 +- ibis/backends/tests/test_numeric.py | 75 +- ibis/backends/tests/test_param.py | 6 +- ibis/backends/tests/test_sql.py | 18 +- ibis/backends/tests/test_string.py | 16 +- ibis/backends/tests/test_temporal.py | 58 +- ibis/backends/tests/test_window.py | 41 +- .../test_h01/test_tpc_h01/duckdb/h01.sql | 56 +- .../test_h02/test_tpc_h02/duckdb/h02.sql | 206 +-- .../test_h03/test_tpc_h03/duckdb/h03.sql | 176 +-- .../test_h04/test_tpc_h04/duckdb/h04.sql | 44 +- .../test_h05/test_tpc_h05/duckdb/h05.sql | 234 ++-- .../test_h06/test_tpc_h06/duckdb/h06.sql | 34 +- .../test_h07/test_tpc_h07/duckdb/h07.sql | 106 +- .../test_h08/test_tpc_h08/duckdb/h08.sql | 78 +- .../test_h09/test_tpc_h09/duckdb/h09.sql | 66 +- .../test_h10/test_tpc_h10/duckdb/h10.sql | 210 +-- .../test_h11/test_tpc_h11/duckdb/h11.sql | 172 +-- .../test_h12/test_tpc_h12/duckdb/h12.sql | 136 +- .../test_h13/test_tpc_h13/duckdb/h13.sql | 62 +- .../test_h14/test_tpc_h14/duckdb/h14.sql | 122 +- .../test_h15/test_tpc_h15/duckdb/h15.sql | 148 +- .../test_h16/test_tpc_h16/duckdb/h16.sql | 104 +- .../test_h17/test_tpc_h17/duckdb/h17.sql | 156 +-- .../test_h18/test_tpc_h18/duckdb/h18.sql | 188 +-- .../test_h19/test_tpc_h19/duckdb/h19.sql | 156 +-- .../test_h20/test_tpc_h20/duckdb/h20.sql | 86 +- .../test_h21/test_tpc_h21/duckdb/h21.sql | 108 +- .../test_h22/test_tpc_h22/duckdb/h22.sql | 40 +- ibis/backends/tests/tpch/test_h08.py | 5 + ibis/backends/tests/tpch/test_h11.py | 7 + ibis/backends/tests/tpch/test_h14.py | 5 + ibis/backends/tests/tpch/test_h17.py | 5 + ibis/backends/tests/tpch/test_h21.py | 8 + ibis/backends/tests/tpch/test_h22.py | 7 + ibis/examples/tests/test_examples.py | 25 - ibis/expr/decompile.py | 24 +- ibis/expr/operations/relations.py | 26 +- ibis/expr/rewrites.py | 121 ++ .../test_format_dummy_table/repr.txt | 2 +- .../decompiled.py | 17 +- .../decompiled.py | 11 +- .../inner/decompiled.py | 11 +- .../left/decompiled.py | 11 +- .../right/decompiled.py | 11 +- .../decompiled.py | 11 +- .../decompiled.py | 17 +- .../decompiled.py | 11 +- ibis/expr/tests/test_format.py | 6 +- ibis/expr/types/joins.py | 5 + ibis/formats/pandas.py | 26 +- ibis/tests/expr/mocks.py | 5 + ibis/tests/util.py | 4 +- pyproject.toml | 9 +- 318 files changed, 5214 insertions(+), 5141 deletions(-) delete mode 100644 ibis/backends/duckdb/registry.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql create mode 100644 ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql diff --git a/.github/renovate.json b/.github/renovate.json index 392a0a4558ae..8f6329da3b2a 100644 --- a/.github/renovate.json +++ b/.github/renovate.json @@ -46,7 +46,7 @@ "addLabels": ["bigquery"] }, { - "matchPackagePatterns": ["duckdb", "duckdb-engine"], + "matchPackagePatterns": ["duckdb"], "addLabels": ["duckdb"] }, { diff --git a/.github/workflows/ibis-backends-skip-helper.yml b/.github/workflows/ibis-backends-skip-helper.yml index d889b16d8d45..009070c65c5a 100644 --- a/.github/workflows/ibis-backends-skip-helper.yml +++ b/.github/workflows/ibis-backends-skip-helper.yml @@ -13,7 +13,7 @@ on: branches: - main - "*.x.x" - - "!the-epic-split" + - the-epic-split pull_request: paths: - "docs/**" @@ -24,7 +24,7 @@ on: branches: - main - "*.x.x" - - "!the-epic-split" + - the-epic-split merge_group: jobs: test_backends: diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index e42589ab004c..e8cd07907ee8 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -12,7 +12,7 @@ on: branches: - main - "*.x.x" - - "!the-epic-split" + - the-epic-split pull_request: # Skip the backend suite if all changes are docs paths-ignore: @@ -24,7 +24,7 @@ on: branches: - main - "*.x.x" - - "!the-epic-split" + - the-epic-split merge_group: permissions: @@ -56,10 +56,6 @@ jobs: - "3.9" - "3.11" backend: - - name: dask - title: Dask - extras: - - dask - name: duckdb title: DuckDB extras: @@ -69,33 +65,7 @@ jobs: - examples additional_deps: - torch - - name: pandas - title: Pandas - extras: - - pandas - - name: sqlite - title: SQLite - extras: - - sqlite - - name: datafusion - title: Datafusion - extras: - - datafusion - - name: polars - title: Polars - extras: - - polars - - deltalake - - examples - - name: mysql - title: MySQL - services: - - mysql - extras: - - mysql - - geospatial - sys-deps: - - libgeos-dev + - decompiler - name: clickhouse title: ClickHouse services: @@ -103,100 +73,131 @@ jobs: extras: - clickhouse - examples - - name: postgres - title: PostgreSQL - extras: - - postgres - - geospatial - services: - - postgres - sys-deps: - - libgeos-dev - - name: postgres - title: PostgreSQL + Torch - extras: - - postgres - - geospatial - additional_deps: - - torch - services: - - postgres - sys-deps: - - libgeos-dev - - name: risingwave - title: Risingwave - services: - - risingwave - extras: - - risingwave - - name: impala - title: Impala - extras: - - impala - services: - - impala - - kudu - sys-deps: - - cmake - - ninja-build - - name: mssql - title: MS SQL Server - extras: - - mssql - services: - - mssql - sys-deps: - - freetds-dev - - unixodbc-dev - - tdsodbc - - name: trino - title: Trino - extras: - - trino - services: - - trino - - name: druid - title: Druid - extras: - - druid - services: - - druid - - name: oracle - title: Oracle - serial: true - extras: - - oracle - services: - - oracle - - name: exasol - title: Exasol - serial: true - extras: - - exasol - services: - - exasol - - name: flink - title: Flink - serial: true - extras: - - flink - additional_deps: - - apache-flink - - pytest-split - services: - - flink + # - name: dask + # title: Dask + # extras: + # - dask + # - name: pandas + # title: Pandas + # extras: + # - pandas + # - name: sqlite + # title: SQLite + # extras: + # - sqlite + # - name: datafusion + # title: Datafusion + # extras: + # - datafusion + # - name: polars + # title: Polars + # extras: + # - polars + # - deltalake + # - name: mysql + # title: MySQL + # services: + # - mysql + # extras: + # - mysql + # - geospatial + # sys-deps: + # - libgeos-dev + # - name: postgres + # title: PostgreSQL + # extras: + # - postgres + # - geospatial + # services: + # - postgres + # sys-deps: + # - libgeos-dev + # - name: postgres + # title: PostgreSQL + Torch + # extras: + # - postgres + # - geospatial + # additional_deps: + # - torch + # services: + # - postgres + # sys-deps: + # - libgeos-dev + # - name: impala + # title: Impala + # extras: + # - impala + # services: + # - impala + # - kudu + # sys-deps: + # - cmake + # - ninja-build + # - name: mssql + # title: MS SQL Server + # extras: + # - mssql + # services: + # - mssql + # sys-deps: + # - freetds-dev + # - unixodbc-dev + # - tdsodbc + # - name: trino + # title: Trino + # extras: + # - trino + # - postgres + # services: + # - trino + # - name: druid + # title: Druid + # extras: + # - druid + # services: + # - druid + # - name: oracle + # title: Oracle + # serial: true + # extras: + # - oracle + # services: + # - oracle + # - name: exasol + # title: Exasol + # serial: true + # extras: + # - exasol + # services: + # - exasol + # - name: flink + # title: Flink + # serial: true + # extras: + # - flink + # additional_deps: + # - apache-flink + # - pytest-split + # services: + # - flink + # - name: risingwave + # title: Risingwave + # services: + # - risingwave + # extras: + # - risingwave exclude: - - os: windows-latest - backend: - name: mysql - title: MySQL - extras: - - mysql - - geospatial - services: - - mysql - sys-deps: - - libgeos-dev + # - os: windows-latest + # backend: + # name: mysql + # title: MySQL + # extras: + # - mysql + # - geospatial + # services: + # - mysql + # sys-deps: + # - libgeos-dev - os: windows-latest backend: name: clickhouse @@ -206,114 +207,115 @@ jobs: - examples services: - clickhouse - - os: windows-latest - backend: - name: postgres - title: PostgreSQL - extras: - - postgres - - geospatial - services: - - postgres - sys-deps: - - libgeos-dev - - os: windows-latest - backend: - name: risingwave - title: Risingwave - services: - - risingwave - extras: - - risingwave - - os: windows-latest - backend: - name: postgres - title: PostgreSQL + Torch - extras: - - postgres - - geospatial - additional_deps: - - torch - services: - - postgres - sys-deps: - - libgeos-dev - - os: windows-latest - backend: - name: impala - title: Impala - extras: - - impala - services: - - impala - - kudu - sys-deps: - - cmake - - ninja-build - - os: windows-latest - backend: - name: mssql - title: MS SQL Server - extras: - - mssql - services: - - mssql - sys-deps: - - freetds-dev - - unixodbc-dev - - tdsodbc - - os: windows-latest - backend: - name: trino - title: Trino - services: - - trino - extras: - - trino - - os: windows-latest - backend: - name: druid - title: Druid - extras: - - druid - services: - - druid - - os: windows-latest - backend: - name: oracle - title: Oracle - serial: true - extras: - - oracle - services: - - oracle - - os: windows-latest - backend: - name: flink - title: Flink - serial: true - extras: - - flink - services: - - flink - - python-version: "3.11" - backend: - name: flink - title: Flink - serial: true - extras: - - flink - services: - - flink - - os: windows-latest - backend: - name: exasol - title: Exasol - serial: true - extras: - - exasol - services: - - exasol + # - os: windows-latest + # backend: + # name: postgres + # title: PostgreSQL + # extras: + # - postgres + # - geospatial + # services: + # - postgres + # sys-deps: + # - libgeos-dev + # - os: windows-latest + # backend: + # name: postgres + # title: PostgreSQL + Torch + # extras: + # - postgres + # - geospatial + # additional_deps: + # - torch + # services: + # - postgres + # sys-deps: + # - libgeos-dev + # - os: windows-latest + # backend: + # name: impala + # title: Impala + # extras: + # - impala + # services: + # - impala + # - kudu + # sys-deps: + # - cmake + # - ninja-build + # - os: windows-latest + # backend: + # name: mssql + # title: MS SQL Server + # extras: + # - mssql + # services: + # - mssql + # sys-deps: + # - freetds-dev + # - unixodbc-dev + # - tdsodbc + # - os: windows-latest + # backend: + # name: trino + # title: Trino + # services: + # - trino + # extras: + # - trino + # - postgres + # - os: windows-latest + # backend: + # name: druid + # title: Druid + # extras: + # - druid + # services: + # - druid + # - os: windows-latest + # backend: + # name: oracle + # title: Oracle + # serial: true + # extras: + # - oracle + # services: + # - oracle + # - os: windows-latest + # backend: + # name: flink + # title: Flink + # serial: true + # extras: + # - flink + # services: + # - flink + # - python-version: "3.11" + # backend: + # name: flink + # title: Flink + # serial: true + # extras: + # - flink + # services: + # - flink + # - os: windows-latest + # backend: + # name: exasol + # title: Exasol + # serial: true + # extras: + # - exasol + # services: + # - exasol + # - os: windows-latest + # backend: + # name: risingwave + # title: Risingwave + # services: + # - risingwave + # extras: + # - risingwave steps: - name: update and install system dependencies if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null @@ -366,7 +368,7 @@ jobs: run: poetry run pip list - name: "run parallel tests: ${{ matrix.backend.name }}" - if: ${{ !matrix.backend.serial }} + if: true # ${{ !matrix.backend.serial }} run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup env: IBIS_TEST_IMPALA_HOST: localhost @@ -377,148 +379,21 @@ jobs: # executes before common tests, they will fail with: # org.apache.flink.table.api.ValidationException: Table `default_catalog`.`default_database`.`functional_alltypes` was not found. # Therefore, we run backend-specific tests second to avoid this. - - name: "run serial tests: ${{ matrix.backend.name }}" - if: matrix.backend.serial && matrix.backend.name == 'flink' - run: | - just ci-check -m ${{ matrix.backend.name }} ibis/backends/tests - just ci-check -m ${{ matrix.backend.name }} ibis/backends/flink/tests - env: - IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} - FLINK_REMOTE_CLUSTER_ADDR: localhost - FLINK_REMOTE_CLUSTER_PORT: "8081" - - - name: "run serial tests: ${{ matrix.backend.name }}" - if: matrix.backend.serial && matrix.backend.name != 'flink' - run: just ci-check -m ${{ matrix.backend.name }} - env: - IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} - - - name: check that no untracked files were produced - shell: bash - run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - - - name: upload code coverage - if: success() - uses: codecov/codecov-action@v4 - with: - flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - - - name: Show docker compose logs on fail - if: matrix.backend.services != null && failure() - run: docker compose logs - - test_backends_min_version: - name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: - - ubuntu-latest - - windows-latest - python-version: - - "3.9" - - "3.11" - backend: - - name: dask - title: Dask - deps: - - "dask[array,dataframe]@2022.9.1" - - "pandas@1.5.3" - extras: - - dask - - name: postgres - title: PostgreSQL - deps: - - "psycopg2@2.8.4" - - "GeoAlchemy2@0.6.3" - - "geopandas@0.6" - - "Shapely@2" - services: - - postgres - extras: - - postgres - - geospatial - exclude: - - os: windows-latest - backend: - name: postgres - title: PostgreSQL - deps: - - "psycopg2@2.8.4" - - "GeoAlchemy2@0.6.3" - - "geopandas@0.6" - - "Shapely@2" - services: - - postgres - extras: - - postgres - - geospatial - - python-version: "3.11" - backend: - name: postgres - title: PostgreSQL - deps: - - "psycopg2@2.8.4" - - "GeoAlchemy2@0.6.3" - - "geopandas@0.6" - - "Shapely@2" - services: - - postgres - extras: - - postgres - - geospatial - steps: - - name: checkout - uses: actions/checkout@v4 - - - name: install libgeos for shapely - if: matrix.backend.name == 'postgres' - run: | - sudo apt-get update -y -qq - sudo apt-get install -qq -y build-essential libgeos-dev - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: download backend data - run: just download-data - - - name: start services - if: matrix.backend.services != null - run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} - - - name: install python - uses: actions/setup-python@v5 - id: install_python - with: - python-version: ${{ matrix.python-version }} - - - name: install poetry - run: python -m pip install --upgrade pip 'poetry==1.7.1' - - - name: remove lonboard - # it requires a version of pandas that min versions are not compatible with - run: poetry remove lonboard - - - name: install minimum versions - run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }} - - - name: checkout the lock file - run: git checkout poetry.lock - - - name: lock with no updates - # poetry add is aggressive and will update other dependencies like - # numpy and pandas so we keep the pyproject.toml edits and then relock - # without updating anything except the requested versions - run: poetry lock --no-update - - - name: install ibis - run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" - - - name: run tests - run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup + # - name: "run serial tests: ${{ matrix.backend.name }}" + # if: matrix.backend.serial && matrix.backend.name == 'flink' + # run: | + # just ci-check -m ${{ matrix.backend.name }} ibis/backends/tests + # just ci-check -m ${{ matrix.backend.name }} ibis/backends/flink/tests + # env: + # IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} + # FLINK_REMOTE_CLUSTER_ADDR: localhost + # FLINK_REMOTE_CLUSTER_PORT: "8081" + # + # - name: "run serial tests: ${{ matrix.backend.name }}" + # if: matrix.backend.serial && matrix.backend.name != 'flink' + # run: just ci-check -m ${{ matrix.backend.name }} + # env: + # IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} - name: check that no untracked files were produced shell: bash @@ -534,196 +409,390 @@ jobs: if: matrix.backend.services != null && failure() run: docker compose logs - test_pyspark: - name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: - - ubuntu-latest - python-version: - - "3.10" - steps: - - name: checkout - uses: actions/checkout@v4 - - - uses: actions/setup-java@v4 - with: - distribution: microsoft - java-version: 17 - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: download backend data - run: just download-data - - - name: install python - uses: actions/setup-python@v5 - id: install_python - with: - python-version: ${{ matrix.python-version }} - - - name: install poetry - run: python -m pip install --upgrade pip 'poetry==1.7.1' - - - name: remove lonboard - # it requires a version of pandas that pyspark is not compatible with - run: poetry remove lonboard - - - name: install maximum versions of pandas and numpy - run: poetry add --lock 'pandas@<2' 'numpy<1.24' - - - name: checkout the lock file - run: git checkout poetry.lock - - - name: lock with no updates - # poetry add is aggressive and will update other dependencies like - # numpy and pandas so we keep the pyproject.toml edits and then relock - # without updating anything except the requested versions - run: poetry lock --no-update - - - name: install ibis - run: poetry install --without dev --without docs --extras pyspark - - - name: run tests - run: just ci-check -m pyspark - - - name: check that no untracked files were produced - shell: bash - run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - - - name: upload code coverage - # only upload coverage for jobs that aren't mostly xfails - if: success() && matrix.python-version != '3.11' - uses: codecov/codecov-action@v4 - with: - flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - - test_backends_sqlalchemy2: - name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: - - ubuntu-latest - python-version: - - "3.11" - backend: - - name: mssql - title: MS SQL Server - services: - - mssql - extras: - - mssql - sys-deps: - - freetds-dev - - unixodbc-dev - - tdsodbc - - name: mysql - title: MySQL - services: - - mysql - extras: - - geospatial - - mysql - - name: postgres - title: PostgreSQL - services: - - postgres - extras: - - geospatial - - postgres - sys-deps: - - libgeos-dev - - name: sqlite - title: SQLite - extras: - - sqlite - - name: trino - title: Trino - services: - - trino - extras: - - trino - - name: duckdb - title: DuckDB - extras: - - duckdb - - name: oracle - title: Oracle - serial: true - extras: - - oracle - services: - - oracle - steps: - - name: checkout - uses: actions/checkout@v4 - - - name: update and install system dependencies - if: matrix.backend.sys-deps != null - run: | - set -euo pipefail - - sudo apt-get update -qq -y - sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: download backend data - run: just download-data - - - name: start services - if: matrix.backend.services != null - run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} - - - name: install poetry - run: pipx install 'poetry==1.7.1' - - - name: install python - uses: actions/setup-python@v5 - id: install_python - with: - python-version: ${{ matrix.python-version }} - - - name: remove deps that are not compatible with sqlalchemy 2 - run: poetry remove snowflake-sqlalchemy sqlalchemy-exasol sqlalchemy-risingwave - - - name: add sqlalchemy 2 - run: poetry update sqlalchemy - - - name: check sqlalchemy is version 2 - run: poetry run python -c 'import sqlalchemy as sa; assert sa.__version__[0] == "2", sa.__version__' - - - name: install ibis - run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" - - - name: run tests - run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup - - - name: check that no untracked files were produced - shell: bash - run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - - - name: upload code coverage - if: success() - uses: codecov/codecov-action@v4 - with: - flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} + # test_backends_min_version: + # name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }} + # runs-on: ${{ matrix.os }} + # env: + # SQLALCHEMY_WARN_20: "1" + # strategy: + # fail-fast: false + # matrix: + # os: + # - ubuntu-latest + # - windows-latest + # python-version: + # - "3.9" + # - "3.11" + # backend: + # - name: dask + # title: Dask + # deps: + # - "dask[array,dataframe]@2022.9.1" + # - "pandas@1.5.3" + # extras: + # - dask + # - name: postgres + # title: PostgreSQL + # deps: + # - "psycopg2@2.8.4" + # - "GeoAlchemy2@0.6.3" + # - "geopandas@0.6" + # - "Shapely@2" + # services: + # - postgres + # extras: + # - postgres + # - geospatial + # exclude: + # - os: windows-latest + # backend: + # name: postgres + # title: PostgreSQL + # deps: + # - "psycopg2@2.8.4" + # - "GeoAlchemy2@0.6.3" + # - "geopandas@0.6" + # - "Shapely@2" + # services: + # - postgres + # extras: + # - postgres + # - geospatial + # - python-version: "3.11" + # backend: + # name: postgres + # title: PostgreSQL + # deps: + # - "psycopg2@2.8.4" + # - "GeoAlchemy2@0.6.3" + # - "geopandas@0.6" + # - "Shapely@2" + # services: + # - postgres + # extras: + # - postgres + # - geospatial + # steps: + # - name: checkout + # uses: actions/checkout@v4 + # + # - name: install libgeos for shapely + # if: matrix.backend.name == 'postgres' + # run: | + # sudo apt-get update -y -qq + # sudo apt-get install -qq -y build-essential libgeos-dev + # + # - uses: extractions/setup-just@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # + # - name: download backend data + # run: just download-data + # + # - name: start services + # if: matrix.backend.services != null + # run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} + # + # - name: install python + # uses: actions/setup-python@v5 + # id: install_python + # with: + # python-version: ${{ matrix.python-version }} + # + # - name: install poetry + # run: python -m pip install --upgrade pip 'poetry==1.7.1' + # + # - name: remove lonboard + # # it requires a version of pandas that min versions are not compatible with + # run: poetry remove lonboard + # + # - name: install minimum versions + # run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }} + # + # - name: checkout the lock file + # run: git checkout poetry.lock + # + # - name: lock with no updates + # # poetry add is aggressive and will update other dependencies like + # # numpy and pandas so we keep the pyproject.toml edits and then relock + # # without updating anything except the requested versions + # run: poetry lock --no-update + # + # - name: install ibis + # run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" + # + # - name: run tests + # run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup + # + # - name: check that no untracked files were produced + # shell: bash + # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . + # + # - name: upload code coverage + # if: success() + # uses: codecov/codecov-action@v4 + # with: + # flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} + # + # - name: Show docker compose logs on fail + # if: matrix.backend.services != null && failure() + # run: docker compose logs + + # test_pyspark: + # name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} + # runs-on: ${{ matrix.os }} + # strategy: + # fail-fast: false + # matrix: + # os: + # - ubuntu-latest + # python-version: + # - "3.10" + # steps: + # - name: checkout + # uses: actions/checkout@v4 + # + # - uses: actions/setup-java@v4 + # with: + # distribution: microsoft + # java-version: 17 + # + # - uses: extractions/setup-just@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # + # - name: download backend data + # run: just download-data + # + # - name: install python + # uses: actions/setup-python@v5 + # id: install_python + # with: + # python-version: ${{ matrix.python-version }} + # + # - name: install poetry + # run: python -m pip install --upgrade pip 'poetry==1.7.1' + # + # - name: remove lonboard + # # it requires a version of pandas that pyspark is not compatible with + # run: poetry remove lonboard + # + # - name: install maximum versions of pandas and numpy + # run: poetry add --lock 'pandas@<2' 'numpy<1.24' + # + # - name: checkout the lock file + # run: git checkout poetry.lock + # + # - name: lock with no updates + # # poetry add is aggressive and will update other dependencies like + # # numpy and pandas so we keep the pyproject.toml edits and then relock + # # without updating anything except the requested versions + # run: poetry lock --no-update + # + # - name: install ibis + # run: poetry install --without dev --without docs --extras pyspark + # + # - name: run tests + # run: just ci-check -m pyspark + # + # - name: check that no untracked files were produced + # shell: bash + # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . + # + # - name: upload code coverage + # # only upload coverage for jobs that aren't mostly xfails + # if: success() && matrix.python-version != '3.11' + # uses: codecov/codecov-action@v4 + # with: + # flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} + + # gen_lockfile_sqlalchemy2: + # name: Generate Poetry Lockfile for SQLAlchemy 2 + # runs-on: ubuntu-latest + # steps: + # - name: checkout + # uses: actions/checkout@v4 + # + # - name: install python + # uses: actions/setup-python@v5 + # with: + # python-version: "3.11" + # + # - run: python -m pip install --upgrade pip 'poetry==1.7.1' + # + # - name: remove deps that are not compatible with sqlalchemy 2 + # run: poetry remove snowflake-sqlalchemy sqlalchemy-exasol + # + # - name: add sqlalchemy 2 + # run: poetry add --lock --optional 'sqlalchemy>=2,<3' + # + # - name: checkout the lock file + # run: git checkout poetry.lock + # + # - name: lock with no updates + # # poetry add is aggressive and will update other dependencies like + # # numpy and pandas so we keep the pyproject.toml edits and then relock + # # without updating anything except the requested versions + # run: poetry lock --no-update + # + # - name: check the sqlalchemy version + # run: poetry show sqlalchemy --no-ansi | grep version | cut -d ':' -f2- | sed 's/ //g' | grep -P '^2\.' + # + # - name: upload deps file + # uses: actions/upload-artifact@v3 + # with: + # name: deps + # path: | + # pyproject.toml + # poetry.lock + + # test_backends_sqlalchemy2: + # name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} + # runs-on: ${{ matrix.os }} + # needs: + # - gen_lockfile_sqlalchemy2 + # env: + # ODBCSYSINI: "${{ github.workspace }}/.odbc" + # strategy: + # fail-fast: false + # matrix: + # os: + # - ubuntu-latest + # python-version: + # - "3.11" + # backend: + # - name: mssql + # title: MS SQL Server + # services: + # - mssql + # extras: + # - mssql + # sys-deps: + # - freetds-dev + # - unixodbc-dev + # - tdsodbc + # - name: mysql + # title: MySQL + # services: + # - mysql + # extras: + # - geospatial + # - mysql + # - name: postgres + # title: PostgreSQL + # services: + # - postgres + # extras: + # - geospatial + # - postgres + # sys-deps: + # - libgeos-dev + # - name: sqlite + # title: SQLite + # extras: + # - sqlite + # - name: trino + # title: Trino + # services: + # - trino + # extras: + # - trino + # - postgres + # - name: oracle + # title: Oracle + # serial: true + # extras: + # - oracle + # services: + # - oracle + # steps: + # - name: checkout + # uses: actions/checkout@v4 + # + # - name: update and install system dependencies + # if: matrix.backend.sys-deps != null + # run: | + # set -euo pipefail + # + # sudo apt-get update -qq -y + # sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} + # + # - name: setup odbc for mssql + # if: ${{ matrix.backend.name == 'mssql' }} + # run: | + # mkdir -p "$ODBCSYSINI" + # + # { + # echo '[FreeTDS]' + # echo "Driver = libtdsodbc.so" + # } > "$ODBCSYSINI/odbcinst.ini" + # + # - uses: extractions/setup-just@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # + # - name: download backend data + # run: just download-data + # + # - name: start services + # if: matrix.backend.services != null + # run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} + # + # - name: install python + # uses: actions/setup-python@v5 + # id: install_python + # with: + # python-version: ${{ matrix.python-version }} + # + # - name: download poetry lockfile + # uses: actions/download-artifact@v3 + # with: + # name: deps + # path: deps + # + # - name: pull out lockfile + # run: | + # set -euo pipefail + # + # mv -f deps/* . + # rm -r deps + # + # - uses: syphar/restore-virtualenv@v1 + # with: + # requirement_files: poetry.lock + # custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }} + # + # - uses: syphar/restore-pip-download-cache@v1 + # with: + # requirement_files: poetry.lock + # custom_cache_key_element: ${{ steps.install_python.outputs.python-version }} + # + # - name: install poetry + # run: python -m pip install --upgrade pip 'poetry==1.7.1' + # + # - name: install ibis + # run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" + # + # - name: run tests + # run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup + # + # - name: check that no untracked files were produced + # shell: bash + # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . + # + # - name: upload code coverage + # if: success() + # uses: codecov/codecov-action@v4 + # with: + # flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} backends: # this job exists so that we can use a single job from this workflow to gate merging runs-on: ubuntu-latest needs: - - test_backends_min_version + # - test_backends_min_version - test_backends - - test_backends_sqlalchemy2 - - test_pyspark + # - test_backends_sqlalchemy2 + # - test_pyspark steps: - run: exit 0 diff --git a/.github/workflows/ibis-main.yml b/.github/workflows/ibis-main.yml index 9b69c1132ee4..0412f5cbc54b 100644 --- a/.github/workflows/ibis-main.yml +++ b/.github/workflows/ibis-main.yml @@ -86,12 +86,13 @@ jobs: - name: run all core tests and run benchmarks once parallel if: matrix.os != 'windows-latest' - # TODO(kszucs): restore "just ci-check -m "'core or benchmark'" -n auto" - run: poetry run pytest -v -n auto ibis/common ibis/expr ibis/tests ibis/formats + # TODO(cpcloud): bring back benchmarks smoke tests -m 'core or benchmarks' + run: just ci-check -m core --numprocesses auto - name: run all core tests and run benchmarks once serial if: matrix.os == 'windows-latest' - run: poetry run pytest -v ibis/common ibis/expr ibis/tests ibis/formats + # TODO(cpcloud): bring back benchmarks smoke tests -m 'core or benchmarks' + run: just ci-check -m core - name: upload code coverage if: success() diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py index a5a0cd0d2106..7190d831ba68 100644 --- a/ibis/backends/base/__init__.py +++ b/ibis/backends/base/__init__.py @@ -31,7 +31,6 @@ import pyarrow as pa import torch - __all__ = ("BaseBackend", "Database", "connect") _IBIS_TO_SQLGLOT_DIALECT = { @@ -1217,9 +1216,9 @@ def _cached(self, expr: ir.Table): if (result := self._query_cache.get(op)) is None: self._query_cache.store(expr) result = self._query_cache[op] - return ir.CachedTableExpr(result) + return ir.CachedTable(result) - def _release_cached(self, expr: ir.CachedTableExpr) -> None: + def _release_cached(self, expr: ir.CachedTable) -> None: """Releases the provided cached expression. Parameters diff --git a/ibis/backends/base/df/timecontext.py b/ibis/backends/base/df/timecontext.py index a04f905ce0c5..f84dd473bc4c 100644 --- a/ibis/backends/base/df/timecontext.py +++ b/ibis/backends/base/df/timecontext.py @@ -265,19 +265,19 @@ def adjust_context_alias( return adjust_context(op.arg, scope, timecontext) -# @adjust_context.register(ops.AsOfJoin) -# def adjust_context_asof_join( -# op: ops.AsOfJoin, scope: Scope, timecontext: TimeContext -# ) -> TimeContext: -# begin, end = timecontext +@adjust_context.register(ops.AsOfJoin) +def adjust_context_asof_join( + op: ops.AsOfJoin, scope: Scope, timecontext: TimeContext +) -> TimeContext: + begin, end = timecontext -# if op.tolerance is not None: -# from ibis.backends.pandas.execution import execute + if op.tolerance is not None: + from ibis.backends.pandas.execution import execute -# timedelta = execute(op.tolerance) -# return (begin - timedelta, end) + timedelta = execute(op.tolerance) + return (begin - timedelta, end) -# return timecontext + return timecontext @adjust_context.register(ops.WindowFunction) diff --git a/ibis/backends/base/sql/alchemy/registry.py b/ibis/backends/base/sql/alchemy/registry.py index 5ff058f72c8c..baa5d5fe287e 100644 --- a/ibis/backends/base/sql/alchemy/registry.py +++ b/ibis/backends/base/sql/alchemy/registry.py @@ -605,7 +605,7 @@ class array_filter(FunctionElement): ops.Coalesce: varargs(sa.func.coalesce), ops.NullIf: fixed_arity(sa.func.nullif, 2), ops.InValues: _in_values, - ops.InColumn: _in_column, + ops.InSubquery: _in_column, ops.Count: reduction(sa.func.count), ops.CountStar: _count_star, ops.CountDistinctStar: _count_distinct_star, diff --git a/ibis/backends/base/sql/compiler/query_builder.py b/ibis/backends/base/sql/compiler/query_builder.py index 50bd21eb62ae..4376e03b4a55 100644 --- a/ibis/backends/base/sql/compiler/query_builder.py +++ b/ibis/backends/base/sql/compiler/query_builder.py @@ -23,16 +23,6 @@ class TableSetFormatter: - _join_names = { - ops.InnerJoin: "INNER JOIN", - ops.LeftJoin: "LEFT OUTER JOIN", - ops.RightJoin: "RIGHT OUTER JOIN", - ops.OuterJoin: "FULL OUTER JOIN", - ops.LeftAntiJoin: "LEFT ANTI JOIN", - ops.LeftSemiJoin: "LEFT SEMI JOIN", - ops.CrossJoin: "CROSS JOIN", - } - def __init__(self, parent, node, indent=2): # `parent` is a `Select` instance, not a `TableSetFormatter` self.parent = parent diff --git a/ibis/backends/base/sql/compiler/select_builder.py b/ibis/backends/base/sql/compiler/select_builder.py index 41abc75b4a33..f1f9b73cc984 100644 --- a/ibis/backends/base/sql/compiler/select_builder.py +++ b/ibis/backends/base/sql/compiler/select_builder.py @@ -86,9 +86,9 @@ def _populate_context(self): def _make_table_aliases(self, node): ctx = self.context - if isinstance(node, ops.Join): + if isinstance(node, ops.JoinChain): for arg in node.args: - if isinstance(arg, ops.TableNode): + if isinstance(arg, ops.Relation): self._make_table_aliases(arg) elif not ctx.is_extracted(node): ctx.make_alias(node) @@ -112,7 +112,7 @@ def _collect_elements(self): if isinstance(self.op, ops.DummyTable): self.select_set = list(self.op.values) - elif isinstance(self.op, ops.TableNode): + elif isinstance(self.op, ops.Relation): self._collect(self.op, toplevel=True) else: self.select_set = [self.op] @@ -125,7 +125,7 @@ def _collect(self, op, toplevel=False): f(op, toplevel=toplevel) elif isinstance(op, (ops.PhysicalTable, ops.SQLQueryResult)): self._collect_PhysicalTable(op, toplevel=toplevel) - elif isinstance(op, ops.Join): + elif isinstance(op, ops.JoinChain): self._collect_Join(op, toplevel=toplevel) elif isinstance(op, ops.WindowingTVF): self._collect_WindowingTVF(op, toplevel=toplevel) @@ -140,7 +140,7 @@ def _collect_Distinct(self, op, toplevel=False): def _collect_Limit(self, op, toplevel=False): if toplevel: - if isinstance(table := op.table, ops.Limit): + if isinstance(table := op.parent, ops.Limit): self.table_set = table self.select_set = [table] else: @@ -184,27 +184,18 @@ def _collect_Aggregation(self, op, toplevel=False): self._collect(op.table) - def _collect_Selection(self, op, toplevel=False): - table = op.table + def _collect_Project(self, op, toplevel=False): + table = op.parent if toplevel: - if isinstance(table, ops.Join): + if isinstance(table, ops.JoinChain): self._collect_Join(table) else: self._collect(table) - selections = op.selections - sort_keys = op.sort_keys - filters = op.predicates - - if not selections: - # select * - selections = [table] - - self.order_by = sort_keys - self.select_set = selections + selections = op.values + self.select_set = list(selections.values()) self.table_set = table - self.filters = filters def _collect_InMemoryTable(self, node, toplevel=False): if toplevel: diff --git a/ibis/backends/base/sql/registry/main.py b/ibis/backends/base/sql/registry/main.py index 8a2d8bcbb5b9..91bf90f29707 100644 --- a/ibis/backends/base/sql/registry/main.py +++ b/ibis/backends/base/sql/registry/main.py @@ -355,7 +355,7 @@ def _floor(t, op): ops.IfElse: fixed_arity("if", 3), ops.Between: between, ops.InValues: binary_infix.in_values, - ops.InColumn: binary_infix.in_column, + ops.InSubquery: binary_infix.in_column, ops.SimpleCase: case.simple_case, ops.SearchedCase: case.searched_case, ops.Field: table_column, diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index 50cb23ec9a71..f8cc80e5c76f 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -32,11 +32,6 @@ def has_operation(cls, operation: type[ops.Value]) -> bool: dispatcher = cls.compiler.visit_node.register.__self__.dispatcher return dispatcher.dispatch(operation) is not dispatcher.dispatch(object) - def _transform( - self, sql: sge.Expression, table_expr: ir.TableExpr - ) -> sge.Expression: - return sql - def table( self, name: str, schema: str | None = None, database: str | None = None ) -> ir.Table: @@ -85,17 +80,16 @@ def _to_sqlglot( sql = sg.select(STAR).from_(sql) assert not isinstance(sql, sge.Subquery) - return [self._transform(sql, table_expr)] + return sql def compile( self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any ): - """Compile an Ibis expression to a ClickHouse SQL string.""" - queries = self._to_sqlglot(expr, limit=limit, params=params, **kwargs) - - return ";\n\n".join( - query.sql(dialect=self.name, pretty=True) for query in queries - ) + """Compile an Ibis expression to a SQL string.""" + query = self._to_sqlglot(expr, limit=limit, params=params, **kwargs) + sql = query.sql(dialect=self.name, pretty=True) + self._log(sql) + return sql def _to_sql(self, expr: ir.Expr, **kwargs) -> str: return self.compile(expr, **kwargs) @@ -221,10 +215,9 @@ def execute( sql = self.compile(table, limit=limit, **kwargs) schema = table.schema() - self._log(sql) with self._safe_raw_sql(sql) as cur: - result = self.fetch_from_cursor(cur, schema) + result = self._fetch_from_cursor(cur, schema) return expr.__pandas_result__(result) def drop_table( diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index c7e5e092778a..8c9879501726 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -7,21 +7,28 @@ import math import operator import string -from collections.abc import Mapping +from collections.abc import Iterator, Mapping from functools import partial, singledispatchmethod +from itertools import starmap from typing import TYPE_CHECKING, Any, Callable import sqlglot as sg import sqlglot.expressions as sge +import toolz from public import public import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.backends.base.sqlglot.rewrites import Select, Window, sqlize -from ibis.common.deferred import _ -from ibis.common.patterns import replace -from ibis.expr.analysis import p, x +from ibis.expr.rewrites import ( + add_one_to_nth_value_input, + add_order_by_to_empty_ranking_window_functions, + empty_in_values_right_side, + one_to_zero_index, + replace_scalar_parameter, + unwrap_scalar_parameter, +) if TYPE_CHECKING: import ibis.expr.schema as sch @@ -97,43 +104,6 @@ def parenthesize(op, arg): STAR = sge.Star() -@replace(p.InValues(..., ())) -def empty_in_values_right_side(_): - """Replace checks against an empty right side with `False`.""" - return ops.Literal(False, dtype=dt.bool) - - -@replace( - p.WindowFunction( - p.PercentRank(x) | p.RankBase(x) | p.CumeDist(x) | p.NTile(x), - p.WindowFrame(..., order_by=()) >> _.copy(order_by=(x,)), - ) -) -def add_order_by_to_empty_ranking_window_functions(_): - """Add an ORDER BY clause to rank window functions that don't have one.""" - return _ - - -@replace( - p.WindowFunction(p.RankBase | p.NTile) - | p.StringFind - | p.FindInSet - | p.ArrayPosition -) -def one_to_zero_index(_, **__): - """Subtract one from one-index functions.""" - return ops.Subtract(_, 1) - - -@replace(ops.NthValue) -def add_one_to_nth_value_input(_, **__): - if isinstance(_.nth, ops.Literal): - nth = ops.Literal(_.nth.value + 1, dtype=_.nth.dtype) - else: - nth = ops.Add(_.nth, 1) - return _.copy(nth=nth) - - @public class SQLGlotCompiler(abc.ABC): __slots__ = "agg", "f" @@ -244,16 +214,18 @@ def fn(node, _, **kwargs): # substitute parameters immediately to avoid having to define a # ScalarParameter translation rule # - # this lets us avoid threading `params` through every `translate_val` call - # only to be used in the one place it would be needed: the ScalarParameter - # `translate_val` rule - params = {param.op(): value for param, value in (params or {}).items()} - replace_literals = p.ScalarParameter >> ( - lambda _: ops.Literal(value=params[_], dtype=_.dtype) - ) + # this lets us avoid threading `params` through every `translate_val` + # call only to be used in the one place it would be needed: the + # ScalarParameter `translate_val` rule + params = { + # remove aliases from scalar parameters + param.op().replace(unwrap_scalar_parameter): value + for param, value in (params or {}).items() + } op = op.replace( - replace_literals | functools.reduce(operator.or_, self.rewrites) + replace_scalar_parameter(params) + | functools.reduce(operator.or_, self.rewrites) ) op = sqlize(op) # apply translate rules in topological order @@ -279,7 +251,7 @@ def visit_ScalarSubquery(self, op, *, rel): @visit_node.register(ops.Alias) def visit_Alias(self, op, *, arg, name): - return arg.as_(self._gen_valid_name(name), quoted=self.quoted) + return arg @visit_node.register(ops.Literal) def visit_Literal(self, op, *, value, dtype, **kw): @@ -422,19 +394,9 @@ def visit_Negate(self, op, *, arg): @visit_node.register(ops.Not) def visit_Not(self, op, *, arg): if isinstance(arg, sge.Filter): - return sge.Filter( - this=self._de_morgan_law(arg.this), expression=arg.expression - ) # transform the not expression using _de_morgan_law + return sge.Filter(this=sg.not_(arg.this), expression=arg.expression) return sg.not_(paren(arg)) - @staticmethod - def _de_morgan_law(logical_op: sge.Expression): - if isinstance(logical_op, sge.LogicalAnd): - return sge.LogicalOr(this=sg.not_(paren(logical_op.this))) - if isinstance(logical_op, sge.LogicalOr): - return sge.LogicalAnd(this=sg.not_(paren(logical_op.this))) - return None - ### Timey McTimeFace @visit_node.register(ops.Date) @@ -535,7 +497,7 @@ def visit_DayOfWeekName(self, op, *, arg): # Saturday == 6 return sge.Case( this=(self.f.dayofweek(arg) + 6) % 7, - ifs=list(itertools.starmap(self.if_, enumerate(calendar.day_name))), + ifs=list(starmap(self.if_, enumerate(calendar.day_name))), ) @visit_node.register(ops.IntervalFromInteger) @@ -648,7 +610,8 @@ def visit_Sum(self, op, *, arg, where): @visit_node.register(ops.Quantile) @visit_node.register(ops.MultiQuantile) def visit_Quantile(self, op, *, arg, quantile, where): - return self.agg.quantile_cont(arg, quantile, where=where) + suffix = "cont" if op.arg.dtype.is_numeric() else "disc" + return self.agg[f"quantile_{suffix}"](arg, quantile, where=where) @visit_node.register(ops.Variance) @visit_node.register(ops.StandardDev) @@ -688,11 +651,11 @@ def visit_SimpleCase(self, op, *, base=None, cases, results, default): @visit_node.register(ops.ExistsSubquery) def visit_ExistsSubquery(self, op, *, rel): - return self.f.exists(rel.this.subquery()) + return self.f.exists(rel.this) @visit_node.register(ops.InSubquery) def visit_InSubquery(self, op, *, rel, needle): - return needle.isin(rel.this.subquery()) + return needle.isin(rel.this) @visit_node.register(ops.Array) def visit_Array(self, op, *, exprs): @@ -709,8 +672,7 @@ def visit_StructColumn(self, op, *, names, values): @visit_node.register(ops.StructField) def visit_StructField(self, op, *, arg, field): - val = arg.this if isinstance(op.arg, ops.Alias) else arg - return val[sge.convert(field)] + return arg[sge.convert(field)] @visit_node.register(ops.IdenticalTo) def visit_IdenticalTo(self, op, *, left, right): @@ -790,7 +752,7 @@ def visit_LagLead(self, op, *, arg, offset, default): @visit_node.register(ops.Argument) def visit_Argument(self, op, *, name: str, shape, dtype): - return sg.to_identifier(name) + return sg.to_identifier(op.param) @visit_node.register(ops.RowID) def visit_RowID(self, op, *, table): @@ -831,6 +793,15 @@ def visit_ArrayContains(self, op, *, arg, other): ## relations + def _dedup_name( + self, key: str, value: sge.Expression + ) -> Iterator[sge.Alias | sge.Column]: + return ( + value.as_(key, quoted=self.quoted) + if not isinstance(value, sge.Column) or key != value.name + else value + ) + @visit_node.register(Select) def visit_Select(self, op, *, parent, selections, predicates, sort_keys): # if we've constructed a useless projection return the parent relation @@ -840,9 +811,9 @@ def visit_Select(self, op, *, parent, selections, predicates, sort_keys): result = parent if selections: - result = sg.select( - *(sel.as_(name, quoted=self.quoted) for name, sel in selections.items()) - ).from_(result) + result = sg.select(*starmap(self._dedup_name, selections.items())).from_( + result + ) if predicates: result = result.where(*predicates) @@ -854,9 +825,7 @@ def visit_Select(self, op, *, parent, selections, predicates, sort_keys): @visit_node.register(ops.DummyTable) def visit_DummyTable(self, op, *, values): - return sg.select( - *(value.as_(key, quoted=self.quoted) for key, value in values.items()) - ) + return sg.select(*starmap(self._dedup_name, values.items())) @visit_node.register(ops.UnboundTable) def visit_UnboundTable(self, op, *, name: str, schema: sch.Schema): @@ -874,13 +843,11 @@ def visit_DatabaseTable(self, op, *, name, namespace, schema, source): @visit_node.register(ops.SelfReference) def visit_SelfReference(self, op, *, parent, identifier): - return parent.as_(op.name, quoted=self.quoted) + return parent @visit_node.register(ops.JoinChain) def visit_JoinChain(self, op, *, first, rest, values): - result = sg.select( - *(value.as_(key, quoted=self.quoted) for key, value in values.items()) - ).from_(first) + result = sg.select(*starmap(self._dedup_name, values.items())).from_(first) for link in rest: if isinstance(link, sge.Alias): @@ -914,6 +881,7 @@ def visit_JoinLink(self, op, *, how, table, predicates): "cross": "cross", "outer": "outer", } + assert predicates return sge.Join( this=table, side=sides[how], kind=kinds[how], on=sg.and_(*predicates) ) @@ -926,12 +894,7 @@ def _gen_valid_name(name: str) -> str: def visit_Project(self, op, *, parent, values): # needs_alias should never be true here in explicitly, but it may get # passed via a (recursive) call to translate_val - return sg.select( - *( - value.as_(self._gen_valid_name(key), quoted=self.quoted) - for key, value in values.items() - ) - ).from_(parent) + return sg.select(*starmap(self._dedup_name, values.items())).from_(parent) @staticmethod def _generate_groups(groups): @@ -940,13 +903,11 @@ def _generate_groups(groups): @visit_node.register(ops.Aggregate) def visit_Aggregate(self, op, *, parent, groups, metrics): sel = sg.select( - *( - value.as_(self._gen_valid_name(key), quoted=self.quoted) - for key, value in groups.items() + *starmap( + self._dedup_name, toolz.keymap(self._gen_valid_name, groups).items() ), - *( - value.as_(self._gen_valid_name(key), quoted=self.quoted) - for key, value in metrics.items() + *starmap( + self._dedup_name, toolz.keymap(self._gen_valid_name, metrics).items() ), ).from_(parent) @@ -1112,12 +1073,12 @@ def visit_FillNa(self, op, *, parent, replacements): ] return sg.select(*exprs).from_(parent) - # @visit_node.register(ops.View) - # def visit_View(self, op, *, child, name: str): - # # TODO: find a way to do this without creating a temporary view - # backend = op.child.to_expr()._find_backend() - # backend._create_temp_view(table_name=name, source=sg.select(STAR).from_(child)) - # return sg.table(name, quoted=self.quoted) + @visit_node.register(ops.View) + def visit_View(self, op, *, child, name: str): + # TODO: find a way to do this without creating a temporary view + backend = op.child.to_expr()._find_backend() + backend._create_temp_view(table_name=name, source=sg.select(STAR).from_(child)) + return sg.table(name, quoted=self.quoted) @visit_node.register(ops.SQLStringView) def visit_SQLStringView(self, op, *, query: str, name: str, child): @@ -1134,6 +1095,18 @@ def visit_SQLQueryResult(self, op, *, query, schema, source): def visit_Unnest(self, op, *, arg): return sge.Explode(this=arg) + @visit_node.register(ops.RegexSplit) + def visit_RegexSplit(self, op, *, arg, pattern): + return sge.RegexpSplit(this=arg, expression=pattern) + + @visit_node.register(ops.Levenshtein) + def visit_Levenshtein(self, op, *, left, right): + return sge.Levenshtein(this=left, expression=right) + + @visit_node.register(ops.JoinTable) + def visit_JoinTable(self, op, *, parent, index): + return parent + _SIMPLE_OPS = { ops.All: "bool_and", diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index eeff6692258a..00ef53c7ba20 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -419,6 +419,13 @@ def _from_sqlglot_TIMESTAMP_MS(cls) -> dt.Timestamp: def _from_sqlglot_TIMESTAMP_NS(cls) -> dt.Timestamp: return dt.Timestamp(scale=9, nullable=cls.default_nullable) + @classmethod + def _from_ibis_GeoSpatial(cls, dtype: dt.GeoSpatial): + assert ( + dtype.geotype == "geometry" + ), "DuckDB only supports geometry types; geography types are not supported" + return sge.DataType(this=typecode.GEOMETRY) + class TrinoType(SqlglotType): dialect = "trino" @@ -444,5 +451,26 @@ class OracleType(SqlglotType): dialect = "oracle" +class SnowflakeType(SqlglotType): + dialect = "snowflake" + default_temporal_scale = 9 + + @classmethod + def _from_sqlglot_FLOAT(cls) -> dt.Float64: + return dt.Float64(nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_DECIMAL(cls, precision=None, scale=None) -> dt.Decimal: + if scale is None or int(scale.this.this) == 0: + return dt.Int64(nullable=cls.default_nullable) + else: + return super()._from_sqlglot_DECIMAL(precision, scale) + + @classmethod + def _from_sqlglot_ARRAY(cls, value_type=None) -> dt.Array: + assert value_type is None + return dt.Array(dt.json, nullable=cls.default_nullable) + + class SQLiteType(SqlglotType): dialect = "sqlite" diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 37c32e038530..ca999208aa39 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -3,9 +3,9 @@ from __future__ import annotations -import os from typing import Literal, Optional +import toolz from public import public import ibis.expr.datashape as ds @@ -115,14 +115,11 @@ def merge_select_select(_): return Select( _.parent.parent, selections=selections, - predicates=_.parent.predicates + predicates, - sort_keys=_.parent.sort_keys + sort_keys, + predicates=tuple(toolz.unique(_.parent.predicates + predicates)), + sort_keys=tuple(toolz.unique(_.parent.sort_keys + sort_keys)), ) -DEBUG = os.environ.get("IBIS_SQL_DEBUG", False) - - def sqlize(node): """Lower the ibis expression graph to a SQL-like relational algebra.""" step1 = node.replace( @@ -131,11 +128,5 @@ def sqlize(node): | filter_to_select | sort_to_select ) - if DEBUG: - print("--------- STEP 1 ---------") - print(step1.to_expr()) step2 = step1.replace(merge_select_select) - if DEBUG: - print("--------- STEP 2 ---------") - print(step2.to_expr()) return step2 diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 9668f47ca49f..746d35f41cc9 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -7,11 +7,11 @@ from contextlib import closing from functools import partial from typing import TYPE_CHECKING, Any, Literal +from urllib.parse import parse_qs, urlparse import clickhouse_connect as cc import pyarrow as pa import pyarrow_hotfix # noqa: F401 -import sqlalchemy as sa import sqlglot as sg import sqlglot.expressions as sge import toolz @@ -26,7 +26,7 @@ from ibis import util from ibis.backends.base import BaseBackend, CanCreateDatabase from ibis.backends.base.sqlglot import SQLGlotBackend -from ibis.backends.base.sqlglot.compiler import C, F +from ibis.backends.base.sqlglot.compiler import C from ibis.backends.clickhouse.compiler import ClickHouseCompiler from ibis.backends.clickhouse.datatypes import ClickhouseType @@ -74,21 +74,28 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend: BaseBackend A backend instance """ - url = sa.engine.make_url(url) - - kwargs = toolz.merge( - { - name: value - for name in ("host", "port", "database", "password") - if (value := getattr(url, name, None)) - }, - kwargs, - ) - if username := url.username: - kwargs["user"] = username - - kwargs.update(url.query) + url = urlparse(url) + database = url.path[1:] + query_params = parse_qs(url.query) + + connect_args = { + "user": url.username, + "password": url.password or "", + "host": url.hostname, + "database": database or "", + } + + for name, value in query_params.items(): + if len(value) > 1: + connect_args[name] = value + elif len(value) == 1: + connect_args[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + + kwargs.update(connect_args) self._convert_kwargs(kwargs) + return self.connect(**kwargs) def _convert_kwargs(self, kwargs): @@ -172,7 +179,7 @@ def _safe_raw_sql(self, *args, **kwargs): @property def current_database(self) -> str: - with self._safe_raw_sql(sg.select(F.currentDatabase())) as result: + with self._safe_raw_sql(sg.select(self.compiler.f.currentDatabase())) as result: [(db,)] = result.result_rows return db @@ -194,7 +201,7 @@ def list_tables( query = sg.select(C.name).from_(sg.table("tables", db="system")) if database is None: - database = F.currentDatabase() + database = self.compiler.f.currentDatabase() else: database = sge.convert(database) @@ -681,7 +688,7 @@ def create_table( expression = None if obj is not None: - (expression,) = self._to_sqlglot(obj) + expression = self._to_sqlglot(obj) external_tables.update(self._collect_in_memory_tables(obj)) code = sge.Create( @@ -708,7 +715,7 @@ def create_view( database: str | None = None, overwrite: bool = False, ) -> ir.Table: - (expression,) = self._to_sqlglot(obj) + expression = self._to_sqlglot(obj) src = sge.Create( this=sg.table(name, db=database), kind="VIEW", diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py index 56496271c513..7d04c531d77f 100644 --- a/ibis/backends/clickhouse/compiler.py +++ b/ibis/backends/clickhouse/compiler.py @@ -6,6 +6,7 @@ from typing import Any import sqlglot as sg +import sqlglot.expressions as sge from sqlglot import exp from sqlglot.dialects import ClickHouse from sqlglot.dialects.dialect import rename_func @@ -79,7 +80,7 @@ def visit_ArrayIndex(self, op, *, arg, index): @visit_node.register(ops.ArrayRepeat) def visit_ArrayRepeat(self, op, *, arg, times): param = sg.to_identifier("_") - func = sg.exp.Lambda(this=arg, expressions=[param]) + func = sge.Lambda(this=arg, expressions=[param]) return self.f.arrayFlatten(self.f.arrayMap(func, self.f.range(times))) @visit_node.register(ops.ArraySlice) @@ -107,7 +108,7 @@ def visit_ArraySlice(self, op, *, arg, start, stop): def visit_CountStar(self, op, *, where, arg): if where is not None: return self.f.countIf(where) - return sg.exp.Count(this=STAR) + return sge.Count(this=STAR) @visit_node.register(ops.Quantile) @visit_node.register(ops.MultiQuantile) @@ -116,7 +117,7 @@ def visit_QuantileMultiQuantile(self, op, *, arg, quantile, where): return self.agg.quantile(arg, quantile, where=where) func = "quantile" + "s" * isinstance(op, ops.MultiQuantile) - return sg.exp.ParameterizedAgg( + return sge.ParameterizedAgg( this=f"{func}If", expressions=util.promote_list(quantile), params=[arg, where], @@ -162,7 +163,7 @@ def visit_StringFind(self, op, *, arg, substr, start, end): @visit_node.register(ops.RegexSearch) def visit_RegexSearch(self, op, *, arg, pattern): - return sg.exp.RegexpLike(this=arg, expression=pattern) + return sge.RegexpLike(this=arg, expression=pattern) @visit_node.register(ops.RegexExtract) def visit_RegexExtract(self, op, *, arg, pattern, index): @@ -230,7 +231,7 @@ def visit_Literal(self, op, *, value, dtype, **kw): v = str(value) return self.f.toIPv6(v) if ":" in v else self.f.toIPv4(v) elif dtype.is_string(): - return sg.exp.convert(str(value).replace(r"\0", r"\\0")) + return sge.convert(str(value).replace(r"\0", r"\\0")) elif dtype.is_decimal(): precision = dtype.precision if precision is None or not 1 <= precision <= 76: @@ -249,26 +250,26 @@ def visit_Literal(self, op, *, value, dtype, **kw): return type_name(value, dtype.scale) elif dtype.is_numeric(): if not math.isfinite(value): - return sg.exp.Literal.number(str(value)) - return sg.exp.convert(value) + return sge.Literal.number(str(value)) + return sge.convert(value) elif dtype.is_interval(): if dtype.unit.short in ("ms", "us", "ns"): raise com.UnsupportedOperationError( "Clickhouse doesn't support subsecond interval resolutions" ) - return sg.exp.Interval( - this=sg.exp.convert(str(value)), unit=dtype.resolution.upper() + return sge.Interval( + this=sge.convert(str(value)), unit=dtype.resolution.upper() ) elif dtype.is_timestamp(): - funcname = "toDateTime" - fmt = "%Y-%m-%dT%H:%M:%S" + funcname = "parseDateTime" if micros := value.microsecond: funcname += "64" - fmt += ".%f" - args = [value.strftime(fmt)] + funcname += "BestEffort" + + args = [value.isoformat()] if micros % 1000: args.append(6) @@ -280,7 +281,33 @@ def visit_Literal(self, op, *, value, dtype, **kw): return self.f[funcname](*args) elif dtype.is_date(): - return self.f.toDate(value.strftime("%Y-%m-%d")) + return self.f.toDate(value.isoformat()) + elif dtype.is_array(): + value_type = dtype.value_type + values = [ + self.visit_Literal( + ops.Literal(v, dtype=value_type), value=v, dtype=value_type, **kw + ) + for v in value + ] + return self.f.array(*values) + elif dtype.is_map(): + value_type = dtype.value_type + keys = [] + values = [] + + for k, v in value.items(): + keys.append(sge.convert(k)) + values.append( + self.visit_Literal( + ops.Literal(v, dtype=value_type), + value=v, + dtype=value_type, + **kw, + ) + ) + + return self.f.map(self.f.array(*keys), self.f.array(*values)) elif dtype.is_struct(): fields = [ self.visit_Literal( @@ -407,9 +434,7 @@ def visit_Clip(self, op, *, arg, lower, upper): def visit_StructField(self, op, *, arg, field: str): arg_dtype = op.arg.dtype idx = arg_dtype.names.index(field) - return self.cast( - sg.exp.Dot(this=arg, expression=sg.exp.convert(idx + 1)), op.dtype - ) + return self.cast(sge.Dot(this=arg, expression=sge.convert(idx + 1)), op.dtype) @visit_node.register(ops.Repeat) def visit_Repeat(self, op, *, arg, times): @@ -440,10 +465,10 @@ def visit_DayOfWeekName(self, op, *, arg): base = ( ((self.f.toDayOfWeek(arg) - 1) % num_weekdays) + num_weekdays ) % num_weekdays - return sg.exp.Case( + return sge.Case( this=base, ifs=list(map(self.if_, *zip(*enumerate(days)))), - default=sg.exp.convert(""), + default=sge.convert(""), ) @visit_node.register(ops.Map) @@ -541,19 +566,19 @@ def visit_ArrayStringJoin(self, op, *, arg, sep): @visit_node.register(ops.ArrayMap) def visit_ArrayMap(self, op, *, arg, param, body): - func = sg.exp.Lambda(this=body, expressions=[param]) + func = sge.Lambda(this=body, expressions=[param]) return self.f.arrayMap(func, arg) @visit_node.register(ops.ArrayFilter) def visit_ArrayFilter(self, op, *, arg, param, body): - func = sg.exp.Lambda(this=body, expressions=[param]) + func = sge.Lambda(this=body, expressions=[param]) return self.f.arrayFilter(func, arg) @visit_node.register(ops.ArrayRemove) def visit_ArrayRemove(self, op, *, arg, other): x = sg.to_identifier("x") body = x.neq(other) - return self.f.arrayFilter(sg.exp.Lambda(this=body, expressions=[x]), arg) + return self.f.arrayFilter(sge.Lambda(this=body, expressions=[x]), arg) @visit_node.register(ops.ArrayUnion) def visit_ArrayUnion(self, op, *, left, right): @@ -578,6 +603,35 @@ def visit_CountDistinctStar( else: return self.f.countDistinct(columns) + @visit_node.register(ops.TimestampRange) + def visit_TimestampRange(self, op, *, start, stop, step): + unit = op.step.dtype.unit.name.lower() + + if not isinstance(op.step, ops.Literal): + raise com.UnsupportedOperationError( + "ClickHouse doesn't support non-literal step values" + ) + + step_value = op.step.value + + offset = sg.to_identifier("offset") + + func = sge.Lambda( + this=self.f.dateAdd(sg.to_identifier(unit), offset, start), + expressions=[offset], + ) + + if step_value == 0: + return self.f.array() + + return self.f.arrayMap( + func, self.f.range(0, self.f.timestampDiff(unit, start, stop), step_value) + ) + + @visit_node.register(ops.RegexSplit) + def visit_RegexSplit(self, op, *, arg, pattern): + return self.f.splitByRegexp(pattern, self.cast(arg, dt.String(nullable=False))) + @staticmethod def _generate_groups(groups): return groups @@ -588,6 +642,7 @@ def _generate_groups(groups): @visit_node.register(ops.Time) @visit_node.register(ops.TimeDelta) @visit_node.register(ops.StringToTimestamp) + @visit_node.register(ops.Levenshtein) def visit_Undefined(self, op, **_): raise com.OperationNotDefinedError(type(op).__name__) diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql index e86039758336..f97b0b7e0747 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col AS double_col + t0.double_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql index 4bec79041636..b2e0d4507c4f 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.bigint_col AS bigint_col + t0.bigint_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql index 130cc6720677..d967873daf1f 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.bool_col AS bool_col + t0.bool_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql index 30f45904aefe..c8ad0f838a31 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.date_string_col AS date_string_col + t0.date_string_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql index e86039758336..f97b0b7e0747 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col AS double_col + t0.double_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql index 5303bda94aa1..33277148af85 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.float_col AS float_col + t0.float_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql index 8a5c9fda9ccc..b4012dbb377d 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql @@ -1,3 +1,3 @@ SELECT - t0.id AS id + t0.id FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql index f6d916f49144..6b3541821ed4 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col AS int_col + t0.int_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql index c7dc05252aa5..d0eb5143c2b4 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql @@ -1,3 +1,3 @@ SELECT - t0.month AS month + t0.month FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql index 7a69adfa78c9..dfcd8e0c0149 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.smallint_col AS smallint_col + t0.smallint_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql index 9be6e5524121..cfe88fb96a8c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.string_col AS string_col + t0.string_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql index f251eb35e692..860302ea8039 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.timestamp_col AS timestamp_col + t0.timestamp_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql index 4f3dd3c555a9..c9f057e3aa11 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.tinyint_col AS tinyint_col + t0.tinyint_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql index 7d21b2c53090..5295b8fc6a8d 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql @@ -1,3 +1,3 @@ SELECT - t0.year AS year + t0.year FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql index 05665f06de1d..a62f4b1f78bc 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql @@ -1,7 +1,5 @@ SELECT - t0.id IN (( - SELECT - arrayJoin(t1.ids) AS ids - FROM way_view AS t1 - )) AS "InSubquery(id)" + t0.id IN (SELECT + arrayJoin(t1.ids) AS ids + FROM way_view AS t1) AS "InSubquery(id)" FROM node_view AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql index 0cc5ae5e1250..034aab28ceef 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql @@ -2,7 +2,7 @@ SELECT CAST(t1.string_col AS Nullable(Float64)) AS "Cast(string_col, float64)" FROM ( SELECT - t0.string_col AS string_col, + t0.string_col, COUNT(*) AS count FROM functional_alltypes AS t0 GROUP BY diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql index 5156a61980fa..a13c5f564c3e 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql @@ -1,11 +1,18 @@ SELECT - t0.a AS a, - t0.b AS b, - t2.c AS c, - t2.d AS d, - t2.c / ( - t0.a - t0.b + t5.a, + t5.b, + t5.c, + t5.d, + t5.c / ( + t5.a - t5.b ) AS e -FROM s AS t0 -INNER JOIN t AS t2 - ON t0.a = t2.c \ No newline at end of file +FROM ( + SELECT + t2.a, + t2.b, + t3.c, + t3.d + FROM s AS t2 + INNER JOIN t AS t3 + ON t2.a = t3.c +) AS t5 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql index e6344e2c48f9..6edcaf0c84a9 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql @@ -1,5 +1,5 @@ SELECT - t0.a AS a, + t0.a, COALESCE(countIf(NOT ( t0.b )), 0) AS A, diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql index f75d124691e9..1bd6720ed390 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql @@ -1,17 +1,17 @@ SELECT - t0.id AS id, - t0.bool_col AS bool_col, - t0.tinyint_col AS tinyint_col, - t0.smallint_col AS smallint_col, - t0.int_col AS int_col, - t0.bigint_col AS bigint_col, - t0.float_col AS float_col, - t0.double_col AS double_col, - t0.date_string_col AS date_string_col, - t0.string_col AS string_col, - t0.timestamp_col AS timestamp_col, - t0.year AS year, - t0.month AS month + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE t0.string_col IN ('foo', 'bar') \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql index 280d4be81f9f..85fd1cae375d 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql @@ -1,17 +1,17 @@ SELECT - t0.id AS id, - t0.bool_col AS bool_col, - t0.tinyint_col AS tinyint_col, - t0.smallint_col AS smallint_col, - t0.int_col AS int_col, - t0.bigint_col AS bigint_col, - t0.float_col AS float_col, - t0.double_col AS double_col, - t0.date_string_col AS date_string_col, - t0.string_col AS string_col, - t0.timestamp_col AS timestamp_col, - t0.year AS year, - t0.month AS month + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE NOT ( diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql index 6ef5f03a897c..728987548b00 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql @@ -1,17 +1,17 @@ SELECT - t0.id AS id, - t0.bool_col AS bool_col, - t0.tinyint_col AS tinyint_col, - t0.smallint_col AS smallint_col, - t0.int_col AS int_col, - t0.bigint_col AS bigint_col, - t0.float_col AS float_col, - t0.double_col AS double_col, - t0.date_string_col AS date_string_col, - t0.string_col AS string_col, - t0.timestamp_col AS timestamp_col, - t0.year AS year, - t0.month AS month -FROM functional_alltypes AS t0 -INNER JOIN functional_alltypes AS t1 - ON t0.id = t1.id \ No newline at end of file + t1.id, + t1.bool_col, + t1.tinyint_col, + t1.smallint_col, + t1.int_col, + t1.bigint_col, + t1.float_col, + t1.double_col, + t1.date_string_col, + t1.string_col, + t1.timestamp_col, + t1.year, + t1.month +FROM functional_alltypes AS t1 +INNER JOIN functional_alltypes AS t3 + ON t1.id = t3.id \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql index d9820f3b119b..2ae649a0ea76 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql @@ -1,5 +1,5 @@ SELECT - t1.key AS key, + t1.key, SUM(( ( t1.value + 1 @@ -7,8 +7,8 @@ SELECT ) + 3) AS abc FROM ( SELECT - t0.key AS key, - t0.value AS value + t0.key, + t0.value FROM t0 AS t0 WHERE t0.value = 42 diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql index cf152c137c02..d22a599a88a7 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql @@ -1,5 +1,5 @@ SELECT - t1.key AS key, + t1.key, SUM(( ( t1.value + 1 @@ -7,8 +7,8 @@ SELECT ) + 3) AS foo FROM ( SELECT - t0.key AS key, - t0.value AS value + t0.key, + t0.value FROM t0 AS t0 WHERE t0.value = 42 diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql index b1f45cbca8ef..99d5c76e03f3 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql @@ -1,3 +1,3 @@ SELECT * -FROM functional_alltypes AS functional_alltypes_ref \ No newline at end of file +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql index b1a66e364144..a1bdbc4f3fc3 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql @@ -1,26 +1,26 @@ SELECT - t0.playerID AS playerID, - t0.yearID AS yearID, - t0.stint AS stint, - t0.teamID AS teamID, - t0.lgID AS lgID, - t0.G AS G, - t0.AB AS AB, - t0.R AS R, - t0.H AS H, - t0.X2B AS X2B, - t0.X3B AS X3B, - t0.HR AS HR, - t0.RBI AS RBI, - t0.SB AS SB, - t0.CS AS CS, - t0.BB AS BB, - t0.SO AS SO, - t0.IBB AS IBB, - t0.HBP AS HBP, - t0.SH AS SH, - t0.SF AS SF, - t0.GIDP AS GIDP -FROM batting AS t0 -ANY JOIN awards_players AS t2 - ON t0.playerID = t2.awardID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +ANY JOIN awards_players AS t3 + ON t2.playerID = t3.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql index 9e806e782a58..651c9ca46694 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql @@ -1,26 +1,26 @@ SELECT - t0.playerID AS playerID, - t0.yearID AS yearID, - t0.stint AS stint, - t0.teamID AS teamID, - t0.lgID AS lgID, - t0.G AS G, - t0.AB AS AB, - t0.R AS R, - t0.H AS H, - t0.X2B AS X2B, - t0.X3B AS X3B, - t0.HR AS HR, - t0.RBI AS RBI, - t0.SB AS SB, - t0.CS AS CS, - t0.BB AS BB, - t0.SO AS SO, - t0.IBB AS IBB, - t0.HBP AS HBP, - t0.SH AS SH, - t0.SF AS SF, - t0.GIDP AS GIDP -FROM batting AS t0 -LEFT ANY JOIN awards_players AS t2 - ON t0.playerID = t2.awardID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +LEFT ANY JOIN awards_players AS t3 + ON t2.playerID = t3.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql index 3e49718a37e7..ebcca144d254 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql @@ -1,26 +1,26 @@ SELECT - t0.playerID AS playerID, - t0.yearID AS yearID, - t0.stint AS stint, - t0.teamID AS teamID, - t0.lgID AS lgID, - t0.G AS G, - t0.AB AS AB, - t0.R AS R, - t0.H AS H, - t0.X2B AS X2B, - t0.X3B AS X3B, - t0.HR AS HR, - t0.RBI AS RBI, - t0.SB AS SB, - t0.CS AS CS, - t0.BB AS BB, - t0.SO AS SO, - t0.IBB AS IBB, - t0.HBP AS HBP, - t0.SH AS SH, - t0.SF AS SF, - t0.GIDP AS GIDP -FROM batting AS t0 -INNER JOIN awards_players AS t2 - ON t0.playerID = t2.awardID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +INNER JOIN awards_players AS t3 + ON t2.playerID = t3.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql index 3d2ffe6a0df8..5ae2ee1998b1 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql @@ -1,26 +1,26 @@ SELECT - t0.playerID AS playerID, - t0.yearID AS yearID, - t0.stint AS stint, - t0.teamID AS teamID, - t0.lgID AS lgID, - t0.G AS G, - t0.AB AS AB, - t0.R AS R, - t0.H AS H, - t0.X2B AS X2B, - t0.X3B AS X3B, - t0.HR AS HR, - t0.RBI AS RBI, - t0.SB AS SB, - t0.CS AS CS, - t0.BB AS BB, - t0.SO AS SO, - t0.IBB AS IBB, - t0.HBP AS HBP, - t0.SH AS SH, - t0.SF AS SF, - t0.GIDP AS GIDP -FROM batting AS t0 -LEFT OUTER JOIN awards_players AS t2 - ON t0.playerID = t2.awardID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +LEFT OUTER JOIN awards_players AS t3 + ON t2.playerID = t3.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql index 84dddb02c539..43bddb923f0a 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql @@ -1,26 +1,26 @@ SELECT - t0.playerID AS playerID, - t0.yearID AS yearID, - t0.stint AS stint, - t0.teamID AS teamID, - t0.lgID AS lgID, - t0.G AS G, - t0.AB AS AB, - t0.R AS R, - t0.H AS H, - t0.X2B AS X2B, - t0.X3B AS X3B, - t0.HR AS HR, - t0.RBI AS RBI, - t0.SB AS SB, - t0.CS AS CS, - t0.BB AS BB, - t0.SO AS SO, - t0.IBB AS IBB, - t0.HBP AS HBP, - t0.SH AS SH, - t0.SF AS SF, - t0.GIDP AS GIDP -FROM batting AS t0 -ANY JOIN awards_players AS t2 - ON t0.playerID = t2.playerID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +ANY JOIN awards_players AS t3 + ON t2.playerID = t3.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql index 6826f42c2752..5586b8b01ee0 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql @@ -1,26 +1,26 @@ SELECT - t0.playerID AS playerID, - t0.yearID AS yearID, - t0.stint AS stint, - t0.teamID AS teamID, - t0.lgID AS lgID, - t0.G AS G, - t0.AB AS AB, - t0.R AS R, - t0.H AS H, - t0.X2B AS X2B, - t0.X3B AS X3B, - t0.HR AS HR, - t0.RBI AS RBI, - t0.SB AS SB, - t0.CS AS CS, - t0.BB AS BB, - t0.SO AS SO, - t0.IBB AS IBB, - t0.HBP AS HBP, - t0.SH AS SH, - t0.SF AS SF, - t0.GIDP AS GIDP -FROM batting AS t0 -LEFT ANY JOIN awards_players AS t2 - ON t0.playerID = t2.playerID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +LEFT ANY JOIN awards_players AS t3 + ON t2.playerID = t3.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql index c1d013f9fe49..f611516b394e 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql @@ -1,26 +1,26 @@ SELECT - t0.playerID AS playerID, - t0.yearID AS yearID, - t0.stint AS stint, - t0.teamID AS teamID, - t0.lgID AS lgID, - t0.G AS G, - t0.AB AS AB, - t0.R AS R, - t0.H AS H, - t0.X2B AS X2B, - t0.X3B AS X3B, - t0.HR AS HR, - t0.RBI AS RBI, - t0.SB AS SB, - t0.CS AS CS, - t0.BB AS BB, - t0.SO AS SO, - t0.IBB AS IBB, - t0.HBP AS HBP, - t0.SH AS SH, - t0.SF AS SF, - t0.GIDP AS GIDP -FROM batting AS t0 -INNER JOIN awards_players AS t2 - ON t0.playerID = t2.playerID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +INNER JOIN awards_players AS t3 + ON t2.playerID = t3.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql index cd444e5fa871..c820c7e05b88 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql @@ -1,26 +1,26 @@ SELECT - t0.playerID AS playerID, - t0.yearID AS yearID, - t0.stint AS stint, - t0.teamID AS teamID, - t0.lgID AS lgID, - t0.G AS G, - t0.AB AS AB, - t0.R AS R, - t0.H AS H, - t0.X2B AS X2B, - t0.X3B AS X3B, - t0.HR AS HR, - t0.RBI AS RBI, - t0.SB AS SB, - t0.CS AS CS, - t0.BB AS BB, - t0.SO AS SO, - t0.IBB AS IBB, - t0.HBP AS HBP, - t0.SH AS SH, - t0.SF AS SF, - t0.GIDP AS GIDP -FROM batting AS t0 -LEFT OUTER JOIN awards_players AS t2 - ON t0.playerID = t2.playerID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +LEFT OUTER JOIN awards_players AS t3 + ON t2.playerID = t3.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql index 57e97d5ec095..86d975c44589 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql @@ -2,19 +2,19 @@ SELECT SUM(t1.float_col) AS "Sum(float_col)" FROM ( SELECT - t0.id AS id, - t0.bool_col AS bool_col, - t0.tinyint_col AS tinyint_col, - t0.smallint_col AS smallint_col, - t0.int_col AS int_col, - t0.bigint_col AS bigint_col, - t0.float_col AS float_col, - t0.double_col AS double_col, - t0.date_string_col AS date_string_col, - t0.string_col AS string_col, - t0.timestamp_col AS timestamp_col, - t0.year AS year, - t0.month AS month + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE t0.int_col > 0 diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql index 53eef384f148..d2bf6243fdea 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql @@ -1,24 +1,24 @@ SELECT - t2.string_col AS string_col + t2.string_col FROM ( SELECT - t1.string_col AS string_col, + t1.string_col, SUM(t1.float_col) AS total FROM ( SELECT - t0.id AS id, - t0.bool_col AS bool_col, - t0.tinyint_col AS tinyint_col, - t0.smallint_col AS smallint_col, - t0.int_col AS int_col, - t0.bigint_col AS bigint_col, - t0.float_col AS float_col, - t0.double_col AS double_col, - t0.date_string_col AS date_string_col, - t0.string_col AS string_col, - t0.timestamp_col AS timestamp_col, - t0.year AS year, - t0.month AS month + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE t0.int_col > 0 diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql index 3fc646a6237e..cb651e1f7369 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql @@ -1,17 +1,17 @@ SELECT - t0.id AS id, - t0.bool_col AS bool_col, - t0.tinyint_col AS tinyint_col, - t0.smallint_col AS smallint_col, - t0.int_col AS int_col, - t0.bigint_col AS bigint_col, - t0.float_col AS float_col, - t0.double_col AS double_col, - t0.date_string_col AS date_string_col, - t0.string_col AS string_col, - t0.timestamp_col AS timestamp_col, - t0.year AS year, - t0.month AS month + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE t0.float_col > 0 AND t0.int_col < ( diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql index e91cbcdc4595..a27f8a736dc5 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql @@ -1,17 +1,17 @@ SELECT - t0.id AS id, - t0.bool_col AS bool_col, - t0.tinyint_col AS tinyint_col, - t0.smallint_col AS smallint_col, - t0.int_col AS int_col, - t0.bigint_col AS bigint_col, - t0.float_col AS float_col, - t0.double_col AS double_col, - t0.date_string_col AS date_string_col, - t0.string_col AS string_col, - t0.timestamp_col AS timestamp_col, - t0.year AS year, - t0.month AS month + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE t0.int_col > 0 AND t0.float_col BETWEEN 0 AND 1 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql index 2e6c66b7d831..cee980322a7b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql @@ -1,5 +1,5 @@ SELECT - t0.uuid AS uuid, + t0.uuid, minIf(t0.ts, t0.search_level = 1) AS min_date FROM t AS t0 GROUP BY diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index cd754eb53ce4..e177b32324dd 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -535,7 +535,6 @@ def ddl_con(ddl_backend): @pytest.fixture( params=_get_backends_to_test( keep=( - "duckdb", "exasol", "mssql", "mysql", diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 5216197b8724..b1df75561970 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -9,6 +9,7 @@ from operator import itemgetter from pathlib import Path from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse import duckdb import pandas as pd @@ -16,7 +17,6 @@ import pyarrow_hotfix # noqa: F401 import sqlglot as sg import sqlglot.expressions as sge -import toolz import ibis import ibis.common.exceptions as exc @@ -26,7 +26,6 @@ import ibis.expr.types as ir from ibis import util from ibis.backends.base import CanCreateSchema -from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported from ibis.backends.base.sqlglot import SQLGlotBackend from ibis.backends.base.sqlglot.compiler import STAR, C, F from ibis.backends.base.sqlglot.datatypes import DuckDBType @@ -107,9 +106,12 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: query = query.sql(dialect=self.name) return self.con.execute(query, **kwargs) - def _transform( - self, sql: sge.Expression, table_expr: ir.TableExpr - ) -> sge.Expression: + def _to_sqlglot( + self, expr: ir.Expr, limit: str | None = None, params=None, **_: Any + ): + sql = super()._to_sqlglot(expr, limit=limit, params=params) + + table_expr = expr.as_table() geocols = frozenset( name for name, typ in table_expr.schema().items() if typ.is_geospatial() ) @@ -175,7 +177,7 @@ def create_table( self._run_pre_execute_hooks(table) - (query,) = self._to_sqlglot(table) + query = self._to_sqlglot(table) else: query = None @@ -454,22 +456,20 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend: BaseBackend A backend instance """ - import sqlalchemy as sa - - url = sa.engine.make_url(url) - - kwargs = toolz.merge( - { - name: value - for name in ("database", "read_only", "temp_directory") - if (value := getattr(url, name, None)) - }, - kwargs, - ) + url = urlparse(url) + database = url.path[1:] or ":memory:" + query_params = parse_qs(url.query) + + for name, value in query_params.items(): + if len(value) > 1: + kwargs[name] = value + elif len(value) == 1: + kwargs[name] = value[0] + else: + raise exc.IbisError(f"Invalid URL parameter: {name}") - kwargs.update(url.query) self._convert_kwargs(kwargs) - return self.connect(**kwargs) + return self.connect(database=database, **kwargs) def load_extension(self, extension: str, force_install: bool = False) -> None: """Install and load a duckdb extension by name or path. @@ -921,10 +921,7 @@ def list_tables( >>> con.list_tables(schema="my_schema") [] >>> with con.begin() as c: - ... c.exec_driver_sql( - ... "CREATE TABLE my_schema.baz (a INTEGER)" - ... ) # doctest: +ELLIPSIS - ... + ... c.exec_driver_sql("CREATE TABLE my_schema.baz (a INTEGER)") # doctest: +ELLIPSIS <...> >>> con.list_tables(schema="my_schema") ['baz'] @@ -1360,7 +1357,7 @@ def to_csv( with self._safe_raw_sql(copy_cmd): pass - def fetch_from_cursor( + def _fetch_from_cursor( self, cursor: duckdb.DuckDBPyConnection, schema: sch.Schema ) -> pd.DataFrame: import pandas as pd @@ -1384,10 +1381,7 @@ def fetch_from_cursor( for name, col in zip(table.column_names, table.columns) } ) - df = DuckDBPandasData.convert_table(df, schema) - if not df.empty and geospatial_supported: - return self._to_geodataframe(df, schema) - return df + return DuckDBPandasData.convert_table(df, schema) # TODO(gforsyth): this may not need to be specialized in the future @staticmethod diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 4207f108b8f5..35fa4729f07b 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -329,6 +329,16 @@ def visit_Correlation(self, op, *, left, right, how, where): return self.agg.corr(left, right, where=where) + @visit_node.register(ops.GeoConvert) + def visit_GeoConvert(self, op, *, arg, source, target): + # 4th argument is to specify that the result is always_xy so that it + # matches the behavior of the equivalent geopandas functionality + return self.f.st_transform(arg, source, target, True) + + @visit_node.register(ops.HexDigest) + def visit_HexDigest(self, op, *, arg, how): + return self.f[how](arg) + _SIMPLE_OPS = { ops.ArrayPosition: "list_indexof", @@ -338,8 +348,8 @@ def visit_Correlation(self, op, *, left, right, how, where): ops.EndsWith: "suffix", ops.Hash: "hash", ops.IntegerRange: "range", + ops.TimestampRange: "range", ops.LPad: "lpad", - ops.Levenshtein: "levenshtein", ops.MapKeys: "map_keys", ops.MapLength: "cardinality", ops.MapMerge: "map_concat", @@ -349,7 +359,6 @@ def visit_Correlation(self, op, *, left, right, how, where): ops.StringAscii: "ascii", ops.TimeFromHMS: "make_time", ops.TypeOf: "typeof", - ops.Unnest: "unnest", ops.GeoPoint: "st_point", ops.GeoAsText: "st_astext", ops.GeoArea: "st_area", diff --git a/ibis/backends/duckdb/registry.py b/ibis/backends/duckdb/registry.py deleted file mode 100644 index a0733196c42a..000000000000 --- a/ibis/backends/duckdb/registry.py +++ /dev/null @@ -1,605 +0,0 @@ -from __future__ import annotations - -import operator -from functools import partial -from typing import TYPE_CHECKING, Any - -import numpy as np -import sqlalchemy as sa -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.functions import GenericFunction - -import ibis.backends.base.sql.registry.geospatial as geo -import ibis.expr.operations as ops -from ibis.backends.base.sql import alchemy -from ibis.backends.base.sql.alchemy import unary -from ibis.backends.base.sql.alchemy.registry import ( - _table_column, - array_filter, - array_map, - geospatial_functions, - reduction, - try_cast, -) -from ibis.backends.duckdb.datatypes import Geometry_WKB -from ibis.backends.postgres.registry import ( - _array_index, - _array_slice, - fixed_arity, - operation_registry, -) -from ibis.common.exceptions import UnsupportedOperationError - -if TYPE_CHECKING: - from collections.abc import Mapping - - from ibis.backends.base.sql.alchemy.datatypes import StructType - -operation_registry = { - op: operation_registry[op] - for op in operation_registry.keys() - geospatial_functions.keys() -} - - -def _round(t, op): - arg, digits = op.args - sa_arg = t.translate(arg) - - if digits is None: - return sa.func.round(sa_arg) - - return sa.func.round(sa_arg, t.translate(digits)) - - -_LOG_BASE_FUNCS = { - 2: sa.func.log2, - 10: sa.func.log, -} - - -def _centroid(t, op): - arg = t.translate(op.arg) - return sa.func.st_centroid(arg, type_=Geometry_WKB) - - -def _geo_flip_coordinates(t, op): - arg = t.translate(op.arg) - return sa.func.st_flipcoordinates(arg, type_=Geometry_WKB) - - -def _geo_end_point(t, op): - arg = t.translate(op.arg) - return sa.func.st_endpoint(arg, type_=Geometry_WKB) - - -def _geo_start_point(t, op): - arg = t.translate(op.arg) - return sa.func.st_startpoint(arg, type_=Geometry_WKB) - - -def _envelope(t, op): - arg = t.translate(op.arg) - return sa.func.st_envelope(arg, type_=Geometry_WKB) - - -def _geo_buffer(t, op): - arg = t.translate(op.arg) - radius = t.translate(op.radius) - return sa.func.st_buffer(arg, radius, type_=Geometry_WKB) - - -def _geo_unary_union(t, op): - arg = t.translate(op.arg) - return sa.func.st_union_agg(arg, type_=Geometry_WKB) - - -def _geo_point(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.st_point(left, right, type_=Geometry_WKB) - - -def _geo_difference(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.st_difference(left, right, type_=Geometry_WKB) - - -def _geo_intersection(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.st_intersection(left, right, type_=Geometry_WKB) - - -def _geo_union(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.st_union(left, right, type_=Geometry_WKB) - - -def _geo_convert(t, op): - arg = t.translate(op.arg) - source = op.source - target = op.target - - # sa.true() setting always_xy=True - return sa.func.st_transform(arg, source, target, sa.true(), type_=Geometry_WKB) - - -def _generic_log(arg, base, *, type_): - return sa.func.ln(arg, type_=type_) / sa.func.ln(base, type_=type_) - - -def _log(t, op): - arg, base = op.args - sqla_type = t.get_sqla_type(op.dtype) - sa_arg = t.translate(arg) - if base is not None: - sa_base = t.translate(base) - try: - base_value = sa_base.value - except AttributeError: - return _generic_log(sa_arg, sa_base, type_=sqla_type) - else: - func = _LOG_BASE_FUNCS.get(base_value, _generic_log) - return func(sa_arg, type_=sqla_type) - return sa.func.ln(sa_arg, type_=sqla_type) - - -def _timestamp_from_unix(t, op): - arg, unit = op.args - arg = t.translate(arg) - - if unit.short == "ms": - return sa.func.epoch_ms(arg) - elif unit.short == "s": - return sa.func.to_timestamp(arg) - else: - raise UnsupportedOperationError(f"{unit!r} unit is not supported!") - - -def _timestamp_bucket(t, op): - arg = t.translate(op.arg) - interval = t.translate(op.interval) - - origin = sa.literal_column("'epoch'::TIMESTAMP") - - if op.offset is not None: - origin += t.translate(op.offset) - return sa.func.time_bucket(interval, arg, origin) - - -class struct_pack(GenericFunction): - def __init__(self, values: Mapping[str, Any], *, type: StructType) -> None: - super().__init__() - self.values = values - self.type = type - - -@compiles(struct_pack, "duckdb") -def compiles_struct_pack(element, compiler, **kw): - quote = compiler.preparer.quote - args = ", ".join( - f"{quote(key)} := {compiler.process(value, **kw)}" - for key, value in element.values.items() - ) - return f"struct_pack({args})" - - -def _literal(t, op): - dtype = op.dtype - value = op.value - - if value is None: - return ( - sa.null() if dtype.is_null() else sa.cast(sa.null(), t.get_sqla_type(dtype)) - ) - - sqla_type = t.get_sqla_type(dtype) - - if dtype.is_interval(): - return getattr(sa.func, f"to_{dtype.unit.plural}")(value) - elif dtype.is_geospatial(): - return sa.literal_column(geo.translate_literal(op, inline_metadata=True)) - elif dtype.is_array(): - values = value.tolist() if isinstance(value, np.ndarray) else value - return sa.cast(sa.func.list_value(*values), sqla_type) - elif dtype.is_floating(): - if not np.isfinite(value): - if np.isnan(value): - value = "NaN" - else: - assert np.isinf(value), "value is neither finite, nan nor infinite" - prefix = "-" * (value < 0) - value = f"{prefix}Inf" - return sa.cast(sa.literal(value), sqla_type) - elif dtype.is_struct(): - return struct_pack( - { - key: t.translate(ops.Literal(val, dtype=dtype[key])) - for key, val in value.items() - }, - type=sqla_type, - ) - elif dtype.is_string(): - return sa.literal(value) - elif dtype.is_map(): - return sa.func.map( - sa.func.list_value(*value.keys()), sa.func.list_value(*value.values()) - ) - elif dtype.is_timestamp(): - return sa.cast(sa.literal(value.isoformat()), t.get_sqla_type(dtype)) - elif dtype.is_date(): - return sa.func.make_date(value.year, value.month, value.day) - elif dtype.is_time(): - return sa.func.make_time( - value.hour, value.minute, value.second + value.microsecond / 1e6 - ) - else: - return sa.cast(sa.literal(value), sqla_type) - - -if_ = getattr(sa.func, "if") - - -def _neg_idx_to_pos(array, idx): - arg_length = sa.func.array_length(array) - return if_(idx < 0, arg_length + sa.func.greatest(idx, -arg_length), idx) - - -def _regex_extract(string, pattern, index): - return sa.func.regexp_extract( - string, - pattern, - # DuckDB requires the index to be a constant, so we compile - # the value and inline it by using sa.text - sa.text(str(index.compile(compile_kwargs=dict(literal_binds=True)))), - ) - - -def _json_get_item(left, path): - # Workaround for https://github.com/duckdb/duckdb/issues/5063 - # In some situations duckdb silently does the wrong thing if - # the path is parametrized. - sa_path = sa.text(str(path.compile(compile_kwargs=dict(literal_binds=True)))) - return left.op("->")(sa_path) - - -def _strftime(t, op): - if not isinstance(op.format_str, ops.Literal): - raise UnsupportedOperationError( - f"DuckDB format_str must be a literal `str`; got {type(op.format_str)}" - ) - return sa.func.strftime(t.translate(op.arg), t.translate(op.format_str)) - - -def _strptime(t, op): - if not isinstance(op.format_str, ops.Literal): - raise UnsupportedOperationError( - f"DuckDB format_str must be a literal `str`; got {type(op.format_str)}" - ) - return sa.cast( - sa.func.strptime(t.translate(op.arg), t.translate(op.format_str)), - t.get_sqla_type(op.dtype), - ) - - -def _arbitrary(t, op): - if (how := op.how) == "heavy": - raise UnsupportedOperationError( - f"how={how!r} not supported in the DuckDB backend" - ) - return t._reduction(getattr(sa.func, how), op) - - -def _string_agg(t, op): - if not isinstance(op.sep, ops.Literal): - raise UnsupportedOperationError( - "Separator argument to group_concat operation must be a constant" - ) - agg = sa.func.string_agg(t.translate(op.arg), sa.text(repr(op.sep.value))) - if (where := op.where) is not None: - return agg.filter(t.translate(where)) - return agg - - -def _struct_column(t, op): - return struct_pack( - dict(zip(op.names, map(t.translate, op.values))), - type=t.get_sqla_type(op.dtype), - ) - - -@compiles(array_map, "duckdb") -def compiles_list_apply(element, compiler, **kw): - *args, signature, result = map(partial(compiler.process, **kw), element.clauses) - return f"list_apply({', '.join(args)}, {signature} -> {result})" - - -def _array_map(t, op): - return array_map( - t.translate(op.arg), sa.literal_column(f"({op.param})"), t.translate(op.body) - ) - - -@compiles(array_filter, "duckdb") -def compiles_list_filter(element, compiler, **kw): - *args, signature, result = map(partial(compiler.process, **kw), element.clauses) - return f"list_filter({', '.join(args)}, {signature} -> {result})" - - -def _array_filter(t, op): - return array_filter( - t.translate(op.arg), sa.literal_column(f"({op.param})"), t.translate(op.body) - ) - - -def _array_intersect(t, op): - name = "x" - parameter = ops.Argument( - name=name, shape=op.left.shape, dtype=op.left.dtype.value_type - ) - return t.translate( - ops.ArrayFilter( - op.left, param=parameter.param, body=ops.ArrayContains(op.right, parameter) - ) - ) - - -def _array_zip(t, op): - args = tuple(map(t.translate, op.arg)) - - i = sa.literal_column("i", type_=sa.INTEGER) - dtype = op.dtype - return array_map( - sa.func.range(1, sa.func.greatest(*map(sa.func.array_length, args)) + 1), - i, - struct_pack( - { - name: sa.func.list_extract(arg, i) - for name, arg in zip(dtype.value_type.names, args) - }, - type=t.get_sqla_type(dtype), - ), - ) - - -@compiles(try_cast, "duckdb") -def compiles_try_cast(element, compiler, **kw): - return "TRY_CAST({} AS {})".format( - compiler.process(element.clauses.clauses[0], **kw), - compiler.visit_typeclause(element), - ) - - -def _try_cast(t, op): - arg = t.translate(op.arg) - to = t.get_sqla_type(op.to) - return try_cast(arg, type_=to) - - -_temporal_delta = fixed_arity( - lambda part, start, end: sa.func.date_diff(part, end, start), 3 -) - - -def _to_json_collection(t, op): - typ = t.get_sqla_type(op.dtype) - return try_cast(t.translate(op.arg), typ, type_=typ) - - -def _array_remove(t, op): - arg = op.arg - param = ops.Argument(name="x", shape=arg.shape, dtype=arg.dtype.value_type) - return _array_filter( - t, - ops.ArrayFilter(arg, param=param.param, body=ops.NotEquals(param, op.other)), - ) - - -def _hexdigest(translator, op): - how = op.how - - arg_formatted = translator.translate(op.arg) - if how in ("md5", "sha256"): - return getattr(sa.func, how)(arg_formatted) - else: - raise NotImplementedError(how) - - -operation_registry.update( - { - ops.Array: ( - lambda t, op: sa.cast( - sa.func.list_value(*map(t.translate, op.exprs)), - t.get_sqla_type(op.dtype), - ) - ), - ops.TryCast: _try_cast, - ops.ArrayRepeat: fixed_arity( - lambda arg, times: sa.func.flatten( - sa.func.array( - sa.select(arg).select_from(sa.func.range(times)).scalar_subquery() - ) - ), - 2, - ), - ops.ArrayLength: unary(sa.func.array_length), - ops.ArraySlice: _array_slice( - index_converter=_neg_idx_to_pos, - array_length=sa.func.array_length, - func=sa.func.list_slice, - ), - ops.ArrayIndex: _array_index( - index_converter=_neg_idx_to_pos, func=sa.func.list_extract - ), - ops.ArrayMap: _array_map, - ops.ArrayFilter: _array_filter, - ops.ArrayContains: fixed_arity(sa.func.list_has, 2), - ops.ArrayPosition: fixed_arity( - lambda lst, el: sa.func.list_indexof(lst, el) - 1, 2 - ), - ops.ArrayDistinct: fixed_arity( - lambda arg: if_( - arg.is_(sa.null()), - sa.null(), - # append a null if the input array has a null - sa.func.list_distinct(arg) - + if_( - # list_count doesn't count nulls - sa.func.list_count(arg) < sa.func.array_length(arg), - sa.func.list_value(sa.null()), - sa.func.list_value(), - ), - ), - 1, - ), - ops.ArraySort: fixed_arity(sa.func.list_sort, 1), - ops.ArrayRemove: _array_remove, - ops.ArrayUnion: lambda t, op: t.translate( - ops.ArrayDistinct(ops.ArrayConcat((op.left, op.right))) - ), - ops.ArrayZip: _array_zip, - ops.DayOfWeekName: unary(sa.func.dayname), - ops.Literal: _literal, - ops.Log2: unary(sa.func.log2), - ops.Ln: unary(sa.func.ln), - ops.Log: _log, - ops.IsNan: unary(sa.func.isnan), - ops.Modulus: fixed_arity(operator.mod, 2), - ops.Round: _round, - ops.StructField: ( - lambda t, op: sa.func.struct_extract( - t.translate(op.arg), - sa.text(repr(op.field)), - type_=t.get_sqla_type(op.dtype), - ) - ), - ops.TableColumn: _table_column, - ops.TimestampFromUNIX: _timestamp_from_unix, - ops.TimestampBucket: _timestamp_bucket, - ops.TimestampNow: fixed_arity( - # duckdb 0.6.0 changes now to be a timestamp with time zone force - # it back to the original for backwards compatibility - lambda *_: sa.cast(sa.func.now(), sa.TIMESTAMP), - 0, - ), - ops.RegexExtract: fixed_arity(_regex_extract, 3), - ops.RegexReplace: fixed_arity( - lambda *args: sa.func.regexp_replace(*args, sa.text("'g'")), 3 - ), - ops.RegexSearch: fixed_arity(sa.func.regexp_matches, 2), - ops.StringContains: fixed_arity(sa.func.contains, 2), - ops.ApproxMedian: reduction( - # without inline text, duckdb fails with - # RuntimeError: INTERNAL Error: Invalid PhysicalType for GetTypeIdSize - lambda arg: sa.func.approx_quantile(arg, sa.text(str(0.5))) - ), - ops.ApproxCountDistinct: reduction(sa.func.approx_count_distinct), - ops.Mode: reduction(sa.func.mode), - ops.Strftime: _strftime, - ops.Arbitrary: _arbitrary, - ops.GroupConcat: _string_agg, - ops.StructColumn: _struct_column, - ops.ArgMin: reduction(sa.func.min_by), - ops.ArgMax: reduction(sa.func.max_by), - ops.BitwiseXor: fixed_arity(sa.func.xor, 2), - ops.JSONGetItem: fixed_arity(_json_get_item, 2), - ops.RowID: lambda *_: sa.literal_column("rowid"), - ops.StringToTimestamp: _strptime, - ops.Quantile: lambda t, op: ( - reduction(sa.func.quantile_cont)(t, op) - if op.arg.dtype.is_numeric() - else reduction(sa.func.quantile_disc)(t, op) - ), - ops.MultiQuantile: lambda t, op: ( - reduction(sa.func.quantile_cont)(t, op) - if op.arg.dtype.is_numeric() - else reduction(sa.func.quantile_disc)(t, op) - ), - ops.TypeOf: unary(sa.func.typeof), - ops.IntervalAdd: fixed_arity(operator.add, 2), - ops.IntervalSubtract: fixed_arity(operator.sub, 2), - ops.Capitalize: alchemy.sqlalchemy_operation_registry[ops.Capitalize], - ops.ArrayStringJoin: fixed_arity( - lambda sep, arr: sa.func.array_aggr(arr, sa.text("'string_agg'"), sep), 2 - ), - ops.StartsWith: fixed_arity(sa.func.prefix, 2), - ops.EndsWith: fixed_arity(sa.func.suffix, 2), - ops.Argument: lambda _, op: sa.literal_column(op.param), - ops.Unnest: unary(sa.func.unnest), - ops.MapGet: fixed_arity( - lambda arg, key, default: sa.func.coalesce( - sa.func.list_extract(sa.func.element_at(arg, key), 1), default - ), - 3, - ), - ops.Map: fixed_arity(sa.func.map, 2), - ops.MapContains: fixed_arity( - lambda arg, key: sa.func.array_length(sa.func.element_at(arg, key)) != 0, 2 - ), - ops.MapLength: unary(sa.func.cardinality), - ops.MapKeys: unary(sa.func.map_keys), - ops.MapValues: unary(sa.func.map_values), - ops.MapMerge: fixed_arity(sa.func.map_concat, 2), - ops.Hash: unary(sa.func.hash), - ops.HexDigest: _hexdigest, - ops.Median: reduction(sa.func.median), - ops.First: reduction(sa.func.first), - ops.Last: reduction(sa.func.last), - ops.ArrayIntersect: _array_intersect, - ops.TimeDelta: _temporal_delta, - ops.DateDelta: _temporal_delta, - ops.TimestampDelta: _temporal_delta, - ops.ToJSONMap: _to_json_collection, - ops.ToJSONArray: _to_json_collection, - ops.ArrayFlatten: unary(sa.func.flatten), - ops.IntegerRange: fixed_arity(sa.func.range, 3), - # geospatial - ops.GeoPoint: _geo_point, - ops.GeoAsText: unary(sa.func.ST_AsText), - ops.GeoArea: unary(sa.func.ST_Area), - ops.GeoBuffer: _geo_buffer, - ops.GeoCentroid: _centroid, - ops.GeoContains: fixed_arity(sa.func.ST_Contains, 2), - ops.GeoCovers: fixed_arity(sa.func.ST_Covers, 2), - ops.GeoCoveredBy: fixed_arity(sa.func.ST_CoveredBy, 2), - ops.GeoCrosses: fixed_arity(sa.func.ST_Crosses, 2), - ops.GeoDifference: _geo_difference, - ops.GeoDisjoint: fixed_arity(sa.func.ST_Disjoint, 2), - ops.GeoDistance: fixed_arity(sa.func.ST_Distance, 2), - ops.GeoDWithin: fixed_arity(sa.func.ST_DWithin, 3), - ops.GeoEndPoint: _geo_end_point, - ops.GeoEnvelope: _envelope, - ops.GeoEquals: fixed_arity(sa.func.ST_Equals, 2), - ops.GeoGeometryType: unary(sa.func.ST_GeometryType), - ops.GeoIntersection: _geo_intersection, - ops.GeoIntersects: fixed_arity(sa.func.ST_Intersects, 2), - ops.GeoIsValid: unary(sa.func.ST_IsValid), - ops.GeoLength: unary(sa.func.ST_Length), - ops.GeoNPoints: unary(sa.func.ST_NPoints), - ops.GeoOverlaps: fixed_arity(sa.func.ST_Overlaps, 2), - ops.GeoStartPoint: _geo_start_point, - ops.GeoTouches: fixed_arity(sa.func.ST_Touches, 2), - ops.GeoUnion: _geo_union, - ops.GeoUnaryUnion: _geo_unary_union, - ops.GeoWithin: fixed_arity(sa.func.ST_Within, 2), - ops.GeoX: unary(sa.func.ST_X), - ops.GeoY: unary(sa.func.ST_Y), - ops.GeoConvert: _geo_convert, - ops.GeoFlipCoordinates: _geo_flip_coordinates, - # other ops - ops.TimestampRange: fixed_arity(sa.func.range, 3), - ops.RegexSplit: fixed_arity(sa.func.str_split_regex, 2), - } -) - - -_invalid_operations = { - # ibis.expr.operations.strings - ops.Translate, -} - -operation_registry = { - k: v for k, v in operation_registry.items() if k not in _invalid_operations -} diff --git a/ibis/backends/duckdb/tests/conftest.py b/ibis/backends/duckdb/tests/conftest.py index 7ca578ea24ff..cc949ab9f9b5 100644 --- a/ibis/backends/duckdb/tests/conftest.py +++ b/ibis/backends/duckdb/tests/conftest.py @@ -7,7 +7,7 @@ import ibis from ibis.backends.conftest import TEST_TABLES from ibis.backends.tests.base import BackendTest -from ibis.conftest import SANDBOXED +from ibis.conftest import SANDBOXED, WINDOWS if TYPE_CHECKING: from collections.abc import Iterator @@ -49,8 +49,6 @@ def preload(self): @property def ddl_script(self) -> Iterator[str]: - from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported - parquet_dir = self.data_dir / "parquet" geojson_dir = self.data_dir / "geojson" for table in TEST_TABLES: @@ -60,7 +58,7 @@ def ddl_script(self) -> Iterator[str]: SELECT * FROM read_parquet('{parquet_dir / f'{table}.parquet'}') """ ) - if geospatial_supported and not SANDBOXED: + if not SANDBOXED: for table in TEST_TABLES_GEO: yield ( f""" @@ -82,18 +80,25 @@ def ddl_script(self) -> Iterator[str]: @staticmethod def connect(*, tmpdir, worker_id, **kw) -> BaseBackend: # use an extension directory per test worker to prevent simultaneous - # downloads - extension_directory = tmpdir.getbasetemp().joinpath("duckdb_extensions") - extension_directory.mkdir(exist_ok=True) - return ibis.duckdb.connect(extension_directory=extension_directory, **kw) + # downloads on windows + # + # avoid enabling on linux because this adds a lot of time to parallel + # test runs due to each worker getting its own extensions directory + if WINDOWS: + extension_directory = tmpdir.getbasetemp().joinpath("duckdb_extensions") + extension_directory.mkdir(exist_ok=True) + kw["extension_directory"] = extension_directory + return ibis.duckdb.connect(**kw) def load_tpch(self) -> None: - self.connection.raw_sql("CALL dbgen(sf=0.1)") + """Load the TPC-H dataset.""" + with self.connection._safe_raw_sql("CALL dbgen(sf=0.17)"): + pass def _load_data(self, **_: Any) -> None: """Load test data into a backend.""" - for stmt in self.ddl_script: - self.connection.raw_sql(stmt) + with self.connection._safe_raw_sql(";\n".join(self.ddl_script)): + pass @pytest.fixture(scope="session") diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql index b3523fe55df7..e68c65813913 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql @@ -1,3 +1,3 @@ SELECT - ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS DOUBLE)) AS "GeoDWithin(geom, geom, 3.0)" -FROM t AS t0 + ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS DOUBLE)) AS tmp +FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql index 498b544a506c..7da710b2dceb 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql @@ -1,3 +1,3 @@ SELECT - ST_ASTEXT(t0.geom) AS "GeoAsText(geom)" + ST_ASTEXT(t0.geom) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql index db42c12ad237..bf8ba88ffde2 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql @@ -1,3 +1,3 @@ SELECT - ST_NPOINTS(t0.geom) AS "GeoNPoints(geom)" + ST_NPOINTS(t0.geom) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index e01467aa5f65..08cee6fb0954 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -189,6 +189,7 @@ def test_insert(con): assert t.count().execute() == 2 +@pytest.mark.xfail(reason="snowflake backend not yet rewritten") def test_to_other_sql(con, snapshot): pytest.importorskip("snowflake.connector") diff --git a/ibis/backends/duckdb/tests/test_datatypes.py b/ibis/backends/duckdb/tests/test_datatypes.py index 1e23cde4fdbf..7bd989209895 100644 --- a/ibis/backends/duckdb/tests/test_datatypes.py +++ b/ibis/backends/duckdb/tests/test_datatypes.py @@ -3,6 +3,7 @@ import duckdb_engine import numpy as np import pytest +from packaging.version import parse as vparse from pytest import param import ibis diff --git a/ibis/backends/duckdb/tests/test_geospatial.py b/ibis/backends/duckdb/tests/test_geospatial.py index 49d34f52f905..bbe01d51954a 100644 --- a/ibis/backends/duckdb/tests/test_geospatial.py +++ b/ibis/backends/duckdb/tests/test_geospatial.py @@ -17,7 +17,7 @@ def test_geospatial_point(zones, zones_gdf): - coord = zones.x_cent.point(zones.y_cent) + coord = zones.x_cent.point(zones.y_cent).name("coord") # this returns GeometryArray gp_coord = gpd.points_from_xy(zones_gdf.x_cent, zones_gdf.y_cent) @@ -34,13 +34,13 @@ def test_geospatial_point(zones, zones_gdf): ) def test_geospatial_unary_snapshot(operation, keywords, snapshot): t = ibis.table([("geom", "geometry")], name="t") - expr = getattr(t.geom, operation)(**keywords) + expr = getattr(t.geom, operation)(**keywords).name("tmp") snapshot.assert_match(ibis.to_sql(expr), "out.sql") def test_geospatial_dwithin(snapshot): t = ibis.table([("geom", "geometry")], name="t") - expr = t.geom.d_within(t.geom, 3.0) + expr = t.geom.d_within(t.geom, 3.0).name("tmp") snapshot.assert_match(ibis.to_sql(expr), "out.sql") @@ -62,7 +62,7 @@ def test_geospatial_dwithin(snapshot): ], ) def test_geospatial_unary_tm(op, keywords, gp_op, zones, zones_gdf): - expr = getattr(zones.geom, op)(**keywords) + expr = getattr(zones.geom, op)(**keywords).name("tmp") gp_expr = getattr(zones_gdf.geometry, gp_op) tm.assert_series_equal(expr.to_pandas(), gp_expr, check_names=False) @@ -76,10 +76,10 @@ def test_geospatial_unary_tm(op, keywords, gp_op, zones, zones_gdf): ], ) def test_geospatial_xy(op, keywords, gp_op, zones, zones_gdf): - cen = zones.geom.centroid() + cen = zones.geom.centroid().name("centroid") gp_cen = zones_gdf.geometry.centroid - expr = getattr(cen, op)(**keywords) + expr = getattr(cen, op)(**keywords).name("tmp") gp_expr = getattr(gp_cen, gp_op) tm.assert_series_equal(expr.to_pandas(), gp_expr, check_names=False) @@ -88,7 +88,7 @@ def test_geospatial_xy(op, keywords, gp_op, zones, zones_gdf): def test_geospatial_length(lines, lines_gdf): # note: ST_LENGTH returns 0 for the case of polygon # or multi polygon while pandas geopandas returns the perimeter. - length = lines.geom.length() + length = lines.geom.length().name("length") gp_length = lines_gdf.geometry.length tm.assert_series_equal(length.to_pandas(), gp_length, check_names=False) @@ -113,7 +113,7 @@ def test_geospatial_length(lines, lines_gdf): ], ) def test_geospatial_binary_tm(op, gp_op, zones, zones_gdf): - expr = getattr(zones.geom, op)(zones.geom) + expr = getattr(zones.geom, op)(zones.geom).name("tmp") gp_func = getattr(zones_gdf.geometry, gp_op)(zones_gdf.geometry) tm.assert_series_equal(expr.to_pandas(), gp_func, check_names=False) @@ -129,7 +129,7 @@ def test_geospatial_binary_tm(op, gp_op, zones, zones_gdf): ], ) def test_geospatial_unary_gtm(op, gp_op, zones, zones_gdf): - expr = getattr(zones.geom, op)() + expr = getattr(zones.geom, op)().name("tmp") gp_expr = getattr(zones_gdf.geometry, gp_op) gtm.assert_geoseries_equal(expr.to_pandas(), gp_expr, check_crs=False) @@ -146,14 +146,14 @@ def test_geospatial_unary_gtm(op, gp_op, zones, zones_gdf): ], ) def test_geospatial_binary_gtm(op, gp_op, zones, zones_gdf): - expr = getattr(zones.geom, op)(zones.geom) + expr = getattr(zones.geom, op)(zones.geom).name("tmp") gp_func = getattr(zones_gdf.geometry, gp_op)(zones_gdf.geometry) gtm.assert_geoseries_equal(expr.to_pandas(), gp_func, check_crs=False) def test_geospatial_end_point(lines, lines_gdf): - epoint = lines.geom.end_point() + epoint = lines.geom.end_point().name("end_point") # geopandas does not have end_point this is a work around to get it gp_epoint = lines_gdf.geometry.boundary.explode(index_parts=True).xs(1, level=1) @@ -161,7 +161,7 @@ def test_geospatial_end_point(lines, lines_gdf): def test_geospatial_start_point(lines, lines_gdf): - spoint = lines.geom.start_point() + spoint = lines.geom.start_point().name("start_point") # geopandas does not have start_point this is a work around to get it gp_spoint = lines_gdf.geometry.boundary.explode(index_parts=True).xs(0, level=1) @@ -170,7 +170,7 @@ def test_geospatial_start_point(lines, lines_gdf): # this one takes a bit longer than the rest. def test_geospatial_unary_union(zones, zones_gdf): - unary_union = zones.geom.unary_union() + unary_union = zones.geom.unary_union().name("unary_union") # this returns a shapely geometry object gp_unary_union = zones_gdf.geometry.unary_union @@ -182,7 +182,7 @@ def test_geospatial_unary_union(zones, zones_gdf): def test_geospatial_buffer_point(zones, zones_gdf): - cen = zones.geom.centroid() + cen = zones.geom.centroid().name("centroid") gp_cen = zones_gdf.geometry.centroid buffer = cen.buffer(100.0) @@ -290,8 +290,9 @@ def test_literal_geospatial_explicit(con, expr, expected): (shp_multipoint_0, "(0 0, 1 1, 2 2)"), ], ) -def test_literal_geospatial_inferred(con, shp, expected): +def test_literal_geospatial_inferred(con, shp, expected, snapshot): result = str(con.compile(ibis.literal(shp).name("result"))) name = type(shp).__name__.upper() pair = f"{name} {expected}" - assert result == f"SELECT {pair!r} AS result" + assert pair in result + snapshot.assert_match(result, "out.sql") diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index 54e85e7f86f7..16b8f2344d79 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -11,7 +11,6 @@ import pandas.testing as tm import pyarrow as pa import pytest -import sqlalchemy as sa import ibis import ibis.expr.datatypes as dt @@ -110,7 +109,7 @@ def test_read_geo_from_url(con, monkeypatch): loaded_exts = [] monkeypatch.setattr(con, "_load_extensions", lambda x, **_: loaded_exts.extend(x)) - with pytest.raises((sa.exc.OperationalError, sa.exc.ProgrammingError)): + with pytest.raises(duckdb.IOException): # The read will fail, either because the URL is bogus (which it is) or # because the current connection doesn't have the spatial extension # installed and so the call to `st_read` will raise a catalog error. @@ -355,7 +354,7 @@ def test_set_temp_dir(tmp_path): "nix on linux cannot download duckdb extensions or data due to sandboxing; " "duckdb will try to automatically install and load read_parquet" ), - raises=(duckdb.IOException, sa.exc.DBAPIError), + raises=duckdb.IOException, ) def test_s3_403_fallback(con, httpserver, monkeypatch): # monkeypatch to avoid downloading extensions in tests diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 34c7cb3e062c..230ccfe1d37a 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -3,8 +3,12 @@ try: from duckdb import ConversionException as DuckDBConversionException from duckdb import InvalidInputException as DuckDBInvalidInputException + from duckdb import NotImplementedException as DuckDBNotImplementedException + from duckdb import ParserException as DuckDBParserException except ImportError: - DuckDBConversionException = DuckDBInvalidInputException = None + DuckDBConversionException = ( + DuckDBInvalidInputException + ) = DuckDBParserException = DuckDBNotImplementedException = None try: from clickhouse_connect.driver.exceptions import ( @@ -16,6 +20,7 @@ except ImportError: ClickHouseDatabaseError = ClickHouseInternalError = None + try: from pyexasol.exceptions import ExaQueryError except ImportError: diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql new file mode 100644 index 000000000000..f26c12ac78c8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql new file mode 100644 index 000000000000..f26c12ac78c8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql new file mode 100644 index 000000000000..f26c12ac78c8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql new file mode 100644 index 000000000000..f26c12ac78c8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql new file mode 100644 index 000000000000..eb7db2731364 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM(t0.bigint_col) AS "Sum(bigint_col)" +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql new file mode 100644 index 000000000000..eb7db2731364 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM(t0.bigint_col) AS "Sum(bigint_col)" +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql new file mode 100644 index 000000000000..88b2af3a2cc3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + t0.id, + t0.bool_col + FROM functional_alltypes AS t0 + LIMIT 10 +) AS t2 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql new file mode 100644 index 000000000000..88b2af3a2cc3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + t0.id, + t0.bool_col + FROM functional_alltypes AS t0 + LIMIT 10 +) AS t2 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql index 986701fa646e..cd122964c87e 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql @@ -1,9 +1,9 @@ SELECT - t0.x IN (( + t0.x IN ( SELECT - t0.x AS x + t0.x FROM t AS t0 WHERE t0.x > 2 - )) AS "InSubquery(x)" + ) AS "InSubquery(x)" FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql index 7b8c77fc31e8..dd1d25118977 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql @@ -1,9 +1,9 @@ SELECT - t0.x IN (( + t0.x IN ( SELECT - t0.x AS x + t0.x FROM t AS t0 WHERE t0.x > CAST(2 AS TINYINT) - )) AS "InSubquery(x)" + ) AS "InSubquery(x)" FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql index e2cd68f4d9a3..8962d00fdabe 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql @@ -1,37 +1,37 @@ SELECT - t10.field_of_study AS field_of_study, - t10.diff AS diff + t10.field_of_study, + t10.diff FROM ( SELECT - t5.field_of_study AS field_of_study, - t5.diff AS diff + t5.field_of_study, + t5.diff FROM ( SELECT - t4.field_of_study AS field_of_study, + t4.field_of_study, any(t4.diff) AS diff FROM ( SELECT - t3.field_of_study AS field_of_study, - t3.years AS years, - t3.degrees AS degrees, - t3.earliest_degrees AS earliest_degrees, - t3.latest_degrees AS latest_degrees, + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, + t2.field_of_study, + t2.years, + t2.degrees, any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t1.field_of_study AS field_of_study, + t1.field_of_study, CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees FROM ( SELECT - t0.field_of_study AS field_of_study, + t0.field_of_study, arrayJoin( [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] ) AS __pivoted__ @@ -48,35 +48,35 @@ FROM ( LIMIT 10 UNION ALL SELECT - t5.field_of_study AS field_of_study, - t5.diff AS diff + t5.field_of_study, + t5.diff FROM ( SELECT - t4.field_of_study AS field_of_study, + t4.field_of_study, any(t4.diff) AS diff FROM ( SELECT - t3.field_of_study AS field_of_study, - t3.years AS years, - t3.degrees AS degrees, - t3.earliest_degrees AS earliest_degrees, - t3.latest_degrees AS latest_degrees, + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, + t2.field_of_study, + t2.years, + t2.degrees, any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t1.field_of_study AS field_of_study, + t1.field_of_study, CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees FROM ( SELECT - t0.field_of_study AS field_of_study, + t0.field_of_study, arrayJoin( [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] ) AS __pivoted__ diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql index 7af0e3831b68..6eaa105c4a49 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql @@ -1,37 +1,37 @@ SELECT - t10.field_of_study AS field_of_study, - t10.diff AS diff + t10.field_of_study, + t10.diff FROM ( SELECT - t5.field_of_study AS field_of_study, - t5.diff AS diff + t5.field_of_study, + t5.diff FROM ( SELECT - t4.field_of_study AS field_of_study, + t4.field_of_study, FIRST(t4.diff) AS diff FROM ( SELECT - t3.field_of_study AS field_of_study, - t3.years AS years, - t3.degrees AS degrees, - t3.earliest_degrees AS earliest_degrees, - t3.latest_degrees AS latest_degrees, + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, + t2.field_of_study, + t2.years, + t2.degrees, FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t1.field_of_study AS field_of_study, + t1.field_of_study, t1.__pivoted__['years'] AS years, t1.__pivoted__['degrees'] AS degrees FROM ( SELECT - t0.field_of_study AS field_of_study, + t0.field_of_study, UNNEST( [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] ) AS __pivoted__ @@ -48,35 +48,35 @@ FROM ( LIMIT 10 UNION ALL SELECT - t5.field_of_study AS field_of_study, - t5.diff AS diff + t5.field_of_study, + t5.diff FROM ( SELECT - t4.field_of_study AS field_of_study, + t4.field_of_study, FIRST(t4.diff) AS diff FROM ( SELECT - t3.field_of_study AS field_of_study, - t3.years AS years, - t3.degrees AS degrees, - t3.earliest_degrees AS earliest_degrees, - t3.latest_degrees AS latest_degrees, + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, + t2.field_of_study, + t2.years, + t2.degrees, FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t1.field_of_study AS field_of_study, + t1.field_of_study, t1.__pivoted__['years'] AS years, t1.__pivoted__['degrees'] AS degrees FROM ( SELECT - t0.field_of_study AS field_of_study, + t0.field_of_study, UNNEST( [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] ) AS __pivoted__ diff --git a/ibis/backends/tests/sql/conftest.py b/ibis/backends/tests/sql/conftest.py index 2fd23ed45cb3..04667e60e033 100644 --- a/ibis/backends/tests/sql/conftest.py +++ b/ibis/backends/tests/sql/conftest.py @@ -1,14 +1,9 @@ from __future__ import annotations import pytest -import sqlglot as sg import ibis - -pytest.importorskip("duckdb") - -from ibis.backends.duckdb import Backend as DuckDBBackend # noqa: E402 -from ibis.tests.expr.mocks import MockBackend # noqa: E402 +from ibis.tests.expr.mocks import MockBackend @pytest.fixture(scope="module") @@ -72,16 +67,8 @@ def bar_t(con): def to_sql(expr, *args, **kwargs) -> str: - if args: - raise TypeError("Unexpected positional arguments") - if kwargs: - raise TypeError("Unexpected keyword arguments") - - sql = DuckDBBackend.compiler.translate(expr.op(), params={}) - if isinstance(sql, sg.exp.Table): - sql = sg.select("*").from_(sql) - - return sql.sql(dialect="duckdb", pretty=True) + pytest.importorskip("duckdb") + return str(ibis.to_sql(expr, *args, dialect="duckdb", **kwargs)) @pytest.fixture(scope="module") diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql index f93e098292e7..4a8b9ef4cb44 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql @@ -1,9 +1,19 @@ -SELECT t0.* -FROM my_table t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM my_table t0 - WHERE t0.`a` < 100 -)) AND - (t0.`b` = 'a') \ No newline at end of file +SELECT + t0.a, + t0.b +FROM my_table AS t0 +WHERE + t0.a < CAST(100 AS TINYINT) + AND t0.a = ( + SELECT + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b + FROM my_table AS t0 + WHERE + t0.a < CAST(100 AS TINYINT) + ) AS t1 + ) + AND t0.b = 'a' \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql index 631a18bd4253..37c5668f835b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql @@ -1,11 +1,18 @@ -WITH t0 AS ( - SELECT t1.*, t1.`b` * 2 AS `b2` - FROM my_table t1 -) -SELECT t0.`a`, t0.`b2` -FROM t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM t0 -)) \ No newline at end of file +SELECT + t0.a, + t0.b * CAST(2 AS TINYINT) AS b2 +FROM my_table AS t0 +WHERE + t0.a < CAST(100 AS TINYINT) + AND t0.a = ( + SELECT + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b * CAST(2 AS TINYINT) AS b2 + FROM my_table AS t0 + WHERE + t0.a < CAST(100 AS TINYINT) + ) AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql index 631a18bd4253..37c5668f835b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql @@ -1,11 +1,18 @@ -WITH t0 AS ( - SELECT t1.*, t1.`b` * 2 AS `b2` - FROM my_table t1 -) -SELECT t0.`a`, t0.`b2` -FROM t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM t0 -)) \ No newline at end of file +SELECT + t0.a, + t0.b * CAST(2 AS TINYINT) AS b2 +FROM my_table AS t0 +WHERE + t0.a < CAST(100 AS TINYINT) + AND t0.a = ( + SELECT + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b * CAST(2 AS TINYINT) AS b2 + FROM my_table AS t0 + WHERE + t0.a < CAST(100 AS TINYINT) + ) AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql index 636796e7e04d..d38aa10366c4 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql @@ -2,6 +2,6 @@ SELECT DISTINCT * FROM ( SELECT - t0.string_col AS string_col + t0.string_col FROM functional_alltypes AS t0 ) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql index 2bdab32a6fc2..1e1635a607bf 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql @@ -1 +1,3 @@ -t0.int_col + CAST(4 AS TINYINT) \ No newline at end of file +SELECT + t0.int_col + CAST(4 AS TINYINT) AS "Add(int_col, 4)" +FROM int_col_table AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql index 20cc4cc3cdbb..0237f96353c1 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql @@ -1 +1,3 @@ -t0.int_col + CAST(4 AS TINYINT) AS foo \ No newline at end of file +SELECT + t0.int_col + CAST(4 AS TINYINT) AS foo +FROM int_col_table AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql index f6bd37af0d2f..1203afe54f42 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql @@ -1,14 +1,24 @@ SELECT - t1.string_col AS string_col, + t1.string_col, COUNT(DISTINCT t1.int_col) AS nunique FROM ( SELECT - * + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE - ( - t0.bigint_col > CAST(0 AS TINYINT) - ) + t0.bigint_col > CAST(0 AS TINYINT) ) AS t1 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql index f5405266b656..65eba6a3a4c8 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql @@ -1,29 +1,17 @@ SELECT - t5.key AS key + t3.key FROM ( SELECT - t1.string_col AS key, - CAST(t1.float_col AS DOUBLE) AS value - FROM ( - SELECT - * - FROM functional_alltypes AS t0 - WHERE - ( - t0.int_col > CAST(0 AS TINYINT) - ) - ) AS t1 + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) EXCEPT SELECT - t2.string_col AS key, - t2.double_col AS value - FROM ( - SELECT - * - FROM functional_alltypes AS t0 - WHERE - ( - t0.int_col <= CAST(0 AS TINYINT) - ) - ) AS t2 -) AS t5 \ No newline at end of file + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py index aef66c38a37c..9da80d9792e9 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py @@ -4,6 +4,6 @@ t = ibis.table(name="t", schema={"a": "int64", "b": "string"}) f = t.filter(t.b == "m") agg = f.aggregate([f.a.sum().name("sum"), f.a.max()], by=[f.b]) -f1 = agg.filter(agg.Max(a) == 2) +f1 = agg.filter(agg["Max(a)"] == 2) result = f1.select(f1.b, f1.sum) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql index 0b54445c43ce..0043337a96ee 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql @@ -1,28 +1,21 @@ SELECT - t3.b AS b, - t3.sum AS sum + t2.b, + t2.sum FROM ( SELECT - * + t1.b, + SUM(t1.a) AS sum, + MAX(t1.a) AS "Max(a)" FROM ( SELECT - t1.b AS b, - SUM(t1.a) AS sum, - MAX(t1.a) AS "Max(a)" - FROM ( - SELECT - * - FROM t AS t0 - WHERE - ( - t0.b = 'm' - ) - ) AS t1 - GROUP BY - 1 - ) AS t2 - WHERE - ( - t2."Max(a)" = CAST(2 AS TINYINT) - ) -) AS t3 \ No newline at end of file + t0.a, + t0.b + FROM t AS t0 + WHERE + t0.b = 'm' + ) AS t1 + GROUP BY + 1 +) AS t2 +WHERE + t2."Max(a)" = CAST(2 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql index b2dc4a533237..6691834ce1b5 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql @@ -1,20 +1,14 @@ SELECT - t2.string_col AS string_col, - t2."CountStar()" AS "CountStar()" + t1.string_col, + t1."CountStar()" FROM ( SELECT - * - FROM ( - SELECT - t0.string_col AS string_col, - COUNT(*) AS "CountStar()", - MAX(t0.double_col) AS "Max(double_col)" - FROM functional_alltypes AS t0 - GROUP BY - 1 - ) AS t1 - WHERE - ( - t1."Max(double_col)" = CAST(1 AS TINYINT) - ) -) AS t2 \ No newline at end of file + t0.string_col, + COUNT(*) AS "CountStar()", + MAX(t0.double_col) AS "Max(double_col)" + FROM functional_alltypes AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1."Max(double_col)" = CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql index 382428d10e10..7c3cbc2fdf12 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql @@ -1,29 +1,17 @@ SELECT - t5.key AS key + t3.key FROM ( SELECT - t1.string_col AS key, - CAST(t1.float_col AS DOUBLE) AS value - FROM ( - SELECT - * - FROM functional_alltypes AS t0 - WHERE - ( - t0.int_col > CAST(0 AS TINYINT) - ) - ) AS t1 + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) INTERSECT SELECT - t2.string_col AS key, - t2.double_col AS value - FROM ( - SELECT - * - FROM functional_alltypes AS t0 - WHERE - ( - t0.int_col <= CAST(0 AS TINYINT) - ) - ) AS t2 -) AS t5 \ No newline at end of file + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql index bcb0a9b1869f..cbb6ac1079a3 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql @@ -1,5 +1,5 @@ SELECT - t0.string_col AS string_col, + t0.string_col, COUNT(DISTINCT t0.int_col) AS int_card, COUNT(DISTINCT t0.smallint_col) AS smallint_card FROM functional_alltypes AS t0 diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql index 1fe424dcfbe9..45fa51a79cba 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql @@ -1,10 +1,11 @@ SELECT - * + t0.double_col, + t0.string_col, + t0.int_col, + t0.float_col FROM functional_alltypes AS t0 WHERE - ( - t0.double_col > CAST(3.14 AS DOUBLE) - ) + t0.double_col > CAST(3.14 AS DOUBLE) AND CONTAINS(t0.string_col, 'foo') AND ( ( diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql index d0e1564bcf77..c1410ecdac47 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql @@ -1,8 +1,18 @@ -SELECT t0.* -FROM my_table t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM my_table t0 - WHERE t0.`a` < 100 -)) \ No newline at end of file +SELECT + t0.a, + t0.b +FROM my_table AS t0 +WHERE + t0.a < CAST(100 AS TINYINT) + AND t0.a = ( + SELECT + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b + FROM my_table AS t0 + WHERE + t0.a < CAST(100 AS TINYINT) + ) AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py index a18437fa4d10..aef3bd85e809 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py @@ -11,9 +11,7 @@ }, ) param = ibis.param("timestamp") -proj = alltypes.select( - [alltypes.float_col, alltypes.timestamp_col, alltypes.int_col, alltypes.string_col] -).filter(alltypes.timestamp_col < param.name("my_param")) -agg = proj.group_by(proj.string_col).aggregate(proj.float_col.sum().name("foo")) +f = alltypes.filter(alltypes.timestamp_col < param.name("my_param")) +agg = f.aggregate([f.float_col.sum().name("foo")], by=[f.string_col]) result = agg.foo.count() diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql index 40f4523b6453..7ceaa3d33e79 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql @@ -1,11 +1,19 @@ -WITH t0 AS ( - SELECT t2.`float_col`, t2.`timestamp_col`, t2.`int_col`, t2.`string_col` - FROM alltypes t2 - WHERE t2.`timestamp_col` < '2014-01-01T00:00:00' -) -SELECT count(t1.`foo`) AS `Count(foo)` +SELECT + COUNT(t2.foo) AS "Count(foo)" FROM ( - SELECT t0.`string_col`, sum(t0.`float_col`) AS `foo` - FROM t0 - GROUP BY 1 -) t1 \ No newline at end of file + SELECT + t1.string_col, + SUM(t1.float_col) AS foo + FROM ( + SELECT + t0.float_col, + t0.timestamp_col, + t0.int_col, + t0.string_col + FROM alltypes AS t0 + WHERE + t0.timestamp_col < MAKE_TIMESTAMP(2014, 1, 1, 0, 0, 0.0) + ) AS t1 + GROUP BY + 1 +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql index d0a0f1a458c6..4d00f47c36b0 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql @@ -1,25 +1,18 @@ SELECT - t1.string_col AS key, - CAST(t1.float_col AS DOUBLE) AS value + t3.key, + t3.value FROM ( SELECT - * + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value FROM functional_alltypes AS t0 WHERE - ( - t0.int_col > CAST(0 AS TINYINT) - ) -) AS t1 -EXCEPT -SELECT - t2.string_col AS key, - t2.double_col AS value -FROM ( + t0.int_col > CAST(0 AS TINYINT) + EXCEPT SELECT - * + t0.string_col AS key, + t0.double_col AS value FROM functional_alltypes AS t0 WHERE - ( - t0.int_col <= CAST(0 AS TINYINT) - ) -) AS t2 \ No newline at end of file + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py index 3b35a0e24ef2..76016cf2fc5f 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py @@ -21,5 +21,5 @@ ) result = functional_alltypes.select( - [functional_alltypes.string_col, functional_alltypes.int_col] + functional_alltypes.string_col, functional_alltypes.int_col ).distinct() diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql index f98b3697b64f..483b4fef6f49 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql @@ -2,7 +2,7 @@ SELECT DISTINCT * FROM ( SELECT - t0.string_col AS string_col, - t0.int_col AS int_col + t0.string_col, + t0.int_col FROM functional_alltypes AS t0 ) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py index 51188cc25f4e..143198197ad6 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py @@ -5,7 +5,10 @@ s = ibis.table(name="s", schema={"b": "string"}) t = ibis.table(name="t", schema={"a": "int64", "b": "string", "c": "timestamp"}) f = t.filter(t.c == lit) -p = f.select(f.a, f.b, lit.name("the_date")) -joinchain = p.inner_join(s, p.b == s.b) +joinchain = ( + f.select(f.a, f.b, lit.name("the_date")) + .inner_join(s, f.select(f.a, f.b, lit.name("the_date")).b == s.b) + .select(f.select(f.a, f.b, lit.name("the_date")).a) +) result = joinchain.filter(joinchain.a < 1.0) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql index f1828ac4c2f5..bf3eedb03752 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql @@ -1,27 +1,19 @@ SELECT - * + t6.a FROM ( SELECT - t3.a AS a + t4.a FROM ( SELECT - t2.a AS a, - t2.b AS b, + t0.a, + t0.b, MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) AS the_date - FROM ( - SELECT - * - FROM t AS t1 - WHERE - ( - t1.c = MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) - ) - ) AS t2 - ) AS t3 - INNER JOIN s AS t0 - ON t3.b = t0.b -) AS t5 + FROM t AS t0 + WHERE + t0.c = MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) + ) AS t4 + INNER JOIN s AS t2 + ON t4.b = t2.b +) AS t6 WHERE - ( - t5.a < CAST(1.0 AS DOUBLE) - ) \ No newline at end of file + t6.a < CAST(1.0 AS DOUBLE) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql index a1a843860bae..72a05ef24e92 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql @@ -1,25 +1,18 @@ SELECT - t1.string_col AS key, - CAST(t1.float_col AS DOUBLE) AS value + t3.key, + t3.value FROM ( SELECT - * + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value FROM functional_alltypes AS t0 WHERE - ( - t0.int_col > CAST(0 AS TINYINT) - ) -) AS t1 -INTERSECT -SELECT - t2.string_col AS key, - t2.double_col AS value -FROM ( + t0.int_col > CAST(0 AS TINYINT) + INTERSECT SELECT - * + t0.string_col AS key, + t0.double_col AS value FROM functional_alltypes AS t0 WHERE - ( - t0.int_col <= CAST(0 AS TINYINT) - ) -) AS t2 \ No newline at end of file + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql index fe2052bf4317..0bf62fa423da 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql @@ -1,25 +1,18 @@ SELECT - t1.string_col AS key, - CAST(t1.float_col AS DOUBLE) AS value + t3.key, + t3.value FROM ( SELECT - * + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value FROM functional_alltypes AS t0 WHERE - ( - t0.int_col > CAST(0 AS TINYINT) - ) -) AS t1 -UNION -SELECT - t2.string_col AS key, - t2.double_col AS value -FROM ( + t0.int_col > CAST(0 AS TINYINT) + UNION SELECT - * + t0.string_col AS key, + t0.double_col AS value FROM functional_alltypes AS t0 WHERE - ( - t0.int_col <= CAST(0 AS TINYINT) - ) -) AS t2 \ No newline at end of file + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql index 9380eee82c2a..dda59184ba53 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql @@ -1,11 +1,18 @@ SELECT - * -FROM t AS t0 -ORDER BY - t0.b ASC -UNION ALL -SELECT - * -FROM t AS t0 -ORDER BY - t0.b ASC \ No newline at end of file + t2.a, + t2.b +FROM ( + SELECT + t0.a, + t0.b + FROM t AS t0 + ORDER BY + t0.b ASC + UNION ALL + SELECT + t0.a, + t0.b + FROM t AS t0 + ORDER BY + t0.b ASC +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql index 15ac865a88b0..b2fb8620109b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql @@ -1,29 +1,17 @@ SELECT - t5.key AS key + t3.key FROM ( SELECT - t1.string_col AS key, - CAST(t1.float_col AS DOUBLE) AS value - FROM ( - SELECT - * - FROM functional_alltypes AS t0 - WHERE - ( - t0.int_col > CAST(0 AS TINYINT) - ) - ) AS t1 + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) UNION ALL SELECT - t2.string_col AS key, - t2.double_col AS value - FROM ( - SELECT - * - FROM functional_alltypes AS t0 - WHERE - ( - t0.int_col <= CAST(0 AS TINYINT) - ) - ) AS t2 -) AS t5 \ No newline at end of file + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py index c807087cf122..2cf13f7c2cc1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py @@ -15,6 +15,16 @@ }, ) -result = tpch_region.inner_join( - tpch_nation, tpch_region.r_regionkey == tpch_nation.n_regionkey -).count() +result = ( + tpch_region.inner_join( + tpch_nation, tpch_region.r_regionkey == tpch_nation.n_regionkey + ) + .select( + tpch_nation.n_nationkey, + tpch_nation.n_name, + tpch_nation.n_regionkey, + tpch_nation.n_comment, + tpch_region.r_name.name("region"), + ) + .count() +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql index 6407e4987c29..3011cdb409b4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql @@ -1 +1,13 @@ -COUNT(*) \ No newline at end of file +SELECT + COUNT(*) AS "CountStar()" +FROM ( + SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment, + t2.r_name AS region + FROM tpch_region AS t2 + INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql index 6da69b8c5673..47945167c00a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql @@ -1,14 +1,13 @@ SELECT - * + t1.foo_id, + t1.total FROM ( SELECT - t0.foo_id AS foo_id, + t0.foo_id, SUM(t0.f) AS total FROM star1 AS t0 GROUP BY 1 ) AS t1 WHERE - ( - t1.total AS total > CAST(10 AS TINYINT) - ) \ No newline at end of file + t1.total > CAST(10 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql index 214ec1ede144..35e4fe0adc24 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql @@ -1,20 +1,14 @@ SELECT - t2.foo_id AS foo_id, - t2.total AS total + t1.foo_id, + t1.total FROM ( SELECT - * - FROM ( - SELECT - t0.foo_id AS foo_id, - SUM(t0.f) AS total, - COUNT(*) AS "CountStar()" - FROM star1 AS t0 - GROUP BY - 1 - ) AS t1 - WHERE - ( - t1."CountStar()" > CAST(100 AS TINYINT) - ) -) AS t2 \ No newline at end of file + t0.foo_id, + SUM(t0.f) AS total, + COUNT(*) AS "CountStar()" + FROM star1 AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1."CountStar()" > CAST(100 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql index 72adf2407ec1..a15f1a8cce7e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql @@ -1,16 +1,16 @@ SELECT - t3.foo_id AS foo_id, - SUM(t3.value1) AS total + t5.foo_id, + SUM(t5.value1) AS total FROM ( SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id, - t1.value1 AS value1 - FROM star1 AS t0 - INNER JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id -) AS t3 + t2.c, + t2.f, + t2.foo_id, + t2.bar_id, + t3.value1 + FROM star1 AS t2 + INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id +) AS t5 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql index 3304bb7d330b..82d666c54c3f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql @@ -1,31 +1,23 @@ SELECT - t2.g AS g, - SUM(t2.foo) AS "foo total" + t1.g, + SUM(t1.foo) AS "foo total" FROM ( SELECT - t1.a AS a, - t1.b AS b, - t1.c AS c, - t1.d AS d, - t1.e AS e, - t1.f AS f, - t1.g AS g, - t1.h AS h, - t1.i AS i, - t1.j AS j, - t1.k AS k, - t1.a + t1.b AS foo - FROM ( - SELECT - * - FROM alltypes AS t0 - WHERE - ( - t0.f > CAST(0 AS TINYINT) - ) AND ( - t0.g = 'bar' - ) - ) AS t1 -) AS t2 + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k, + t0.a + t0.b AS foo + FROM alltypes AS t0 + WHERE + t0.f > CAST(0 AS TINYINT) AND t0.g = 'bar' +) AS t1 GROUP BY - 1 + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql index a2bbff51e021..32772c5a969d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql @@ -1,33 +1,25 @@ SELECT - t2.g AS g, - SUM(t2.foo) AS "foo total" + t1.g, + SUM(t1.foo) AS "foo total" FROM ( SELECT - t1.a AS a, - t1.b AS b, - t1.c AS c, - t1.d AS d, - t1.e AS e, - t1.f AS f, - t1.g AS g, - t1.h AS h, - t1.i AS i, - t1.j AS j, - t1.k AS k, - t1.a + t1.b AS foo - FROM ( - SELECT - * - FROM alltypes AS t0 - WHERE - ( - t0.f > CAST(0 AS TINYINT) - ) AND ( - ( - t0.a + t0.b - ) < CAST(10 AS TINYINT) - ) - ) AS t1 -) AS t2 + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k, + t0.a + t0.b AS foo + FROM alltypes AS t0 + WHERE + t0.f > CAST(0 AS TINYINT) AND ( + t0.a + t0.b + ) < CAST(10 AS TINYINT) +) AS t1 GROUP BY - 1 + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql index 4809093f21a0..7407d5fcf78b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql @@ -1,24 +1,16 @@ SELECT - t1.a AS a, - t1.b AS b, - t1.c AS c, - t1.d AS d, - t1.e AS e, - t1.f AS f, - t1.g AS g, - t1.h AS h, - t1.i AS i, - t1.j AS j, - t1.k AS k, - t1.a + t1.b AS foo -FROM ( - SELECT - * - FROM alltypes AS t0 - WHERE - ( - t0.f > CAST(0 AS TINYINT) - ) AND ( - t0.g = 'bar' - ) -) AS t1 + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k, + t0.a + t0.b AS foo +FROM alltypes AS t0 +WHERE + t0.f > CAST(0 AS TINYINT) AND t0.g = 'bar' \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql index 0208ce9dfe03..0917ea29aae1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql @@ -1,22 +1,16 @@ SELECT - t1.a AS a, - t1.b AS b, - t1.c AS c, - t1.d AS d, - t1.e AS e, - t1.f AS f, - t1.g AS g, - t1.h AS h, - t1.i AS i, - t1.j AS j, - t1.k AS k, - t1.a + t1.b AS foo -FROM ( - SELECT - * - FROM alltypes AS t0 - WHERE - ( - t0.f > CAST(0 AS TINYINT) - ) -) AS t1 + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k, + t0.a + t0.b AS foo +FROM alltypes AS t0 +WHERE + t0.f > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py index 1f1a500887e9..dc83d63276ea 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py @@ -9,4 +9,6 @@ name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -result = star1.anti_join(star2, star1.foo_id == star2.foo_id) +result = star1.anti_join(star2, star1.foo_id == star2.foo_id).select( + star1.c, star1.f, star1.foo_id, star1.bar_id +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql index 021e6eec101e..f2ef0d0d3f67 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql @@ -1,8 +1,8 @@ SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id -FROM star1 AS t0 -ANTI JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id \ No newline at end of file + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +ANTI JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql index fd6b1490e250..b110ecf4f3b8 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql @@ -1,9 +1,9 @@ SELECT - * + t0.dest, + t0.origin, + t0.arrdelay FROM airlines AS t0 WHERE ( - ( - CAST(t0.dest AS BIGINT) = CAST(0 AS TINYINT) - ) = TRUE - ) \ No newline at end of file + CAST(t0.dest AS BIGINT) = CAST(0 AS TINYINT) + ) = TRUE \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql index 59632deb743c..173b6323b243 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql @@ -1,17 +1,23 @@ -WITH t0 AS ( - SELECT t3.`arrdelay`, t3.`dest` - FROM airlines t3 -), -t1 AS ( - SELECT t0.*, avg(t0.`arrdelay`) OVER (PARTITION BY t0.`dest`) AS `dest_avg`, - t0.`arrdelay` - avg(t0.`arrdelay`) OVER (PARTITION BY t0.`dest`) AS `dev` - FROM t0 -) -SELECT t2.* +SELECT + t2.arrdelay, + t2.dest, + t2.dest_avg, + t2.dev FROM ( - SELECT t1.* - FROM t1 - WHERE t1.`dev` IS NOT NULL -) t2 -ORDER BY t2.`dev` DESC + SELECT + t1.arrdelay, + t1.dest, + AVG(t1.arrdelay) OVER (PARTITION BY t1.dest ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS dest_avg, + t1.arrdelay - AVG(t1.arrdelay) OVER (PARTITION BY t1.dest ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS dev + FROM ( + SELECT + t0.arrdelay, + t0.dest + FROM airlines AS t0 + ) AS t1 +) AS t2 +WHERE + NOT t2.dev IS NULL +ORDER BY + t2.dev DESC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql index fb3f43560215..88011a4c2cf9 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql @@ -1,55 +1,43 @@ SELECT - t5.c_name AS c_name, - t5.r_name AS r_name, - t5.n_name AS n_name -FROM ( - SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t1.n_name AS n_name, - t2.r_name AS r_name - FROM tpch_customer AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.c_nationkey = t1.n_nationkey - INNER JOIN tpch_region AS t2 - ON t1.n_regionkey = t2.r_regionkey -) AS t5 + t3.c_name, + t5.r_name, + t4.n_name +FROM tpch_customer AS t3 +INNER JOIN tpch_nation AS t4 + ON t3.c_nationkey = t4.n_nationkey +INNER JOIN tpch_region AS t5 + ON t4.n_regionkey = t5.r_regionkey SEMI JOIN ( SELECT - * + t9.n_name, + t9."Sum(Cast(c_acctbal, float64))" FROM ( SELECT - t5.n_name AS n_name, - SUM(CAST(t5.c_acctbal AS DOUBLE)) AS "Sum(Cast(c_acctbal, float64))" + t8.n_name, + SUM(CAST(t8.c_acctbal AS DOUBLE)) AS "Sum(Cast(c_acctbal, float64))" FROM ( SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t1.n_name AS n_name, - t2.r_name AS r_name - FROM tpch_customer AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.c_nationkey = t1.n_nationkey - INNER JOIN tpch_region AS t2 - ON t1.n_regionkey = t2.r_regionkey - ) AS t5 + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.n_name, + t5.r_name + FROM tpch_customer AS t3 + INNER JOIN tpch_nation AS t4 + ON t3.c_nationkey = t4.n_nationkey + INNER JOIN tpch_region AS t5 + ON t4.n_regionkey = t5.r_regionkey + ) AS t8 GROUP BY 1 - ) AS t6 + ) AS t9 ORDER BY - t6."Sum(Cast(c_acctbal, float64))" DESC + t9."Sum(Cast(c_acctbal, float64))" DESC LIMIT 10 -) AS t8 - ON t5.n_name = t8.n_name \ No newline at end of file +) AS t12 + ON t4.n_name = t12.n_name \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql index 03a2bfc76996..4b25c0cd6d1e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql @@ -7,15 +7,15 @@ SELECT THEN t0.g ELSE CAST(NULL AS TEXT) END AS col2, - t0.a AS a, - t0.b AS b, - t0.c AS c, - t0.d AS d, - t0.e AS e, - t0.f AS f, - t0.g AS g, - t0.h AS h, - t0.i AS i, - t0.j AS j, - t0.k AS k + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k FROM alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql index 48625bcc8a00..df7349fd3de0 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql @@ -2,10 +2,11 @@ SELECT * FROM ( SELECT - * + t1.city, + t1."Count(city)" FROM ( SELECT - t0.city AS city, + t0.city, COUNT(t0.city) AS "Count(city)" FROM tbl AS t0 GROUP BY @@ -21,10 +22,11 @@ OFFSET ( COUNT(*) + CAST(-5 AS TINYINT) FROM ( SELECT - * + t1.city, + t1."Count(city)" FROM ( SELECT - t0.city AS city, + t0.city, COUNT(t0.city) AS "Count(city)" FROM tbl AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql index fe43159d7cac..57f027a897ec 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql @@ -1,9 +1,14 @@ SELECT - CAST(t0.diag + CAST(1 AS TINYINT) AS INT) AS diag, - t0.status AS status -FROM aids2_one AS t0 -UNION ALL -SELECT - CAST(t1.diag + CAST(1 AS TINYINT) AS INT) AS diag, - t1.status AS status -FROM aids2_two AS t1 \ No newline at end of file + t4.diag, + t4.status +FROM ( + SELECT + CAST(t0.diag + CAST(1 AS TINYINT) AS INT) AS diag, + t0.status + FROM aids2_one AS t0 + UNION ALL + SELECT + CAST(t1.diag + CAST(1 AS TINYINT) AS INT) AS diag, + t1.status + FROM aids2_two AS t1 +) AS t4 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql index 720c3146efdf..6578a858c971 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql @@ -1,16 +1,16 @@ SELECT - t2.key1 AS key1, + t2.key1, SUM(t2.total) AS total FROM ( SELECT - t1.key1 AS key1, - t1.key2 AS key2, + t1.key1, + t1.key2, SUM(t1.total) AS total FROM ( SELECT - t0.key1 AS key1, - t0.key2 AS key2, - t0.key3 AS key3, + t0.key1, + t0.key2, + t0.key3, SUM(t0.value) AS total FROM foo_table AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py index a4ca0e75b920..23f7c5d41601 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py @@ -1,11 +1,9 @@ import ibis -result = ( - ibis.table( - name="star1", - schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, - ) - .foo_id.endswith(ibis.literal("foo")) - .name("tmp") +star1 = ibis.table( + name="star1", + schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) + +result = star1.foo_id.endswith(ibis.literal("foo")).name("tmp") diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql index a7b8c0e5c185..8114e54d3695 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql @@ -1 +1,3 @@ -SUFFIX(t0.foo_id, 'foo') AS tmp \ No newline at end of file +SELECT + SUFFIX(t0.foo_id, 'foo') AS tmp +FROM star1 AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql new file mode 100644 index 000000000000..89f8c66d24dc --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql @@ -0,0 +1,13 @@ +SELECT + t0.key1, + t0.key2, + t0.value1 +FROM t1 AS t0 +WHERE + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM t2 AS t1 + WHERE + t0.key1 = t1.key1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql index 1ca28225ea6a..bffa5a6720b0 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql @@ -1,21 +1,14 @@ SELECT - * + t0.session_id, + t0.user_id, + t0.event_type, + t0.ts FROM events AS t0 WHERE EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM ( - SELECT - * - FROM purchases AS t1 - WHERE - ( - t1.ts > '2015-08-15' - ) AND ( - t0.user_id = t1.user_id - ) - ) AS t2 - ) + SELECT + CAST(1 AS TINYINT) AS "1" + FROM purchases AS t1 + WHERE + t1.ts > '2015-08-15' AND t0.user_id = t1.user_id ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql index fbd2b7d3c76d..fb4bf6a1c3ff 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql @@ -1,7 +1,7 @@ SELECT - * + t0.color FROM t AS t0 WHERE LOWER(t0.color) LIKE '%de%' AND CONTAINS(LOWER(t0.color), 'de') - AND REGEXP_MATCHES(LOWER(t0.color), '.*ge.*', 's') + AND REGEXP_MATCHES(LOWER(t0.color), '.*ge.*', 's') \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql index 1ad66bd2d42e..2bdce97b5fa2 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql @@ -1,13 +1,15 @@ SELECT - t2.region AS region, - t2.total - t3.total AS diff + t4.region, + t4.total - t5.total AS diff FROM ( SELECT - * + t1.region, + t1.kind, + t1.total FROM ( SELECT - t0.region AS region, - t0.kind AS kind, + t0.region, + t0.kind, SUM(t0.amount) AS total FROM purchases AS t0 GROUP BY @@ -15,17 +17,17 @@ FROM ( 2 ) AS t1 WHERE - ( - t1.kind = 'foo' - ) -) AS t2 + t1.kind = 'foo' +) AS t4 INNER JOIN ( SELECT - * + t1.region, + t1.kind, + t1.total FROM ( SELECT - t0.region AS region, - t0.kind AS kind, + t0.region, + t0.kind, SUM(t0.amount) AS total FROM purchases AS t0 GROUP BY @@ -33,8 +35,6 @@ INNER JOIN ( 2 ) AS t1 WHERE - ( - t1.kind = 'bar' - ) -) AS t3 - ON t2.region = t3.region \ No newline at end of file + t1.kind = 'bar' +) AS t5 + ON t4.region = t5.region \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql index 5cb4f21a3d2c..4f1205ba038e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql @@ -1,21 +1,23 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 WHERE - ( - t0.f > LN( - ( + t0.f > LN( + ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( SELECT - AVG(t1.f) AS "Mean(f)" - FROM ( - SELECT - * - FROM star1 AS t0 - WHERE - ( - t0.foo_id = 'foo' - ) - ) AS t1 - ) + t0.c, + t0.f, + t0.foo_id, + t0.bar_id + FROM star1 AS t0 + WHERE + t0.foo_id = 'foo' + ) AS t1 ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql index 4c5fa610555f..640d7f8d09a4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql @@ -1,23 +1,25 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 WHERE - ( - t0.f > ( - LN( - ( + t0.f > ( + LN( + ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( SELECT - AVG(t1.f) AS "Mean(f)" - FROM ( - SELECT - * - FROM star1 AS t0 - WHERE - ( - t0.foo_id = 'foo' - ) - ) AS t1 - ) - ) + CAST(1 AS TINYINT) - ) + t0.c, + t0.f, + t0.foo_id, + t0.bar_id + FROM star1 AS t0 + WHERE + t0.foo_id = 'foo' + ) AS t1 + ) + ) + CAST(1 AS TINYINT) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py index 0d2f6cdef48c..37d25bd80aba 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py @@ -4,11 +4,8 @@ tbl = ibis.table( name="tbl", schema={"foo": "int32", "bar": "int64", "value": "float64"} ) -alias = (tbl.foo + tbl.bar).name("baz") -proj = tbl.select([tbl, alias]) +f = tbl.filter(tbl.value > 0) -result = ( - tbl.select([tbl, alias]) - .filter(tbl.value > 0) - .select([proj, (proj.foo * 2).name("qux")]) +result = f.select( + f.foo, f.bar, f.value, (f.foo + f.bar).name("baz"), (f.foo * 2).name("qux") ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql index 6ddb56dc6476..bc1d0cc45118 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql @@ -1,7 +1,7 @@ SELECT - t0.foo AS foo, - t0.bar AS bar, - t0.value AS value, + t0.foo, + t0.bar, + t0.value, t0.foo + t0.bar AS baz, t0.foo * CAST(2 AS TINYINT) AS qux FROM tbl AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql index 8383b5c79c8a..4f8cda85cc5c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql @@ -1,15 +1,9 @@ SELECT - t1.foo AS foo, - t1.bar AS bar, - t1.value AS value, - t1.foo + t1.bar AS baz, - t1.foo * CAST(2 AS TINYINT) AS qux -FROM ( - SELECT - * - FROM tbl AS t0 - WHERE - ( - t0.value > CAST(0 AS TINYINT) - ) -) AS t1 \ No newline at end of file + t0.foo, + t0.bar, + t0.value, + t0.foo + t0.bar AS baz, + t0.foo * CAST(2 AS TINYINT) AS qux +FROM tbl AS t0 +WHERE + t0.value > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql index 73d0a7bfd20c..dc33ad4a62ff 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql @@ -1,13 +1,7 @@ SELECT - t1.x + CAST(1 AS TINYINT) AS x -FROM ( - SELECT - * - FROM t AS t0 - WHERE - ( - ( - t0.x + CAST(1 AS TINYINT) - ) > CAST(1 AS TINYINT) - ) -) AS t1 \ No newline at end of file + t0.x + CAST(1 AS TINYINT) AS x +FROM t AS t0 +WHERE + ( + t0.x + CAST(1 AS TINYINT) + ) > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql index b8ceefcb67c9..2fd5fe2ddc07 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql @@ -1,11 +1,5 @@ SELECT CAST(1 AS TINYINT) AS a -FROM ( - SELECT - * - FROM t AS t0 - WHERE - ( - CAST(1 AS TINYINT) > CAST(1 AS TINYINT) - ) -) AS t1 \ No newline at end of file +FROM t AS t0 +WHERE + CAST(1 AS TINYINT) > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py index 029cc24d6008..ce3143f2e407 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py @@ -9,7 +9,28 @@ name="third", schema={"key2": "string", "key3": "string", "value3": "float64"} ) fourth = ibis.table(name="fourth", schema={"key3": "string", "value4": "float64"}) -joinchain = first.inner_join(second, first.key1 == second.key1) -joinchain1 = third.inner_join(fourth, third.key3 == fourth.key3) -result = joinchain.inner_join(joinchain1, joinchain.key2 == joinchain1.key2) +result = ( + first.inner_join(second, first.key1 == second.key1) + .inner_join( + third.inner_join(fourth, third.key3 == fourth.key3).select( + third.key2, third.key3, third.value3, fourth.value4 + ), + first.key2 + == third.inner_join(fourth, third.key3 == fourth.key3) + .select(third.key2, third.key3, third.value3, fourth.value4) + .key2, + ) + .select( + first.key1, + first.key2, + first.value1, + second.value2, + third.inner_join(fourth, third.key3 == fourth.key3) + .select(third.key2, third.key3, third.value3, fourth.value4) + .value3, + third.inner_join(fourth, third.key3 == fourth.key3) + .select(third.key2, third.key3, third.value3, fourth.value4) + .value4, + ) +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql index 86958930e55e..a66ce49c2bc5 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql @@ -1,28 +1,21 @@ SELECT - t6.key1 AS key1, - t6.key2 AS key2, - t6.value1 AS value1, - t6.value2 AS value2, - t7.value3 AS value3, - t7.value4 AS value4 -FROM ( - SELECT - t0.key1 AS key1, - t0.key2 AS key2, - t0.value1 AS value1, - t1.value2 AS value2 - FROM first AS t0 - INNER JOIN second AS t1 - ON t0.key1 = t1.key1 -) AS t6 + t4.key1, + t4.key2, + t4.value1, + t5.value2, + t11.value3, + t11.value4 +FROM first AS t4 +INNER JOIN second AS t5 + ON t4.key1 = t5.key1 INNER JOIN ( SELECT - t2.key2 AS key2, - t2.key3 AS key3, - t2.value3 AS value3, - t3.value4 AS value4 - FROM third AS t2 - INNER JOIN fourth AS t3 - ON t2.key3 = t3.key3 -) AS t7 - ON t6.key2 = t7.key2 \ No newline at end of file + t6.key2, + t6.key3, + t6.value3, + t7.value4 + FROM third AS t6 + INNER JOIN fourth AS t7 + ON t6.key3 = t7.key3 +) AS t11 + ON t4.key2 = t11.key2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql index 6b3adafa9878..879cd074a3ac 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql @@ -1,20 +1,28 @@ -WITH t0 AS ( - SELECT t2.* - FROM b t2 - WHERE (t2.`year` = 2016) AND - (t2.`month` = 2) AND - (t2.`day` = 29) -), -t1 AS ( - SELECT t2.* - FROM a t2 - WHERE (t2.`year` = 2016) AND - (t2.`month` = 2) AND - (t2.`day` = 29) -) -SELECT t1.`value_a`, t0.`value_b` -FROM t1 - LEFT OUTER JOIN t0 - ON (t1.`year` = t0.`year`) AND - (t1.`month` = t0.`month`) AND - (t1.`day` = t0.`day`) \ No newline at end of file +SELECT + t4.value_a, + t5.value_b +FROM ( + SELECT + t0.year, + t0.month, + t0.day, + t0.value_a + FROM a AS t0 + WHERE + t0.year = CAST(2016 AS SMALLINT) + AND t0.month = CAST(2 AS TINYINT) + AND t0.day = CAST(29 AS TINYINT) +) AS t4 +LEFT OUTER JOIN ( + SELECT + t1.year, + t1.month, + t1.day, + t1.value_b + FROM b AS t1 + WHERE + t1.year = CAST(2016 AS SMALLINT) + AND t1.month = CAST(2 AS TINYINT) + AND t1.day = CAST(29 AS TINYINT) +) AS t5 + ON t4.year = t5.year AND t4.month = t5.month AND t4.day = t5.day \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py index 44a6ed339c71..e2fefc7e6493 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py @@ -28,6 +28,26 @@ }, ) -result = tpch_nation.inner_join( - tpch_region, tpch_nation.n_regionkey == tpch_region.r_regionkey -).inner_join(tpch_customer, tpch_nation.n_nationkey == tpch_customer.c_nationkey) +result = ( + tpch_nation.inner_join( + tpch_region, tpch_nation.n_regionkey == tpch_region.r_regionkey + ) + .inner_join(tpch_customer, tpch_nation.n_nationkey == tpch_customer.c_nationkey) + .select( + tpch_nation.n_nationkey, + tpch_nation.n_name, + tpch_nation.n_regionkey, + tpch_nation.n_comment, + tpch_region.r_regionkey, + tpch_region.r_name, + tpch_region.r_comment, + tpch_customer.c_custkey, + tpch_customer.c_name, + tpch_customer.c_address, + tpch_customer.c_nationkey, + tpch_customer.c_phone, + tpch_customer.c_acctbal, + tpch_customer.c_mktsegment, + tpch_customer.c_comment, + ) +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql index 7d32758ef61c..28ef4cadbb22 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql @@ -1,21 +1,21 @@ SELECT - t0.n_nationkey AS n_nationkey, - t0.n_name AS n_name, - t0.n_regionkey AS n_regionkey, - t0.n_comment AS n_comment, - t1.r_regionkey AS r_regionkey, - t1.r_name AS r_name, - t1.r_comment AS r_comment, - t2.c_custkey AS c_custkey, - t2.c_name AS c_name, - t2.c_address AS c_address, - t2.c_nationkey AS c_nationkey, - t2.c_phone AS c_phone, - t2.c_acctbal AS c_acctbal, - t2.c_mktsegment AS c_mktsegment, - t2.c_comment AS c_comment -FROM tpch_nation AS t0 -INNER JOIN tpch_region AS t1 - ON t0.n_regionkey = t1.r_regionkey -INNER JOIN tpch_customer AS t2 - ON t0.n_nationkey = t2.c_nationkey \ No newline at end of file + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment, + t4.r_regionkey, + t4.r_name, + t4.r_comment, + t5.c_custkey, + t5.c_name, + t5.c_address, + t5.c_nationkey, + t5.c_phone, + t5.c_acctbal, + t5.c_mktsegment, + t5.c_comment +FROM tpch_nation AS t3 +INNER JOIN tpch_region AS t4 + ON t3.n_regionkey = t4.r_regionkey +INNER JOIN tpch_customer AS t5 + ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql index 3d73c83b16db..ccaf14d42229 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql @@ -1,23 +1,17 @@ SELECT - t1.c_custkey AS c_custkey, - t1.c_name AS c_name, - t1.c_address AS c_address, - t1.c_nationkey AS c_nationkey, - t1.c_phone AS c_phone, - t1.c_acctbal AS c_acctbal, - t1.c_mktsegment AS c_mktsegment, - t1.c_comment AS c_comment, - t4.n_nationkey AS n_nationkey, - t4.nation AS nation, - t4.region AS region -FROM ( - SELECT - t0.n_nationkey AS n_nationkey, - t0.n_name AS nation, - t2.r_name AS region - FROM tpch_nation AS t0 - INNER JOIN tpch_region AS t2 - ON t0.n_regionkey = t2.r_regionkey -) AS t4 -INNER JOIN tpch_customer AS t1 - ON t4.n_nationkey = t1.c_nationkey \ No newline at end of file + t5.c_custkey, + t5.c_name, + t5.c_address, + t5.c_nationkey, + t5.c_phone, + t5.c_acctbal, + t5.c_mktsegment, + t5.c_comment, + t3.n_nationkey, + t3.n_name AS nation, + t4.r_name AS region +FROM tpch_nation AS t3 +INNER JOIN tpch_region AS t4 + ON t3.n_regionkey = t4.r_regionkey +INNER JOIN tpch_customer AS t5 + ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql index fe9f2b5b67f3..7b3d0ffe12ef 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql @@ -1,23 +1,31 @@ SELECT - t0.on AS on, - t0.by AS by, - t1.on AS on_right, - t1.by AS by_right, - t1.val AS val -FROM left AS t0 -LEFT OUTER JOIN right AS t1 - ON t0.by = t1.by + t6.on, + t6.by, + t6.on_right, + t6.by_right, + t6.val +FROM ( + SELECT + t2.on, + t2.by, + t3.on AS on_right, + t3.by AS by_right, + t3.val + FROM left AS t2 + LEFT OUTER JOIN right AS t3 + ON t2.by = t3.by +) AS t6 WHERE - t1.on = ( + t6.on_right = ( SELECT - MAX(t3.on) AS "Max(on)" + MAX(t4.on) AS "Max(on)" FROM ( SELECT - t1.on AS on, - t1.by AS by, - t1.val AS val + t1.on, + t1.by, + t1.val FROM right AS t1 WHERE t1.by = t0.by AND t1.on <= t0.on - ) AS t3 + ) AS t4 ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql index 136cb7d2c69d..1ea81d25a312 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql @@ -1,13 +1,13 @@ SELECT - t2.c AS c, - t2.f AS f, - t2.foo_id AS foo_id, - t2.bar_id AS bar_id + t4.c, + t4.f, + t4.foo_id, + t4.bar_id FROM ( SELECT * FROM star1 AS t0 LIMIT 100 -) AS t2 -INNER JOIN star2 AS t1 - ON t2.foo_id = t1.foo_id \ No newline at end of file +) AS t4 +INNER JOIN star2 AS t3 + ON t4.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql index 6a0ea118caef..e32be6672764 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql @@ -1,8 +1,27 @@ -WITH t0 AS ( - SELECT t2.* - FROM functional_alltypes t2 +SELECT + t2.id, + t2.bool_col, + t2.tinyint_col, + t2.smallint_col, + t2.int_col, + t2.bigint_col, + t2.float_col, + t2.double_col, + t2.date_string_col, + t2.string_col, + t2.timestamp_col, + t2.year, + t2.month +FROM ( + SELECT + * + FROM functional_alltypes AS t0 LIMIT 100 -) -SELECT t0.* -FROM t0 - INNER JOIN t0 t1 \ No newline at end of file +) AS t2 +INNER JOIN ( + SELECT + * + FROM functional_alltypes AS t0 + LIMIT 100 +) AS t4 + ON TRUE \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py index 80cbfb72f010..334f4b168218 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py @@ -27,34 +27,32 @@ functional_alltypes.tinyint_col < selfreference.timestamp_col.minute(), ) .select( - [ - functional_alltypes.id, - functional_alltypes.bool_col, - functional_alltypes.tinyint_col, - functional_alltypes.smallint_col, - functional_alltypes.int_col, - functional_alltypes.bigint_col, - functional_alltypes.float_col, - functional_alltypes.double_col, - functional_alltypes.date_string_col, - functional_alltypes.string_col, - functional_alltypes.timestamp_col, - functional_alltypes.year, - functional_alltypes.month, - selfreference.id.name("id_right"), - selfreference.bool_col.name("bool_col_right"), - selfreference.tinyint_col.name("tinyint_col_right"), - selfreference.smallint_col.name("smallint_col_right"), - selfreference.int_col.name("int_col_right"), - selfreference.bigint_col.name("bigint_col_right"), - selfreference.float_col.name("float_col_right"), - selfreference.double_col.name("double_col_right"), - selfreference.date_string_col.name("date_string_col_right"), - selfreference.string_col.name("string_col_right"), - selfreference.timestamp_col.name("timestamp_col_right"), - selfreference.year.name("year_right"), - selfreference.month.name("month_right"), - ] + functional_alltypes.id, + functional_alltypes.bool_col, + functional_alltypes.tinyint_col, + functional_alltypes.smallint_col, + functional_alltypes.int_col, + functional_alltypes.bigint_col, + functional_alltypes.float_col, + functional_alltypes.double_col, + functional_alltypes.date_string_col, + functional_alltypes.string_col, + functional_alltypes.timestamp_col, + functional_alltypes.year, + functional_alltypes.month, + selfreference.id.name("id_right"), + selfreference.bool_col.name("bool_col_right"), + selfreference.tinyint_col.name("tinyint_col_right"), + selfreference.smallint_col.name("smallint_col_right"), + selfreference.int_col.name("int_col_right"), + selfreference.bigint_col.name("bigint_col_right"), + selfreference.float_col.name("float_col_right"), + selfreference.double_col.name("double_col_right"), + selfreference.date_string_col.name("date_string_col_right"), + selfreference.string_col.name("string_col_right"), + selfreference.timestamp_col.name("timestamp_col_right"), + selfreference.year.name("year_right"), + selfreference.month.name("month_right"), ) .count() ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql index 6407e4987c29..26824f377a3e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql @@ -1 +1,34 @@ -COUNT(*) \ No newline at end of file +SELECT + COUNT(*) AS "CountStar()" +FROM ( + SELECT + t1.id, + t1.bool_col, + t1.tinyint_col, + t1.smallint_col, + t1.int_col, + t1.bigint_col, + t1.float_col, + t1.double_col, + t1.date_string_col, + t1.string_col, + t1.timestamp_col, + t1.year, + t1.month, + t3.id AS id_right, + t3.bool_col AS bool_col_right, + t3.tinyint_col AS tinyint_col_right, + t3.smallint_col AS smallint_col_right, + t3.int_col AS int_col_right, + t3.bigint_col AS bigint_col_right, + t3.float_col AS float_col_right, + t3.double_col AS double_col_right, + t3.date_string_col AS date_string_col_right, + t3.string_col AS string_col_right, + t3.timestamp_col AS timestamp_col_right, + t3.year AS year_right, + t3.month AS month_right + FROM functional_alltypes AS t1 + INNER JOIN functional_alltypes AS t3 + ON t1.tinyint_col < EXTRACT('minute' FROM t3.timestamp_col) +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql index bf1ecb1da578..fd5640b69685 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql @@ -1,24 +1,22 @@ SELECT - t2.id AS left_id, - t2.desc AS left_desc, - t3.id AS right_id, - t3.desc AS right_desc + t4.id AS left_id, + t4.desc AS left_desc, + t5.id AS right_id, + t5.desc AS right_desc FROM ( SELECT - * + t0.id, + t0.desc FROM foo AS t0 WHERE - ( - t0.id < CAST(2 AS TINYINT) - ) -) AS t2 + t0.id < CAST(2 AS TINYINT) +) AS t4 LEFT OUTER JOIN ( SELECT - * + t1.id, + t1.desc FROM bar AS t1 WHERE - ( - t1.id < CAST(3 AS TINYINT) - ) -) AS t3 - ON t2.id = t3.id AND t2.desc = t3.desc \ No newline at end of file + t1.id < CAST(3 AS TINYINT) +) AS t5 + ON t4.id = t5.id AND t4.desc = t5.desc \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py index 894f52809414..00f53b84cbf4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py @@ -10,6 +10,8 @@ ) star3 = ibis.table(name="star3", schema={"bar_id": "string", "value2": "float64"}) -result = star1.left_join(star2, star1.foo_id == star2.foo_id).inner_join( - star3, star1.bar_id == star3.bar_id +result = ( + star1.left_join(star2, star1.foo_id == star2.foo_id) + .inner_join(star3, star1.bar_id == star3.bar_id) + .select(star1.c, star1.f, star1.foo_id, star1.bar_id, star2.value1, star3.value2) ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql index 79ad2e20002d..4d414f398697 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql @@ -1,12 +1,12 @@ SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id, - t1.value1 AS value1, - t2.value2 AS value2 -FROM star1 AS t0 -LEFT OUTER JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id -INNER JOIN star3 AS t2 - ON t0.bar_id = t2.bar_id \ No newline at end of file + t3.c, + t3.f, + t3.foo_id, + t3.bar_id, + t4.value1, + t5.value2 +FROM star1 AS t3 +LEFT OUTER JOIN star2 AS t4 + ON t3.foo_id = t4.foo_id +INNER JOIN star3 AS t5 + ON t3.bar_id = t5.bar_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py index 3d256c68c24d..a2704f369e8a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py @@ -1,25 +1,23 @@ import ibis -result = ( - ibis.table( - name="functional_alltypes", - schema={ - "id": "int32", - "bool_col": "boolean", - "tinyint_col": "int8", - "smallint_col": "int16", - "int_col": "int32", - "bigint_col": "int64", - "float_col": "float32", - "double_col": "float64", - "date_string_col": "string", - "string_col": "string", - "timestamp_col": "timestamp", - "year": "int32", - "month": "int32", - }, - ) - .limit(20) - .limit(10) +functional_alltypes = ibis.table( + name="functional_alltypes", + schema={ + "id": "int32", + "bool_col": "boolean", + "tinyint_col": "int8", + "smallint_col": "int16", + "int_col": "int32", + "bigint_col": "int64", + "float_col": "float32", + "double_col": "float64", + "date_string_col": "string", + "string_col": "string", + "timestamp_col": "timestamp", + "year": "int32", + "month": "int32", + }, ) + +result = functional_alltypes.limit(20).limit(10) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql index b88ec7dcdd9d..7f7d28627c2f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql @@ -6,4 +6,4 @@ FROM ( FROM functional_alltypes AS t0 LIMIT 20 ) AS t1 -LIMIT 10 +LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql index bbc32482815d..315083748ef8 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql @@ -1,8 +1,9 @@ SELECT - * + t2.string_col, + t2.nrows FROM ( SELECT - t0.string_col AS string_col, + t0.string_col, COUNT(*) AS nrows FROM functional_alltypes AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql index ad8215827d46..c93dbd9ab8dc 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql @@ -1,6 +1,7 @@ -SELECT t0.`a`, t0.`b`, t0.`c` -FROM ( - SELECT t1.* - FROM foo t1 - WHERE t1.`a` > 0 -) t0 \ No newline at end of file +SELECT + t0.a, + t0.b, + t0.c +FROM foo AS t0 +WHERE + t0.a > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql index a37945ee2f13..fdc7cf00bcd2 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql @@ -1,11 +1,12 @@ SELECT - * + t0.job, + t0.dept_id, + t0.year, + t0.y FROM foo AS t0 WHERE - ( - t0.y > ( - SELECT - MAX(t1.x) AS "Max(x)" - FROM bar AS t1 - ) + t0.y > ( + SELECT + MAX(t1.x) AS "Max(x)" + FROM bar AS t1 ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql index 29e72c8336aa..dbf4aadac203 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql @@ -1,5 +1,5 @@ SELECT - t0.foo_id AS foo_id, + t0.foo_id, SUM(t0.f) AS total FROM star1 AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql index 116832bf3c3a..be9b430bd3d3 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql @@ -1,6 +1,6 @@ SELECT - t0.foo_id AS foo_id, - t0.bar_id AS bar_id, + t0.foo_id, + t0.bar_id, SUM(t0.f) AS total FROM star1 AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py index f811fdd901f7..948decc08bfc 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py @@ -1,7 +1,9 @@ import ibis -result = ibis.table( +star1 = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).count() +) + +result = star1.count() diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql index 6407e4987c29..a924af63f39d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql @@ -1 +1,3 @@ -COUNT(*) \ No newline at end of file +SELECT + COUNT(*) AS "CountStar()" +FROM star1 AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql index ec694656a01b..31e87b57f3ea 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql @@ -1,8 +1,9 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 WHERE - ( - t0.f > CAST(0 AS TINYINT) - ) + t0.f > CAST(0 AS TINYINT) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py index 736a1368dbe5..3b82e874ef59 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py @@ -1,7 +1,9 @@ import ibis -result = ibis.table( +star1 = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).limit(10) +) + +result = star1.limit(10) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py index 2767182490a8..f002a286656d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py @@ -1,9 +1,10 @@ import ibis -limit = ibis.table( +star1 = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).limit(10) +) +limit = star1.limit(10) result = limit.filter(limit.f > 0) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql index 431ba054f9f9..ab4dd6df7158 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql @@ -1,5 +1,8 @@ SELECT - * + t1.c, + t1.f, + t1.foo_id, + t1.bar_id FROM ( SELECT * @@ -7,6 +10,4 @@ FROM ( LIMIT 10 ) AS t1 WHERE - ( - t1.f > CAST(0 AS TINYINT) - ) \ No newline at end of file + t1.f > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py index 97d5358b784c..46ad8f50727d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py @@ -1,7 +1,9 @@ import ibis -result = ibis.table( +star1 = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).limit(10, 5) +) + +result = star1.limit(10, 5) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py index 48689a1f4b93..676523775712 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py @@ -4,4 +4,4 @@ result = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).view() +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql index a401619ae2d8..bb666f269b2a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql @@ -1,5 +1,8 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 ORDER BY t0.f ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql index 374a6dbdd0e7..d262b49d64c1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql @@ -1,30 +1,20 @@ SELECT - t3.r_name AS r_name, - t4.n_name AS n_name -FROM ( - SELECT - t0.r_regionkey AS r_regionkey, - t0.r_name AS r_name, - t0.r_comment AS r_comment, - t1.n_nationkey AS n_nationkey, - t1.n_name AS n_name, - t1.n_regionkey AS n_regionkey, - t1.n_comment AS n_comment - FROM tpch_region AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.r_regionkey = t1.n_regionkey -) AS t3 + t7.r_name, + t7.n_name +FROM tpch_region AS t2 +INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey INNER JOIN ( SELECT - t0.r_regionkey AS r_regionkey, - t0.r_name AS r_name, - t0.r_comment AS r_comment, - t1.n_nationkey AS n_nationkey, - t1.n_name AS n_name, - t1.n_regionkey AS n_regionkey, - t1.n_comment AS n_comment - FROM tpch_region AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.r_regionkey = t1.n_regionkey -) AS t4 - ON t3.r_regionkey = t4.r_regionkey \ No newline at end of file + t2.r_regionkey, + t2.r_name, + t2.r_comment, + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment + FROM tpch_region AS t2 + INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey +) AS t7 + ON t2.r_regionkey = t7.r_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py index 02140f8bb8a7..6e73ee23967b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py @@ -9,4 +9,6 @@ name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -result = star1.semi_join(star2, star1.foo_id == star2.foo_id) +result = star1.semi_join(star2, star1.foo_id == star2.foo_id).select( + star1.c, star1.f, star1.foo_id, star1.bar_id +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql index 0cfc52ab309b..802ea0aad1c6 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql @@ -1,8 +1,8 @@ SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id -FROM star1 AS t0 -SEMI JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id \ No newline at end of file + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +SEMI JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py index 09d698686546..527f8071b328 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py @@ -11,4 +11,4 @@ result = star1.inner_join( star2, [star1.foo_id == star2.foo_id, star1.bar_id == star2.foo_id] -) +).select(star1.c, star1.f, star1.foo_id, star1.bar_id) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql index e1aed9698bc1..56f5488cdde3 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql @@ -1,8 +1,8 @@ SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id -FROM star1 AS t0 -INNER JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql index 63cc978163fc..59916704c75d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql @@ -1,8 +1,8 @@ SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id -FROM star1 AS t0 -INNER JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id AND t0.bar_id = t1.foo_id \ No newline at end of file + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id AND t2.bar_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql index 3cc6681236ae..0b9cd8c00921 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql @@ -1,8 +1,8 @@ SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id -FROM star1 AS t0 -LEFT OUTER JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +LEFT OUTER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql index 09d791f98b1e..91950bc952c5 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql @@ -1,8 +1,8 @@ SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id -FROM star1 AS t0 -FULL OUTER JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id \ No newline at end of file + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +FULL OUTER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql index 8e97174b77d9..5877da115f7b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql @@ -1,16 +1,12 @@ SELECT - t2.b AS b, + t1.b, COUNT(*) AS b_count FROM ( SELECT - t1.b AS b - FROM ( - SELECT - * - FROM t AS t0 - ORDER BY - t0.a ASC - ) AS t1 -) AS t2 + t0.b + FROM t AS t0 + ORDER BY + t0.a ASC +) AS t1 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql index b6331cb95031..b40324745e05 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql @@ -1,16 +1,12 @@ SELECT - t2.b AS b, + t1.b, COUNT(*) AS b_count FROM ( SELECT - t1.b AS b - FROM ( - SELECT - * - FROM t AS t0 - ORDER BY - t0.b ASC - ) AS t1 -) AS t2 + t0.b + FROM t AS t0 + ORDER BY + t0.b ASC +) AS t1 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py index 2a3bfca845bd..18a6c4da1098 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py @@ -1,11 +1,9 @@ import ibis -result = ( - ibis.table( - name="star1", - schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, - ) - .foo_id.startswith(ibis.literal("foo")) - .name("tmp") +star1 = ibis.table( + name="star1", + schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) + +result = star1.foo_id.startswith(ibis.literal("foo")).name("tmp") diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql index bf7995d43b4e..05528dd5f869 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql @@ -1 +1,3 @@ -STARTS_WITH(t0.foo_id, 'foo') AS tmp \ No newline at end of file +SELECT + STARTS_WITH(t0.foo_id, 'foo') AS tmp +FROM star1 AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql index 67f775236f0f..ab07ecf75d0e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql @@ -1,60 +1,76 @@ SELECT - * + t11.c_custkey, + t11.c_name, + t11.c_address, + t11.c_nationkey, + t11.c_phone, + t11.c_acctbal, + t11.c_mktsegment, + t11.c_comment, + t11.region, + t11.amount, + t11.odate FROM ( SELECT - t2.c_custkey AS c_custkey, - t2.c_name AS c_name, - t2.c_address AS c_address, - t2.c_nationkey AS c_nationkey, - t2.c_phone AS c_phone, - t2.c_acctbal AS c_acctbal, - t2.c_mktsegment AS c_mktsegment, - t2.c_comment AS c_comment, - t0.r_name AS region, - t3.o_totalprice AS amount, - CAST(t3.o_orderdate AS TIMESTAMP) AS odate - FROM tpch_region AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.r_regionkey = t1.n_regionkey - INNER JOIN tpch_customer AS t2 - ON t2.c_nationkey = t1.n_nationkey - INNER JOIN tpch_orders AS t3 - ON t3.o_custkey = t2.c_custkey -) AS t7 + t6.c_custkey, + t6.c_name, + t6.c_address, + t6.c_nationkey, + t6.c_phone, + t6.c_acctbal, + t6.c_mktsegment, + t6.c_comment, + t4.r_name AS region, + t7.o_totalprice AS amount, + CAST(t7.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t4 + INNER JOIN tpch_nation AS t5 + ON t4.r_regionkey = t5.n_regionkey + INNER JOIN tpch_customer AS t6 + ON t6.c_nationkey = t5.n_nationkey + INNER JOIN tpch_orders AS t7 + ON t7.o_custkey = t6.c_custkey +) AS t11 WHERE - ( - t7.amount > ( + t11.amount > ( + SELECT + AVG(t13.amount) AS "Mean(amount)" + FROM ( SELECT - AVG(t9.amount) AS "Mean(amount)" + t12.c_custkey, + t12.c_name, + t12.c_address, + t12.c_nationkey, + t12.c_phone, + t12.c_acctbal, + t12.c_mktsegment, + t12.c_comment, + t12.region, + t12.amount, + t12.odate FROM ( SELECT - * - FROM ( - SELECT - t2.c_custkey AS c_custkey, - t2.c_name AS c_name, - t2.c_address AS c_address, - t2.c_nationkey AS c_nationkey, - t2.c_phone AS c_phone, - t2.c_acctbal AS c_acctbal, - t2.c_mktsegment AS c_mktsegment, - t2.c_comment AS c_comment, - t0.r_name AS region, - t3.o_totalprice AS amount, - CAST(t3.o_orderdate AS TIMESTAMP) AS odate - FROM tpch_region AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.r_regionkey = t1.n_regionkey - INNER JOIN tpch_customer AS t2 - ON t2.c_nationkey = t1.n_nationkey - INNER JOIN tpch_orders AS t3 - ON t3.o_custkey = t2.c_custkey - ) AS t8 - WHERE - ( - t8.region = t7.region - ) - ) AS t9 - ) + t6.c_custkey, + t6.c_name, + t6.c_address, + t6.c_nationkey, + t6.c_phone, + t6.c_acctbal, + t6.c_mktsegment, + t6.c_comment, + t4.r_name AS region, + t7.o_totalprice AS amount, + CAST(t7.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t4 + INNER JOIN tpch_nation AS t5 + ON t4.r_regionkey = t5.n_regionkey + INNER JOIN tpch_customer AS t6 + ON t6.c_nationkey = t5.n_nationkey + INNER JOIN tpch_orders AS t7 + ON t7.o_custkey = t6.c_custkey + ) AS t12 + WHERE + t12.region = t12.region + ) AS t13 ) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql index 6829667d5210..404caac50463 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql @@ -1,11 +1,12 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 WHERE - ( - t0.f > ( - SELECT - AVG(t0.f) AS "Mean(f)" - FROM star1 AS t0 - ) - ) + t0.f > ( + SELECT + AVG(t0.f) AS "Mean(f)" + FROM star1 AS t0 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql index d196e74c2bf7..fdbdef535ce4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql @@ -1,19 +1,21 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 WHERE - ( - t0.f > ( + t0.f > ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( SELECT - AVG(t1.f) AS "Mean(f)" - FROM ( - SELECT - * - FROM star1 AS t0 - WHERE - ( - t0.foo_id = 'foo' - ) - ) AS t1 - ) - ) + t0.c, + t0.f, + t0.foo_id, + t0.bar_id + FROM star1 AS t0 + WHERE + t0.foo_id = 'foo' + ) AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py index 9a11e7e90c51..a2a8f64d4fd2 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py @@ -17,11 +17,11 @@ "k": "time", }, ) -agg = alltypes.group_by([alltypes.a, alltypes.g]).aggregate( - alltypes.f.sum().name("metric") -) +agg = alltypes.aggregate([alltypes.f.sum().name("metric")], by=[alltypes.a, alltypes.g]) selfreference = agg.view() -proj = agg.inner_join(selfreference, agg.g == selfreference.g).select(agg) -union = proj.union(proj.view()) +joinchain = agg.inner_join(selfreference, agg.g == selfreference.g).select( + agg.a, agg.g, agg.metric +) +selfreference1 = joinchain.view() -result = union.select([union.a, union.g, union.metric]) +result = joinchain.union(selfreference1) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql index e7785e9117d3..57e0912af4a6 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql @@ -1,51 +1,57 @@ SELECT - t1.a AS a, - t1.g AS g, - t1.metric AS metric + t8.a, + t8.g, + t8.metric FROM ( SELECT - t0.a AS a, - t0.g AS g, - SUM(t0.f) AS metric - FROM alltypes AS t0 - GROUP BY - 1, - 2 -) AS t1 -INNER JOIN ( + t2.a, + t2.g, + t2.metric + FROM ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 + ) AS t2 + INNER JOIN ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 + ) AS t4 + ON t2.g = t4.g + UNION ALL SELECT - t0.a AS a, - t0.g AS g, - SUM(t0.f) AS metric - FROM alltypes AS t0 - GROUP BY - 1, - 2 -) AS t2 - ON t1.g = t2.g -UNION ALL -SELECT - t1.a AS a, - t1.g AS g, - t1.metric AS metric -FROM ( - SELECT - t0.a AS a, - t0.g AS g, - SUM(t0.f) AS metric - FROM alltypes AS t0 - GROUP BY - 1, - 2 -) AS t1 -INNER JOIN ( - SELECT - t0.a AS a, - t0.g AS g, - SUM(t0.f) AS metric - FROM alltypes AS t0 - GROUP BY - 1, - 2 -) AS t2 - ON t1.g = t2.g \ No newline at end of file + t2.a, + t2.g, + t2.metric + FROM ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 + ) AS t2 + INNER JOIN ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 + ) AS t4 + ON t2.g = t4.g +) AS t8 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql index 496b5b45619e..57edded67515 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql @@ -1,41 +1,41 @@ SELECT - t4.g AS g, - MAX(t4.total - t4.total_right) AS metric + t6.g, + MAX(t6.total - t6.total_right) AS metric FROM ( SELECT - t1.g AS g, - t1.a AS a, - t1.b AS b, - t1.total AS total, - t2.g AS g_right, - t2.a AS a_right, - t2.b AS b_right, - t2.total AS total_right + t2.g, + t2.a, + t2.b, + t2.total, + t4.g AS g_right, + t4.a AS a_right, + t4.b AS b_right, + t4.total AS total_right FROM ( SELECT - t0.g AS g, - t0.a AS a, - t0.b AS b, + t0.g, + t0.a, + t0.b, SUM(t0.f) AS total FROM alltypes AS t0 GROUP BY 1, 2, 3 - ) AS t1 + ) AS t2 INNER JOIN ( SELECT - t0.g AS g, - t0.a AS a, - t0.b AS b, + t0.g, + t0.a, + t0.b, SUM(t0.f) AS total FROM alltypes AS t0 GROUP BY 1, 2, 3 - ) AS t2 - ON t1.a = t2.b -) AS t4 + ) AS t4 + ON t2.a = t4.b +) AS t6 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql index 551821a247c4..5145b5c7361f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql @@ -1,40 +1,45 @@ SELECT - t6.origin AS origin, + t8.origin, COUNT(*) AS "CountStar()" FROM ( SELECT - t1.dest AS dest, - t1.origin AS origin, - t1.arrdelay AS arrdelay + t2.dest, + t2.origin, + t2.arrdelay FROM ( SELECT - * + t0.dest, + t0.origin, + t0.arrdelay FROM airlines AS t0 WHERE t0.dest IN ('ORD', 'JFK', 'SFO') - ) AS t1 + ) AS t2 SEMI JOIN ( SELECT - * + t3.dest, + t3."Mean(arrdelay)" FROM ( SELECT - t1.dest AS dest, + t1.dest, AVG(t1.arrdelay) AS "Mean(arrdelay)" FROM ( SELECT - * + t0.dest, + t0.origin, + t0.arrdelay FROM airlines AS t0 WHERE t0.dest IN ('ORD', 'JFK', 'SFO') ) AS t1 GROUP BY 1 - ) AS t2 + ) AS t3 ORDER BY - t2."Mean(arrdelay)" DESC + t3."Mean(arrdelay)" DESC LIMIT 10 - ) AS t4 - ON t1.dest = t4.dest -) AS t6 + ) AS t6 + ON t2.dest = t6.dest +) AS t8 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql index 077bcd8cb40b..6d86baa0190c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql @@ -1,23 +1,24 @@ SELECT - t0.foo AS foo, - t0.bar AS bar, - t0.city AS city, - t0.v1 AS v1, - t0.v2 AS v2 -FROM tbl AS t0 + t1.foo, + t1.bar, + t1.city, + t1.v1, + t1.v2 +FROM tbl AS t1 SEMI JOIN ( SELECT - * + t2.city, + t2."Mean(v2)" FROM ( SELECT - t0.city AS city, + t0.city, AVG(t0.v2) AS "Mean(v2)" FROM tbl AS t0 GROUP BY 1 - ) AS t1 + ) AS t2 ORDER BY - t1."Mean(v2)" DESC + t2."Mean(v2)" DESC LIMIT 10 -) AS t3 - ON t0.city = t3.city \ No newline at end of file +) AS t5 + ON t1.city = t5.city \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql index 6f15e53a0d5b..53e239ad98c8 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql @@ -1,23 +1,24 @@ SELECT - t0.foo AS foo, - t0.bar AS bar, - t0.city AS city, - t0.v1 AS v1, - t0.v2 AS v2 -FROM tbl AS t0 + t1.foo, + t1.bar, + t1.city, + t1.v1, + t1.v2 +FROM tbl AS t1 SEMI JOIN ( SELECT - * + t2.city, + t2."Count(city)" FROM ( SELECT - t0.city AS city, + t0.city, COUNT(t0.city) AS "Count(city)" FROM tbl AS t0 GROUP BY 1 - ) AS t1 + ) AS t2 ORDER BY - t1."Count(city)" DESC + t2."Count(city)" DESC LIMIT 10 -) AS t3 - ON t0.city = t3.city \ No newline at end of file +) AS t5 + ON t1.city = t5.city \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql index 237a4f77eb32..9dffcedd667b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql @@ -1,62 +1,50 @@ SELECT - t5.c_custkey AS c_custkey, - t5.c_name AS c_name, - t5.c_address AS c_address, - t5.c_nationkey AS c_nationkey, - t5.c_phone AS c_phone, - t5.c_acctbal AS c_acctbal, - t5.c_mktsegment AS c_mktsegment, - t5.c_comment AS c_comment, - t5.n_name AS n_name, - t5.r_name AS r_name -FROM ( - SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t1.n_name AS n_name, - t2.r_name AS r_name - FROM tpch_customer AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.c_nationkey = t1.n_nationkey - INNER JOIN tpch_region AS t2 - ON t1.n_regionkey = t2.r_regionkey -) AS t5 + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.n_name, + t5.r_name +FROM tpch_customer AS t3 +INNER JOIN tpch_nation AS t4 + ON t3.c_nationkey = t4.n_nationkey +INNER JOIN tpch_region AS t5 + ON t4.n_regionkey = t5.r_regionkey SEMI JOIN ( SELECT - * + t9.n_name, + t9."Sum(c_acctbal)" FROM ( SELECT - t5.n_name AS n_name, - SUM(t5.c_acctbal) AS "Sum(c_acctbal)" + t8.n_name, + SUM(t8.c_acctbal) AS "Sum(c_acctbal)" FROM ( SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t1.n_name AS n_name, - t2.r_name AS r_name - FROM tpch_customer AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.c_nationkey = t1.n_nationkey - INNER JOIN tpch_region AS t2 - ON t1.n_regionkey = t2.r_regionkey - ) AS t5 + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.n_name, + t5.r_name + FROM tpch_customer AS t3 + INNER JOIN tpch_nation AS t4 + ON t3.c_nationkey = t4.n_nationkey + INNER JOIN tpch_region AS t5 + ON t4.n_regionkey = t5.r_regionkey + ) AS t8 GROUP BY 1 - ) AS t6 + ) AS t9 ORDER BY - t6."Sum(c_acctbal)" DESC + t9."Sum(c_acctbal)" DESC LIMIT 10 -) AS t8 - ON t5.n_name = t8.n_name \ No newline at end of file +) AS t12 + ON t4.n_name = t12.n_name \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql index 55fd82ce48b9..fe46767993fe 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql @@ -1,8 +1,9 @@ SELECT - * + t1.dest, + t1."Mean(arrdelay)" FROM ( SELECT - t0.dest AS dest, + t0.dest, AVG(t0.arrdelay) AS "Mean(arrdelay)" FROM airlines AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql index cf9c5a649d4b..aa75e2be0ae1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql @@ -1,53 +1,53 @@ SELECT - t8.region AS region, - t8.year AS year, - t8.total - t9.total AS yoy_change + t13.region, + t13.year, + t13.total - t15.total AS yoy_change FROM ( SELECT - t7.region AS region, - EXTRACT('year' FROM t7.odate) AS year, - CAST(SUM(t7.amount) AS DOUBLE) AS total + t11.region, + EXTRACT('year' FROM t11.odate) AS year, + CAST(SUM(t11.amount) AS DOUBLE) AS total FROM ( SELECT - t0.r_name AS region, - t1.n_name AS nation, - t3.o_totalprice AS amount, - CAST(t3.o_orderdate AS TIMESTAMP) AS odate - FROM tpch_region AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.r_regionkey = t1.n_regionkey - INNER JOIN tpch_customer AS t2 - ON t2.c_nationkey = t1.n_nationkey - INNER JOIN tpch_orders AS t3 - ON t3.o_custkey = t2.c_custkey - ) AS t7 + t4.r_name AS region, + t5.n_name AS nation, + t7.o_totalprice AS amount, + CAST(t7.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t4 + INNER JOIN tpch_nation AS t5 + ON t4.r_regionkey = t5.n_regionkey + INNER JOIN tpch_customer AS t6 + ON t6.c_nationkey = t5.n_nationkey + INNER JOIN tpch_orders AS t7 + ON t7.o_custkey = t6.c_custkey + ) AS t11 GROUP BY 1, 2 -) AS t8 +) AS t13 INNER JOIN ( SELECT - t7.region AS region, - EXTRACT('year' FROM t7.odate) AS year, - CAST(SUM(t7.amount) AS DOUBLE) AS total + t11.region, + EXTRACT('year' FROM t11.odate) AS year, + CAST(SUM(t11.amount) AS DOUBLE) AS total FROM ( SELECT - t0.r_name AS region, - t1.n_name AS nation, - t3.o_totalprice AS amount, - CAST(t3.o_orderdate AS TIMESTAMP) AS odate - FROM tpch_region AS t0 - INNER JOIN tpch_nation AS t1 - ON t0.r_regionkey = t1.n_regionkey - INNER JOIN tpch_customer AS t2 - ON t2.c_nationkey = t1.n_nationkey - INNER JOIN tpch_orders AS t3 - ON t3.o_custkey = t2.c_custkey - ) AS t7 + t4.r_name AS region, + t5.n_name AS nation, + t7.o_totalprice AS amount, + CAST(t7.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t4 + INNER JOIN tpch_nation AS t5 + ON t4.r_regionkey = t5.n_regionkey + INNER JOIN tpch_customer AS t6 + ON t6.c_nationkey = t5.n_nationkey + INNER JOIN tpch_orders AS t7 + ON t7.o_custkey = t6.c_custkey + ) AS t11 GROUP BY 1, 2 -) AS t9 - ON t8.year = ( - t9.year - CAST(1 AS TINYINT) +) AS t15 + ON t13.year = ( + t15.year - CAST(1 AS TINYINT) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql index 6407e4987c29..102f8d7fc152 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql @@ -1 +1,26 @@ -COUNT(*) \ No newline at end of file +SELECT + COUNT(*) AS "CountStar()" +FROM ( + SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month + FROM functional_alltypes AS t0 + WHERE + t0.timestamp_col < ( + MAKE_TIMESTAMP(2010, 1, 1, 0, 0, 0.0) + INTERVAL '3' MONTH + ) + AND t0.timestamp_col < ( + CAST(CURRENT_TIMESTAMP AS TIMESTAMP) + INTERVAL '10' DAY + ) +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py index e44d7f2cff3a..434705fa8742 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py @@ -8,6 +8,8 @@ star2 = ibis.table( name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -joinchain = star1.inner_join(star2, star1.foo_id == star2.foo_id) +joinchain = star1.inner_join(star2, star1.foo_id == star2.foo_id).select( + star1.c, star1.f, star1.foo_id, star1.bar_id, (star1.f - star2.value1).name("diff") +) result = joinchain.filter(joinchain.diff > 1) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql index 7168e81df0e5..98fedc20876d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql @@ -1,17 +1,19 @@ SELECT - * + t5.c, + t5.f, + t5.foo_id, + t5.bar_id, + t5.diff FROM ( SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id, - t0.f - t1.value1 AS diff - FROM star1 AS t0 - INNER JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id -) AS t3 + t2.c, + t2.f, + t2.foo_id, + t2.bar_id, + t2.f - t3.value1 AS diff + FROM star1 AS t2 + INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id +) AS t5 WHERE - ( - t3.diff > CAST(1 AS TINYINT) - ) \ No newline at end of file + t5.diff > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql index 136c64d28e4e..34036b117531 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql @@ -1,8 +1,15 @@ SELECT - * + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k FROM alltypes AS t0 WHERE - ( - t0.a > CAST(0 AS TINYINT) - ) - AND t0.f BETWEEN CAST(0 AS TINYINT) AND CAST(1 AS TINYINT) \ No newline at end of file + t0.a > CAST(0 AS TINYINT) AND t0.f BETWEEN CAST(0 AS TINYINT) AND CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py index b89e0510e17a..45ea3c0684a0 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py @@ -8,6 +8,8 @@ star2 = ibis.table( name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -joinchain = star1.inner_join(star2, star1.foo_id == star2.foo_id) +joinchain = star1.inner_join(star2, star1.foo_id == star2.foo_id).select( + star1.c, star1.f, star1.foo_id, star1.bar_id, star2.value1, star2.value3 +) result = joinchain.filter(joinchain.f > 0, joinchain.value3 < 1000) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql index 27f0563003e4..3f187d69a7fc 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql @@ -1,20 +1,21 @@ SELECT - * + t5.c, + t5.f, + t5.foo_id, + t5.bar_id, + t5.value1, + t5.value3 FROM ( SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id, - t1.value1 AS value1, - t1.value3 AS value3 - FROM star1 AS t0 - INNER JOIN star2 AS t1 - ON t0.foo_id = t1.foo_id -) AS t3 + t2.c, + t2.f, + t2.foo_id, + t2.bar_id, + t3.value1, + t3.value3 + FROM star1 AS t2 + INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id +) AS t5 WHERE - ( - t3.f > CAST(0 AS TINYINT) - ) AND ( - t3.value3 < CAST(1000 AS SMALLINT) - ) \ No newline at end of file + t5.f > CAST(0 AS TINYINT) AND t5.value3 < CAST(1000 AS SMALLINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql index 214ec1ede144..35e4fe0adc24 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql @@ -1,20 +1,14 @@ SELECT - t2.foo_id AS foo_id, - t2.total AS total + t1.foo_id, + t1.total FROM ( SELECT - * - FROM ( - SELECT - t0.foo_id AS foo_id, - SUM(t0.f) AS total, - COUNT(*) AS "CountStar()" - FROM star1 AS t0 - GROUP BY - 1 - ) AS t1 - WHERE - ( - t1."CountStar()" > CAST(100 AS TINYINT) - ) -) AS t2 \ No newline at end of file + t0.foo_id, + SUM(t0.f) AS total, + COUNT(*) AS "CountStar()" + FROM star1 AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1."CountStar()" > CAST(100 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql index 307170b0f208..47945167c00a 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql @@ -1,14 +1,13 @@ SELECT - * + t1.foo_id, + t1.total FROM ( SELECT - t0.foo_id AS foo_id, + t0.foo_id, SUM(t0.f) AS total FROM star1 AS t0 GROUP BY 1 ) AS t1 WHERE - ( - t1.total > CAST(10 AS TINYINT) - ) \ No newline at end of file + t1.total > CAST(10 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql index 29e72c8336aa..dbf4aadac203 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql @@ -1,5 +1,5 @@ SELECT - t0.foo_id AS foo_id, + t0.foo_id, SUM(t0.f) AS total FROM star1 AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql index 116832bf3c3a..be9b430bd3d3 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql @@ -1,6 +1,6 @@ SELECT - t0.foo_id AS foo_id, - t0.bar_id AS bar_id, + t0.foo_id, + t0.bar_id, SUM(t0.f) AS total FROM star1 AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql index a944dbd5c958..c4f686443cee 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql @@ -1 +1,3 @@ -t0.double_col BETWEEN CAST(5 AS TINYINT) AND CAST(10 AS TINYINT) AS tmp \ No newline at end of file +SELECT + t0.double_col BETWEEN CAST(5 AS TINYINT) AND CAST(10 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql index 6e3e0443a532..d2e722d4fa18 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql @@ -1,5 +1,7 @@ -( - t0.double_col > CAST(0 AS TINYINT) -) AND ( - t0.double_col < CAST(5 AS TINYINT) -) AS tmp \ No newline at end of file +SELECT + ( + t0.double_col > CAST(0 AS TINYINT) + ) AND ( + t0.double_col < CAST(5 AS TINYINT) + ) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql index b0f919bdac1a..4b0542464299 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql @@ -1,5 +1,7 @@ -( - t0.double_col < CAST(0 AS TINYINT) -) OR ( - t0.double_col > CAST(5 AS TINYINT) -) AS tmp \ No newline at end of file +SELECT + ( + t0.double_col < CAST(0 AS TINYINT) + ) OR ( + t0.double_col > CAST(5 AS TINYINT) + ) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql index 233633ba658a..df5a9329fcb4 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql @@ -1,5 +1,7 @@ -COALESCE( - CASE WHEN t0.double_col > CAST(30 AS TINYINT) THEN t0.double_col ELSE NULL END, - NULL, - t0.float_col -) AS tmp \ No newline at end of file +SELECT + COALESCE( + CASE WHEN t0.double_col > CAST(30 AS TINYINT) THEN t0.double_col ELSE NULL END, + NULL, + t0.float_col + ) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql index 19a939098145..2ad44306e1cd 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql @@ -1 +1,3 @@ -t0.double_col = CAST(5 AS TINYINT) AS tmp \ No newline at end of file +SELECT + t0.double_col = CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql index 1c278dc0cb73..8b722a819754 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql @@ -1 +1,3 @@ -t0.double_col >= CAST(5 AS TINYINT) AS tmp \ No newline at end of file +SELECT + t0.double_col >= CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql index 4fbdd9ab3b1c..ca8c8d134d60 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql @@ -1 +1,3 @@ -t0.double_col > CAST(5 AS TINYINT) AS tmp \ No newline at end of file +SELECT + t0.double_col > CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql index 41acc5d90cad..53bf7d0d2dbb 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql @@ -1 +1,3 @@ -t0.double_col <= CAST(5 AS TINYINT) AS tmp \ No newline at end of file +SELECT + t0.double_col <= CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql index a6fb94a63f56..627be1840789 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql @@ -1 +1,3 @@ -t0.double_col < CAST(5 AS TINYINT) AS tmp \ No newline at end of file +SELECT + t0.double_col < CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql index 98a382b28167..685a418a8eb4 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql @@ -1 +1,3 @@ -t0.double_col <> CAST(5 AS TINYINT) AS tmp \ No newline at end of file +SELECT + t0.double_col <> CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql index 4e808e0ff710..1aa27939686a 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql @@ -1,20 +1,20 @@ SELECT - t2.g AS g, - t2.metric AS metric + t3.g, + t3.metric FROM ( SELECT - t0.g AS g, + t0.g, SUM(t0.f) AS metric FROM alltypes AS t0 GROUP BY 1 -) AS t2 +) AS t3 INNER JOIN ( SELECT - t1.g AS g, + t1.g, SUM(t1.f) AS metric FROM alltypes AS t1 GROUP BY 1 -) AS t4 - ON t2.g = t4.g \ No newline at end of file +) AS t6 + ON t3.g = t6.g \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql index e2c1fc57b53f..37382bcf5149 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql @@ -1 +1,3 @@ -COUNT(DISTINCT t0.int_col) AS nunique \ No newline at end of file +SELECT + COUNT(DISTINCT t0.int_col) AS nunique +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql index c4d51f354005..14b6c6d83477 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql @@ -1,5 +1,5 @@ SELECT - t0.string_col AS string_col, + t0.string_col, COUNT(DISTINCT t0.int_col) AS nunique FROM functional_alltypes AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql index f98b3697b64f..483b4fef6f49 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql @@ -2,7 +2,7 @@ SELECT DISTINCT * FROM ( SELECT - t0.string_col AS string_col, - t0.int_col AS int_col + t0.string_col, + t0.int_col FROM functional_alltypes AS t0 ) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql index 636796e7e04d..d38aa10366c4 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql @@ -2,6 +2,6 @@ SELECT DISTINCT * FROM ( SELECT - t0.string_col AS string_col + t0.string_col FROM functional_alltypes AS t0 ) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql index c84bf63f1858..b0be257b254f 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql @@ -1,19 +1,13 @@ SELECT - * + t0.key1, + t0.key2, + t0.value1 FROM foo_t AS t0 WHERE EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM ( - SELECT - * - FROM bar_t AS t1 - WHERE - ( - t0.key1 = t1.key1 - ) - ) AS t2 - ) + SELECT + CAST(1 AS TINYINT) AS "1" + FROM bar_t AS t1 + WHERE + t0.key1 = t1.key1 ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql index 83416d431936..f397c2b7251e 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql @@ -1,23 +1,17 @@ SELECT - * + t0.key1, + t0.key2, + t0.value1 FROM foo_t AS t0 WHERE EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM ( - SELECT - * - FROM bar_t AS t1 - WHERE - ( - ( - t0.key1 = t1.key1 - ) AND ( - t1.key2 = 'foo' - ) - ) - ) AS t2 - ) + SELECT + CAST(1 AS TINYINT) AS "1" + FROM bar_t AS t1 + WHERE + ( + t0.key1 = t1.key1 + ) AND ( + t1.key2 = 'foo' + ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql index 13e53cf4bbd7..c0ba260a78bb 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql @@ -1,14 +1,13 @@ SELECT - * + t1.int_col, + t1.bigint_col FROM ( SELECT - t0.int_col AS int_col, + t0.int_col, SUM(t0.bigint_col) AS bigint_col FROM t AS t0 GROUP BY 1 ) AS t1 WHERE - ( - t1.bigint_col = CAST(60 AS TINYINT) - ) \ No newline at end of file + t1.bigint_col = CAST(60 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql new file mode 100644 index 000000000000..b9a81bb4916f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql @@ -0,0 +1,37 @@ +SELECT + t5.t1_id1, + t5.t1_val1, + t10.id3, + t10.val2, + t10.dt, + t10.t3_val2, + t10.id2a, + t10.id2b, + t10.val2_right +FROM ( + SELECT + t0.id1 AS t1_id1, + t0.val1 AS t1_val1 + FROM test1 AS t0 +) AS t5 +LEFT OUTER JOIN ( + SELECT + t7.id3, + t7.val2, + t7.dt, + t7.t3_val2, + t3.id2a, + t3.id2b, + t3.val2 AS val2_right + FROM ( + SELECT + CAST(t1.id3 AS BIGINT) AS id3, + t1.val2, + t1.dt, + CAST(t1.id3 AS BIGINT) AS t3_val2 + FROM test3 AS t1 + ) AS t7 + INNER JOIN test2 AS t3 + ON t3.id2b = t7.id3 +) AS t10 + ON t5.t1_id1 = t10.id2a \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql index 96dd80efc156..d06c0383bb09 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql @@ -1 +1,3 @@ -t0.double_col IS NULL AS tmp \ No newline at end of file +SELECT + t0.double_col IS NULL AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql index b25cb9418af6..f33c3466083a 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql @@ -1 +1,3 @@ -t0.double_col IS NOT NULL AS tmp \ No newline at end of file +SELECT + NOT t0.double_col IS NULL AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql index 7d32758ef61c..28ef4cadbb22 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql @@ -1,21 +1,21 @@ SELECT - t0.n_nationkey AS n_nationkey, - t0.n_name AS n_name, - t0.n_regionkey AS n_regionkey, - t0.n_comment AS n_comment, - t1.r_regionkey AS r_regionkey, - t1.r_name AS r_name, - t1.r_comment AS r_comment, - t2.c_custkey AS c_custkey, - t2.c_name AS c_name, - t2.c_address AS c_address, - t2.c_nationkey AS c_nationkey, - t2.c_phone AS c_phone, - t2.c_acctbal AS c_acctbal, - t2.c_mktsegment AS c_mktsegment, - t2.c_comment AS c_comment -FROM tpch_nation AS t0 -INNER JOIN tpch_region AS t1 - ON t0.n_regionkey = t1.r_regionkey -INNER JOIN tpch_customer AS t2 - ON t0.n_nationkey = t2.c_nationkey \ No newline at end of file + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment, + t4.r_regionkey, + t4.r_name, + t4.r_comment, + t5.c_custkey, + t5.c_name, + t5.c_address, + t5.c_nationkey, + t5.c_phone, + t5.c_acctbal, + t5.c_mktsegment, + t5.c_comment +FROM tpch_nation AS t3 +INNER JOIN tpch_region AS t4 + ON t3.n_regionkey = t4.r_regionkey +INNER JOIN tpch_customer AS t5 + ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql new file mode 100644 index 000000000000..9289a835e37a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql @@ -0,0 +1,11 @@ +SELECT + t2.r_regionkey, + t2.r_name, + t2.r_comment, + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql new file mode 100644 index 000000000000..38534295064f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql @@ -0,0 +1,8 @@ +SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql new file mode 100644 index 000000000000..7048d19d0ba4 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql @@ -0,0 +1,11 @@ +SELECT + t2.r_regionkey, + t2.r_name, + t2.r_comment, + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +LEFT OUTER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql new file mode 100644 index 000000000000..26c408b5be1a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql @@ -0,0 +1,8 @@ +SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +LEFT OUTER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql new file mode 100644 index 000000000000..f14ac5c0d92e --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql @@ -0,0 +1,11 @@ +SELECT + t2.r_regionkey, + t2.r_name, + t2.r_comment, + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +FULL OUTER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql new file mode 100644 index 000000000000..1b339a3f247b --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql @@ -0,0 +1,8 @@ +SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +FULL OUTER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql index ec694656a01b..31e87b57f3ea 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql @@ -1,8 +1,9 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 WHERE - ( - t0.f > CAST(0 AS TINYINT) - ) + t0.f > CAST(0 AS TINYINT) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql index 431ba054f9f9..ab4dd6df7158 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql @@ -1,5 +1,8 @@ SELECT - * + t1.c, + t1.f, + t1.foo_id, + t1.bar_id FROM ( SELECT * @@ -7,6 +10,4 @@ FROM ( LIMIT 10 ) AS t1 WHERE - ( - t1.f > CAST(0 AS TINYINT) - ) \ No newline at end of file + t1.f > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py index 27d91a4d745a..593c3faf221f 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py @@ -9,7 +9,9 @@ name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) agg = star1.aggregate([star1.f.sum().name("total")], by=[star1.foo_id]) -joinchain = agg.inner_join(star2, agg.foo_id == star2.foo_id) +joinchain = agg.inner_join(star2, agg.foo_id == star2.foo_id).select( + agg.foo_id, agg.total, star2.value1 +) f = joinchain.filter(joinchain.total > 100) result = f.order_by(f.total.desc()) diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql index 3b947354fb10..b6c7dab91969 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql @@ -1,28 +1,24 @@ SELECT - * + t6.foo_id, + t6.total, + t6.value1 FROM ( SELECT - * + t4.foo_id, + t4.total, + t2.value1 FROM ( SELECT - t2.foo_id AS foo_id, - t2.total AS total, - t1.value1 AS value1 - FROM ( - SELECT - t0.foo_id AS foo_id, - SUM(t0.f) AS total - FROM star1 AS t0 - GROUP BY - 1 - ) AS t2 - INNER JOIN star2 AS t1 - ON t2.foo_id = t1.foo_id + t0.foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 ) AS t4 - WHERE - ( - t4.total > CAST(100 AS TINYINT) - ) -) AS t5 + INNER JOIN star2 AS t2 + ON t4.foo_id = t2.foo_id +) AS t6 +WHERE + t6.total > CAST(100 AS TINYINT) ORDER BY - t5.total DESC \ No newline at end of file + t6.total DESC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql index 0a2d70812c52..d9c97bc180ca 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql @@ -1,20 +1,20 @@ SELECT - t0.x1 AS x1, - t0.y1 AS y1, - t1.x2 AS x2, - t6.x3 AS x3, - t6.y2 AS y2, - t6.x4 AS x4 -FROM t1 AS t0 -INNER JOIN t2 AS t1 - ON t0.x1 = t1.x2 + t4.x1, + t4.y1, + t5.x2, + t11.x3, + t11.y2, + t11.x4 +FROM t1 AS t4 +INNER JOIN t2 AS t5 + ON t4.x1 = t5.x2 INNER JOIN ( SELECT - t2.x3 AS x3, - t2.y2 AS y2, - t3.x4 AS x4 - FROM t3 AS t2 - INNER JOIN t4 AS t3 - ON t2.x3 = t3.x4 -) AS t6 - ON t0.y1 = t6.y2 \ No newline at end of file + t6.x3, + t6.y2, + t7.x4 + FROM t3 AS t6 + INNER JOIN t4 AS t7 + ON t6.x3 = t7.x4 +) AS t11 + ON t4.y1 = t11.y2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql index 28adfeef1952..82946b9a13bc 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql @@ -1,11 +1,5 @@ SELECT - t1.person_id AS person_id -FROM ( - SELECT - * - FROM person AS t0 - WHERE - ( - CAST(400 AS SMALLINT) <= CAST(40 AS TINYINT) - ) -) AS t1 \ No newline at end of file + t0.person_id +FROM person AS t0 +WHERE + CAST(400 AS SMALLINT) <= CAST(40 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql index 812bef8825b8..3ea20ad88c56 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql @@ -1,3 +1,5 @@ -NOT ( - t0.double_col > CAST(0 AS TINYINT) -) AS tmp \ No newline at end of file +SELECT + NOT ( + t0.double_col > CAST(0 AS TINYINT) + ) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql index d27c92e5a7a9..580263e1f156 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql @@ -1,24 +1,25 @@ SELECT - * + t7.ancestor_node_sort_order, + t7.n FROM ( SELECT - t4.ancestor_node_sort_order AS ancestor_node_sort_order, + t6.ancestor_node_sort_order, CAST(1 AS TINYINT) AS n FROM ( SELECT - t0.product_id AS product_id, - t2.ancestor_level_name AS ancestor_level_name, - t2.ancestor_level_number AS ancestor_level_number, - t2.ancestor_node_sort_order AS ancestor_node_sort_order, - t2.descendant_node_natural_key AS descendant_node_natural_key, - t2.product_level_name AS product_level_name - FROM facts AS t0 + t2.product_id, + t4.ancestor_level_name, + t4.ancestor_level_number, + t4.ancestor_node_sort_order, + t4.descendant_node_natural_key, + t4.product_level_name + FROM facts AS t2 INNER JOIN ( SELECT - t1.ancestor_level_name AS ancestor_level_name, - t1.ancestor_level_number AS ancestor_level_number, - t1.ancestor_node_sort_order AS ancestor_node_sort_order, - t1.descendant_node_natural_key AS descendant_node_natural_key, + t1.ancestor_level_name, + t1.ancestor_level_number, + t1.ancestor_node_sort_order, + t1.descendant_node_natural_key, CONCAT( LPAD('-', ( t1.ancestor_level_number - CAST(1 AS TINYINT) @@ -26,11 +27,11 @@ FROM ( t1.ancestor_level_name ) AS product_level_name FROM products AS t1 - ) AS t2 - ON t0.product_id = t2.descendant_node_natural_key - ) AS t4 + ) AS t4 + ON t2.product_id = t4.descendant_node_natural_key + ) AS t6 GROUP BY 1 -) AS t5 +) AS t7 ORDER BY - t5.ancestor_node_sort_order ASC \ No newline at end of file + t7.ancestor_node_sort_order ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql index 60077ed03a09..a3df1de479ac 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql @@ -1,51 +1,61 @@ SELECT - t9.customer_id AS customer_id, - t9.first_name AS first_name, - t9.last_name AS last_name, - t9.first_order AS first_order, - t9.most_recent_order AS most_recent_order, - t9.number_of_orders AS number_of_orders, - t11.total_amount AS customer_lifetime_value + t15.customer_id, + t15.first_name, + t15.last_name, + t15.first_order, + t15.most_recent_order, + t15.number_of_orders, + t13.total_amount AS customer_lifetime_value FROM ( SELECT - t0.customer_id AS customer_id, - t0.first_name AS first_name, - t0.last_name AS last_name, - t5.first_order AS first_order, - t5.most_recent_order AS most_recent_order, - t5.number_of_orders AS number_of_orders - FROM customers AS t0 - LEFT OUTER JOIN ( + t12.customer_id, + t12.first_name, + t12.last_name, + t12.first_order, + t12.most_recent_order, + t12.number_of_orders + FROM ( SELECT - t1.customer_id AS customer_id, - MIN(t1.order_date) AS first_order, - MAX(t1.order_date) AS most_recent_order, - COUNT(t1.order_id) AS number_of_orders - FROM orders AS t1 - GROUP BY - 1 - ) AS t5 - ON t0.customer_id = t5.customer_id -) AS t9 + t3.customer_id, + t3.first_name, + t3.last_name, + t8.customer_id AS customer_id_right, + t8.first_order, + t8.most_recent_order, + t8.number_of_orders + FROM customers AS t3 + LEFT OUTER JOIN ( + SELECT + t2.customer_id, + MIN(t2.order_date) AS first_order, + MAX(t2.order_date) AS most_recent_order, + COUNT(t2.order_id) AS number_of_orders + FROM orders AS t2 + GROUP BY + 1 + ) AS t8 + ON t3.customer_id = t8.customer_id + ) AS t12 +) AS t15 LEFT OUTER JOIN ( SELECT - t7.customer_id AS customer_id, - SUM(t7.amount) AS total_amount + t9.customer_id, + SUM(t9.amount) AS total_amount FROM ( SELECT - t2.payment_id AS payment_id, - t2.order_id AS order_id, - t2.payment_method AS payment_method, - t2.amount AS amount, - t3.order_id AS order_id_right, - t3.customer_id AS customer_id, - t3.order_date AS order_date, - t3.status AS status - FROM payments AS t2 - LEFT OUTER JOIN orders AS t3 - ON t2.order_id = t3.order_id - ) AS t7 + t4.payment_id, + t4.order_id, + t4.payment_method, + t4.amount, + t5.order_id AS order_id_right, + t5.customer_id, + t5.order_date, + t5.status + FROM payments AS t4 + LEFT OUTER JOIN orders AS t5 + ON t4.order_id = t5.order_id + ) AS t9 GROUP BY 1 -) AS t11 - ON t9.customer_id = t11.customer_id \ No newline at end of file +) AS t13 + ON t15.customer_id = t13.customer_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql index 0fe7408ed8c4..fc3f08afa72b 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id AS id, - t0.personal AS personal, - t0.family AS family, - t1.taken AS taken, - t1.person AS person, - t1.quant AS quant, - t1.reading AS reading, - t2.id AS id_right, - t2.site AS site, - t2.dated AS dated -FROM person AS t0 -INNER JOIN survey AS t1 - ON t0.id = t1.person -INNER JOIN visited AS t2 - ON t2.id = t1.taken \ No newline at end of file + t3.id, + t3.personal, + t3.family, + t4.taken, + t4.person, + t4.quant, + t4.reading, + t5.id AS id_right, + t5.site, + t5.dated +FROM person AS t3 +INNER JOIN survey AS t4 + ON t3.id = t4.person +INNER JOIN visited AS t5 + ON t5.id = t4.taken \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql index 7c62bce20d7f..548a1efef2ec 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql @@ -1,21 +1,15 @@ SELECT - * + t0.key1, + t0.key2, + t0.value1 FROM foo_t AS t0 WHERE NOT ( EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM ( - SELECT - * - FROM bar_t AS t1 - WHERE - ( - t0.key1 = t1.key1 - ) - ) AS t2 - ) + SELECT + CAST(1 AS TINYINT) AS "1" + FROM bar_t AS t1 + WHERE + t0.key1 = t1.key1 ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql index a401619ae2d8..bb666f269b2a 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql @@ -1,5 +1,8 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 ORDER BY t0.f ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql index bd2fd25215ee..356b091282c5 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql @@ -1,5 +1,8 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 ORDER BY RANDOM() ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql index 51532051bd3f..99a46813f652 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql @@ -1,13 +1,8 @@ SELECT - * -FROM ( - SELECT - * - FROM t AS t0 - WHERE - ( - t0.a = CAST(1 AS TINYINT) - ) -) AS t1 + t0.a, + t0.b +FROM t AS t0 +WHERE + t0.a = CAST(1 AS TINYINT) ORDER BY - CONCAT(t1.b, 'a') ASC \ No newline at end of file + CONCAT(t0.b, 'a') ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql index ae63fb4838b3..31c40c343501 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql @@ -1,7 +1,9 @@ -CASE - WHEN t0.f > CAST(0 AS TINYINT) - THEN t0.d * CAST(2 AS TINYINT) - WHEN t0.c < CAST(0 AS TINYINT) - THEN t0.a * CAST(2 AS TINYINT) - ELSE CAST(NULL AS BIGINT) -END AS tmp \ No newline at end of file +SELECT + CASE + WHEN t0.f > CAST(0 AS TINYINT) + THEN t0.d * CAST(2 AS TINYINT) + WHEN t0.c < CAST(0 AS TINYINT) + THEN t0.a * CAST(2 AS TINYINT) + ELSE CAST(NULL AS BIGINT) + END AS tmp +FROM alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql index 3e0c659089b2..c598c1264a74 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql @@ -1,21 +1,25 @@ SELECT - * + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE NOT ( EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM ( - SELECT - * - FROM functional_alltypes AS t1 - WHERE - ( - t0.string_col = t1.string_col - ) - ) AS t2 - ) + SELECT + CAST(1 AS TINYINT) AS "1" + FROM functional_alltypes AS t1 + WHERE + t0.string_col = t1.string_col ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql index 0f4cb117f736..16089afced58 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql @@ -1,19 +1,23 @@ SELECT - * + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM ( - SELECT - * - FROM functional_alltypes AS t1 - WHERE - ( - t0.string_col = t1.string_col - ) - ) AS t2 - ) + SELECT + CAST(1 AS TINYINT) AS "1" + FROM functional_alltypes AS t1 + WHERE + t0.string_col = t1.string_col ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql index de06d3805e28..e9c93029c637 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql @@ -1,8 +1,8 @@ SELECT - t0.c AS c, - t0.f AS f, - t0.foo_id AS foo_id, - t0.bar_id AS bar_id -FROM star1 AS t0 -INNER JOIN star1 AS t1 - ON t0.foo_id = t1.bar_id \ No newline at end of file + t1.c, + t1.f, + t1.foo_id, + t1.bar_id +FROM star1 AS t1 +INNER JOIN star1 AS t3 + ON t1.foo_id = t3.bar_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql index 7dfc00dcf063..3575c8d6e653 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql @@ -1 +1,3 @@ -CASE t0.g WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS tmp \ No newline at end of file +SELECT + CASE t0.g WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS tmp +FROM alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql index 0f520487cbb8..e8f9420b4263 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql @@ -1,8 +1,9 @@ SELECT - * + t1.string_col, + t1.foo FROM ( SELECT - t0.string_col AS string_col, + t0.string_col, MAX(t0.double_col) AS foo FROM functional_alltypes AS t0 GROUP BY diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql index ab9ad3623170..e0f8941f3527 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql @@ -1,14 +1,14 @@ SELECT - t2.foo_id AS foo_id, - t2.total AS total, - t1.value1 AS value1 + t4.foo_id, + t4.total, + t2.value1 FROM ( SELECT - t0.foo_id AS foo_id, + t0.foo_id, SUM(t0.f) AS total FROM star1 AS t0 GROUP BY 1 -) AS t2 -INNER JOIN star2 AS t1 - ON t2.foo_id = t1.foo_id \ No newline at end of file +) AS t4 +INNER JOIN star2 AS t2 + ON t4.foo_id = t2.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql index 3ded1e24f732..0b1767f7a740 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql @@ -1,19 +1,21 @@ SELECT - * + t0.job, + t0.dept_id, + t0.year, + t0.y FROM foo AS t0 WHERE - ( - t0.y > ( + t0.y > ( + SELECT + AVG(t2.y) AS "Mean(y)" + FROM ( SELECT - AVG(t2.y) AS "Mean(y)" - FROM ( - SELECT - * - FROM foo AS t1 - WHERE - ( - t0.dept_id = t1.dept_id - ) - ) AS t2 - ) + t1.job, + t1.dept_id, + t1.year, + t1.y + FROM foo AS t1 + WHERE + t1.dept_id = t1.dept_id + ) AS t2 ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql index 631fc089852e..9f3f24cd76eb 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql @@ -1,33 +1,31 @@ SELECT - * + t9.p_partkey, + t9.ps_supplycost FROM ( SELECT - t0.p_partkey AS p_partkey, - t1.ps_supplycost AS ps_supplycost - FROM part AS t0 - INNER JOIN partsupp AS t1 - ON t0.p_partkey = t1.ps_partkey -) AS t5 + t3.p_partkey, + t4.ps_supplycost + FROM part AS t3 + INNER JOIN partsupp AS t4 + ON t3.p_partkey = t4.ps_partkey +) AS t9 WHERE - ( - t5.ps_supplycost = ( + t9.ps_supplycost = ( + SELECT + MIN(t11.ps_supplycost) AS "Min(ps_supplycost)" + FROM ( SELECT - MIN(t7.ps_supplycost) AS "Min(ps_supplycost)" + t10.ps_partkey, + t10.ps_supplycost FROM ( SELECT - * - FROM ( - SELECT - t1.ps_partkey AS ps_partkey, - t1.ps_supplycost AS ps_supplycost - FROM partsupp AS t1 - INNER JOIN supplier AS t2 - ON t2.s_suppkey = t1.ps_suppkey - ) AS t6 - WHERE - ( - t6.ps_partkey = t5.p_partkey - ) - ) AS t7 - ) + t5.ps_partkey, + t5.ps_supplycost + FROM partsupp AS t5 + INNER JOIN supplier AS t6 + ON t6.s_suppkey = t5.ps_suppkey + ) AS t10 + WHERE + t10.ps_partkey = t9.p_partkey + ) AS t11 ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql index 2c8cebaa0a08..e1914ac959bd 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql @@ -1,11 +1,10 @@ SELECT - * + t0.c, + t0.f, + t0.foo_id, + t0.bar_id FROM star1 AS t0 WHERE - ( - t0.f > CAST(0 AS TINYINT) - ) AND ( - t0.c < ( - t0.f * CAST(2 AS TINYINT) - ) + t0.f > CAST(0 AS TINYINT) AND t0.c < ( + t0.f * CAST(2 AS TINYINT) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql index 5240818910ca..d64e69ba3894 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql @@ -1,9 +1,12 @@ SELECT - * + t0.job, + t0.dept_id, + t0.year, + t0.y FROM foo AS t0 WHERE - t0.job IN (( + t0.job IN ( SELECT - t1.job AS job + t1.job FROM bar AS t1 - )) \ No newline at end of file + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/test_compiler.py b/ibis/backends/tests/sql/test_compiler.py index ec7b264ffbf3..bc789dba86ba 100644 --- a/ibis/backends/tests/sql/test_compiler.py +++ b/ibis/backends/tests/sql/test_compiler.py @@ -5,7 +5,6 @@ import pytest import ibis - from ibis.backends.tests.sql.conftest import to_sql from ibis.tests.util import assert_decompile_roundtrip, schemas_eq @@ -206,7 +205,7 @@ def test_subquery_where_location(snapshot): .aggregate(foo=lambda t: t.float_col.sum()) .foo.count() ) - out = Compiler.to_sql(expr, params={param: "20140101"}) + out = to_sql(expr, params={param: "20140101"}) snapshot.assert_match(out, "out.sql") # params get different auto incremented counter identifiers assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) diff --git a/ibis/backends/tests/sql/test_select_sql.py b/ibis/backends/tests/sql/test_select_sql.py index bca97afebb72..2ffaad9120f2 100644 --- a/ibis/backends/tests/sql/test_select_sql.py +++ b/ibis/backends/tests/sql/test_select_sql.py @@ -6,7 +6,7 @@ import ibis from ibis import _ from ibis.backends.tests.sql.conftest import to_sql -from ibis.tests.util import assert_decompile_roundtrip +from ibis.tests.util import assert_decompile_roundtrip, schemas_eq pytestmark = pytest.mark.duckdb @@ -35,7 +35,9 @@ id="limit_then_filter", ), param(lambda star1, **_: star1.count(), id="aggregate_table_count_metric"), - param(lambda star1, **_: star1.view(), id="self_reference_simple"), + # TODO: this is automatically simplified to `t`, so it's probably not a + # useful test to roundtrip *just* a call to view + # param(lambda star1, **_: star1.view(), id="self_reference_simple"), param(lambda t, **_: t, id="test_physical_table_reference_translate"), ], ) @@ -48,11 +50,7 @@ def test_select_sql(alltypes, star1, expr_fn, snapshot): def test_nameless_table(snapshot): # Generate a unique table name when we haven't passed on nameless = ibis.table([("key", "string")]) - assert to_sql(nameless) == f"SELECT t0.*\nFROM {nameless.op().name} t0" - - expr = ibis.table([("key", "string")], name="baz") - snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot) + assert nameless.op().name is not None def test_simple_joins(star1, star2, snapshot): @@ -248,19 +246,11 @@ def test_fuse_projections(snapshot): def test_projection_filter_fuse(projection_fuse_filter, snapshot): expr1, expr2, expr3 = projection_fuse_filter - sql1 = Compiler.to_sql(expr1) - sql2 = Compiler.to_sql(expr2) + sql1 = ibis.to_sql(expr1) + sql2 = ibis.to_sql(expr2) assert sql1 == sql2 - # ideally sql1 == sql3 but the projection logic has been a mess for a long - # time and causes bugs like - # - # https://github.com/ibis-project/ibis/issues/4003 - # - # so we're conservative in fusing projections and filters - # - # even though it may seem obvious what to do, it's not snapshot.assert_match(to_sql(expr3), "out.sql") @@ -427,15 +417,14 @@ def test_scalar_subquery_different_table(foo, bar, snapshot): snapshot.assert_match(to_sql(expr), "out.sql") -# TODO(kszucs): should do snapshot testing instead -# def test_exists_subquery_repr(t1, t2): -# # GH #660 +def test_exists_subquery(t1, t2, snapshot): + # GH #660 -# cond = t1.key1 == t2.key1 -# expr = t1[cond.any()] -# stmt = get_query(expr) + cond = t1.key1 == t2.key1 + expr = t1[cond.any()] -# repr(stmt.where[0]) + snapshot.assert_match(to_sql(expr), "out.sql") + assert repr(expr) def test_filter_inside_exists(snapshot): @@ -523,10 +512,6 @@ def test_join_filtered_tables_no_pushdown(snapshot): joined = tbl_a_filter.left_join(tbl_b_filter, ["year", "month", "day"]) result = joined[tbl_a_filter.value_a, tbl_b_filter.value_b] - join_op = result.op().table - assert join_op.left == tbl_a_filter.op() - assert join_op.right == tbl_b_filter.op() - snapshot.assert_match(to_sql(result), "out.sql") diff --git a/ibis/backends/tests/sql/test_sql.py b/ibis/backends/tests/sql/test_sql.py index ac4d07876d7e..d4bb8fe8033e 100644 --- a/ibis/backends/tests/sql/test_sql.py +++ b/ibis/backends/tests/sql/test_sql.py @@ -141,10 +141,7 @@ def test_named_expr(functional_alltypes, snapshot): ], ids=["inner", "left", "outer", "inner_select", "left_select", "outer_select"], ) -def test_joins(tpch_region, tpch_nation, expr_fn, snapshot): - region = tpch_region - nation = tpch_nation - +def test_joins(region, nation, expr_fn, snapshot): expr = expr_fn(region, nation) snapshot.assert_match(to_sql(expr), "out.sql") @@ -160,15 +157,12 @@ def test_join_just_materialized(nation, region, customer, snapshot): snapshot.assert_match(to_sql(joined), "out.sql") -def test_full_outer_join(tpch_region, tpch_nation): +def test_full_outer_join(region, nation): """Testing full outer join separately due to previous issue with outer join resulting in left outer join (issue #1773)""" - region = tpch_region - nation = tpch_nation - predicate = region.r_regionkey == nation.n_regionkey joined = region.outer_join(nation, predicate) - joined_sql_str = str(joined.compile()) + joined_sql_str = to_sql(joined) assert "full" in joined_sql_str.lower() assert "left" not in joined_sql_str.lower() diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 61633cafbc02..190a5d560537 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -772,7 +772,16 @@ def mean_and_std(v): id="collect", marks=[ pytest.mark.notimpl( - ["impala", "mysql", "sqlite", "mssql", "druid", "oracle", "exasol"], + [ + "impala", + "mysql", + "sqlite", + "datafusion", + "mssql", + "druid", + "oracle", + "exasol", + ], raises=com.OperationNotDefinedError, ), pytest.mark.broken( @@ -855,7 +864,7 @@ def test_reduction_ops( id="cond", marks=[ pytest.mark.notyet( - ["mysql"], + ["snowflake", "mysql"], raises=com.UnsupportedOperationError, reason="backend does not support filtered count distinct with more than one column", ), @@ -1021,7 +1030,7 @@ def test_quantile( id="covar_pop", marks=[ pytest.mark.notimpl( - ["dask", "polars", "druid"], + ["dask", "pandas", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1041,7 +1050,7 @@ def test_quantile( id="covar_samp", marks=[ pytest.mark.notimpl( - ["dask", "polars", "druid"], + ["dask", "pandas", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1061,7 +1070,7 @@ def test_quantile( id="corr_pop", marks=[ pytest.mark.notimpl( - ["dask", "druid"], + ["dask", "pandas", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1091,7 +1100,7 @@ def test_quantile( id="corr_samp", marks=[ pytest.mark.notimpl( - ["dask", "druid"], + ["dask", "pandas", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1131,7 +1140,7 @@ def test_quantile( id="covar_pop_bool", marks=[ pytest.mark.notimpl( - ["dask", "polars", "druid"], + ["dask", "pandas", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1155,7 +1164,7 @@ def test_quantile( id="corr_pop_bool", marks=[ pytest.mark.notimpl( - ["dask", "druid"], + ["dask", "pandas", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1685,17 +1694,16 @@ def test_grouped_case(backend, con): ["datafusion", "mssql", "polars", "exasol"], raises=com.OperationNotDefinedError ) @pytest.mark.broken( - ["dask"], + ["dask", "pandas"], reason="Dask and Pandas do not windowize this operation correctly", raises=AssertionError, ) @pytest.mark.notyet(["impala", "flink"], raises=com.UnsupportedOperationError) -@pytest.mark.notyet(["clickhouse"], raises=ClickhouseDatabaseError) -@pytest.mark.notyet(["druid", "trino"], raises=sa.exc.ProgrammingError) -@pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) -@pytest.mark.notyet("mysql", raises=sa.exc.NotSupportedError) -@pytest.mark.notyet("oracle", raises=sa.exc.DatabaseError) -@pytest.mark.notyet("pyspark", raises=PysparkAnalysisException) +@pytest.mark.notyet(["clickhouse"], raises=ClickHouseDatabaseError) +@pytest.mark.notyet(["druid", "trino", "snowflake"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["mysql"], raises=sa.exc.NotSupportedError) +@pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) +@pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) def test_group_concat_over_window(backend, con): input_df = pd.DataFrame( { diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 0b4d5ad07007..903bbff3ff39 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -119,7 +119,7 @@ def test_limit_chain(alltypes, expr_fn): "expr_fn", [ param(lambda t: t, id="alltypes table"), - param(lambda t: t.join(t.view(), t.id == t.view().int_col), id="self join"), + param(lambda t: t.join(t.view(), [("id", "int_col")]), id="self join"), ], ) def test_unbind(alltypes, expr_fn): diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 00a7b670ce80..7be4e9f68861 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -97,6 +97,7 @@ def test_array_concat_variadic(con): # Issues #2370 +@pytest.mark.notimpl(["datafusion"], raises=BaseException) @pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( ["postgres", "trino"], @@ -150,7 +151,7 @@ def test_np_array_literal(con): @pytest.mark.parametrize("idx", range(3)) -@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars", "datafusion"], raises=com.OperationNotDefinedError) def test_array_index(con, idx): arr = [1, 2, 3] expr = ibis.literal(arr) @@ -531,7 +532,7 @@ def test_array_filter(con, input, output): @builtin_array @pytest.mark.notimpl( - ["mssql", "polars", "postgres"], + ["mssql", "pandas", "polars", "postgres"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) diff --git a/ibis/backends/tests/test_benchmarks.py b/ibis/backends/tests/test_benchmarks.py index a38760628e5a..42ea66e8f0f0 100644 --- a/ibis/backends/tests/test_benchmarks.py +++ b/ibis/backends/tests/test_benchmarks.py @@ -717,7 +717,6 @@ def test_repr_join(benchmark, customers, orders, orders_items, products): @pytest.mark.parametrize("overwrite", [True, False], ids=["overwrite", "no_overwrite"]) def test_insert_duckdb(benchmark, overwrite, tmp_path): pytest.importorskip("duckdb") - pytest.importorskip("duckdb_engine") n_rows = int(1e4) table_name = "t" @@ -806,7 +805,6 @@ def test_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: def test_ibis_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: pytest.importorskip("duckdb") - pytest.importorskip("duckdb_engine") con = ibis.duckdb.connect(ddb, read_only=True) @@ -876,7 +874,6 @@ def test_big_join_expr(benchmark, src, diff): def test_big_join_execute(benchmark, nrels): pytest.importorskip("duckdb") - pytest.importorskip("duckdb_engine") con = ibis.duckdb.connect() diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 4b8b2dfce135..d06e8a31374c 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -313,7 +313,7 @@ def test_rename_table(con, temp_table, temp_table_orig): assert temp_table_orig not in con.list_tables() -@mark.notimpl(["polars", "druid"]) +@mark.notimpl(["datafusion", "polars", "druid"]) @mark.never(["impala", "pyspark"], reason="No non-nullable datatypes") @mark.notyet( ["trino"], reason="trino doesn't support NOT NULL in its in-memory catalog" @@ -917,12 +917,10 @@ def test_self_join_memory_table(backend, con, monkeypatch): t = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) t_view = t.view() expr = t.join(t_view, t.x == t_view.y).select("x", "y", "z", "z_right") - result = con.execute(expr).sort_values("x").reset_index(drop=True) expected = pd.DataFrame( {"x": [1, 2], "y": [2, 1], "z": ["a", "b"], "z_right": ["b", "a"]} ) - backend.assert_frame_equal(result, expected) @@ -1461,7 +1459,7 @@ def gen_test_name(con: BaseBackend) -> str: @mark.notimpl( - ["polars"], + ["datafusion", "polars"], raises=NotImplementedError, reason="overwriting not implemented in ibis for this backend", ) diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index e6392d99632c..a50fcb2b7e98 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -10,7 +10,12 @@ import ibis import ibis.expr.datatypes as dt from ibis import util -from ibis.backends.tests.errors import PyDeltaTableError, PySparkAnalysisException +from ibis.backends.tests.errors import ( + DuckDBNotImplementedException, + DuckDBParserException, + PyDeltaTableError, + PySparkAnalysisException, +) from ibis.formats.pyarrow import PyArrowType limit = [ @@ -127,7 +132,7 @@ def test_column_to_pyarrow_table_schema(awards_players): assert array.type == pa.string() or array.type == pa.large_string() -@pytest.mark.notimpl(["dask", "datafusion", "flink"]) +@pytest.mark.notimpl(["pandas", "dask", "datafusion", "flink"]) @pytest.mark.notyet( ["clickhouse"], raises=AssertionError, @@ -142,7 +147,7 @@ def test_table_pyarrow_batch_chunk_size(awards_players): util.consume(batch_reader) -@pytest.mark.notimpl(["dask", "datafusion", "flink"]) +@pytest.mark.notimpl(["pandas", "dask", "datafusion", "flink"]) @pytest.mark.notyet( ["clickhouse"], raises=AssertionError, @@ -345,8 +350,10 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): marks=[ pytest.mark.notyet(["impala"], reason="precision not supported"), pytest.mark.notyet(["duckdb"], reason="precision is out of range"), - pytest.mark.notyet(["druid", "trino"], raises=sa.exc.ProgrammingError), - pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), + pytest.mark.notyet( + ["druid", "mssql", "snowflake", "trino"], + raises=sa.exc.ProgrammingError, + ), pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), pytest.mark.notyet(["mysql"], raises=sa.exc.OperationalError), pytest.mark.notyet( @@ -394,11 +401,6 @@ def test_to_pyarrow_decimal(backend, dtype, pyarrow_dtype): reason="read_delta not yet implemented", ) @pytest.mark.notyet(["clickhouse"], raises=Exception) -@pytest.mark.notyet( - ["snowflake"], - raises=Exception, - reason="deltalake doesn't support nanosecond timestamps", -) @pytest.mark.notyet(["mssql", "pandas"], raises=PyDeltaTableError) @pytest.mark.notyet( ["druid"], diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 308dd19b1a09..4bdba3c9f13c 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -142,7 +142,6 @@ def test_isna(backend, alltypes, col, filt): [ "bigquery", "clickhouse", - "datafusion", "duckdb", "impala", "postgres", @@ -196,7 +195,9 @@ def test_coalesce(con, expr, expected): # TODO(dask) - identicalTo - #2553 -@pytest.mark.notimpl(["clickhouse", "dask", "pyspark", "mssql", "druid", "exasol"]) +@pytest.mark.notimpl( + ["clickhouse", "datafusion", "dask", "pyspark", "mssql", "druid", "exasol"] +) def test_identical_to(backend, alltypes, sorted_df): sorted_alltypes = alltypes.order_by("id") df = sorted_df @@ -622,7 +623,7 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): reason="dask doesn't support Series as isin/notin argument", raises=NotImplementedError, ) -@pytest.mark.notimpl(["druid"]) +@pytest.mark.notimpl(["datafusion", "druid"]) @pytest.mark.parametrize( ("ibis_op", "pandas_op"), [ @@ -640,13 +641,11 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): _.string_col.notin(_.string_col), lambda df: ~df.string_col.isin(df.string_col), id="notin_col", - marks=[pytest.mark.notimpl(["datafusion"])], ), param( (_.bigint_col + 1).notin(_.string_col.length() + 1), lambda df: ~(df.bigint_col.add(1)).isin(df.string_col.str.len().add(1)), id="notin_expr", - marks=[pytest.mark.notimpl(["datafusion"])], ), ], ) @@ -763,7 +762,8 @@ def test_select_filter_select(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.broken(["mssql"], raises=sa.exc.OperationalError) +@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError) def test_between(backend, alltypes, df): expr = alltypes.double_col.between(5, 10) result = expr.execute().rename("double_col") @@ -898,7 +898,7 @@ def test_isin_uncorrelated( @pytest.mark.broken(["polars"], reason="incorrect answer") -@pytest.mark.notimpl(["pyspark", "druid", "exasol"]) +@pytest.mark.notimpl(["datafusion", "pyspark", "druid", "exasol"]) @pytest.mark.notyet(["dask"], reason="not supported by the backend") def test_isin_uncorrelated_filter( backend, batting, awards_players, batting_df, awards_players_df @@ -1011,8 +1011,13 @@ def test_memtable_column_naming_mismatch(backend, con, monkeypatch, df, columns) ibis.memtable(df, columns=columns) +@pytest.mark.xfail( + raises=com.IntegrityError, reason="inner join convenience not implemented" +) @pytest.mark.notimpl( - ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" + ["dask", "datafusion", "pandas", "polars"], + raises=NotImplementedError, + reason="not a SQL backend", ) @pytest.mark.notimpl( ["pyspark"], reason="pyspark doesn't generate SQL", raises=NotImplementedError @@ -1300,6 +1305,7 @@ def test_hash_consistent(backend, alltypes): "pyspark", "risingwave", "sqlite", + "clickhouse", ] ) def test_hashbytes(backend, alltypes): @@ -1361,6 +1367,7 @@ def hash_256(col): "pandas", "dask", "bigquery", + "datafusion", "druid", "impala", "mssql", @@ -1369,9 +1376,9 @@ def hash_256(col): "postgres", "risingwave", "pyspark", + "snowflake", "sqlite", "exasol", - "snowflake", ] ) @pytest.mark.parametrize( @@ -1393,7 +1400,6 @@ def hash_256(col): reason="raises TrinoUserError", ), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), - pytest.mark.broken(["datafusion"], reason="casts to 1672531200000000"), ], ), ], @@ -1417,9 +1423,9 @@ def test_try_cast_expected(con, from_val, to_type, expected): "postgres", "risingwave", "pyspark", + "snowflake", "sqlite", "exasol", - "snowflake", ] ) @pytest.mark.parametrize( @@ -1489,9 +1495,9 @@ def test_try_cast_table(backend, con): "postgres", "risingwave", "pyspark", + "snowflake", "sqlite", "exasol", - "snowflake", ] ) @pytest.mark.parametrize( @@ -1676,15 +1682,10 @@ def test_static_table_slice(backend, slc, expected_count_fn): ids=str, ) @pytest.mark.notyet( - ["mysql", "trino"], + ["mysql", "snowflake", "trino"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) -@pytest.mark.notyet( - ["snowflake"], - raises=SnowflakeProgrammingError, - reason="backend doesn't support dynamic limit/offset", -) @pytest.mark.notimpl( ["mssql"], raises=sa.exc.CompileError, @@ -1734,7 +1735,7 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): @pytest.mark.notyet( - ["mysql", "trino"], + ["mysql", "snowflake", "trino"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) @@ -1742,11 +1743,6 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): ["exasol"], raises=sa.exc.CompileError, ) -@pytest.mark.notyet( - ["snowflake"], - raises=SnowflakeProgrammingError, - reason="backend doesn't support dynamic limit/offset", -) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, diff --git a/ibis/backends/tests/test_interactive.py b/ibis/backends/tests/test_interactive.py index 111be16de5ee..8014ab7f1fa2 100644 --- a/ibis/backends/tests/test_interactive.py +++ b/ibis/backends/tests/test_interactive.py @@ -15,93 +15,80 @@ import pytest +import ibis from ibis import config -def test_interactive_execute_on_repr(con): - table = con.table("functional_alltypes") - expr = table.bigint_col.sum() - with config.option_context("interactive", True): - repr(expr) +@pytest.fixture +def queries(monkeypatch): + queries = [] + monkeypatch.setattr(ibis.options, "verbose", True) + monkeypatch.setattr(ibis.options, "verbose_log", queries.append) + monkeypatch.setattr(ibis.options, "interactive", True) + return queries + - assert len(con.executed_queries) > 0 +@pytest.fixture(scope="module") +def table(con): + return con.table("functional_alltypes") -def test_repr_png_is_none_in_interactive(con): - table = con.table("functional_alltypes") +def test_interactive_execute_on_repr(table, queries, snapshot): + repr(table.bigint_col.sum()) + snapshot.assert_match(queries[0], "out.sql") + +def test_repr_png_is_none_in_interactive(table): with config.option_context("interactive", True): assert table._repr_png_() is None -def test_repr_png_is_not_none_in_not_interactive(con): +def test_repr_png_is_not_none_in_not_interactive(table): pytest.importorskip("ibis.expr.visualize") - table = con.table("functional_alltypes") - with config.option_context("interactive", False), config.option_context( "graphviz_repr", True ): assert table._repr_png_() is not None -def test_default_limit(con, snapshot): - table = con.table("functional_alltypes").select("id", "bool_col") - - with config.option_context("interactive", True): - repr(table) - - snapshot.assert_match(con.executed_queries[0], "out.sql") - +def test_default_limit(table, snapshot, queries): + repr(table.select("id", "bool_col")) -def test_respect_set_limit(con, snapshot): - table = con.table("functional_alltypes").select("id", "bool_col").limit(10) + snapshot.assert_match(queries[0], "out.sql") - with config.option_context("interactive", True): - repr(table) - snapshot.assert_match(con.executed_queries[0], "out.sql") +def test_respect_set_limit(table, snapshot, queries): + repr(table.select("id", "bool_col").limit(10)) + snapshot.assert_match(queries[0], "out.sql") -def test_disable_query_limit(con, snapshot): - table = con.table("functional_alltypes").select("id", "bool_col") - with config.option_context("interactive", True): - with config.option_context("sql.default_limit", None): - repr(table) +def test_disable_query_limit(table, snapshot, queries): + assert ibis.options.sql.default_limit is None - snapshot.assert_match(con.executed_queries[0], "out.sql") + with config.option_context("sql.default_limit", 10): + assert ibis.options.sql.default_limit == 10 + repr(table.select("id", "bool_col")) + snapshot.assert_match(queries[0], "out.sql") -def test_interactive_non_compilable_repr_not_fail(con): - # #170 - table = con.table("functional_alltypes") - expr = table.string_col.topk(3) +def test_interactive_non_compilable_repr_does_not_fail(table): + """https://github.com/ibis-project/ibis/issues/170""" + repr(table.string_col.topk(3)) - # it works! - with config.option_context("interactive", True): - repr(expr) +def test_histogram_repr_no_query_execute(table, queries): + tier = table.double_col.histogram(10).name("bucket") + expr = table.group_by(tier).size() + expr._repr() -def test_histogram_repr_no_query_execute(con): - t = con.table("functional_alltypes") - tier = t.double_col.histogram(10).name("bucket") - expr = t.group_by(tier).size() - with config.option_context("interactive", True): - expr._repr() - assert con.executed_queries == [] - + assert not queries -def test_compile_no_execute(con): - t = con.table("functional_alltypes") - t.double_col.sum().compile() - assert con.executed_queries == [] +def test_isin_rule_suppressed_exception_repr_not_fail(table): + bool_clause = table["string_col"].notin(["1", "4", "7"]) + expr = table[bool_clause]["string_col"].value_counts() -def test_isin_rule_suppressed_exception_repr_not_fail(con): - with config.option_context("interactive", True): - t = con.table("functional_alltypes") - bool_clause = t["string_col"].notin(["1", "4", "7"]) - expr = t[bool_clause]["string_col"].value_counts() - repr(expr) + repr(expr) diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index e46196aa2378..a0b8f0aa1a88 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -289,7 +289,7 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): reason="polars doesn't support join predicates", ) @pytest.mark.notimpl( - ["dask"], + ["dask", "pandas"], raises=TypeError, reason="dask and pandas don't support join predicates", ) diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 047c9322ca1f..2e345252a2e7 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -18,7 +18,7 @@ from ibis import literal as L from ibis.backends.tests.errors import ( ArrowNotImplementedError, - DuckDBConversionException, + DuckDBParserException, ExaQueryError, GoogleBadRequest, ImpalaHiveServer2Error, @@ -27,11 +27,6 @@ from ibis.expr import datatypes as dt from ibis.tests.util import assert_equal -try: - from snowflake.connector.errors import ProgrammingError as SnowflakeProgrammingError -except ImportError: - SnowflakeProgrammingError = None - @pytest.mark.parametrize( ("expr", "expected_types"), @@ -259,7 +254,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": decimal.Decimal("1.1"), + "snowflake": "1.1", "sqlite": 1.1, "trino": 1.1, "dask": decimal.Decimal("1.1"), @@ -277,7 +272,7 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "NUMERIC", - "snowflake": "DECIMAL", + "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(18,3)", @@ -313,7 +308,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": decimal.Decimal("1.1"), + "snowflake": "1.100000000", "sqlite": 1.1, "trino": 1.1, "duckdb": decimal.Decimal("1.100000000"), @@ -333,7 +328,7 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "NUMERIC", "clickhouse": "Decimal(38, 9)", - "snowflake": "DECIMAL", + "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(38,9)", @@ -364,6 +359,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), + "snowflake": "1.10000000000000000000000000000000000000", "sqlite": 1.1, "trino": 1.1, "dask": decimal.Decimal("1.1"), @@ -382,6 +378,7 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "BIGNUMERIC", "clickhouse": "Decimal(76, 38)", + "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(18,3)", @@ -412,11 +409,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "The precision can be up to 38 in Flink", raises=ValueError, ), - pytest.mark.broken( - ["snowflake"], - "Invalid number precision: 76. Must be between 0 and 38.", - raises=SnowflakeProgrammingError, - ), ], id="decimal-big", ), @@ -425,6 +417,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("inf"), + "snowflake": "Infinity", "sqlite": float("inf"), "postgres": float("nan"), "risingwave": float("nan"), @@ -436,6 +429,7 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "FLOAT64", + "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "postgres": "numeric", @@ -456,11 +450,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.broken( - ["snowflake"], - "snowflake.connector.errors.ProgrammingError: 100038 (22018): Numeric value 'Infinity' is not recognized", - raises=SnowflakeProgrammingError, - ), pytest.mark.broken( ["pyspark"], "An error occurred while calling z:org.apache.spark.sql.functions.lit.", @@ -507,6 +496,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("-inf"), + "snowflake": "-Infinity", "sqlite": float("-inf"), "postgres": float("nan"), "risingwave": float("nan"), @@ -518,6 +508,7 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "FLOAT64", + "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "postgres": "numeric", @@ -538,11 +529,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.broken( - ["snowflake"], - "snowflake.connector.errors.ProgrammingError: 100038 (22018): Numeric value '-Infinity' is not recognized", - raises=SnowflakeProgrammingError, - ), pytest.mark.broken( ["pyspark"], "An error occurred while calling z:org.apache.spark.sql.functions.lit.", @@ -589,6 +575,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("nan"), + "snowflake": "NaN", "sqlite": None, "postgres": float("nan"), "risingwave": float("nan"), @@ -600,6 +587,7 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "FLOAT64", + "snowflake": "VARCHAR", "sqlite": "null", "trino": "decimal(2,1)", "postgres": "numeric", @@ -620,11 +608,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.broken( - ["snowflake"], - "snowflake.connector.errors.ProgrammingError: 100038 (22018): Numeric value 'NaN' is not recognized", - raises=SnowflakeProgrammingError, - ), pytest.mark.broken( ["pyspark"], "An error occurred while calling z:org.apache.spark.sql.functions.lit.", @@ -644,6 +627,14 @@ def test_numeric_literal(con, backend, expr, expected_types): "[SQL: SELECT %(param_1)s AS [Decimal('NaN')]]", raises=(sa.exc.ProgrammingError, KeyError), ), + pytest.mark.broken( + ["mssql"], + "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " + "(org.apache.calcite.tools.ValidationException): " + "org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 10: Column 'NaN' not found in any table" + "[SQL: SELECT NaN AS \"Decimal('NaN')\"]", + raises=sa.exc.ProgrammingError, + ), pytest.mark.broken( ["druid"], "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " @@ -784,14 +775,28 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): param( operator.methodcaller("isnan"), np.isnan, - marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + marks=[ + pytest.mark.notimpl( + ["exasol"], + raises=com.OperationNotDefinedError, + ), + ], id="isnan", ), param( operator.methodcaller("isinf"), np.isinf, id="isinf", - marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + marks=[ + pytest.mark.notimpl( + ["exasol"], + raises=com.OperationNotDefinedError, + ), + pytest.mark.notimpl( + ["datafusion"], + raises=com.OperationNotDefinedError, + ), + ], ), ], ) @@ -1437,7 +1442,7 @@ def test_floating_mod(backend, alltypes, df): ) @pytest.mark.notyet(["mssql"], raises=(sa.exc.OperationalError, sa.exc.DataError)) @pytest.mark.notyet(["postgres"], raises=sa.exc.DataError) -@pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) +@pytest.mark.notyet(["snowflake"], raises=sa.exc.ProgrammingError) @pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, com.IbisTypeError)) def test_divide_by_zero(backend, alltypes, df, column, denominator): expr = alltypes[column] / denominator @@ -1488,7 +1493,6 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "pyspark", "polars", "flink", - "snowflake", ], reason="Not SQLAlchemy backends", ) @@ -1599,8 +1603,7 @@ def test_random(con): @pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) def test_clip(backend, alltypes, df, ibis_func, pandas_func): result = ibis_func(alltypes.int_col).execute() - raw_expected = pandas_func(df.int_col) - expected = raw_expected.astype(result.dtype) + expected = pandas_func(df.int_col).astype(result.dtype) # Names won't match in the PySpark backend since PySpark # gives 'tmp' name when executing a Column backend.assert_series_equal(result, expected, check_names=False) diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index b7aa81c43dd1..910ac4cb97f6 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -37,7 +37,7 @@ def test_floating_scalar_parameter(backend, alltypes, df, column, raw_value): ("start_string", "end_string"), [("2009-03-01", "2010-07-03"), ("2014-12-01", "2017-01-05")], ) -@pytest.mark.notimpl(["mssql", "trino", "druid"]) +@pytest.mark.notimpl(["datafusion", "mssql", "trino", "druid"]) @pytest.mark.broken(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl( ["risingwave"], @@ -65,7 +65,9 @@ def test_timestamp_accepts_date_literals(alltypes): assert expr.compile(params=params) is not None -@pytest.mark.notimpl(["dask", "impala", "pyspark", "druid", "oracle", "exasol"]) +@pytest.mark.notimpl( + ["dask", "impala", "pandas", "pyspark", "druid", "oracle", "exasol"] +) @pytest.mark.never( ["mysql", "sqlite", "mssql"], reason="backend will never implement array types" ) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 0654ea12cb77..eb8aae905935 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -39,7 +39,8 @@ reason="Not a SQL backend", ) no_sql_extraction = pytest.mark.notimpl( - ["pyspark", "polars"], reason="Not clear how to extract SQL from the backend" + ["datafusion", "pyspark", "polars"], + reason="Not clear how to extract SQL from the backend", ) @@ -61,7 +62,9 @@ def test_literal(backend, expr): assert ibis.to_sql(expr, dialect=backend.name()) -@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") +@pytest.mark.never( + ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL" +) @pytest.mark.xfail_version( mssql=["sqlalchemy>=2"], reason="sqlalchemy 2 prefixes literals with `N`" ) @@ -87,6 +90,9 @@ def test_group_by_has_index(backend, snapshot): snapshot.assert_match(sql, "out.sql") +@pytest.mark.xfail( + raises=exc.IntegrityError, reason="inner join convenience not implemented" +) @pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") def test_cte_refs_in_topo_order(backend, snapshot): mr0 = ibis.table(schema=ibis.schema(dict(key="int")), name="leaf") @@ -100,7 +106,9 @@ def test_cte_refs_in_topo_order(backend, snapshot): snapshot.assert_match(sql, "out.sql") -@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") +@pytest.mark.never( + ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL" +) def test_isin_bug(con, snapshot): t = ibis.table(dict(x="int"), name="t") good = t[t.x > 2].x @@ -109,7 +117,7 @@ def test_isin_bug(con, snapshot): @pytest.mark.never( - ["pandas", "dask", "polars", "pyspark"], + ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL", raises=NotImplementedError, ) @@ -117,7 +125,7 @@ def test_isin_bug(con, snapshot): ["sqlite", "mysql", "druid", "impala", "mssql"], reason="no unnest support upstream" ) @pytest.mark.notimpl( - ["oracle", "flink", "datafusion"], + ["oracle", "flink"], reason="unnest not yet implemented", raises=exc.OperationNotDefinedError, ) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 13a4ab0b3268..83366c24bdb1 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -147,7 +147,9 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6.*"), id="like", marks=[ - pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), + pytest.mark.notimpl( + ["datafusion", "polars"], raises=com.OperationNotDefinedError + ), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -160,7 +162,9 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6%"), id="complex_like_escape", marks=[ - pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), + pytest.mark.notimpl( + ["datafusion", "polars"], raises=com.OperationNotDefinedError + ), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -173,7 +177,9 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6%.*"), id="complex_like_escape_match", marks=[ - pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), + pytest.mark.notimpl( + ["datafusion", "polars"], raises=com.OperationNotDefinedError + ), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -187,7 +193,8 @@ def uses_java_re(t): id="ilike", marks=[ pytest.mark.notimpl( - ["pyspark", "polars"], raises=com.OperationNotDefinedError + ["datafusion", "pyspark", "polars"], + raises=com.OperationNotDefinedError, ), pytest.mark.broken( ["mssql"], @@ -823,6 +830,7 @@ def uses_java_re(t): marks=pytest.mark.notimpl( [ "dask", + "datafusion", "impala", "mysql", "sqlite", diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 019207c6f24b..78e4ce67208e 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -642,7 +642,10 @@ def test_timestamp_truncate(backend, alltypes, df, unit): @pytest.mark.broken( ["polars", "druid"], reason="snaps to the UNIX epoch", raises=AssertionError ) -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["datafusion", "oracle"], + raises=com.OperationNotDefinedError, +) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -1024,6 +1027,7 @@ def convert_to_offset(x): "dask", "impala", "mysql", + "pandas", "postgres", "risingwave", "snowflake", @@ -1645,6 +1649,12 @@ def test_interval_add_cast_column(backend, alltypes, df): ), "%Y%m%d", marks=[ + pytest.mark.notimpl( + [ + "pandas", + ], + raises=com.OperationNotDefinedError, + ), pytest.mark.notimpl( [ "pyspark", @@ -1749,7 +1759,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): reason="PySpark backend does not support timestamp from unix time with unit us. Supported unit is s.", ), pytest.mark.notimpl( - ["mssql", "clickhouse", "duckdb"], + ["duckdb", "mssql", "clickhouse"], raises=com.UnsupportedOperationError, reason="`us` unit is not supported!", ), @@ -1766,12 +1776,12 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): pytest.mark.notimpl( ["pyspark"], raises=com.UnsupportedArgumentError, - reason="PySpark backend does not support timestamp from unix time with unit ns. Supported unit is s.", + reason="PySpark backend does not support timestamp from unix time with unit ms. Supported unit is s.", ), pytest.mark.notimpl( ["duckdb", "mssql", "clickhouse"], raises=com.UnsupportedOperationError, - reason="`ns` unit is not supported!", + reason="`ms` unit is not supported!", ), pytest.mark.notimpl( ["flink"], @@ -1821,7 +1831,7 @@ def test_integer_to_timestamp(backend, con, unit): "(snowflake.connector.errors.ProgrammingError) 100096 (22007): " "Can't parse '11/01/10' as timestamp with format '%m/%d/%y'" ), - raises=SnowflakeProgrammingError, + raises=sa.exc.ProgrammingError, ), pytest.mark.never( ["flink"], @@ -2036,7 +2046,10 @@ def test_now_from_projection(alltypes): } -@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["pandas", "datafusion", "dask", "pyspark"], + raises=com.OperationNotDefinedError, +) @pytest.mark.notimpl( ["druid"], raises=sa.exc.ProgrammingError, reason="SQL parse failed" ) @@ -2084,7 +2097,10 @@ def test_date_literal(con, backend): } -@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["pandas", "datafusion", "dask", "pyspark"], + raises=com.OperationNotDefinedError, +) @pytest.mark.notimpl( ["druid"], raises=sa.exc.ProgrammingError, @@ -2123,7 +2139,8 @@ def test_timestamp_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "mysql", "dask", "pyspark"], raises=com.OperationNotDefinedError + ["pandas", "datafusion", "mysql", "dask", "pyspark"], + raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( ["mysql"], @@ -2250,12 +2267,14 @@ def test_time_literal(con, backend): @pytest.mark.broken( ["sqlite"], raises=AssertionError, reason="SQLite returns Timedelta from execution" ) -@pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["dask", "datafusion", "pandas"], raises=com.OperationNotDefinedError +) @pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.parametrize( "microsecond", [ - param(0, id="second"), + 0, param( 561021, marks=[ @@ -2278,9 +2297,9 @@ def test_time_literal(con, backend): ), ), ], - id="subsecond", ), ], + ids=["second", "subsecond"], ) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_extract_time_from_timestamp(con, microsecond): @@ -2307,8 +2326,9 @@ def test_extract_time_from_timestamp(con, microsecond): @pytest.mark.broken( ["snowflake"], - "interval literal is not supported in this form.", - raises=SnowflakeProgrammingError, + "(snowflake.connector.errors.ProgrammingError) 001007 (22023): SQL compilation error:" + "invalid type [CAST(INTERVAL_LITERAL('second', '1') AS VARIANT)] for parameter 'TO_VARIANT'", + raises=sa.exc.ProgrammingError, ) @pytest.mark.broken( ["druid"], @@ -2369,7 +2389,10 @@ def test_interval_literal(con, backend): assert con.execute(expr.typeof()) == INTERVAL_BACKEND_TYPES[backend_name] -@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["pandas", "datafusion", "dask", "pyspark"], + raises=com.OperationNotDefinedError, +) @pytest.mark.broken( ["mysql"], raises=sa.exc.ProgrammingError, @@ -2404,7 +2427,10 @@ def test_date_column_from_ymd(backend, con, alltypes, df): backend.assert_series_equal(golden, result.timestamp_col) -@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["pandas", "datafusion", "dask", "pyspark"], + raises=com.OperationNotDefinedError, +) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -2846,7 +2872,7 @@ def test_delta(con, start, end, unit, expected): ), pytest.mark.notimpl( ["snowflake"], - raises=SnowflakeProgrammingError, + raises=sa.exc.ProgrammingError, reason="snowflake doesn't support sub-second interval precision", ), pytest.mark.notimpl( diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 036b424dad0e..1b43317a52c9 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -31,12 +31,6 @@ ) -try: - from snowflake.connector.errors import ProgrammingError as SnowflakeProgrammingError -except ImportError: - SnowflakeProgrammingError = None - - # adapted from https://gist.github.com/xmnlab/2c1f93df1a6c6bde4e32c8579117e9cc def pandas_ntile(x, bucket: int): """Divide values into a number of buckets. @@ -116,6 +110,11 @@ def calc_zscore(s): reason="upstream is broken; returns all nulls", raises=AssertionError, ), + pytest.mark.broken( + ["datafusion"], + reason="Exception: Internal error: Expects default value to have Int64 type.", + raises=BaseException, + ), pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["flink"], @@ -646,7 +645,6 @@ def test_grouped_unbounded_window( # 1) Grouped # 2) Ordered if `ordered` is True df = df.sort_values("id") if ordered else df - expected = df.assign(val=expected_fn(df.groupby("string_col"))) expected = expected.set_index("id").sort_index() @@ -663,7 +661,7 @@ def test_grouped_unbounded_window( ], ) @pytest.mark.broken(["snowflake"], raises=AssertionError) -@pytest.mark.broken(["dask", "mssql"], raises=AssertionError) +@pytest.mark.broken(["dask", "pandas", "mssql"], raises=AssertionError) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["flink"], @@ -730,6 +728,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): True, id="ordered-mean", marks=[ + pytest.mark.broken(["pandas"], raises=AssertionError), pytest.mark.notimpl( ["dask"], raises=NotImplementedError, @@ -806,6 +805,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ], raises=com.OperationNotDefinedError, ), + pytest.mark.broken(["pandas"], raises=AssertionError), pytest.mark.broken( ["dask"], raises=ValueError, @@ -875,6 +875,11 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=AssertionError, ), pytest.mark.broken(["oracle"], raises=AssertionError), + pytest.mark.broken( + ["datafusion"], + raises=Exception, + reason="Exception: Internal error: Expects default value to have Int64 type.", + ), pytest.mark.notimpl( ["pyspark"], raises=PySparkAnalysisException, @@ -890,7 +895,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): pytest.mark.notyet( ["snowflake"], reason="backend requires ordering", - raises=SnowflakeProgrammingError, + raises=sa.exc.ProgrammingError, ), pytest.mark.notimpl( ["risingwave"], @@ -931,6 +936,11 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ), raises=AssertionError, ), + pytest.mark.broken( + ["datafusion"], + raises=Exception, + reason="Exception: Internal error: Expects default value to have Int64 type.", + ), pytest.mark.broken(["oracle"], raises=AssertionError), pytest.mark.notimpl( ["pyspark"], @@ -947,7 +957,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): pytest.mark.notyet( ["snowflake"], reason="backend requires ordering", - raises=SnowflakeProgrammingError, + raises=sa.exc.ProgrammingError, ), pytest.mark.notimpl( ["risingwave"], @@ -1056,11 +1066,7 @@ def test_ungrouped_unbounded_window( @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["snowflake"], - raises=SnowflakeProgrammingError, - reason="snowflake doesn't support sliding range windows", -) +@pytest.mark.notimpl(["snowflake"], raises=sa.exc.ProgrammingError) @pytest.mark.notimpl( ["impala"], raises=ImpalaHiveServer2Error, reason="limited RANGE support" ) @@ -1162,6 +1168,11 @@ def test_percent_rank_whole_table_no_order_by(backend, alltypes, df): @pytest.mark.broken( ["pandas"], reason="pandas returns incorrect results", raises=AssertionError ) +@pytest.mark.broken( + ["datafusion"], + reason="Exception: External error: Internal error: Expects default value to have Int64 type", + raises=Exception, +) def test_grouped_ordered_window_coalesce(backend, alltypes, df): t = alltypes expr = ( diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql index 31b9d111cde6..953b4dfeefc4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql @@ -1,18 +1,18 @@ SELECT - t2.l_returnflag AS l_returnflag, - t2.l_linestatus AS l_linestatus, - t2.sum_qty AS sum_qty, - t2.sum_base_price AS sum_base_price, - t2.sum_disc_price AS sum_disc_price, - t2.sum_charge AS sum_charge, - t2.avg_qty AS avg_qty, - t2.avg_price AS avg_price, - t2.avg_disc AS avg_disc, - t2.count_order AS count_order + t2.l_returnflag, + t2.l_linestatus, + t2.sum_qty, + t2.sum_base_price, + t2.sum_disc_price, + t2.sum_charge, + t2.avg_qty, + t2.avg_price, + t2.avg_disc, + t2.count_order FROM ( SELECT - t1.l_returnflag AS l_returnflag, - t1.l_linestatus AS l_linestatus, + t1.l_returnflag, + t1.l_linestatus, SUM(t1.l_quantity) AS sum_qty, SUM(t1.l_extendedprice) AS sum_base_price, SUM(t1.l_extendedprice * ( @@ -33,22 +33,22 @@ FROM ( COUNT(*) AS count_order FROM ( SELECT - t0.l_orderkey AS l_orderkey, - t0.l_partkey AS l_partkey, - t0.l_suppkey AS l_suppkey, - t0.l_linenumber AS l_linenumber, - t0.l_quantity AS l_quantity, - t0.l_extendedprice AS l_extendedprice, - t0.l_discount AS l_discount, - t0.l_tax AS l_tax, - t0.l_returnflag AS l_returnflag, - t0.l_linestatus AS l_linestatus, - t0.l_shipdate AS l_shipdate, - t0.l_commitdate AS l_commitdate, - t0.l_receiptdate AS l_receiptdate, - t0.l_shipinstruct AS l_shipinstruct, - t0.l_shipmode AS l_shipmode, - t0.l_comment AS l_comment + t0.l_orderkey, + t0.l_partkey, + t0.l_suppkey, + t0.l_linenumber, + t0.l_quantity, + t0.l_extendedprice, + t0.l_discount, + t0.l_tax, + t0.l_returnflag, + t0.l_linestatus, + t0.l_shipdate, + t0.l_commitdate, + t0.l_receiptdate, + t0.l_shipinstruct, + t0.l_shipmode, + t0.l_comment FROM lineitem AS t0 WHERE t0.l_shipdate <= MAKE_DATE(1998, 9, 2) diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql index b8ea068fcd7b..9fa0195c56bb 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql @@ -1,116 +1,116 @@ SELECT - t19.s_acctbal AS s_acctbal, - t19.s_name AS s_name, - t19.n_name AS n_name, - t19.p_partkey AS p_partkey, - t19.p_mfgr AS p_mfgr, - t19.s_address AS s_address, - t19.s_phone AS s_phone, - t19.s_comment AS s_comment + t21.s_acctbal, + t21.s_name, + t21.n_name, + t21.p_partkey, + t21.p_mfgr, + t21.s_address, + t21.s_phone, + t21.s_comment FROM ( SELECT - t0.p_partkey AS p_partkey, - t0.p_name AS p_name, - t0.p_mfgr AS p_mfgr, - t0.p_brand AS p_brand, - t0.p_type AS p_type, - t0.p_size AS p_size, - t0.p_container AS p_container, - t0.p_retailprice AS p_retailprice, - t0.p_comment AS p_comment, - t5.ps_partkey AS ps_partkey, - t5.ps_suppkey AS ps_suppkey, - t5.ps_availqty AS ps_availqty, - t5.ps_supplycost AS ps_supplycost, - t5.ps_comment AS ps_comment, - t6.s_suppkey AS s_suppkey, - t6.s_name AS s_name, - t6.s_address AS s_address, - t6.s_nationkey AS s_nationkey, - t6.s_phone AS s_phone, - t6.s_acctbal AS s_acctbal, - t6.s_comment AS s_comment, - t8.n_nationkey AS n_nationkey, - t8.n_name AS n_name, - t8.n_regionkey AS n_regionkey, - t8.n_comment AS n_comment, - t10.r_regionkey AS r_regionkey, - t10.r_name AS r_name, - t10.r_comment AS r_comment - FROM part AS t0 - INNER JOIN partsupp AS t5 - ON t0.p_partkey = t5.ps_partkey - INNER JOIN supplier AS t6 - ON t6.s_suppkey = t5.ps_suppkey - INNER JOIN nation AS t8 - ON t6.s_nationkey = t8.n_nationkey - INNER JOIN region AS t10 - ON t8.n_regionkey = t10.r_regionkey -) AS t19 + t5.p_partkey, + t5.p_name, + t5.p_mfgr, + t5.p_brand, + t5.p_type, + t5.p_size, + t5.p_container, + t5.p_retailprice, + t5.p_comment, + t6.ps_partkey, + t6.ps_suppkey, + t6.ps_availqty, + t6.ps_supplycost, + t6.ps_comment, + t8.s_suppkey, + t8.s_name, + t8.s_address, + t8.s_nationkey, + t8.s_phone, + t8.s_acctbal, + t8.s_comment, + t10.n_nationkey, + t10.n_name, + t10.n_regionkey, + t10.n_comment, + t12.r_regionkey, + t12.r_name, + t12.r_comment + FROM part AS t5 + INNER JOIN partsupp AS t6 + ON t5.p_partkey = t6.ps_partkey + INNER JOIN supplier AS t8 + ON t8.s_suppkey = t6.ps_suppkey + INNER JOIN nation AS t10 + ON t8.s_nationkey = t10.n_nationkey + INNER JOIN region AS t12 + ON t10.n_regionkey = t12.r_regionkey +) AS t21 WHERE - t19.p_size = CAST(15 AS TINYINT) - AND t19.p_type LIKE '%BRASS' - AND t19.r_name = 'EUROPE' - AND t19.ps_supplycost = ( + t21.p_size = CAST(15 AS TINYINT) + AND t21.p_type LIKE '%BRASS' + AND t21.r_name = 'EUROPE' + AND t21.ps_supplycost = ( SELECT - MIN(t21.ps_supplycost) AS "Min(ps_supplycost)" + MIN(t23.ps_supplycost) AS "Min(ps_supplycost)" FROM ( SELECT - t20.ps_partkey AS ps_partkey, - t20.ps_suppkey AS ps_suppkey, - t20.ps_availqty AS ps_availqty, - t20.ps_supplycost AS ps_supplycost, - t20.ps_comment AS ps_comment, - t20.s_suppkey AS s_suppkey, - t20.s_name AS s_name, - t20.s_address AS s_address, - t20.s_nationkey AS s_nationkey, - t20.s_phone AS s_phone, - t20.s_acctbal AS s_acctbal, - t20.s_comment AS s_comment, - t20.n_nationkey AS n_nationkey, - t20.n_name AS n_name, - t20.n_regionkey AS n_regionkey, - t20.n_comment AS n_comment, - t20.r_regionkey AS r_regionkey, - t20.r_name AS r_name, - t20.r_comment AS r_comment + t22.ps_partkey, + t22.ps_suppkey, + t22.ps_availqty, + t22.ps_supplycost, + t22.ps_comment, + t22.s_suppkey, + t22.s_name, + t22.s_address, + t22.s_nationkey, + t22.s_phone, + t22.s_acctbal, + t22.s_comment, + t22.n_nationkey, + t22.n_name, + t22.n_regionkey, + t22.n_comment, + t22.r_regionkey, + t22.r_name, + t22.r_comment FROM ( SELECT - t1.ps_partkey AS ps_partkey, - t1.ps_suppkey AS ps_suppkey, - t1.ps_availqty AS ps_availqty, - t1.ps_supplycost AS ps_supplycost, - t1.ps_comment AS ps_comment, - t7.s_suppkey AS s_suppkey, - t7.s_name AS s_name, - t7.s_address AS s_address, - t7.s_nationkey AS s_nationkey, - t7.s_phone AS s_phone, - t7.s_acctbal AS s_acctbal, - t7.s_comment AS s_comment, - t9.n_nationkey AS n_nationkey, - t9.n_name AS n_name, - t9.n_regionkey AS n_regionkey, - t9.n_comment AS n_comment, - t11.r_regionkey AS r_regionkey, - t11.r_name AS r_name, - t11.r_comment AS r_comment - FROM partsupp AS t1 - INNER JOIN supplier AS t7 - ON t7.s_suppkey = t1.ps_suppkey - INNER JOIN nation AS t9 - ON t7.s_nationkey = t9.n_nationkey - INNER JOIN region AS t11 - ON t9.n_regionkey = t11.r_regionkey - ) AS t20 + t7.ps_partkey, + t7.ps_suppkey, + t7.ps_availqty, + t7.ps_supplycost, + t7.ps_comment, + t9.s_suppkey, + t9.s_name, + t9.s_address, + t9.s_nationkey, + t9.s_phone, + t9.s_acctbal, + t9.s_comment, + t11.n_nationkey, + t11.n_name, + t11.n_regionkey, + t11.n_comment, + t13.r_regionkey, + t13.r_name, + t13.r_comment + FROM partsupp AS t7 + INNER JOIN supplier AS t9 + ON t9.s_suppkey = t7.ps_suppkey + INNER JOIN nation AS t11 + ON t9.s_nationkey = t11.n_nationkey + INNER JOIN region AS t13 + ON t11.n_regionkey = t13.r_regionkey + ) AS t22 WHERE - t20.r_name = 'EUROPE' AND t19.p_partkey = t20.ps_partkey - ) AS t21 + t22.r_name = 'EUROPE' AND t21.p_partkey = t22.ps_partkey + ) AS t23 ) ORDER BY - t19.s_acctbal DESC, - t19.n_name ASC, - t19.s_name ASC, - t19.p_partkey ASC + t21.s_acctbal DESC, + t21.n_name ASC, + t21.s_name ASC, + t21.p_partkey ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql index 90c48b774ef5..adb97afaf7f1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql @@ -1,103 +1,103 @@ SELECT - t9.l_orderkey AS l_orderkey, - t9.revenue AS revenue, - t9.o_orderdate AS o_orderdate, - t9.o_shippriority AS o_shippriority + t10.l_orderkey, + t10.revenue, + t10.o_orderdate, + t10.o_shippriority FROM ( SELECT - t8.l_orderkey AS l_orderkey, - t8.o_orderdate AS o_orderdate, - t8.o_shippriority AS o_shippriority, - SUM(t8.l_extendedprice * ( - CAST(1 AS TINYINT) - t8.l_discount + t9.l_orderkey, + t9.o_orderdate, + t9.o_shippriority, + SUM(t9.l_extendedprice * ( + CAST(1 AS TINYINT) - t9.l_discount )) AS revenue FROM ( SELECT - t7.c_custkey AS c_custkey, - t7.c_name AS c_name, - t7.c_address AS c_address, - t7.c_nationkey AS c_nationkey, - t7.c_phone AS c_phone, - t7.c_acctbal AS c_acctbal, - t7.c_mktsegment AS c_mktsegment, - t7.c_comment AS c_comment, - t7.o_orderkey AS o_orderkey, - t7.o_custkey AS o_custkey, - t7.o_orderstatus AS o_orderstatus, - t7.o_totalprice AS o_totalprice, - t7.o_orderdate AS o_orderdate, - t7.o_orderpriority AS o_orderpriority, - t7.o_clerk AS o_clerk, - t7.o_shippriority AS o_shippriority, - t7.o_comment AS o_comment, - t7.l_orderkey AS l_orderkey, - t7.l_partkey AS l_partkey, - t7.l_suppkey AS l_suppkey, - t7.l_linenumber AS l_linenumber, - t7.l_quantity AS l_quantity, - t7.l_extendedprice AS l_extendedprice, - t7.l_discount AS l_discount, - t7.l_tax AS l_tax, - t7.l_returnflag AS l_returnflag, - t7.l_linestatus AS l_linestatus, - t7.l_shipdate AS l_shipdate, - t7.l_commitdate AS l_commitdate, - t7.l_receiptdate AS l_receiptdate, - t7.l_shipinstruct AS l_shipinstruct, - t7.l_shipmode AS l_shipmode, - t7.l_comment AS l_comment + t8.c_custkey, + t8.c_name, + t8.c_address, + t8.c_nationkey, + t8.c_phone, + t8.c_acctbal, + t8.c_mktsegment, + t8.c_comment, + t8.o_orderkey, + t8.o_custkey, + t8.o_orderstatus, + t8.o_totalprice, + t8.o_orderdate, + t8.o_orderpriority, + t8.o_clerk, + t8.o_shippriority, + t8.o_comment, + t8.l_orderkey, + t8.l_partkey, + t8.l_suppkey, + t8.l_linenumber, + t8.l_quantity, + t8.l_extendedprice, + t8.l_discount, + t8.l_tax, + t8.l_returnflag, + t8.l_linestatus, + t8.l_shipdate, + t8.l_commitdate, + t8.l_receiptdate, + t8.l_shipinstruct, + t8.l_shipmode, + t8.l_comment FROM ( SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t3.o_orderkey AS o_orderkey, - t3.o_custkey AS o_custkey, - t3.o_orderstatus AS o_orderstatus, - t3.o_totalprice AS o_totalprice, - t3.o_orderdate AS o_orderdate, - t3.o_orderpriority AS o_orderpriority, - t3.o_clerk AS o_clerk, - t3.o_shippriority AS o_shippriority, - t3.o_comment AS o_comment, - t4.l_orderkey AS l_orderkey, - t4.l_partkey AS l_partkey, - t4.l_suppkey AS l_suppkey, - t4.l_linenumber AS l_linenumber, - t4.l_quantity AS l_quantity, - t4.l_extendedprice AS l_extendedprice, - t4.l_discount AS l_discount, - t4.l_tax AS l_tax, - t4.l_returnflag AS l_returnflag, - t4.l_linestatus AS l_linestatus, - t4.l_shipdate AS l_shipdate, - t4.l_commitdate AS l_commitdate, - t4.l_receiptdate AS l_receiptdate, - t4.l_shipinstruct AS l_shipinstruct, - t4.l_shipmode AS l_shipmode, - t4.l_comment AS l_comment - FROM customer AS t0 - INNER JOIN orders AS t3 - ON t0.c_custkey = t3.o_custkey - INNER JOIN lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - ) AS t7 + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.o_orderkey, + t4.o_custkey, + t4.o_orderstatus, + t4.o_totalprice, + t4.o_orderdate, + t4.o_orderpriority, + t4.o_clerk, + t4.o_shippriority, + t4.o_comment, + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment + FROM customer AS t3 + INNER JOIN orders AS t4 + ON t3.c_custkey = t4.o_custkey + INNER JOIN lineitem AS t5 + ON t5.l_orderkey = t4.o_orderkey + ) AS t8 WHERE - t7.c_mktsegment = 'BUILDING' - AND t7.o_orderdate < MAKE_DATE(1995, 3, 15) - AND t7.l_shipdate > MAKE_DATE(1995, 3, 15) - ) AS t8 + t8.c_mktsegment = 'BUILDING' + AND t8.o_orderdate < MAKE_DATE(1995, 3, 15) + AND t8.l_shipdate > MAKE_DATE(1995, 3, 15) + ) AS t9 GROUP BY 1, 2, 3 -) AS t9 +) AS t10 ORDER BY - t9.revenue DESC, - t9.o_orderdate ASC + t10.revenue DESC, + t10.o_orderdate ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql index f56cd81b6401..77ba19f9cc07 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql @@ -1,35 +1,33 @@ SELECT - t4.o_orderpriority AS o_orderpriority, - t4.order_count AS order_count + t4.o_orderpriority, + t4.order_count FROM ( SELECT - t3.o_orderpriority AS o_orderpriority, + t3.o_orderpriority, COUNT(*) AS order_count FROM ( SELECT - t0.o_orderkey AS o_orderkey, - t0.o_custkey AS o_custkey, - t0.o_orderstatus AS o_orderstatus, - t0.o_totalprice AS o_totalprice, - t0.o_orderdate AS o_orderdate, - t0.o_orderpriority AS o_orderpriority, - t0.o_clerk AS o_clerk, - t0.o_shippriority AS o_shippriority, - t0.o_comment AS o_comment + t0.o_orderkey, + t0.o_custkey, + t0.o_orderstatus, + t0.o_totalprice, + t0.o_orderdate, + t0.o_orderpriority, + t0.o_clerk, + t0.o_shippriority, + t0.o_comment FROM orders AS t0 WHERE EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM lineitem AS t1 - WHERE - ( - t1.l_orderkey = t0.o_orderkey - ) AND ( - t1.l_commitdate < t1.l_receiptdate - ) - ) + SELECT + CAST(1 AS TINYINT) AS "1" + FROM lineitem AS t1 + WHERE + ( + t1.l_orderkey = t0.o_orderkey + ) AND ( + t1.l_commitdate < t1.l_receiptdate + ) ) AND t0.o_orderdate >= MAKE_DATE(1993, 7, 1) AND t0.o_orderdate < MAKE_DATE(1993, 10, 1) diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql index 6fed94b3b38c..ae3bbac7941f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql @@ -1,129 +1,129 @@ SELECT - t18.n_name AS n_name, - t18.revenue AS revenue + t19.n_name, + t19.revenue FROM ( SELECT - t17.n_name AS n_name, - SUM(t17.l_extendedprice * ( - CAST(1 AS TINYINT) - t17.l_discount + t18.n_name, + SUM(t18.l_extendedprice * ( + CAST(1 AS TINYINT) - t18.l_discount )) AS revenue FROM ( SELECT - t16.c_custkey AS c_custkey, - t16.c_name AS c_name, - t16.c_address AS c_address, - t16.c_nationkey AS c_nationkey, - t16.c_phone AS c_phone, - t16.c_acctbal AS c_acctbal, - t16.c_mktsegment AS c_mktsegment, - t16.c_comment AS c_comment, - t16.o_orderkey AS o_orderkey, - t16.o_custkey AS o_custkey, - t16.o_orderstatus AS o_orderstatus, - t16.o_totalprice AS o_totalprice, - t16.o_orderdate AS o_orderdate, - t16.o_orderpriority AS o_orderpriority, - t16.o_clerk AS o_clerk, - t16.o_shippriority AS o_shippriority, - t16.o_comment AS o_comment, - t16.l_orderkey AS l_orderkey, - t16.l_partkey AS l_partkey, - t16.l_suppkey AS l_suppkey, - t16.l_linenumber AS l_linenumber, - t16.l_quantity AS l_quantity, - t16.l_extendedprice AS l_extendedprice, - t16.l_discount AS l_discount, - t16.l_tax AS l_tax, - t16.l_returnflag AS l_returnflag, - t16.l_linestatus AS l_linestatus, - t16.l_shipdate AS l_shipdate, - t16.l_commitdate AS l_commitdate, - t16.l_receiptdate AS l_receiptdate, - t16.l_shipinstruct AS l_shipinstruct, - t16.l_shipmode AS l_shipmode, - t16.l_comment AS l_comment, - t16.s_suppkey AS s_suppkey, - t16.s_name AS s_name, - t16.s_address AS s_address, - t16.s_nationkey AS s_nationkey, - t16.s_phone AS s_phone, - t16.s_acctbal AS s_acctbal, - t16.s_comment AS s_comment, - t16.n_nationkey AS n_nationkey, - t16.n_name AS n_name, - t16.n_regionkey AS n_regionkey, - t16.n_comment AS n_comment, - t16.r_regionkey AS r_regionkey, - t16.r_name AS r_name, - t16.r_comment AS r_comment + t17.c_custkey, + t17.c_name, + t17.c_address, + t17.c_nationkey, + t17.c_phone, + t17.c_acctbal, + t17.c_mktsegment, + t17.c_comment, + t17.o_orderkey, + t17.o_custkey, + t17.o_orderstatus, + t17.o_totalprice, + t17.o_orderdate, + t17.o_orderpriority, + t17.o_clerk, + t17.o_shippriority, + t17.o_comment, + t17.l_orderkey, + t17.l_partkey, + t17.l_suppkey, + t17.l_linenumber, + t17.l_quantity, + t17.l_extendedprice, + t17.l_discount, + t17.l_tax, + t17.l_returnflag, + t17.l_linestatus, + t17.l_shipdate, + t17.l_commitdate, + t17.l_receiptdate, + t17.l_shipinstruct, + t17.l_shipmode, + t17.l_comment, + t17.s_suppkey, + t17.s_name, + t17.s_address, + t17.s_nationkey, + t17.s_phone, + t17.s_acctbal, + t17.s_comment, + t17.n_nationkey, + t17.n_name, + t17.n_regionkey, + t17.n_comment, + t17.r_regionkey, + t17.r_name, + t17.r_comment FROM ( SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t6.o_orderkey AS o_orderkey, - t6.o_custkey AS o_custkey, - t6.o_orderstatus AS o_orderstatus, - t6.o_totalprice AS o_totalprice, - t6.o_orderdate AS o_orderdate, - t6.o_orderpriority AS o_orderpriority, - t6.o_clerk AS o_clerk, - t6.o_shippriority AS o_shippriority, - t6.o_comment AS o_comment, - t7.l_orderkey AS l_orderkey, - t7.l_partkey AS l_partkey, - t7.l_suppkey AS l_suppkey, - t7.l_linenumber AS l_linenumber, - t7.l_quantity AS l_quantity, - t7.l_extendedprice AS l_extendedprice, - t7.l_discount AS l_discount, - t7.l_tax AS l_tax, - t7.l_returnflag AS l_returnflag, - t7.l_linestatus AS l_linestatus, - t7.l_shipdate AS l_shipdate, - t7.l_commitdate AS l_commitdate, - t7.l_receiptdate AS l_receiptdate, - t7.l_shipinstruct AS l_shipinstruct, - t7.l_shipmode AS l_shipmode, - t7.l_comment AS l_comment, - t8.s_suppkey AS s_suppkey, - t8.s_name AS s_name, - t8.s_address AS s_address, - t8.s_nationkey AS s_nationkey, - t8.s_phone AS s_phone, - t8.s_acctbal AS s_acctbal, - t8.s_comment AS s_comment, - t9.n_nationkey AS n_nationkey, - t9.n_name AS n_name, - t9.n_regionkey AS n_regionkey, - t9.n_comment AS n_comment, - t10.r_regionkey AS r_regionkey, - t10.r_name AS r_name, - t10.r_comment AS r_comment - FROM customer AS t0 - INNER JOIN orders AS t6 - ON t0.c_custkey = t6.o_custkey - INNER JOIN lineitem AS t7 - ON t7.l_orderkey = t6.o_orderkey - INNER JOIN supplier AS t8 - ON t7.l_suppkey = t8.s_suppkey - INNER JOIN nation AS t9 - ON t0.c_nationkey = t8.s_nationkey AND t8.s_nationkey = t9.n_nationkey - INNER JOIN region AS t10 - ON t9.n_regionkey = t10.r_regionkey - ) AS t16 + t6.c_custkey, + t6.c_name, + t6.c_address, + t6.c_nationkey, + t6.c_phone, + t6.c_acctbal, + t6.c_mktsegment, + t6.c_comment, + t7.o_orderkey, + t7.o_custkey, + t7.o_orderstatus, + t7.o_totalprice, + t7.o_orderdate, + t7.o_orderpriority, + t7.o_clerk, + t7.o_shippriority, + t7.o_comment, + t8.l_orderkey, + t8.l_partkey, + t8.l_suppkey, + t8.l_linenumber, + t8.l_quantity, + t8.l_extendedprice, + t8.l_discount, + t8.l_tax, + t8.l_returnflag, + t8.l_linestatus, + t8.l_shipdate, + t8.l_commitdate, + t8.l_receiptdate, + t8.l_shipinstruct, + t8.l_shipmode, + t8.l_comment, + t9.s_suppkey, + t9.s_name, + t9.s_address, + t9.s_nationkey, + t9.s_phone, + t9.s_acctbal, + t9.s_comment, + t10.n_nationkey, + t10.n_name, + t10.n_regionkey, + t10.n_comment, + t11.r_regionkey, + t11.r_name, + t11.r_comment + FROM customer AS t6 + INNER JOIN orders AS t7 + ON t6.c_custkey = t7.o_custkey + INNER JOIN lineitem AS t8 + ON t8.l_orderkey = t7.o_orderkey + INNER JOIN supplier AS t9 + ON t8.l_suppkey = t9.s_suppkey + INNER JOIN nation AS t10 + ON t6.c_nationkey = t9.s_nationkey AND t9.s_nationkey = t10.n_nationkey + INNER JOIN region AS t11 + ON t10.n_regionkey = t11.r_regionkey + ) AS t17 WHERE - t16.r_name = 'ASIA' - AND t16.o_orderdate >= MAKE_DATE(1994, 1, 1) - AND t16.o_orderdate < MAKE_DATE(1995, 1, 1) - ) AS t17 + t17.r_name = 'ASIA' + AND t17.o_orderdate >= MAKE_DATE(1994, 1, 1) + AND t17.o_orderdate < MAKE_DATE(1995, 1, 1) + ) AS t18 GROUP BY 1 -) AS t18 +) AS t19 ORDER BY - t18.revenue DESC \ No newline at end of file + t19.revenue DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql index d42e3466036d..eea01a0277a6 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql @@ -2,26 +2,26 @@ SELECT SUM(t1.l_extendedprice * t1.l_discount) AS revenue FROM ( SELECT - t0.l_orderkey AS l_orderkey, - t0.l_partkey AS l_partkey, - t0.l_suppkey AS l_suppkey, - t0.l_linenumber AS l_linenumber, - t0.l_quantity AS l_quantity, - t0.l_extendedprice AS l_extendedprice, - t0.l_discount AS l_discount, - t0.l_tax AS l_tax, - t0.l_returnflag AS l_returnflag, - t0.l_linestatus AS l_linestatus, - t0.l_shipdate AS l_shipdate, - t0.l_commitdate AS l_commitdate, - t0.l_receiptdate AS l_receiptdate, - t0.l_shipinstruct AS l_shipinstruct, - t0.l_shipmode AS l_shipmode, - t0.l_comment AS l_comment + t0.l_orderkey, + t0.l_partkey, + t0.l_suppkey, + t0.l_linenumber, + t0.l_quantity, + t0.l_extendedprice, + t0.l_discount, + t0.l_tax, + t0.l_returnflag, + t0.l_linestatus, + t0.l_shipdate, + t0.l_commitdate, + t0.l_receiptdate, + t0.l_shipinstruct, + t0.l_shipmode, + t0.l_comment FROM lineitem AS t0 WHERE t0.l_shipdate >= MAKE_DATE(1994, 1, 1) AND t0.l_shipdate < MAKE_DATE(1995, 1, 1) AND t0.l_discount BETWEEN CAST(0.05 AS DOUBLE) AND CAST(0.07 AS DOUBLE) AND t0.l_quantity < CAST(24 AS TINYINT) -) AS t1 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql index 171bbd4b75d8..35411472de9a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql @@ -1,51 +1,71 @@ -WITH t0 AS ( - SELECT - t6.n_name AS supp_nation, - t7.n_name AS cust_nation, - t3.l_shipdate AS l_shipdate, - t3.l_extendedprice AS l_extendedprice, - t3.l_discount AS l_discount, - CAST(EXTRACT(year FROM t3.l_shipdate) AS SMALLINT) AS l_year, - t3.l_extendedprice * ( - CAST(1 AS TINYINT) - t3.l_discount - ) AS volume - FROM main.supplier AS t2 - JOIN main.lineitem AS t3 - ON t2.s_suppkey = t3.l_suppkey - JOIN main.orders AS t4 - ON t4.o_orderkey = t3.l_orderkey - JOIN main.customer AS t5 - ON t5.c_custkey = t4.o_custkey - JOIN main.nation AS t6 - ON t2.s_nationkey = t6.n_nationkey - JOIN main.nation AS t7 - ON t5.c_nationkey = t7.n_nationkey -) SELECT - t1.supp_nation, - t1.cust_nation, - t1.l_year, - t1.revenue + t19.supp_nation, + t19.cust_nation, + t19.l_year, + t19.revenue FROM ( SELECT - t0.supp_nation AS supp_nation, - t0.cust_nation AS cust_nation, - t0.l_year AS l_year, - SUM(t0.volume) AS revenue - FROM t0 - WHERE - ( - t0.cust_nation = 'FRANCE' AND t0.supp_nation = 'GERMANY' - OR t0.cust_nation = 'GERMANY' - AND t0.supp_nation = 'FRANCE' - ) - AND t0.l_shipdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + t18.supp_nation, + t18.cust_nation, + t18.l_year, + SUM(t18.volume) AS revenue + FROM ( + SELECT + t17.supp_nation, + t17.cust_nation, + t17.l_shipdate, + t17.l_extendedprice, + t17.l_discount, + t17.l_year, + t17.volume + FROM ( + SELECT + t9.n_name AS supp_nation, + t11.n_name AS cust_nation, + t6.l_shipdate, + t6.l_extendedprice, + t6.l_discount, + EXTRACT('year' FROM t6.l_shipdate) AS l_year, + t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount + ) AS volume + FROM supplier AS t5 + INNER JOIN lineitem AS t6 + ON t5.s_suppkey = t6.l_suppkey + INNER JOIN orders AS t7 + ON t7.o_orderkey = t6.l_orderkey + INNER JOIN customer AS t8 + ON t8.c_custkey = t7.o_custkey + INNER JOIN nation AS t9 + ON t5.s_nationkey = t9.n_nationkey + INNER JOIN nation AS t11 + ON t8.c_nationkey = t11.n_nationkey + ) AS t17 + WHERE + ( + ( + ( + t17.cust_nation = 'FRANCE' + ) AND ( + t17.supp_nation = 'GERMANY' + ) + ) + OR ( + ( + t17.cust_nation = 'GERMANY' + ) AND ( + t17.supp_nation = 'FRANCE' + ) + ) + ) + AND t17.l_shipdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + ) AS t18 GROUP BY 1, 2, 3 -) AS t1 +) AS t19 ORDER BY - t1.supp_nation ASC, - t1.cust_nation ASC, - t1.l_year ASC \ No newline at end of file + t19.supp_nation ASC, + t19.cust_nation ASC, + t19.l_year ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql index e588d3e1466f..97b1be133851 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql @@ -1,52 +1,52 @@ SELECT - t23.o_year AS o_year, - t23.mkt_share AS mkt_share + t25.o_year, + t25.mkt_share FROM ( SELECT - t22.o_year AS o_year, - SUM(t22.nation_volume) / SUM(t22.volume) AS mkt_share + t24.o_year, + SUM(t24.nation_volume) / SUM(t24.volume) AS mkt_share FROM ( SELECT - t21.o_year AS o_year, - t21.volume AS volume, - t21.nation AS nation, - t21.r_name AS r_name, - t21.o_orderdate AS o_orderdate, - t21.p_type AS p_type, - CASE WHEN t21.nation = 'BRAZIL' THEN t21.volume ELSE CAST(0 AS TINYINT) END AS nation_volume + t23.o_year, + t23.volume, + t23.nation, + t23.r_name, + t23.o_orderdate, + t23.p_type, + CASE WHEN t23.nation = 'BRAZIL' THEN t23.volume ELSE CAST(0 AS TINYINT) END AS nation_volume FROM ( SELECT - EXTRACT('year' FROM t9.o_orderdate) AS o_year, - t7.l_extendedprice * ( - CAST(1 AS TINYINT) - t7.l_discount + EXTRACT('year' FROM t10.o_orderdate) AS o_year, + t8.l_extendedprice * ( + CAST(1 AS TINYINT) - t8.l_discount ) AS volume, - t12.n_name AS nation, - t13.r_name AS r_name, - t9.o_orderdate AS o_orderdate, - t0.p_type AS p_type - FROM part AS t0 - INNER JOIN lineitem AS t7 - ON t0.p_partkey = t7.l_partkey - INNER JOIN supplier AS t8 - ON t8.s_suppkey = t7.l_suppkey - INNER JOIN orders AS t9 - ON t7.l_orderkey = t9.o_orderkey - INNER JOIN customer AS t10 - ON t9.o_custkey = t10.c_custkey - INNER JOIN nation AS t11 - ON t10.c_nationkey = t11.n_nationkey - INNER JOIN region AS t13 - ON t11.n_regionkey = t13.r_regionkey + t15.n_name AS nation, + t14.r_name, + t10.o_orderdate, + t7.p_type + FROM part AS t7 + INNER JOIN lineitem AS t8 + ON t7.p_partkey = t8.l_partkey + INNER JOIN supplier AS t9 + ON t9.s_suppkey = t8.l_suppkey + INNER JOIN orders AS t10 + ON t8.l_orderkey = t10.o_orderkey + INNER JOIN customer AS t11 + ON t10.o_custkey = t11.c_custkey INNER JOIN nation AS t12 - ON t8.s_nationkey = t12.n_nationkey - ) AS t21 + ON t11.c_nationkey = t12.n_nationkey + INNER JOIN region AS t14 + ON t12.n_regionkey = t14.r_regionkey + INNER JOIN nation AS t15 + ON t9.s_nationkey = t15.n_nationkey + ) AS t23 WHERE - t21.r_name = 'AMERICA' - AND t21.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) - AND t21.p_type = 'ECONOMY ANODIZED STEEL' - ) AS t22 + t23.r_name = 'AMERICA' + AND t23.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + AND t23.p_type = 'ECONOMY ANODIZED STEEL' + ) AS t24 GROUP BY 1 -) AS t23 +) AS t25 ORDER BY - t23.o_year ASC \ No newline at end of file + t25.o_year ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql index 1b3de4b3fb5b..21489f03313d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql @@ -1,49 +1,49 @@ SELECT - t18.nation AS nation, - t18.o_year AS o_year, - t18.sum_profit AS sum_profit + t19.nation, + t19.o_year, + t19.sum_profit FROM ( SELECT - t17.nation AS nation, - t17.o_year AS o_year, - SUM(t17.amount) AS sum_profit + t18.nation, + t18.o_year, + SUM(t18.amount) AS sum_profit FROM ( SELECT - t16.amount AS amount, - t16.o_year AS o_year, - t16.nation AS nation, - t16.p_name AS p_name + t17.amount, + t17.o_year, + t17.nation, + t17.p_name FROM ( SELECT ( - t0.l_extendedprice * ( - CAST(1 AS TINYINT) - t0.l_discount + t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount ) ) - ( - t7.ps_supplycost * t0.l_quantity + t8.ps_supplycost * t6.l_quantity ) AS amount, - EXTRACT('year' FROM t9.o_orderdate) AS o_year, - t10.n_name AS nation, - t8.p_name AS p_name - FROM lineitem AS t0 - INNER JOIN supplier AS t6 - ON t6.s_suppkey = t0.l_suppkey - INNER JOIN partsupp AS t7 - ON t7.ps_suppkey = t0.l_suppkey AND t7.ps_partkey = t0.l_partkey - INNER JOIN part AS t8 - ON t8.p_partkey = t0.l_partkey - INNER JOIN orders AS t9 - ON t9.o_orderkey = t0.l_orderkey - INNER JOIN nation AS t10 - ON t6.s_nationkey = t10.n_nationkey - ) AS t16 + EXTRACT('year' FROM t10.o_orderdate) AS o_year, + t11.n_name AS nation, + t9.p_name + FROM lineitem AS t6 + INNER JOIN supplier AS t7 + ON t7.s_suppkey = t6.l_suppkey + INNER JOIN partsupp AS t8 + ON t8.ps_suppkey = t6.l_suppkey AND t8.ps_partkey = t6.l_partkey + INNER JOIN part AS t9 + ON t9.p_partkey = t6.l_partkey + INNER JOIN orders AS t10 + ON t10.o_orderkey = t6.l_orderkey + INNER JOIN nation AS t11 + ON t7.s_nationkey = t11.n_nationkey + ) AS t17 WHERE - t16.p_name LIKE '%green%' - ) AS t17 + t17.p_name LIKE '%green%' + ) AS t18 GROUP BY 1, 2 -) AS t18 +) AS t19 ORDER BY - t18.nation ASC, - t18.o_year DESC \ No newline at end of file + t19.nation ASC, + t19.o_year DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql index 9fd9b9eec366..a08b8198283b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql @@ -1,115 +1,115 @@ SELECT - t12.c_custkey AS c_custkey, - t12.c_name AS c_name, - t12.revenue AS revenue, - t12.c_acctbal AS c_acctbal, - t12.n_name AS n_name, - t12.c_address AS c_address, - t12.c_phone AS c_phone, - t12.c_comment AS c_comment + t13.c_custkey, + t13.c_name, + t13.revenue, + t13.c_acctbal, + t13.n_name, + t13.c_address, + t13.c_phone, + t13.c_comment FROM ( SELECT - t11.c_custkey AS c_custkey, - t11.c_name AS c_name, - t11.c_acctbal AS c_acctbal, - t11.n_name AS n_name, - t11.c_address AS c_address, - t11.c_phone AS c_phone, - t11.c_comment AS c_comment, - SUM(t11.l_extendedprice * ( - CAST(1 AS TINYINT) - t11.l_discount + t12.c_custkey, + t12.c_name, + t12.c_acctbal, + t12.n_name, + t12.c_address, + t12.c_phone, + t12.c_comment, + SUM(t12.l_extendedprice * ( + CAST(1 AS TINYINT) - t12.l_discount )) AS revenue FROM ( SELECT - t10.c_custkey AS c_custkey, - t10.c_name AS c_name, - t10.c_address AS c_address, - t10.c_nationkey AS c_nationkey, - t10.c_phone AS c_phone, - t10.c_acctbal AS c_acctbal, - t10.c_mktsegment AS c_mktsegment, - t10.c_comment AS c_comment, - t10.o_orderkey AS o_orderkey, - t10.o_custkey AS o_custkey, - t10.o_orderstatus AS o_orderstatus, - t10.o_totalprice AS o_totalprice, - t10.o_orderdate AS o_orderdate, - t10.o_orderpriority AS o_orderpriority, - t10.o_clerk AS o_clerk, - t10.o_shippriority AS o_shippriority, - t10.o_comment AS o_comment, - t10.l_orderkey AS l_orderkey, - t10.l_partkey AS l_partkey, - t10.l_suppkey AS l_suppkey, - t10.l_linenumber AS l_linenumber, - t10.l_quantity AS l_quantity, - t10.l_extendedprice AS l_extendedprice, - t10.l_discount AS l_discount, - t10.l_tax AS l_tax, - t10.l_returnflag AS l_returnflag, - t10.l_linestatus AS l_linestatus, - t10.l_shipdate AS l_shipdate, - t10.l_commitdate AS l_commitdate, - t10.l_receiptdate AS l_receiptdate, - t10.l_shipinstruct AS l_shipinstruct, - t10.l_shipmode AS l_shipmode, - t10.l_comment AS l_comment, - t10.n_nationkey AS n_nationkey, - t10.n_name AS n_name, - t10.n_regionkey AS n_regionkey, - t10.n_comment AS n_comment + t11.c_custkey, + t11.c_name, + t11.c_address, + t11.c_nationkey, + t11.c_phone, + t11.c_acctbal, + t11.c_mktsegment, + t11.c_comment, + t11.o_orderkey, + t11.o_custkey, + t11.o_orderstatus, + t11.o_totalprice, + t11.o_orderdate, + t11.o_orderpriority, + t11.o_clerk, + t11.o_shippriority, + t11.o_comment, + t11.l_orderkey, + t11.l_partkey, + t11.l_suppkey, + t11.l_linenumber, + t11.l_quantity, + t11.l_extendedprice, + t11.l_discount, + t11.l_tax, + t11.l_returnflag, + t11.l_linestatus, + t11.l_shipdate, + t11.l_commitdate, + t11.l_receiptdate, + t11.l_shipinstruct, + t11.l_shipmode, + t11.l_comment, + t11.n_nationkey, + t11.n_name, + t11.n_regionkey, + t11.n_comment FROM ( SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t4.o_orderkey AS o_orderkey, - t4.o_custkey AS o_custkey, - t4.o_orderstatus AS o_orderstatus, - t4.o_totalprice AS o_totalprice, - t4.o_orderdate AS o_orderdate, - t4.o_orderpriority AS o_orderpriority, - t4.o_clerk AS o_clerk, - t4.o_shippriority AS o_shippriority, - t4.o_comment AS o_comment, - t5.l_orderkey AS l_orderkey, - t5.l_partkey AS l_partkey, - t5.l_suppkey AS l_suppkey, - t5.l_linenumber AS l_linenumber, - t5.l_quantity AS l_quantity, - t5.l_extendedprice AS l_extendedprice, - t5.l_discount AS l_discount, - t5.l_tax AS l_tax, - t5.l_returnflag AS l_returnflag, - t5.l_linestatus AS l_linestatus, - t5.l_shipdate AS l_shipdate, - t5.l_commitdate AS l_commitdate, - t5.l_receiptdate AS l_receiptdate, - t5.l_shipinstruct AS l_shipinstruct, - t5.l_shipmode AS l_shipmode, - t5.l_comment AS l_comment, - t6.n_nationkey AS n_nationkey, - t6.n_name AS n_name, - t6.n_regionkey AS n_regionkey, - t6.n_comment AS n_comment - FROM customer AS t0 - INNER JOIN orders AS t4 - ON t0.c_custkey = t4.o_custkey - INNER JOIN lineitem AS t5 - ON t5.l_orderkey = t4.o_orderkey - INNER JOIN nation AS t6 - ON t0.c_nationkey = t6.n_nationkey - ) AS t10 + t4.c_custkey, + t4.c_name, + t4.c_address, + t4.c_nationkey, + t4.c_phone, + t4.c_acctbal, + t4.c_mktsegment, + t4.c_comment, + t5.o_orderkey, + t5.o_custkey, + t5.o_orderstatus, + t5.o_totalprice, + t5.o_orderdate, + t5.o_orderpriority, + t5.o_clerk, + t5.o_shippriority, + t5.o_comment, + t6.l_orderkey, + t6.l_partkey, + t6.l_suppkey, + t6.l_linenumber, + t6.l_quantity, + t6.l_extendedprice, + t6.l_discount, + t6.l_tax, + t6.l_returnflag, + t6.l_linestatus, + t6.l_shipdate, + t6.l_commitdate, + t6.l_receiptdate, + t6.l_shipinstruct, + t6.l_shipmode, + t6.l_comment, + t7.n_nationkey, + t7.n_name, + t7.n_regionkey, + t7.n_comment + FROM customer AS t4 + INNER JOIN orders AS t5 + ON t4.c_custkey = t5.o_custkey + INNER JOIN lineitem AS t6 + ON t6.l_orderkey = t5.o_orderkey + INNER JOIN nation AS t7 + ON t4.c_nationkey = t7.n_nationkey + ) AS t11 WHERE - t10.o_orderdate >= MAKE_DATE(1993, 10, 1) - AND t10.o_orderdate < MAKE_DATE(1994, 1, 1) - AND t10.l_returnflag = 'R' - ) AS t11 + t11.o_orderdate >= MAKE_DATE(1993, 10, 1) + AND t11.o_orderdate < MAKE_DATE(1994, 1, 1) + AND t11.l_returnflag = 'R' + ) AS t12 GROUP BY 1, 2, @@ -118,7 +118,7 @@ FROM ( 5, 6, 7 -) AS t12 +) AS t13 ORDER BY - t12.revenue DESC + t13.revenue DESC LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql index 79d4720321ab..c5d401180d41 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql @@ -1,109 +1,109 @@ SELECT - t15.ps_partkey AS ps_partkey, - t15.value AS value + t10.ps_partkey, + t10.value FROM ( SELECT - t13.ps_partkey AS ps_partkey, - SUM(t13.ps_supplycost * t13.ps_availqty) AS value + t9.ps_partkey, + SUM(t9.ps_supplycost * t9.ps_availqty) AS value FROM ( SELECT - t11.ps_partkey AS ps_partkey, - t11.ps_suppkey AS ps_suppkey, - t11.ps_availqty AS ps_availqty, - t11.ps_supplycost AS ps_supplycost, - t11.ps_comment AS ps_comment, - t11.s_suppkey AS s_suppkey, - t11.s_name AS s_name, - t11.s_address AS s_address, - t11.s_nationkey AS s_nationkey, - t11.s_phone AS s_phone, - t11.s_acctbal AS s_acctbal, - t11.s_comment AS s_comment, - t11.n_nationkey AS n_nationkey, - t11.n_name AS n_name, - t11.n_regionkey AS n_regionkey, - t11.n_comment AS n_comment + t8.ps_partkey, + t8.ps_suppkey, + t8.ps_availqty, + t8.ps_supplycost, + t8.ps_comment, + t8.s_suppkey, + t8.s_name, + t8.s_address, + t8.s_nationkey, + t8.s_phone, + t8.s_acctbal, + t8.s_comment, + t8.n_nationkey, + t8.n_name, + t8.n_regionkey, + t8.n_comment FROM ( SELECT - t0.ps_partkey AS ps_partkey, - t0.ps_suppkey AS ps_suppkey, - t0.ps_availqty AS ps_availqty, - t0.ps_supplycost AS ps_supplycost, - t0.ps_comment AS ps_comment, - t3.s_suppkey AS s_suppkey, - t3.s_name AS s_name, - t3.s_address AS s_address, - t3.s_nationkey AS s_nationkey, - t3.s_phone AS s_phone, - t3.s_acctbal AS s_acctbal, - t3.s_comment AS s_comment, - t5.n_nationkey AS n_nationkey, - t5.n_name AS n_name, - t5.n_regionkey AS n_regionkey, - t5.n_comment AS n_comment - FROM partsupp AS t0 - INNER JOIN supplier AS t3 - ON t0.ps_suppkey = t3.s_suppkey + t3.ps_partkey, + t3.ps_suppkey, + t3.ps_availqty, + t3.ps_supplycost, + t3.ps_comment, + t4.s_suppkey, + t4.s_name, + t4.s_address, + t4.s_nationkey, + t4.s_phone, + t4.s_acctbal, + t4.s_comment, + t5.n_nationkey, + t5.n_name, + t5.n_regionkey, + t5.n_comment + FROM partsupp AS t3 + INNER JOIN supplier AS t4 + ON t3.ps_suppkey = t4.s_suppkey INNER JOIN nation AS t5 - ON t5.n_nationkey = t3.s_nationkey - ) AS t11 + ON t5.n_nationkey = t4.s_nationkey + ) AS t8 WHERE - t11.n_name = 'GERMANY' - ) AS t13 + t8.n_name = 'GERMANY' + ) AS t9 GROUP BY 1 -) AS t15 +) AS t10 WHERE - t15.value > ( + t10.value > ( ( SELECT - SUM(t14.ps_supplycost * t14.ps_availqty) AS "Sum(Multiply(ps_supplycost, ps_availqty))" + SUM(t9.ps_supplycost * t9.ps_availqty) AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - t12.ps_partkey AS ps_partkey, - t12.ps_suppkey AS ps_suppkey, - t12.ps_availqty AS ps_availqty, - t12.ps_supplycost AS ps_supplycost, - t12.ps_comment AS ps_comment, - t12.s_suppkey AS s_suppkey, - t12.s_name AS s_name, - t12.s_address AS s_address, - t12.s_nationkey AS s_nationkey, - t12.s_phone AS s_phone, - t12.s_acctbal AS s_acctbal, - t12.s_comment AS s_comment, - t12.n_nationkey AS n_nationkey, - t12.n_name AS n_name, - t12.n_regionkey AS n_regionkey, - t12.n_comment AS n_comment + t8.ps_partkey, + t8.ps_suppkey, + t8.ps_availqty, + t8.ps_supplycost, + t8.ps_comment, + t8.s_suppkey, + t8.s_name, + t8.s_address, + t8.s_nationkey, + t8.s_phone, + t8.s_acctbal, + t8.s_comment, + t8.n_nationkey, + t8.n_name, + t8.n_regionkey, + t8.n_comment FROM ( SELECT - t0.ps_partkey AS ps_partkey, - t0.ps_suppkey AS ps_suppkey, - t0.ps_availqty AS ps_availqty, - t0.ps_supplycost AS ps_supplycost, - t0.ps_comment AS ps_comment, - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment, - t6.n_nationkey AS n_nationkey, - t6.n_name AS n_name, - t6.n_regionkey AS n_regionkey, - t6.n_comment AS n_comment - FROM partsupp AS t0 + t3.ps_partkey, + t3.ps_suppkey, + t3.ps_availqty, + t3.ps_supplycost, + t3.ps_comment, + t4.s_suppkey, + t4.s_name, + t4.s_address, + t4.s_nationkey, + t4.s_phone, + t4.s_acctbal, + t4.s_comment, + t5.n_nationkey, + t5.n_name, + t5.n_regionkey, + t5.n_comment + FROM partsupp AS t3 INNER JOIN supplier AS t4 - ON t0.ps_suppkey = t4.s_suppkey - INNER JOIN nation AS t6 - ON t6.n_nationkey = t4.s_nationkey - ) AS t12 + ON t3.ps_suppkey = t4.s_suppkey + INNER JOIN nation AS t5 + ON t5.n_nationkey = t4.s_nationkey + ) AS t8 WHERE - t12.n_name = 'GERMANY' - ) AS t14 + t8.n_name = 'GERMANY' + ) AS t9 ) * CAST(0.0001 AS DOUBLE) ) ORDER BY - t15.value DESC + t10.value DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql index 5dd65a2837f0..1b0c38b528aa 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql @@ -1,12 +1,12 @@ SELECT - t6.l_shipmode AS l_shipmode, - t6.high_line_count AS high_line_count, - t6.low_line_count AS low_line_count + t7.l_shipmode, + t7.high_line_count, + t7.low_line_count FROM ( SELECT - t5.l_shipmode AS l_shipmode, + t6.l_shipmode, SUM( - CASE t5.o_orderpriority + CASE t6.o_orderpriority WHEN '1-URGENT' THEN CAST(1 AS TINYINT) WHEN '2-HIGH' @@ -15,7 +15,7 @@ FROM ( END ) AS high_line_count, SUM( - CASE t5.o_orderpriority + CASE t6.o_orderpriority WHEN '1-URGENT' THEN CAST(0 AS TINYINT) WHEN '2-HIGH' @@ -25,71 +25,71 @@ FROM ( ) AS low_line_count FROM ( SELECT - t4.o_orderkey AS o_orderkey, - t4.o_custkey AS o_custkey, - t4.o_orderstatus AS o_orderstatus, - t4.o_totalprice AS o_totalprice, - t4.o_orderdate AS o_orderdate, - t4.o_orderpriority AS o_orderpriority, - t4.o_clerk AS o_clerk, - t4.o_shippriority AS o_shippriority, - t4.o_comment AS o_comment, - t4.l_orderkey AS l_orderkey, - t4.l_partkey AS l_partkey, - t4.l_suppkey AS l_suppkey, - t4.l_linenumber AS l_linenumber, - t4.l_quantity AS l_quantity, - t4.l_extendedprice AS l_extendedprice, - t4.l_discount AS l_discount, - t4.l_tax AS l_tax, - t4.l_returnflag AS l_returnflag, - t4.l_linestatus AS l_linestatus, - t4.l_shipdate AS l_shipdate, - t4.l_commitdate AS l_commitdate, - t4.l_receiptdate AS l_receiptdate, - t4.l_shipinstruct AS l_shipinstruct, - t4.l_shipmode AS l_shipmode, - t4.l_comment AS l_comment + t5.o_orderkey, + t5.o_custkey, + t5.o_orderstatus, + t5.o_totalprice, + t5.o_orderdate, + t5.o_orderpriority, + t5.o_clerk, + t5.o_shippriority, + t5.o_comment, + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment FROM ( SELECT - t0.o_orderkey AS o_orderkey, - t0.o_custkey AS o_custkey, - t0.o_orderstatus AS o_orderstatus, - t0.o_totalprice AS o_totalprice, - t0.o_orderdate AS o_orderdate, - t0.o_orderpriority AS o_orderpriority, - t0.o_clerk AS o_clerk, - t0.o_shippriority AS o_shippriority, - t0.o_comment AS o_comment, - t2.l_orderkey AS l_orderkey, - t2.l_partkey AS l_partkey, - t2.l_suppkey AS l_suppkey, - t2.l_linenumber AS l_linenumber, - t2.l_quantity AS l_quantity, - t2.l_extendedprice AS l_extendedprice, - t2.l_discount AS l_discount, - t2.l_tax AS l_tax, - t2.l_returnflag AS l_returnflag, - t2.l_linestatus AS l_linestatus, - t2.l_shipdate AS l_shipdate, - t2.l_commitdate AS l_commitdate, - t2.l_receiptdate AS l_receiptdate, - t2.l_shipinstruct AS l_shipinstruct, - t2.l_shipmode AS l_shipmode, - t2.l_comment AS l_comment - FROM orders AS t0 - INNER JOIN lineitem AS t2 - ON t0.o_orderkey = t2.l_orderkey - ) AS t4 + t2.o_orderkey, + t2.o_custkey, + t2.o_orderstatus, + t2.o_totalprice, + t2.o_orderdate, + t2.o_orderpriority, + t2.o_clerk, + t2.o_shippriority, + t2.o_comment, + t3.l_orderkey, + t3.l_partkey, + t3.l_suppkey, + t3.l_linenumber, + t3.l_quantity, + t3.l_extendedprice, + t3.l_discount, + t3.l_tax, + t3.l_returnflag, + t3.l_linestatus, + t3.l_shipdate, + t3.l_commitdate, + t3.l_receiptdate, + t3.l_shipinstruct, + t3.l_shipmode, + t3.l_comment + FROM orders AS t2 + INNER JOIN lineitem AS t3 + ON t2.o_orderkey = t3.l_orderkey + ) AS t5 WHERE - t4.l_shipmode IN ('MAIL', 'SHIP') - AND t4.l_commitdate < t4.l_receiptdate - AND t4.l_shipdate < t4.l_commitdate - AND t4.l_receiptdate >= MAKE_DATE(1994, 1, 1) - AND t4.l_receiptdate < MAKE_DATE(1995, 1, 1) - ) AS t5 + t5.l_shipmode IN ('MAIL', 'SHIP') + AND t5.l_commitdate < t5.l_receiptdate + AND t5.l_shipdate < t5.l_commitdate + AND t5.l_receiptdate >= MAKE_DATE(1994, 1, 1) + AND t5.l_receiptdate < MAKE_DATE(1995, 1, 1) + ) AS t6 GROUP BY 1 -) AS t6 +) AS t7 ORDER BY - t6.l_shipmode ASC \ No newline at end of file + t7.l_shipmode ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql index 72657a284609..58270b87504b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql @@ -1,45 +1,45 @@ SELECT - t6.c_count AS c_count, - t6.custdist AS custdist + t7.c_count, + t7.custdist FROM ( SELECT - t5.c_count AS c_count, + t6.c_count, COUNT(*) AS custdist FROM ( SELECT - t4.c_custkey AS c_custkey, - COUNT(t4.o_orderkey) AS c_count + t5.c_custkey, + COUNT(t5.o_orderkey) AS c_count FROM ( SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t2.o_orderkey AS o_orderkey, - t2.o_custkey AS o_custkey, - t2.o_orderstatus AS o_orderstatus, - t2.o_totalprice AS o_totalprice, - t2.o_orderdate AS o_orderdate, - t2.o_orderpriority AS o_orderpriority, - t2.o_clerk AS o_clerk, - t2.o_shippriority AS o_shippriority, - t2.o_comment AS o_comment - FROM customer AS t0 - LEFT OUTER JOIN orders AS t2 - ON t0.c_custkey = t2.o_custkey AND NOT ( - t2.o_comment LIKE '%special%requests%' + t2.c_custkey, + t2.c_name, + t2.c_address, + t2.c_nationkey, + t2.c_phone, + t2.c_acctbal, + t2.c_mktsegment, + t2.c_comment, + t3.o_orderkey, + t3.o_custkey, + t3.o_orderstatus, + t3.o_totalprice, + t3.o_orderdate, + t3.o_orderpriority, + t3.o_clerk, + t3.o_shippriority, + t3.o_comment + FROM customer AS t2 + LEFT OUTER JOIN orders AS t3 + ON t2.c_custkey = t3.o_custkey AND NOT ( + t3.o_comment LIKE '%special%requests%' ) - ) AS t4 + ) AS t5 GROUP BY 1 - ) AS t5 + ) AS t6 GROUP BY 1 -) AS t6 +) AS t7 ORDER BY - t6.custdist DESC, - t6.c_count DESC \ No newline at end of file + t7.custdist DESC, + t7.c_count DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql index dc27f6c65550..42d6dbe835b4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql @@ -2,74 +2,74 @@ SELECT ( SUM( CASE - WHEN t5.p_type LIKE 'PROMO%' - THEN t5.l_extendedprice * ( - CAST(1 AS TINYINT) - t5.l_discount + WHEN t6.p_type LIKE 'PROMO%' + THEN t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount ) ELSE CAST(0 AS TINYINT) END ) * CAST(100 AS TINYINT) - ) / SUM(t5.l_extendedprice * ( - CAST(1 AS TINYINT) - t5.l_discount + ) / SUM(t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount )) AS promo_revenue FROM ( SELECT - t4.l_orderkey AS l_orderkey, - t4.l_partkey AS l_partkey, - t4.l_suppkey AS l_suppkey, - t4.l_linenumber AS l_linenumber, - t4.l_quantity AS l_quantity, - t4.l_extendedprice AS l_extendedprice, - t4.l_discount AS l_discount, - t4.l_tax AS l_tax, - t4.l_returnflag AS l_returnflag, - t4.l_linestatus AS l_linestatus, - t4.l_shipdate AS l_shipdate, - t4.l_commitdate AS l_commitdate, - t4.l_receiptdate AS l_receiptdate, - t4.l_shipinstruct AS l_shipinstruct, - t4.l_shipmode AS l_shipmode, - t4.l_comment AS l_comment, - t4.p_partkey AS p_partkey, - t4.p_name AS p_name, - t4.p_mfgr AS p_mfgr, - t4.p_brand AS p_brand, - t4.p_type AS p_type, - t4.p_size AS p_size, - t4.p_container AS p_container, - t4.p_retailprice AS p_retailprice, - t4.p_comment AS p_comment + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment, + t5.p_partkey, + t5.p_name, + t5.p_mfgr, + t5.p_brand, + t5.p_type, + t5.p_size, + t5.p_container, + t5.p_retailprice, + t5.p_comment FROM ( SELECT - t0.l_orderkey AS l_orderkey, - t0.l_partkey AS l_partkey, - t0.l_suppkey AS l_suppkey, - t0.l_linenumber AS l_linenumber, - t0.l_quantity AS l_quantity, - t0.l_extendedprice AS l_extendedprice, - t0.l_discount AS l_discount, - t0.l_tax AS l_tax, - t0.l_returnflag AS l_returnflag, - t0.l_linestatus AS l_linestatus, - t0.l_shipdate AS l_shipdate, - t0.l_commitdate AS l_commitdate, - t0.l_receiptdate AS l_receiptdate, - t0.l_shipinstruct AS l_shipinstruct, - t0.l_shipmode AS l_shipmode, - t0.l_comment AS l_comment, - t2.p_partkey AS p_partkey, - t2.p_name AS p_name, - t2.p_mfgr AS p_mfgr, - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - t2.p_container AS p_container, - t2.p_retailprice AS p_retailprice, - t2.p_comment AS p_comment - FROM lineitem AS t0 - INNER JOIN part AS t2 - ON t0.l_partkey = t2.p_partkey - ) AS t4 + t2.l_orderkey, + t2.l_partkey, + t2.l_suppkey, + t2.l_linenumber, + t2.l_quantity, + t2.l_extendedprice, + t2.l_discount, + t2.l_tax, + t2.l_returnflag, + t2.l_linestatus, + t2.l_shipdate, + t2.l_commitdate, + t2.l_receiptdate, + t2.l_shipinstruct, + t2.l_shipmode, + t2.l_comment, + t3.p_partkey, + t3.p_name, + t3.p_mfgr, + t3.p_brand, + t3.p_type, + t3.p_size, + t3.p_container, + t3.p_retailprice, + t3.p_comment + FROM lineitem AS t2 + INNER JOIN part AS t3 + ON t2.l_partkey = t3.p_partkey + ) AS t5 WHERE - t4.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t4.l_shipdate < MAKE_DATE(1995, 10, 1) -) AS t5 \ No newline at end of file + t5.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t5.l_shipdate < MAKE_DATE(1995, 10, 1) +) AS t6 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql index fc6c924aca22..afad257dc2f2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql @@ -1,103 +1,103 @@ SELECT - t6.s_suppkey AS s_suppkey, - t6.s_name AS s_name, - t6.s_address AS s_address, - t6.s_phone AS s_phone, - t6.total_revenue AS total_revenue + t7.s_suppkey, + t7.s_name, + t7.s_address, + t7.s_phone, + t7.total_revenue FROM ( SELECT - t0.s_suppkey AS s_suppkey, - t0.s_name AS s_name, - t0.s_address AS s_address, - t0.s_nationkey AS s_nationkey, - t0.s_phone AS s_phone, - t0.s_acctbal AS s_acctbal, - t0.s_comment AS s_comment, - t4.l_suppkey AS l_suppkey, - t4.total_revenue AS total_revenue - FROM supplier AS t0 + t2.s_suppkey, + t2.s_name, + t2.s_address, + t2.s_nationkey, + t2.s_phone, + t2.s_acctbal, + t2.s_comment, + t5.l_suppkey, + t5.total_revenue + FROM supplier AS t2 INNER JOIN ( SELECT - t2.l_suppkey AS l_suppkey, - SUM(t2.l_extendedprice * ( - CAST(1 AS TINYINT) - t2.l_discount + t3.l_suppkey, + SUM(t3.l_extendedprice * ( + CAST(1 AS TINYINT) - t3.l_discount )) AS total_revenue FROM ( SELECT - t1.l_orderkey AS l_orderkey, - t1.l_partkey AS l_partkey, - t1.l_suppkey AS l_suppkey, - t1.l_linenumber AS l_linenumber, - t1.l_quantity AS l_quantity, - t1.l_extendedprice AS l_extendedprice, - t1.l_discount AS l_discount, - t1.l_tax AS l_tax, - t1.l_returnflag AS l_returnflag, - t1.l_linestatus AS l_linestatus, - t1.l_shipdate AS l_shipdate, - t1.l_commitdate AS l_commitdate, - t1.l_receiptdate AS l_receiptdate, - t1.l_shipinstruct AS l_shipinstruct, - t1.l_shipmode AS l_shipmode, - t1.l_comment AS l_comment + t1.l_orderkey, + t1.l_partkey, + t1.l_suppkey, + t1.l_linenumber, + t1.l_quantity, + t1.l_extendedprice, + t1.l_discount, + t1.l_tax, + t1.l_returnflag, + t1.l_linestatus, + t1.l_shipdate, + t1.l_commitdate, + t1.l_receiptdate, + t1.l_shipinstruct, + t1.l_shipmode, + t1.l_comment FROM lineitem AS t1 WHERE t1.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t1.l_shipdate < MAKE_DATE(1996, 4, 1) - ) AS t2 + ) AS t3 GROUP BY 1 - ) AS t4 - ON t0.s_suppkey = t4.l_suppkey -) AS t6 + ) AS t5 + ON t2.s_suppkey = t5.l_suppkey +) AS t7 WHERE - t6.total_revenue = ( + t7.total_revenue = ( SELECT - MAX(t6.total_revenue) AS "Max(total_revenue)" + MAX(t7.total_revenue) AS "Max(total_revenue)" FROM ( SELECT - t0.s_suppkey AS s_suppkey, - t0.s_name AS s_name, - t0.s_address AS s_address, - t0.s_nationkey AS s_nationkey, - t0.s_phone AS s_phone, - t0.s_acctbal AS s_acctbal, - t0.s_comment AS s_comment, - t4.l_suppkey AS l_suppkey, - t4.total_revenue AS total_revenue - FROM supplier AS t0 + t2.s_suppkey, + t2.s_name, + t2.s_address, + t2.s_nationkey, + t2.s_phone, + t2.s_acctbal, + t2.s_comment, + t5.l_suppkey, + t5.total_revenue + FROM supplier AS t2 INNER JOIN ( SELECT - t2.l_suppkey AS l_suppkey, - SUM(t2.l_extendedprice * ( - CAST(1 AS TINYINT) - t2.l_discount + t3.l_suppkey, + SUM(t3.l_extendedprice * ( + CAST(1 AS TINYINT) - t3.l_discount )) AS total_revenue FROM ( SELECT - t1.l_orderkey AS l_orderkey, - t1.l_partkey AS l_partkey, - t1.l_suppkey AS l_suppkey, - t1.l_linenumber AS l_linenumber, - t1.l_quantity AS l_quantity, - t1.l_extendedprice AS l_extendedprice, - t1.l_discount AS l_discount, - t1.l_tax AS l_tax, - t1.l_returnflag AS l_returnflag, - t1.l_linestatus AS l_linestatus, - t1.l_shipdate AS l_shipdate, - t1.l_commitdate AS l_commitdate, - t1.l_receiptdate AS l_receiptdate, - t1.l_shipinstruct AS l_shipinstruct, - t1.l_shipmode AS l_shipmode, - t1.l_comment AS l_comment + t1.l_orderkey, + t1.l_partkey, + t1.l_suppkey, + t1.l_linenumber, + t1.l_quantity, + t1.l_extendedprice, + t1.l_discount, + t1.l_tax, + t1.l_returnflag, + t1.l_linestatus, + t1.l_shipdate, + t1.l_commitdate, + t1.l_receiptdate, + t1.l_shipinstruct, + t1.l_shipmode, + t1.l_comment FROM lineitem AS t1 WHERE t1.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t1.l_shipdate < MAKE_DATE(1996, 4, 1) - ) AS t2 + ) AS t3 GROUP BY 1 - ) AS t4 - ON t0.s_suppkey = t4.l_suppkey - ) AS t6 + ) AS t5 + ON t2.s_suppkey = t5.l_suppkey + ) AS t7 ) ORDER BY - t6.s_suppkey ASC \ No newline at end of file + t7.s_suppkey ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql index b0634e8a2e27..711276ec20dd 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql @@ -1,73 +1,73 @@ SELECT - t8.p_brand AS p_brand, - t8.p_type AS p_type, - t8.p_size AS p_size, - t8.supplier_cnt AS supplier_cnt + t9.p_brand, + t9.p_type, + t9.p_size, + t9.supplier_cnt FROM ( SELECT - t7.p_brand AS p_brand, - t7.p_type AS p_type, - t7.p_size AS p_size, - COUNT(DISTINCT t7.ps_suppkey) AS supplier_cnt + t8.p_brand, + t8.p_type, + t8.p_size, + COUNT(DISTINCT t8.ps_suppkey) AS supplier_cnt FROM ( SELECT - t6.ps_partkey AS ps_partkey, - t6.ps_suppkey AS ps_suppkey, - t6.ps_availqty AS ps_availqty, - t6.ps_supplycost AS ps_supplycost, - t6.ps_comment AS ps_comment, - t6.p_partkey AS p_partkey, - t6.p_name AS p_name, - t6.p_mfgr AS p_mfgr, - t6.p_brand AS p_brand, - t6.p_type AS p_type, - t6.p_size AS p_size, - t6.p_container AS p_container, - t6.p_retailprice AS p_retailprice, - t6.p_comment AS p_comment + t7.ps_partkey, + t7.ps_suppkey, + t7.ps_availqty, + t7.ps_supplycost, + t7.ps_comment, + t7.p_partkey, + t7.p_name, + t7.p_mfgr, + t7.p_brand, + t7.p_type, + t7.p_size, + t7.p_container, + t7.p_retailprice, + t7.p_comment FROM ( SELECT - t0.ps_partkey AS ps_partkey, - t0.ps_suppkey AS ps_suppkey, - t0.ps_availqty AS ps_availqty, - t0.ps_supplycost AS ps_supplycost, - t0.ps_comment AS ps_comment, - t3.p_partkey AS p_partkey, - t3.p_name AS p_name, - t3.p_mfgr AS p_mfgr, - t3.p_brand AS p_brand, - t3.p_type AS p_type, - t3.p_size AS p_size, - t3.p_container AS p_container, - t3.p_retailprice AS p_retailprice, - t3.p_comment AS p_comment - FROM partsupp AS t0 - INNER JOIN part AS t3 - ON t3.p_partkey = t0.ps_partkey - ) AS t6 + t3.ps_partkey, + t3.ps_suppkey, + t3.ps_availqty, + t3.ps_supplycost, + t3.ps_comment, + t4.p_partkey, + t4.p_name, + t4.p_mfgr, + t4.p_brand, + t4.p_type, + t4.p_size, + t4.p_container, + t4.p_retailprice, + t4.p_comment + FROM partsupp AS t3 + INNER JOIN part AS t4 + ON t4.p_partkey = t3.ps_partkey + ) AS t7 WHERE - t6.p_brand <> 'Brand#45' + t7.p_brand <> 'Brand#45' AND NOT ( - t6.p_type LIKE 'MEDIUM POLISHED%' + t7.p_type LIKE 'MEDIUM POLISHED%' ) - AND t6.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) + AND t7.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) AND NOT ( - t6.ps_suppkey IN (( + t7.ps_suppkey IN ( SELECT - t2.s_suppkey AS s_suppkey + t2.s_suppkey FROM supplier AS t2 WHERE t2.s_comment LIKE '%Customer%Complaints%' - )) + ) ) - ) AS t7 + ) AS t8 GROUP BY 1, 2, 3 -) AS t8 +) AS t9 ORDER BY - t8.supplier_cnt DESC, - t8.p_brand ASC, - t8.p_type ASC, - t8.p_size ASC \ No newline at end of file + t9.supplier_cnt DESC, + t9.p_brand ASC, + t9.p_type ASC, + t9.p_size ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql index 601ac35cd886..905e5c095d3d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql @@ -1,92 +1,92 @@ SELECT - SUM(t7.l_extendedprice) / CAST(7.0 AS DOUBLE) AS avg_yearly + SUM(t8.l_extendedprice) / CAST(7.0 AS DOUBLE) AS avg_yearly FROM ( SELECT - t4.l_orderkey AS l_orderkey, - t4.l_partkey AS l_partkey, - t4.l_suppkey AS l_suppkey, - t4.l_linenumber AS l_linenumber, - t4.l_quantity AS l_quantity, - t4.l_extendedprice AS l_extendedprice, - t4.l_discount AS l_discount, - t4.l_tax AS l_tax, - t4.l_returnflag AS l_returnflag, - t4.l_linestatus AS l_linestatus, - t4.l_shipdate AS l_shipdate, - t4.l_commitdate AS l_commitdate, - t4.l_receiptdate AS l_receiptdate, - t4.l_shipinstruct AS l_shipinstruct, - t4.l_shipmode AS l_shipmode, - t4.l_comment AS l_comment, - t4.p_partkey AS p_partkey, - t4.p_name AS p_name, - t4.p_mfgr AS p_mfgr, - t4.p_brand AS p_brand, - t4.p_type AS p_type, - t4.p_size AS p_size, - t4.p_container AS p_container, - t4.p_retailprice AS p_retailprice, - t4.p_comment AS p_comment + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment, + t5.p_partkey, + t5.p_name, + t5.p_mfgr, + t5.p_brand, + t5.p_type, + t5.p_size, + t5.p_container, + t5.p_retailprice, + t5.p_comment FROM ( SELECT - t0.l_orderkey AS l_orderkey, - t0.l_partkey AS l_partkey, - t0.l_suppkey AS l_suppkey, - t0.l_linenumber AS l_linenumber, - t0.l_quantity AS l_quantity, - t0.l_extendedprice AS l_extendedprice, - t0.l_discount AS l_discount, - t0.l_tax AS l_tax, - t0.l_returnflag AS l_returnflag, - t0.l_linestatus AS l_linestatus, - t0.l_shipdate AS l_shipdate, - t0.l_commitdate AS l_commitdate, - t0.l_receiptdate AS l_receiptdate, - t0.l_shipinstruct AS l_shipinstruct, - t0.l_shipmode AS l_shipmode, - t0.l_comment AS l_comment, - t2.p_partkey AS p_partkey, - t2.p_name AS p_name, - t2.p_mfgr AS p_mfgr, - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - t2.p_container AS p_container, - t2.p_retailprice AS p_retailprice, - t2.p_comment AS p_comment - FROM lineitem AS t0 - INNER JOIN part AS t2 - ON t2.p_partkey = t0.l_partkey - ) AS t4 + t2.l_orderkey, + t2.l_partkey, + t2.l_suppkey, + t2.l_linenumber, + t2.l_quantity, + t2.l_extendedprice, + t2.l_discount, + t2.l_tax, + t2.l_returnflag, + t2.l_linestatus, + t2.l_shipdate, + t2.l_commitdate, + t2.l_receiptdate, + t2.l_shipinstruct, + t2.l_shipmode, + t2.l_comment, + t3.p_partkey, + t3.p_name, + t3.p_mfgr, + t3.p_brand, + t3.p_type, + t3.p_size, + t3.p_container, + t3.p_retailprice, + t3.p_comment + FROM lineitem AS t2 + INNER JOIN part AS t3 + ON t3.p_partkey = t2.l_partkey + ) AS t5 WHERE - t4.p_brand = 'Brand#23' - AND t4.p_container = 'MED BOX' - AND t4.l_quantity < ( + t5.p_brand = 'Brand#23' + AND t5.p_container = 'MED BOX' + AND t5.l_quantity < ( ( SELECT - AVG(t5.l_quantity) AS "Mean(l_quantity)" + AVG(t6.l_quantity) AS "Mean(l_quantity)" FROM ( SELECT - t0.l_orderkey AS l_orderkey, - t0.l_partkey AS l_partkey, - t0.l_suppkey AS l_suppkey, - t0.l_linenumber AS l_linenumber, - t0.l_quantity AS l_quantity, - t0.l_extendedprice AS l_extendedprice, - t0.l_discount AS l_discount, - t0.l_tax AS l_tax, - t0.l_returnflag AS l_returnflag, - t0.l_linestatus AS l_linestatus, - t0.l_shipdate AS l_shipdate, - t0.l_commitdate AS l_commitdate, - t0.l_receiptdate AS l_receiptdate, - t0.l_shipinstruct AS l_shipinstruct, - t0.l_shipmode AS l_shipmode, - t0.l_comment AS l_comment + t0.l_orderkey, + t0.l_partkey, + t0.l_suppkey, + t0.l_linenumber, + t0.l_quantity, + t0.l_extendedprice, + t0.l_discount, + t0.l_tax, + t0.l_returnflag, + t0.l_linestatus, + t0.l_shipdate, + t0.l_commitdate, + t0.l_receiptdate, + t0.l_shipinstruct, + t0.l_shipmode, + t0.l_comment FROM lineitem AS t0 WHERE - t0.l_partkey = t4.p_partkey - ) AS t5 + t0.l_partkey = t5.p_partkey + ) AS t6 ) * CAST(0.2 AS DOUBLE) ) -) AS t7 +) AS t8 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql index 621c6423e037..9d3d4f821010 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql @@ -1,118 +1,118 @@ SELECT - t11.c_name AS c_name, - t11.c_custkey AS c_custkey, - t11.o_orderkey AS o_orderkey, - t11.o_orderdate AS o_orderdate, - t11.o_totalprice AS o_totalprice, - t11.sum_qty AS sum_qty + t12.c_name, + t12.c_custkey, + t12.o_orderkey, + t12.o_orderdate, + t12.o_totalprice, + t12.sum_qty FROM ( SELECT - t10.c_name AS c_name, - t10.c_custkey AS c_custkey, - t10.o_orderkey AS o_orderkey, - t10.o_orderdate AS o_orderdate, - t10.o_totalprice AS o_totalprice, - SUM(t10.l_quantity) AS sum_qty + t11.c_name, + t11.c_custkey, + t11.o_orderkey, + t11.o_orderdate, + t11.o_totalprice, + SUM(t11.l_quantity) AS sum_qty FROM ( SELECT - t8.c_custkey AS c_custkey, - t8.c_name AS c_name, - t8.c_address AS c_address, - t8.c_nationkey AS c_nationkey, - t8.c_phone AS c_phone, - t8.c_acctbal AS c_acctbal, - t8.c_mktsegment AS c_mktsegment, - t8.c_comment AS c_comment, - t8.o_orderkey AS o_orderkey, - t8.o_custkey AS o_custkey, - t8.o_orderstatus AS o_orderstatus, - t8.o_totalprice AS o_totalprice, - t8.o_orderdate AS o_orderdate, - t8.o_orderpriority AS o_orderpriority, - t8.o_clerk AS o_clerk, - t8.o_shippriority AS o_shippriority, - t8.o_comment AS o_comment, - t8.l_orderkey AS l_orderkey, - t8.l_partkey AS l_partkey, - t8.l_suppkey AS l_suppkey, - t8.l_linenumber AS l_linenumber, - t8.l_quantity AS l_quantity, - t8.l_extendedprice AS l_extendedprice, - t8.l_discount AS l_discount, - t8.l_tax AS l_tax, - t8.l_returnflag AS l_returnflag, - t8.l_linestatus AS l_linestatus, - t8.l_shipdate AS l_shipdate, - t8.l_commitdate AS l_commitdate, - t8.l_receiptdate AS l_receiptdate, - t8.l_shipinstruct AS l_shipinstruct, - t8.l_shipmode AS l_shipmode, - t8.l_comment AS l_comment + t9.c_custkey, + t9.c_name, + t9.c_address, + t9.c_nationkey, + t9.c_phone, + t9.c_acctbal, + t9.c_mktsegment, + t9.c_comment, + t9.o_orderkey, + t9.o_custkey, + t9.o_orderstatus, + t9.o_totalprice, + t9.o_orderdate, + t9.o_orderpriority, + t9.o_clerk, + t9.o_shippriority, + t9.o_comment, + t9.l_orderkey, + t9.l_partkey, + t9.l_suppkey, + t9.l_linenumber, + t9.l_quantity, + t9.l_extendedprice, + t9.l_discount, + t9.l_tax, + t9.l_returnflag, + t9.l_linestatus, + t9.l_shipdate, + t9.l_commitdate, + t9.l_receiptdate, + t9.l_shipinstruct, + t9.l_shipmode, + t9.l_comment FROM ( SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment, - t3.o_orderkey AS o_orderkey, - t3.o_custkey AS o_custkey, - t3.o_orderstatus AS o_orderstatus, - t3.o_totalprice AS o_totalprice, - t3.o_orderdate AS o_orderdate, - t3.o_orderpriority AS o_orderpriority, - t3.o_clerk AS o_clerk, - t3.o_shippriority AS o_shippriority, - t3.o_comment AS o_comment, - t4.l_orderkey AS l_orderkey, - t4.l_partkey AS l_partkey, - t4.l_suppkey AS l_suppkey, - t4.l_linenumber AS l_linenumber, - t4.l_quantity AS l_quantity, - t4.l_extendedprice AS l_extendedprice, - t4.l_discount AS l_discount, - t4.l_tax AS l_tax, - t4.l_returnflag AS l_returnflag, - t4.l_linestatus AS l_linestatus, - t4.l_shipdate AS l_shipdate, - t4.l_commitdate AS l_commitdate, - t4.l_receiptdate AS l_receiptdate, - t4.l_shipinstruct AS l_shipinstruct, - t4.l_shipmode AS l_shipmode, - t4.l_comment AS l_comment - FROM customer AS t0 - INNER JOIN orders AS t3 - ON t0.c_custkey = t3.o_custkey - INNER JOIN lineitem AS t4 - ON t3.o_orderkey = t4.l_orderkey - ) AS t8 + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.o_orderkey, + t4.o_custkey, + t4.o_orderstatus, + t4.o_totalprice, + t4.o_orderdate, + t4.o_orderpriority, + t4.o_clerk, + t4.o_shippriority, + t4.o_comment, + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment + FROM customer AS t3 + INNER JOIN orders AS t4 + ON t3.c_custkey = t4.o_custkey + INNER JOIN lineitem AS t5 + ON t4.o_orderkey = t5.l_orderkey + ) AS t9 WHERE - t8.o_orderkey IN (( + t9.o_orderkey IN ( SELECT - t5.l_orderkey AS l_orderkey + t6.l_orderkey FROM ( SELECT - t2.l_orderkey AS l_orderkey, + t2.l_orderkey, SUM(t2.l_quantity) AS qty_sum FROM lineitem AS t2 GROUP BY 1 - ) AS t5 + ) AS t6 WHERE - t5.qty_sum > CAST(300 AS SMALLINT) - )) - ) AS t10 + t6.qty_sum > CAST(300 AS SMALLINT) + ) + ) AS t11 GROUP BY 1, 2, 3, 4, 5 -) AS t11 +) AS t12 ORDER BY - t11.o_totalprice DESC, - t11.o_orderdate ASC + t12.o_totalprice DESC, + t12.o_orderdate ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql index 288021f12b67..29adca6df1be 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql @@ -1,65 +1,65 @@ SELECT - SUM(t5.l_extendedprice * ( - CAST(1 AS TINYINT) - t5.l_discount + SUM(t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount )) AS revenue FROM ( SELECT - t4.l_orderkey AS l_orderkey, - t4.l_partkey AS l_partkey, - t4.l_suppkey AS l_suppkey, - t4.l_linenumber AS l_linenumber, - t4.l_quantity AS l_quantity, - t4.l_extendedprice AS l_extendedprice, - t4.l_discount AS l_discount, - t4.l_tax AS l_tax, - t4.l_returnflag AS l_returnflag, - t4.l_linestatus AS l_linestatus, - t4.l_shipdate AS l_shipdate, - t4.l_commitdate AS l_commitdate, - t4.l_receiptdate AS l_receiptdate, - t4.l_shipinstruct AS l_shipinstruct, - t4.l_shipmode AS l_shipmode, - t4.l_comment AS l_comment, - t4.p_partkey AS p_partkey, - t4.p_name AS p_name, - t4.p_mfgr AS p_mfgr, - t4.p_brand AS p_brand, - t4.p_type AS p_type, - t4.p_size AS p_size, - t4.p_container AS p_container, - t4.p_retailprice AS p_retailprice, - t4.p_comment AS p_comment + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment, + t5.p_partkey, + t5.p_name, + t5.p_mfgr, + t5.p_brand, + t5.p_type, + t5.p_size, + t5.p_container, + t5.p_retailprice, + t5.p_comment FROM ( SELECT - t0.l_orderkey AS l_orderkey, - t0.l_partkey AS l_partkey, - t0.l_suppkey AS l_suppkey, - t0.l_linenumber AS l_linenumber, - t0.l_quantity AS l_quantity, - t0.l_extendedprice AS l_extendedprice, - t0.l_discount AS l_discount, - t0.l_tax AS l_tax, - t0.l_returnflag AS l_returnflag, - t0.l_linestatus AS l_linestatus, - t0.l_shipdate AS l_shipdate, - t0.l_commitdate AS l_commitdate, - t0.l_receiptdate AS l_receiptdate, - t0.l_shipinstruct AS l_shipinstruct, - t0.l_shipmode AS l_shipmode, - t0.l_comment AS l_comment, - t2.p_partkey AS p_partkey, - t2.p_name AS p_name, - t2.p_mfgr AS p_mfgr, - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - t2.p_container AS p_container, - t2.p_retailprice AS p_retailprice, - t2.p_comment AS p_comment - FROM lineitem AS t0 - INNER JOIN part AS t2 - ON t2.p_partkey = t0.l_partkey - ) AS t4 + t2.l_orderkey, + t2.l_partkey, + t2.l_suppkey, + t2.l_linenumber, + t2.l_quantity, + t2.l_extendedprice, + t2.l_discount, + t2.l_tax, + t2.l_returnflag, + t2.l_linestatus, + t2.l_shipdate, + t2.l_commitdate, + t2.l_receiptdate, + t2.l_shipinstruct, + t2.l_shipmode, + t2.l_comment, + t3.p_partkey, + t3.p_name, + t3.p_mfgr, + t3.p_brand, + t3.p_type, + t3.p_size, + t3.p_container, + t3.p_retailprice, + t3.p_comment + FROM lineitem AS t2 + INNER JOIN part AS t3 + ON t3.p_partkey = t2.l_partkey + ) AS t5 WHERE ( ( @@ -69,24 +69,24 @@ FROM ( ( ( ( - t4.p_brand = 'Brand#12' + t5.p_brand = 'Brand#12' ) - AND t4.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND t5.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') ) AND ( - t4.l_quantity >= CAST(1 AS TINYINT) + t5.l_quantity >= CAST(1 AS TINYINT) ) ) AND ( - t4.l_quantity <= CAST(11 AS TINYINT) + t5.l_quantity <= CAST(11 AS TINYINT) ) ) - AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) + AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) ) - AND t4.l_shipmode IN ('AIR', 'AIR REG') + AND t5.l_shipmode IN ('AIR', 'AIR REG') ) AND ( - t4.l_shipinstruct = 'DELIVER IN PERSON' + t5.l_shipinstruct = 'DELIVER IN PERSON' ) ) OR ( @@ -96,24 +96,24 @@ FROM ( ( ( ( - t4.p_brand = 'Brand#23' + t5.p_brand = 'Brand#23' ) - AND t4.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + AND t5.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') ) AND ( - t4.l_quantity >= CAST(10 AS TINYINT) + t5.l_quantity >= CAST(10 AS TINYINT) ) ) AND ( - t4.l_quantity <= CAST(20 AS TINYINT) + t5.l_quantity <= CAST(20 AS TINYINT) ) ) - AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) + AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) ) - AND t4.l_shipmode IN ('AIR', 'AIR REG') + AND t5.l_shipmode IN ('AIR', 'AIR REG') ) AND ( - t4.l_shipinstruct = 'DELIVER IN PERSON' + t5.l_shipinstruct = 'DELIVER IN PERSON' ) ) ) @@ -124,24 +124,24 @@ FROM ( ( ( ( - t4.p_brand = 'Brand#34' + t5.p_brand = 'Brand#34' ) - AND t4.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND t5.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') ) AND ( - t4.l_quantity >= CAST(20 AS TINYINT) + t5.l_quantity >= CAST(20 AS TINYINT) ) ) AND ( - t4.l_quantity <= CAST(30 AS TINYINT) + t5.l_quantity <= CAST(30 AS TINYINT) ) ) - AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) + AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) ) - AND t4.l_shipmode IN ('AIR', 'AIR REG') + AND t5.l_shipmode IN ('AIR', 'AIR REG') ) AND ( - t4.l_shipinstruct = 'DELIVER IN PERSON' + t5.l_shipinstruct = 'DELIVER IN PERSON' ) ) -) AS t5 \ No newline at end of file +) AS t6 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql index 2cc90c2a16fa..111f26421e9a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql @@ -1,68 +1,68 @@ SELECT - t9.s_name AS s_name, - t9.s_address AS s_address + t10.s_name, + t10.s_address FROM ( SELECT - t0.s_suppkey AS s_suppkey, - t0.s_name AS s_name, - t0.s_address AS s_address, - t0.s_nationkey AS s_nationkey, - t0.s_phone AS s_phone, - t0.s_acctbal AS s_acctbal, - t0.s_comment AS s_comment, - t5.n_nationkey AS n_nationkey, - t5.n_name AS n_name, - t5.n_regionkey AS n_regionkey, - t5.n_comment AS n_comment - FROM supplier AS t0 - INNER JOIN nation AS t5 - ON t0.s_nationkey = t5.n_nationkey -) AS t9 + t5.s_suppkey, + t5.s_name, + t5.s_address, + t5.s_nationkey, + t5.s_phone, + t5.s_acctbal, + t5.s_comment, + t6.n_nationkey, + t6.n_name, + t6.n_regionkey, + t6.n_comment + FROM supplier AS t5 + INNER JOIN nation AS t6 + ON t5.s_nationkey = t6.n_nationkey +) AS t10 WHERE - t9.n_name = 'CANADA' - AND t9.s_suppkey IN (( + t10.n_name = 'CANADA' + AND t10.s_suppkey IN ( SELECT - t1.ps_suppkey AS ps_suppkey + t1.ps_suppkey FROM partsupp AS t1 WHERE - t1.ps_partkey IN (( + t1.ps_partkey IN ( SELECT - t3.p_partkey AS p_partkey + t3.p_partkey FROM part AS t3 WHERE t3.p_name LIKE 'forest%' - )) + ) AND t1.ps_availqty > ( ( SELECT - SUM(t7.l_quantity) AS "Sum(l_quantity)" + SUM(t8.l_quantity) AS "Sum(l_quantity)" FROM ( SELECT - t4.l_orderkey AS l_orderkey, - t4.l_partkey AS l_partkey, - t4.l_suppkey AS l_suppkey, - t4.l_linenumber AS l_linenumber, - t4.l_quantity AS l_quantity, - t4.l_extendedprice AS l_extendedprice, - t4.l_discount AS l_discount, - t4.l_tax AS l_tax, - t4.l_returnflag AS l_returnflag, - t4.l_linestatus AS l_linestatus, - t4.l_shipdate AS l_shipdate, - t4.l_commitdate AS l_commitdate, - t4.l_receiptdate AS l_receiptdate, - t4.l_shipinstruct AS l_shipinstruct, - t4.l_shipmode AS l_shipmode, - t4.l_comment AS l_comment + t4.l_orderkey, + t4.l_partkey, + t4.l_suppkey, + t4.l_linenumber, + t4.l_quantity, + t4.l_extendedprice, + t4.l_discount, + t4.l_tax, + t4.l_returnflag, + t4.l_linestatus, + t4.l_shipdate, + t4.l_commitdate, + t4.l_receiptdate, + t4.l_shipinstruct, + t4.l_shipmode, + t4.l_comment FROM lineitem AS t4 WHERE t4.l_partkey = t1.ps_partkey AND t4.l_suppkey = t1.ps_suppkey AND t4.l_shipdate >= MAKE_DATE(1994, 1, 1) AND t4.l_shipdate < MAKE_DATE(1995, 1, 1) - ) AS t7 + ) AS t8 ) * CAST(0.5 AS DOUBLE) ) - )) + ) ORDER BY - t9.s_name ASC + t10.s_name ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql index 282d2c3c05e2..72dd9ea9697b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql @@ -1,78 +1,74 @@ SELECT - t16.s_name AS s_name, - t16.numwait AS numwait + t17.s_name, + t17.numwait FROM ( SELECT - t15.s_name AS s_name, + t16.s_name, COUNT(*) AS numwait FROM ( SELECT - t12.l1_orderkey AS l1_orderkey, - t12.o_orderstatus AS o_orderstatus, - t12.l_receiptdate AS l_receiptdate, - t12.l_commitdate AS l_commitdate, - t12.l1_suppkey AS l1_suppkey, - t12.s_name AS s_name, - t12.n_name AS n_name + t13.l1_orderkey, + t13.o_orderstatus, + t13.l_receiptdate, + t13.l_commitdate, + t13.l1_suppkey, + t13.s_name, + t13.n_name FROM ( SELECT - t4.l_orderkey AS l1_orderkey, - t7.o_orderstatus AS o_orderstatus, - t4.l_receiptdate AS l_receiptdate, - t4.l_commitdate AS l_commitdate, - t4.l_suppkey AS l1_suppkey, - t0.s_name AS s_name, - t8.n_name AS n_name - FROM supplier AS t0 - INNER JOIN lineitem AS t4 - ON t0.s_suppkey = t4.l_suppkey - INNER JOIN orders AS t7 - ON t7.o_orderkey = t4.l_orderkey - INNER JOIN nation AS t8 - ON t0.s_nationkey = t8.n_nationkey - ) AS t12 + t5.l_orderkey AS l1_orderkey, + t8.o_orderstatus, + t5.l_receiptdate, + t5.l_commitdate, + t5.l_suppkey AS l1_suppkey, + t4.s_name, + t9.n_name + FROM supplier AS t4 + INNER JOIN lineitem AS t5 + ON t4.s_suppkey = t5.l_suppkey + INNER JOIN orders AS t8 + ON t8.o_orderkey = t5.l_orderkey + INNER JOIN nation AS t9 + ON t4.s_nationkey = t9.n_nationkey + ) AS t13 WHERE - t12.o_orderstatus = 'F' - AND t12.l_receiptdate > t12.l_commitdate - AND t12.n_name = 'SAUDI ARABIA' + t13.o_orderstatus = 'F' + AND t13.l_receiptdate > t13.l_commitdate + AND t13.n_name = 'SAUDI ARABIA' AND EXISTS( - ( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM lineitem AS t6 + WHERE + ( + t6.l_orderkey = t13.l1_orderkey + ) AND ( + t6.l_suppkey <> t13.l1_suppkey + ) + ) + AND NOT ( + EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM lineitem AS t5 + FROM lineitem AS t7 WHERE ( - t5.l_orderkey = t12.l1_orderkey - ) AND ( - t5.l_suppkey <> t12.l1_suppkey - ) - ) - ) - AND NOT ( - EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM lineitem AS t6 - WHERE ( - ( - t6.l_orderkey = t12.l1_orderkey - ) AND ( - t6.l_suppkey <> t12.l1_suppkey - ) - ) - AND ( - t6.l_receiptdate > t6.l_commitdate + t7.l_orderkey = t13.l1_orderkey + ) AND ( + t7.l_suppkey <> t13.l1_suppkey ) - ) + ) + AND ( + t7.l_receiptdate > t7.l_commitdate + ) ) ) - ) AS t15 + ) AS t16 GROUP BY 1 -) AS t16 +) AS t17 ORDER BY - t16.numwait DESC, - t16.s_name ASC + t17.numwait DESC, + t17.s_name ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql index e8d7d1723ac7..323185ab0e0d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql @@ -1,10 +1,10 @@ SELECT - t6.cntrycode AS cntrycode, - t6.numcust AS numcust, - t6.totacctbal AS totacctbal + t6.cntrycode, + t6.numcust, + t6.totacctbal FROM ( SELECT - t5.cntrycode AS cntrycode, + t5.cntrycode, COUNT(*) AS numcust, SUM(t5.c_acctbal) AS totacctbal FROM ( @@ -14,7 +14,7 @@ FROM ( THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) END AS cntrycode, - t0.c_acctbal AS c_acctbal + t0.c_acctbal FROM customer AS t0 WHERE CASE @@ -27,14 +27,14 @@ FROM ( AVG(t3.c_acctbal) AS "Mean(c_acctbal)" FROM ( SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.c_address AS c_address, - t0.c_nationkey AS c_nationkey, - t0.c_phone AS c_phone, - t0.c_acctbal AS c_acctbal, - t0.c_mktsegment AS c_mktsegment, - t0.c_comment AS c_comment + t0.c_custkey, + t0.c_name, + t0.c_address, + t0.c_nationkey, + t0.c_phone, + t0.c_acctbal, + t0.c_mktsegment, + t0.c_comment FROM customer AS t0 WHERE t0.c_acctbal > CAST(0.0 AS DOUBLE) @@ -47,13 +47,11 @@ FROM ( ) AND NOT ( EXISTS( - ( - SELECT - CAST(1 AS TINYINT) AS "1" - FROM orders AS t1 - WHERE - t1.o_custkey = t0.c_custkey - ) + SELECT + CAST(1 AS TINYINT) AS "1" + FROM orders AS t1 + WHERE + t1.o_custkey = t0.c_custkey ) ) ) AS t5 @@ -61,4 +59,4 @@ FROM ( 1 ) AS t6 ORDER BY - t6.cntrycode ASC + t6.cntrycode ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/test_h08.py b/ibis/backends/tests/tpch/test_h08.py index 971a83c4c352..3ab657c3bec7 100644 --- a/ibis/backends/tests/tpch/test_h08.py +++ b/ibis/backends/tests/tpch/test_h08.py @@ -8,6 +8,11 @@ @tpch_test +@pytest.mark.notimpl( + ["snowflake"], + raises=AssertionError, + reason="ibis doesn't preserve decimal types in aggregations", +) @pytest.mark.xfail_version( trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", diff --git a/ibis/backends/tests/tpch/test_h11.py b/ibis/backends/tests/tpch/test_h11.py index e13ba99179f9..75439d06c8ce 100644 --- a/ibis/backends/tests/tpch/test_h11.py +++ b/ibis/backends/tests/tpch/test_h11.py @@ -1,11 +1,18 @@ from __future__ import annotations +import pytest + import ibis from .conftest import tpch_test @tpch_test +@pytest.mark.broken( + ["snowflake"], + reason="ibis generates incorrect code for the right-hand-side of the exists statement", + raises=AssertionError, +) def test_tpc_h11(partsupp, supplier, nation): NATION = "GERMANY" FRACTION = 0.0001 diff --git a/ibis/backends/tests/tpch/test_h14.py b/ibis/backends/tests/tpch/test_h14.py index f72bbcaf6c2b..cb57d9911577 100644 --- a/ibis/backends/tests/tpch/test_h14.py +++ b/ibis/backends/tests/tpch/test_h14.py @@ -8,6 +8,11 @@ @tpch_test +@pytest.mark.notimpl( + ["snowflake"], + raises=AssertionError, + reason="ibis doesn't preserve decimal types in aggregations", +) @pytest.mark.xfail_version( trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", diff --git a/ibis/backends/tests/tpch/test_h17.py b/ibis/backends/tests/tpch/test_h17.py index 0d112d048c91..fbe50eb78f7e 100644 --- a/ibis/backends/tests/tpch/test_h17.py +++ b/ibis/backends/tests/tpch/test_h17.py @@ -6,6 +6,11 @@ @tpch_test +@pytest.mark.notimpl( + ["snowflake"], + raises=AssertionError, + reason="ibis doesn't preserve decimal types in aggregations", +) @pytest.mark.xfail_version( trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", diff --git a/ibis/backends/tests/tpch/test_h21.py b/ibis/backends/tests/tpch/test_h21.py index 487b574bb615..f8aea4314c81 100644 --- a/ibis/backends/tests/tpch/test_h21.py +++ b/ibis/backends/tests/tpch/test_h21.py @@ -1,11 +1,19 @@ from __future__ import annotations +import pytest +import sqlalchemy as sa + import ibis from .conftest import tpch_test @tpch_test +@pytest.mark.broken( + ["snowflake"], + reason="ibis generates overlapping aliases", + raises=sa.exc.CompileError, +) def test_tpc_h21(supplier, lineitem, orders, nation): """Suppliers Who Kept Orders Waiting Query (Q21) diff --git a/ibis/backends/tests/tpch/test_h22.py b/ibis/backends/tests/tpch/test_h22.py index d505436c4927..f18da1d2930b 100644 --- a/ibis/backends/tests/tpch/test_h22.py +++ b/ibis/backends/tests/tpch/test_h22.py @@ -1,9 +1,16 @@ from __future__ import annotations +import pytest + from .conftest import tpch_test @tpch_test +@pytest.mark.broken( + ["snowflake"], + reason="ibis generates incorrect code for the right-hand-side of the exists statement", + raises=AssertionError, +) def test_tpc_h22(customer, orders): """Global Sales Opportunity Query (Q22) diff --git a/ibis/examples/tests/test_examples.py b/ibis/examples/tests/test_examples.py index b17da0ed3227..c9d0b9567dc4 100644 --- a/ibis/examples/tests/test_examples.py +++ b/ibis/examples/tests/test_examples.py @@ -11,7 +11,6 @@ pytestmark = pytest.mark.examples duckdb = pytest.importorskip("duckdb") -pytest.importorskip("pins") # large files or files that are used elsewhere ignored = frozenset( @@ -95,27 +94,3 @@ def test_table_name_arg(): name = f"penguins-{uuid.uuid4().hex}" t = ibis.examples.penguins.fetch(backend=con, table_name=name) assert t.get_name() == name - - -@pytest.mark.pandas -@pytest.mark.duckdb -@pytest.mark.backend -@skip_linux_nix -@pytest.mark.parametrize( - ("example", "columns"), - [ - ("ml_latest_small_links", ["movieId", "imdbId", "tmdbId"]), - ("band_instruments", ["name", "plays"]), - ( - "AwardsManagers", - ["player_id", "award_id", "year_id", "lg_id", "tie", "notes"], - ), - ], - ids=["parquet", "csv", "csv-all-null"], -) -@pytest.mark.parametrize("backend_name", ["duckdb", "polars", "pandas"]) -def test_load_example(backend_name, example, columns): - pytest.importorskip(backend_name) - con = getattr(ibis, backend_name).connect() - t = getattr(ibis.examples, example).fetch(backend=con) - assert t.columns == columns diff --git a/ibis/expr/decompile.py b/ibis/expr/decompile.py index 3b27c166852a..6c96648b0ff9 100644 --- a/ibis/expr/decompile.py +++ b/ibis/expr/decompile.py @@ -132,7 +132,10 @@ def _wrap_alias(values, rendered): for k, v in values.items(): text = rendered[k] if v.name != k: - text = f"{text}.name({k!r})" + if isinstance(v, ops.Binary): + text = f"({text}).name({k!r})" + else: + text = f"{text}.name({k!r})" result.append(text) return result @@ -189,6 +192,11 @@ def self_reference(op, parent, identifier): return f"{parent}.view()" +@translate.register(ops.Distinct) +def distinct(op, parent): + return f"{parent}.distinct()" + + @translate.register(ops.JoinTable) def join_table(op, parent, index): return parent @@ -202,7 +210,12 @@ def join_link(op, table, predicates, how): @translate.register(ops.JoinChain) def join(op, first, rest, values): calls = "".join(rest) - return f"{first}{calls}" + pieces = [f"{first}{calls}"] + if values: + values = _wrap_alias(op.values, values) + pieces.append(f"select({_inline(values)})") + result = ".".join(pieces) + return result @translate.register(ops.Set) @@ -224,7 +237,9 @@ def limit(op, parent, n, offset): @translate.register(ops.Field) def table_column(op, rel, name): - return f"{rel}.{name}" + if name.isidentifier(): + return f"{rel}.{name}" + return f"{rel}[{name!r}]" @translate.register(ops.SortKey) @@ -337,10 +352,11 @@ def isin(op, value, options): class CodeContext: always_assign = ( ops.ScalarParameter, - ops.UnboundTable, ops.Aggregate, + ops.PhysicalTable, ops.SelfReference, ) + always_ignore = ( ops.JoinTable, ops.Field, diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index a4c37ce2912b..5ab1ee37c595 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -20,7 +20,6 @@ from ibis.expr.operations.sortkeys import SortKey # noqa: TCH001 from ibis.expr.schema import Schema from ibis.formats import TableProxy # noqa: TCH001 -from ibis.util import gen_name T = TypeVar("T") @@ -111,13 +110,10 @@ def __init__(self, rel, **kwargs): ) super().__init__(rel=rel, **kwargs) - @attribute - def name(self): - return self.rel.schema.names[0] - @attribute def value(self): - return self.rel.values[self.name] + name = self.rel.schema.names[0] + return self.rel.values[name] @attribute def relations(self): @@ -208,12 +204,6 @@ def __init__(self, parent, identifier): identifier = next(self._uid_counter) super().__init__(parent=parent, identifier=identifier) - @attribute - def name(self) -> str: - if (name := getattr(self.parent, "name", None)) is not None: - return f"{name}_ref" - return gen_name("self_ref") - JoinKind = Literal[ "inner", @@ -427,6 +417,18 @@ def schema(self): return backend._get_schema_using_query(self.query) +@public +class View(PhysicalTable): + """A view created from an expression.""" + + child: Relation + name: str + + @attribute + def schema(self): + return self.child.schema + + @public class DummyTable(Relation): values: FrozenDict[str, Value] diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 12a314379d9a..5cac1708e89b 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -1,8 +1,13 @@ """Some common rewrite functions to be shared between backends.""" from __future__ import annotations +import functools +from collections.abc import Mapping + import toolz +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.common.deferred import Item, _, deferred, var from ibis.common.exceptions import ExpressionError @@ -22,6 +27,85 @@ def peel_join_field(_): return _.rel.values[_.name] +@replace(p.Alias(p.ScalarParameter)) +def unwrap_scalar_parameter(_): + """Replace aliased scalar parameters with the parameter itself.""" + return _.arg + + +def replace_scalar_parameter(params): + """Replace scalar parameters with their values.""" + + @replace(p.ScalarParameter) + def repl(_): + return ops.Literal(value=params[_], dtype=_.dtype) + + return repl + + +@replace(p.FillNa) +def rewrite_fillna(_): + """Rewrite FillNa expressions to use more common operations.""" + if isinstance(_.replacements, Mapping): + mapping = _.replacements + else: + mapping = { + name: _.replacements + for name, type in _.parent.schema.items() + if type.nullable + } + + if not mapping: + return _.parent + + selections = [] + for name in _.parent.schema.names: + col = ops.TableColumn(_.parent, name) + if (value := mapping.get(name)) is not None: + col = ops.Alias(ops.Coalesce((col, value)), name) + selections.append(col) + + return ops.Project(_.parent, selections) + + +@replace(p.DropNa) +def rewrite_dropna(_): + """Rewrite DropNa expressions to use more common operations.""" + if _.subset is None: + columns = [ops.TableColumn(_.parent, name) for name in _.parent.schema.names] + else: + columns = _.subset + + if columns: + preds = [ + functools.reduce( + ops.And if _.how == "any" else ops.Or, + [ops.NotNull(c) for c in columns], + ) + ] + elif _.how == "all": + preds = [ops.Literal(False, dtype=dt.bool)] + else: + return _.parent + + return ops.Filter(_.parent, tuple(preds)) + + +@replace(p.Sample) +def rewrite_sample(_): + """Rewrite Sample as `t.filter(random() <= fraction)`. + + Errors as unsupported if a `seed` is specified. + """ + + if _.seed is not None: + raise com.UnsupportedOperationError( + "`Table.sample` with a random seed is unsupported" + ) + + return ops.Filter(_.parent, (ops.LessEqual(ops.RandomScalar(), _.fraction),)) + + @replace(ops.Analytic) def project_wrap_analytic(_, rel): # Wrap analytic functions in a window function @@ -118,6 +202,43 @@ def rewrite_window_input(value, frame): return node.replace(window_merge_frames, filter=p.Value, context=context) +@replace(p.InValues(..., ())) +def empty_in_values_right_side(_): + """Replace checks against an empty right side with `False`.""" + return ops.Literal(False, dtype=dt.bool) + + +@replace( + p.WindowFunction( + p.PercentRank(y) | p.RankBase(y) | p.CumeDist(y) | p.NTile(y), + p.WindowFrame(..., order_by=()) >> _.copy(order_by=(y,)), + ) +) +def add_order_by_to_empty_ranking_window_functions(_): + """Add an ORDER BY clause to rank window functions that don't have one.""" + return _ + + +@replace( + p.WindowFunction(p.RankBase | p.NTile) + | p.StringFind + | p.FindInSet + | p.ArrayPosition +) +def one_to_zero_index(_, **__): + """Subtract one from one-index functions.""" + return ops.Subtract(_, 1) + + +@replace(ops.NthValue) +def add_one_to_nth_value_input(_, **__): + if isinstance(_.nth, ops.Literal): + nth = ops.Literal(_.nth.value + 1, dtype=_.nth.dtype) + else: + nth = ops.Add(_.nth, 1) + return _.copy(nth=nth) + + # TODO(kszucs): schema comparison should be updated to not distinguish between # different column order @replace(p.Project(y @ p.Relation) & Check(_.schema == y.schema)) diff --git a/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt index 0563c0ba6211..fbda1a87cc5f 100644 --- a/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt @@ -1,2 +1,2 @@ DummyTable - foo: [1] \ No newline at end of file + foo: Array([1]) \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py index 499385aab514..379751112619 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py @@ -18,8 +18,21 @@ call_outcome = ibis.table( name="call_outcome", schema={"outcome_text": "string", "id": "int64"} ) -joinchain = employee.inner_join(call, employee.id == call.employee_id).inner_join( - call_outcome, call.call_outcome_id == call_outcome.id +joinchain = ( + employee.inner_join(call, employee.id == call.employee_id) + .inner_join(call_outcome, call.call_outcome_id == call_outcome.id) + .select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, + call_outcome.outcome_text, + call_outcome.id.name("id_right"), + ) ) result = joinchain.aggregate( diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py index 0b23d1687445..42fdcbfec8e7 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.left_join(call, employee.id == call.employee_id) +joinchain = employee.left_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) result = joinchain.aggregate( [joinchain.call_attempts.sum().name("attempts")], by=[joinchain.id] diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py index 8439fd762875..290fddcfd9d3 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.inner_join(call, employee.id == call.employee_id) +joinchain = employee.inner_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) f = joinchain.filter(joinchain.id < 5) s = f.order_by(f.id.desc()) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py index 3e375cd052d2..4b3a6f52c08b 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.left_join(call, employee.id == call.employee_id) +joinchain = employee.left_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) f = joinchain.filter(joinchain.id < 5) s = f.order_by(f.id.desc()) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py index e9a8b2082dc1..f7f22e528a2a 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.right_join(call, employee.id == call.employee_id) +joinchain = employee.right_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) f = joinchain.filter(joinchain.id < 5) s = f.order_by(f.id.desc()) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py index 3e375cd052d2..4b3a6f52c08b 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.left_join(call, employee.id == call.employee_id) +joinchain = employee.left_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) f = joinchain.filter(joinchain.id < 5) s = f.order_by(f.id.desc()) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py index d6df17717b27..68a7ecaed136 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py @@ -19,6 +19,19 @@ name="call_outcome", schema={"outcome_text": "string", "id": "int64"} ) -result = employee.inner_join(call, employee.id == call.employee_id).inner_join( - call_outcome, call.call_outcome_id == call_outcome.id +result = ( + employee.inner_join(call, employee.id == call.employee_id) + .inner_join(call_outcome, call.call_outcome_id == call_outcome.id) + .select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, + call_outcome.outcome_text, + call_outcome.id.name("id_right"), + ) ) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py index e651a29b1ad9..6080950c3d24 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py @@ -13,4 +13,13 @@ ) agg = call.aggregate([call.call_attempts.mean().name("mean")]) -result = call.inner_join(agg, [agg.mean < call.call_attempts, ibis.literal(True)]) +result = call.inner_join( + agg, [agg.mean < call.call_attempts, ibis.literal(True)] +).select( + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, + agg.mean, +) diff --git a/ibis/expr/tests/test_format.py b/ibis/expr/tests/test_format.py index 87186e24728e..cec850521eab 100644 --- a/ibis/expr/tests/test_format.py +++ b/ibis/expr/tests/test_format.py @@ -362,11 +362,7 @@ def test_format_literal(literal, typ, output): def test_format_dummy_table(snapshot): -<<<<<<< HEAD - t = ops.DummyTable([ibis.array([1]).cast("array").name("foo")]).to_expr() -======= - t = ops.DummyTable({"foo": ibis.array([1], type="array")}).to_expr() ->>>>>>> 2189ab71b (refactor(ir): split the relational operations) + t = ops.DummyTable({"foo": ibis.array([1]).cast("array")}).to_expr() result = fmt(t) snapshot.assert_match(result, "repr.txt") diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 4afaca288980..0b2aa972e948 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -185,6 +185,11 @@ def join( ) preds = flatten_predicates(list(preds)) + # if there are no predicates, default to every row matching unless the + # join is a cross join, because a cross join already has this behavior + if not preds and how != "cross": + preds.append(ops.Literal(True, dtype="bool")) + # calculate the fields based in lname and rname, this should be a best # effort to avoid collisions, but does not raise if there are any # if no disambiaution happens using a final .select() call, then diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index 7badf372de8a..4f83af05d320 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -3,6 +3,7 @@ import contextlib import datetime import warnings +from importlib.util import find_spec as _find_spec import numpy as np import pandas as pd @@ -24,6 +25,8 @@ "Install pandas >= 1.5.0 for interop with pandas and arrow dtype support" ) +geospatial_supported = _find_spec("geopandas") is not None + class PandasType(NumpyType): @classmethod @@ -120,6 +123,23 @@ def convert_table(cls, df, schema): # return data with the schema's columns which may be different than the # input columns df.columns = schema.names + + if geospatial_supported: + from geopandas import GeoDataFrame + from geopandas.array import GeometryDtype + + if ( + # pluck out the first geometry column if it exists + geom := next( + ( + name + for name, c in df.items() + if isinstance(c.dtype, GeometryDtype) + ), + None, + ) + ) is not None: + return GeoDataFrame(df, geometry=geom) return df @classmethod @@ -143,7 +163,11 @@ def convert_scalar(cls, obj, dtype): @classmethod def convert_GeoSpatial(cls, s, dtype, pandas_type): - return s + import geopandas as gpd + + if isinstance(s.dtype, gpd.array.GeometryDtype): + return gpd.GeoSeries(s) + return gpd.GeoSeries.from_wkb(s) convert_Point = ( convert_LineString diff --git a/ibis/tests/expr/mocks.py b/ibis/tests/expr/mocks.py index cfb7e7e4aa5c..bab44651beef 100644 --- a/ibis/tests/expr/mocks.py +++ b/ibis/tests/expr/mocks.py @@ -47,6 +47,11 @@ def list_tables(self): def list_databases(self): return ["mockdb"] + def _to_sql(self, expr, **kwargs): + import ibis + + return ibis.to_sql(expr, dialect="duckdb", **kwargs) + def fetch_from_cursor(self, cursor, schema): pass diff --git a/ibis/tests/util.py b/ibis/tests/util.py index f51dfca04ab7..47df8e59ebf9 100644 --- a/ibis/tests/util.py +++ b/ibis/tests/util.py @@ -30,7 +30,9 @@ def assert_pickle_roundtrip(obj): def schemas_eq(left: ir.Expr, right: ir.Expr) -> bool: - assert left.as_table().schema().equals(right.as_table().schema()) + left_schema = left.as_table().schema() + right_schema = right.as_table().schema() + return left_schema == right_schema def assert_decompile_roundtrip( diff --git a/pyproject.toml b/pyproject.toml index a6d43f6d04a8..aa0d269b24b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,6 @@ datafusion = { version = ">=0.6,<35", optional = true } db-dtypes = { version = ">=0.3,<2", optional = true } deltalake = { version = ">=0.9.0,<1", optional = true } duckdb = { version = ">=0.8.1,<1", optional = true } -duckdb-engine = { version = ">=0.1.8,<1", optional = true } geoalchemy2 = { version = ">=0.6.3,<1", optional = true } geopandas = { version = ">=0.6,<1", optional = true } google-cloud-bigquery = { version = ">=3,<4", optional = true } @@ -154,7 +153,6 @@ all = [ "datafusion", "db-dtypes", "duckdb", - "duckdb-engine", "deltalake", "geoalchemy2", "geopandas", @@ -188,11 +186,11 @@ bigquery = [ "google-cloud-bigquery-storage", "pydata-google-auth", ] -clickhouse = ["clickhouse-connect", "sqlalchemy"] +clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] datafusion = ["datafusion"] druid = ["pydruid", "sqlalchemy"] -duckdb = ["duckdb", "duckdb-engine", "sqlalchemy", "sqlalchemy-views"] +duckdb = ["duckdb"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] flink = [] geospatial = ["geoalchemy2", "geopandas", "shapely"] @@ -304,9 +302,6 @@ filterwarnings = [ 'ignore:`np\.bool` is a deprecated alias for the builtin `bool`:DeprecationWarning', # numpy, coming from a pandas call 'ignore:In the future `np\.bool` will be defined as the corresponding NumPy scalar:FutureWarning', - # duckdb-engine - 'ignore:Dialect .+ does \*not\* support Decimal:', - "ignore:duckdb-engine doesn't yet support reflection on indices:", # druid 'ignore:Dialect druid.rest will not make use of SQL compilation caching:', # ibis From 31252a9b3f0efc44de916b65e7db5e9a902b15c0 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sun, 24 Dec 2023 09:32:50 -0500 Subject: [PATCH 019/161] feat(datafusion): port to new sqlglot backend --- .github/workflows/ibis-backends.yml | 8 +- ibis/backends/base/sqlglot/compiler.py | 4 + ibis/backends/base/sqlglot/datatypes.py | 7 + ibis/backends/datafusion/__init__.py | 236 ++--- ibis/backends/datafusion/compiler.py | 560 ++++++++++++ ibis/backends/datafusion/compiler/__init__.py | 0 ibis/backends/datafusion/compiler/core.py | 128 --- .../backends/datafusion/compiler/relations.py | 211 ----- ibis/backends/datafusion/compiler/values.py | 818 ------------------ .../test_default_limit/datafusion/out.sql | 5 + .../datafusion/out.sql | 5 + .../datafusion/out.sql | 3 + .../test_respect_set_limit/datafusion/out.sql | 10 + .../datafusion/out.sql | 44 +- .../test_sql/test_isin_bug/datafusion/out.sql | 18 +- ibis/backends/tests/test_aggregation.py | 11 +- ibis/backends/tests/test_array.py | 3 +- ibis/backends/tests/test_client.py | 12 +- ibis/backends/tests/test_generic.py | 18 +- ibis/backends/tests/test_numeric.py | 9 +- ibis/backends/tests/test_param.py | 2 +- ibis/backends/tests/test_sql.py | 16 +- ibis/backends/tests/test_string.py | 15 +- ibis/backends/tests/test_temporal.py | 36 +- ibis/backends/tests/test_window.py | 40 - 25 files changed, 806 insertions(+), 1413 deletions(-) create mode 100644 ibis/backends/datafusion/compiler.py delete mode 100644 ibis/backends/datafusion/compiler/__init__.py delete mode 100644 ibis/backends/datafusion/compiler/core.py delete mode 100644 ibis/backends/datafusion/compiler/relations.py delete mode 100644 ibis/backends/datafusion/compiler/values.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/datafusion/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index e8cd07907ee8..90208910fa1b 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -85,10 +85,10 @@ jobs: # title: SQLite # extras: # - sqlite - # - name: datafusion - # title: Datafusion - # extras: - # - datafusion + - name: datafusion + title: Datafusion + extras: + - datafusion # - name: polars # title: Polars # extras: diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 8c9879501726..ecba280b7867 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -555,6 +555,10 @@ def visit_RegexExtract(self, op, *, arg, pattern, index): def visit_StringConcat(self, op, *, arg): return self.f.concat(*arg) + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, sep, arg): + return self.f.concat_ws(sep, *arg) + @visit_node.register(ops.StringSQLLike) def visit_StringSQLLike(self, op, *, arg, pattern, escape): return arg.like(pattern) diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 00ef53c7ba20..e3244246e6fe 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -360,6 +360,13 @@ class PostgresType(SqlglotType): ) +class DataFusionType(PostgresType): + unknown_type_strings = { + "utf8": dt.string, + "float64": dt.float64, + } + + class MySQLType(SqlglotType): dialect = "mysql" diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 0685885b78bb..d47d5fc7c317 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -1,8 +1,8 @@ from __future__ import annotations +import contextlib import inspect import typing -from contextlib import suppress from pathlib import Path from typing import TYPE_CHECKING, Any @@ -11,9 +11,7 @@ import pyarrow.dataset as ds import pyarrow_hotfix # noqa: F401 import sqlglot as sg -from sqlglot import exp, transforms -from sqlglot.dialects import Postgres -from sqlglot.dialects.dialect import rename_func +import sqlglot.expressions as sge import ibis import ibis.common.exceptions as com @@ -21,12 +19,13 @@ import ibis.expr.operations as ops import ibis.expr.schema as sch import ibis.expr.types as ir -from ibis.backends.base import BaseBackend, CanCreateDatabase, CanCreateSchema -from ibis.backends.base.sqlglot import STAR, C -from ibis.backends.datafusion.compiler.core import translate +from ibis.backends.base import CanCreateDatabase, CanCreateSchema +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import C +from ibis.backends.datafusion.compiler import DataFusionCompiler from ibis.expr.operations.udf import InputType from ibis.formats.pyarrow import PyArrowType -from ibis.util import gen_name, log, normalize_filename +from ibis.util import gen_name, normalize_filename try: from datafusion import ExecutionContext as SessionContext @@ -39,44 +38,18 @@ SessionConfig = None if TYPE_CHECKING: - from collections.abc import Mapping + from collections.abc import Iterator, Mapping import pandas as pd -_exclude_exp = (exp.Pow, exp.ArrayContains) - -def _lower_unit(self, expr): - value = expr.this.sql(dialect=self.dialect) - unit = expr.unit.this.lower() - return f"INTERVAL '{value} {unit}'" - - -# the DataFusion dialect was created to skip the power function to operator transformation -# in the future this could be used to optimize sqlglot for datafusion -class DataFusion(Postgres): - class Generator(Postgres.Generator): - TRANSFORMS = { - exp: trans - for exp, trans in Postgres.Generator.TRANSFORMS.items() - if exp not in _exclude_exp - } | { - exp.Select: transforms.preprocess( - [ - transforms.eliminate_qualify, - ] - ), - exp.IsNan: rename_func("isnan"), - exp.Interval: _lower_unit, - } - - -class Backend(BaseBackend, CanCreateDatabase, CanCreateSchema): +class Backend(SQLGlotBackend, CanCreateDatabase, CanCreateSchema): name = "datafusion" dialect = "datafusion" builder = None supports_in_memory_tables = True supports_arrays = True + compiler = DataFusionCompiler() @property def version(self): @@ -119,6 +92,43 @@ def do_connect( for name, path in config.items(): self.register(path, table_name=name) + self._temp_views = set() + + @contextlib.contextmanager + def _safe_raw_sql(self, sql: sge.Statement) -> Any: + yield self.raw_sql(sql) + + def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: + name = gen_name("datafusion_metadata_view") + table = sg.table(name, quoted=self.compiler.quoted) + src = sge.Create( + this=table, + kind="VIEW", + expression=sg.parse_one(query, read="datafusion"), + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + ) + self.raw_sql(src) + + try: + result = ( + self.raw_sql(f"DESCRIBE {table.sql(self.name)}") + .to_arrow_table() + .to_pydict() + ) + finally: + self.drop_view(name) + return ( + ( + name, + self.compiler.type_mapper.from_string( + type_string, nullable=is_nullable == "YES" + ), + ) + for name, type_string, is_nullable in zip( + result["column_name"], result["data_type"], result["is_nullable"] + ) + ) + def _register_builtin_udfs(self): from ibis.backends.datafusion import udfs @@ -172,14 +182,6 @@ def _compile_elementwise_udf(self, udf_node): name=udf_node.func.__name__, ) - def _log(self, sql: str) -> None: - """Log `sql`. - - This method can be implemented by subclasses. Logging occurs when - `ibis.options.verbose` is `True`. - """ - log(sql) - def raw_sql(self, query: str | sg.exp.Expression) -> Any: """Execute a SQL string `query` against the database. @@ -190,7 +192,7 @@ def raw_sql(self, query: str | sg.exp.Expression) -> Any: kwargs Backend specific query arguments """ - with suppress(AttributeError): + with contextlib.suppress(AttributeError): query = query.sql(dialect=self.dialect, pretty=True) self._log(query) return self.con.sql(query) @@ -253,26 +255,21 @@ def list_tables( """List the available tables.""" return self._filter_with_like(self.con.tables(), like) - def table(self, name: str, schema: sch.Schema | None = None) -> ir.Table: - """Get an ibis expression representing a DataFusion table. + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + if database is not None: + catalog = self.con.catalog(database) + else: + catalog = self.con.catalog() - Parameters - ---------- - name - The name of the table to retrieve - schema - An optional schema for the table + if schema is not None: + database = catalog.database(schema) + else: + database = catalog.database() - Returns - ------- - Table - A table expression - """ - catalog = self.con.catalog() - database = catalog.database() - table = database.table(name) - schema = sch.schema(table.schema) - return ops.DatabaseTable(name, schema, self).to_expr() + table = database.table(table_name) + return sch.schema(table.schema) def register( self, @@ -520,51 +517,86 @@ def execute(self, expr: ir.Expr, **kwargs: Any): batch_reader.read_pandas(timestamp_as_object=True) ) - def _to_sqlglot( - self, expr: ir.Expr, limit: str | None = None, params=None, **_: Any - ): - """Compile an Ibis expression to a sqlglot object.""" - table_expr = expr.as_table() - - if limit == "default": - limit = ibis.options.sql.default_limit - if limit is not None: - table_expr = table_expr.limit(limit) - - if params is None: - params = {} + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: sch.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ) -> ir.Table: + """Create a table in DataFusion. - sql = translate(table_expr.op(), params=params) - assert not isinstance(sql, sg.exp.Subquery) + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The name of the database in which to create the table; if not + passed, the current database is used. + temp + Create a temporary table + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists + """ + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [ + sg.exp.ColumnConstraint(kind=sg.exp.NotNullColumnConstraint()) + ] + ), + ) + for name, typ in (schema or {}).items() + ] - if isinstance(sql, sg.exp.Table): - sql = sg.select(STAR).from_(sql) + target = sg.table(name, db=database, quoted=self.compiler.quoted) - assert not isinstance(sql, sg.exp.Subquery) - return sql + if column_defs: + target = sg.exp.Schema(this=target, expressions=column_defs) - def compile( - self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any - ): - """Compile an Ibis expression to a DataFusion SQL string.""" - return self._to_sqlglot(expr, limit=limit, params=params, **kwargs).sql( - dialect=self.dialect, pretty=True - ) + properties = [] - @classmethod - def has_operation(cls, operation: type[ops.Value]) -> bool: - from ibis.backends.datafusion.compiler.values import translate_val + if temp: + properties.append(sg.exp.TemporaryProperty()) - return translate_val.dispatch(operation) is not translate_val.dispatch(object) + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj - def create_table(self, *_, **__) -> ir.Table: - raise NotImplementedError(self.name) + self._run_pre_execute_hooks(table) - def create_view(self, *_, **__) -> ir.Table: - raise NotImplementedError(self.name) + query = self._to_sqlglot(table) + else: + query = None + + create_stmt = sg.exp.Create( + kind="TABLE", + this=target, + replace=overwrite, + properties=sg.exp.Properties(expressions=properties), + expression=query, + ) - def drop_table(self, *_, **__) -> ir.Table: - raise NotImplementedError(self.name) + with self._safe_raw_sql(create_stmt): + pass - def drop_view(self, *_, **__) -> ir.Table: - raise NotImplementedError(self.name) + return self.table(name, schema=database) diff --git a/ibis/backends/datafusion/compiler.py b/ibis/backends/datafusion/compiler.py new file mode 100644 index 000000000000..92d3fd70a66f --- /dev/null +++ b/ibis/backends/datafusion/compiler.py @@ -0,0 +1,560 @@ +from __future__ import annotations + +import calendar +import math +from functools import partial, singledispatchmethod +from itertools import starmap + +import sqlglot as sg +from sqlglot import exp, transforms +from sqlglot.dialects import Postgres +from sqlglot.dialects.dialect import rename_func + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.compiler import ( + FALSE, + NULL, + STAR, + SQLGlotCompiler, + paren, +) +from ibis.backends.base.sqlglot.datatypes import DataFusionType +from ibis.common.temporal import IntervalUnit, TimestampUnit +from ibis.expr.operations.udf import InputType +from ibis.expr.rewrites import rewrite_sample +from ibis.formats.pyarrow import PyArrowType + + +class DataFusion(Postgres): + class Generator(Postgres.Generator): + TRANSFORMS = Postgres.Generator.TRANSFORMS.copy() | { + exp.Select: transforms.preprocess([transforms.eliminate_qualify]), + exp.Pow: rename_func("pow"), + exp.IsNan: rename_func("isnan"), + exp.CurrentTimestamp: rename_func("now"), + exp.Split: rename_func("string_to_array"), + exp.Array: rename_func("make_array"), + exp.ArrayContains: rename_func("array_has"), + exp.ArraySize: rename_func("array_length"), + } + + +class DataFusionCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "datafusion" + type_mapper = DataFusionType + quoted = True + rewrites = (rewrite_sample, *SQLGlotCompiler.rewrites) + + def _aggregate(self, funcname: str, *args, where): + expr = self.f[funcname](*args) + if where is not None: + return sg.exp.Filter(this=expr, expression=sg.exp.Where(this=where)) + return expr + + def _to_timestamp(self, value, target_dtype, literal=False): + tz = ( + f'Some("{timezone}")' + if (timezone := target_dtype.timezone) is not None + else "None" + ) + unit = ( + target_dtype.unit.name.capitalize() + if target_dtype.scale is not None + else "Microsecond" + ) + str_value = str(value) if literal else value + return self.f.arrow_cast(str_value, f"Timestamp({unit}, {tz})") + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype, **kw): + if value is None: + return super().visit_node(op, value=value, dtype=dtype, **kw) + elif dtype.is_decimal(): + return self.cast( + sg.exp.convert(str(value)), + dt.Decimal(precision=dtype.precision or 38, scale=dtype.scale or 9), + ) + elif dtype.is_numeric(): + if isinstance(value, float): + if math.isinf(value): + return self.cast("+Inf", dt.float64) + elif math.isnan(value): + return self.cast("NaN", dt.float64) + return sg.exp.convert(value) + elif dtype.is_interval(): + if dtype.unit.short in ("ms", "us", "ns"): + raise com.UnsupportedOperationError( + "DataFusion doesn't support subsecond interval resolutions" + ) + + return sg.exp.Interval( + this=sg.exp.convert(str(value)), + unit=sg.exp.var(dtype.unit.plural.lower()), + ) + elif dtype.is_timestamp(): + return self._to_timestamp(value, dtype, literal=True) + elif dtype.is_date(): + return self.f.date_trunc("day", value.isoformat()) + elif dtype.is_binary(): + return sg.exp.HexString(this=value.hex()) + else: + return super().visit_node(op, value=value, dtype=dtype, **kw) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + if to.is_interval(): + unit = to.unit.name.lower() + return sg.cast( + self.f.concat(self.cast(arg, dt.string), f" {unit}"), "interval" + ) + if to.is_timestamp(): + return self._to_timestamp(arg, to) + if to.is_decimal(): + return self.f.arrow_cast(arg, f"{PyArrowType.from_ibis(to)}".capitalize()) + return self.cast(arg, to) + + @visit_node.register(ops.Substring) + def visit_Substring(self, op, *, arg, start, length): + start = self.if_(start < 0, self.f.length(arg) + start + 1, start + 1) + if length is not None: + return self.f.substr(arg, start, length) + return self.f.substr(arg, start) + + @visit_node.register(ops.Variance) + def visit_Variance(self, op, *, arg, how, where): + if how == "sample": + return self.agg.var_samp(arg, where=where) + elif how == "pop": + return self.agg.var_pop(arg, where=where) + else: + raise ValueError(f"Unrecognized how value: {how}") + + @visit_node.register(ops.StandardDev) + def visit_StandardDev(self, op, *, arg, how, where): + if how == "sample": + return self.agg.stddev_samp(arg, where=where) + elif how == "pop": + return self.agg.stddev_pop(arg, where=where) + else: + raise ValueError(f"Unrecognized how value: {how}") + + @visit_node.register(ops.ScalarUDF) + def visit_ScalarUDF(self, op, **kw): + input_type = op.__input_type__ + if input_type in (InputType.PYARROW, InputType.BUILTIN): + return self.f[op.__full_name__](*kw.values()) + else: + raise NotImplementedError( + f"DataFusion only supports PyArrow UDFs: got a {input_type.name.lower()} UDF" + ) + + @visit_node.register(ops.ElementWiseVectorizedUDF) + def visit_ElementWiseVectorizedUDF( + self, op, *, func, func_args, input_type, return_type + ): + return self.f[func.__name__](*func_args) + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + if not isinstance(op.index, ops.Literal): + raise ValueError( + "re_extract `index` expressions must be literals. " + "Arbitrary expressions are not supported in the DataFusion backend" + ) + return self.f.regexp_match(arg, self.f.concat("(", pattern, ")"))[index] + + # @visit_node.register(ops.RegexReplace) + # def regex_replace(self, op, *, arg, pattern, replacement): + # return self.f.regexp_replace(arg, pattern, replacement, sg.exp.convert("g")) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if end is not None: + raise NotImplementedError("`end` not yet implemented") + + if start is not None: + pos = self.f.strpos(self.f.substr(arg, start + 1), substr) + return self.f.coalesce(self.f.nullif(pos + start, start), 0) + + return self.f.strpos(arg, substr) + + @visit_node.register(ops.RegexSearch) + def visit_RegexSearch(self, op, *, arg, pattern): + return self.if_( + sg.or_(arg.is_(NULL), pattern.is_(NULL)), + NULL, + self.f.coalesce( + # null is returned for non-matching patterns, so coalesce to false + # because that is the desired behavior for ops.RegexSearch + self.f.array_length(self.f.regexp_match(arg, pattern)) > 0, + FALSE, + ), + ) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.strpos(haystack, needle) > sg.exp.convert(0) + + @visit_node.register(ops.ExtractFragment) + def visit_ExtractFragment(self, op, *, arg): + return self.f.extract_url_field(arg, "fragment") + + @visit_node.register(ops.ExtractProtocol) + def visit_ExtractProtocol(self, op, *, arg): + return self.f.extract_url_field(arg, "scheme") + + @visit_node.register(ops.ExtractAuthority) + def visit_ExtractAuthority(self, op, *, arg): + return self.f.extract_url_field(arg, "netloc") + + @visit_node.register(ops.ExtractPath) + def visit_ExtractPath(self, op, *, arg): + return self.f.extract_url_field(arg, "path") + + @visit_node.register(ops.ExtractHost) + def visit_ExtractHost(self, op, *, arg): + return self.f.extract_url_field(arg, "hostname") + + @visit_node.register(ops.ExtractQuery) + def visit_ExtractQuery(self, op, *, arg, key): + if key is not None: + return self.f.extract_query_param(arg, key) + return self.f.extract_query(arg) + + @visit_node.register(ops.ExtractUserInfo) + def visit_ExtractUserInfo(self, op, *, arg): + return self.f.extract_user_info(arg) + + @visit_node.register(ops.ExtractYear) + @visit_node.register(ops.ExtractMonth) + @visit_node.register(ops.ExtractQuarter) + @visit_node.register(ops.ExtractDay) + def visit_ExtractYearMonthQuarterDay(self, op, *, arg): + skip = len("Extract") + part = type(op).__name__[skip:].lower() + return self.f.date_part(part, arg) + + @visit_node.register(ops.ExtractDayOfYear) + def visit_ExtractDayOfYear(self, op, *, arg): + return self.f.date_part("doy", arg) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return (self.f.date_part("dow", arg) + 6) % 7 + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + return sg.exp.Case( + this=paren(self.f.date_part("dow", arg) + 6) % 7, + ifs=list(starmap(self.if_, enumerate(calendar.day_name))), + ) + + @visit_node.register(ops.Date) + def visit_Date(self, op, *, arg): + return self.f.date_trunc("day", arg) + + @visit_node.register(ops.ExtractWeekOfYear) + def visit_ExtractWeekOfYear(self, op, *, arg): + return self.f.date_part("week", arg) + + @visit_node.register(ops.TimestampTruncate) + def visit_TimestampTruncate(self, op, *, arg, unit): + if unit in ( + IntervalUnit.MILLISECOND, + IntervalUnit.MICROSECOND, + IntervalUnit.NANOSECOND, + ): + raise com.UnsupportedOperationError( + f"The function is not defined for time unit {unit}" + ) + + return self.f.date_trunc(unit.name.lower(), arg) + + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractEpochSeconds(self, op, *, arg): + if op.arg.dtype.is_date(): + return self.f.extract_epoch_seconds_date(arg) + elif op.arg.dtype.is_timestamp(): + return self.f.extract_epoch_seconds_timestamp(arg) + else: + raise com.OperationNotDefinedError( + f"The function is not defined for {op.arg.dtype}" + ) + + @visit_node.register(ops.ExtractMinute) + def visit_ExtractMinute(self, op, *, arg): + if op.arg.dtype.is_date(): + return self.f.date_part("minute", arg) + elif op.arg.dtype.is_time(): + return self.f.extract_minute_time(arg) + elif op.arg.dtype.is_timestamp(): + return self.f.extract_minute_timestamp(arg) + else: + raise com.OperationNotDefinedError( + f"The function is not defined for {op.arg.dtype}" + ) + + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + if op.arg.dtype.is_time(): + return self.f.extract_millisecond_time(arg) + elif op.arg.dtype.is_timestamp(): + return self.f.extract_millisecond_timestamp(arg) + else: + raise com.OperationNotDefinedError( + f"The function is not defined for {op.arg.dtype}" + ) + + @visit_node.register(ops.ExtractHour) + def visit_ExtractHour(self, op, *, arg): + if op.arg.dtype.is_date() or op.arg.dtype.is_timestamp(): + return self.f.date_part("hour", arg) + elif op.arg.dtype.is_time(): + return self.f.extract_hour_time(arg) + else: + raise com.OperationNotDefinedError( + f"The function is not defined for {op.arg.dtype}" + ) + + @visit_node.register(ops.ExtractSecond) + def visit_ExtractSecond(self, op, *, arg): + if op.arg.dtype.is_date() or op.arg.dtype.is_timestamp(): + return self.f.extract_second_timestamp(arg) + elif op.arg.dtype.is_time(): + return self.f.extract_second_time(arg) + else: + raise com.OperationNotDefinedError( + f"The function is not defined for {op.arg.dtype}" + ) + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + return self.f.flatten(self.f.array_repeat(arg, times)) + + @visit_node.register(ops.ArrayPosition) + def visit_ArrayPosition(self, op, *, arg, other): + return self.f.coalesce(self.f.array_position(arg, other), 0) + + @visit_node.register(ops.Covariance) + def visit_Covariance(self, op, *, left, right, how, where): + x = op.left + if x.dtype.is_boolean(): + left = self.cast(left, dt.float64) + + y = op.right + if y.dtype.is_boolean(): + right = self.cast(right, dt.float64) + + if how == "sample": + return self.agg.covar_samp(left, right, where=where) + elif how == "pop": + return self.agg.covar_pop(left, right, where=where) + else: + raise ValueError(f"Unrecognized how = `{how}` value") + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, where, how): + x = op.left + if x.dtype.is_boolean(): + left = self.cast(left, dt.float64) + + y = op.right + if y.dtype.is_boolean(): + right = self.cast(right, dt.float64) + + return self.agg.corr(left, right, where=where) + + @visit_node.register(ops.IsNan) + def visit_IsNan(self, op, *, arg): + return sg.and_(arg.is_(sg.not_(NULL)), self.f.isnan(arg)) + + @visit_node.register(ops.ArrayStringJoin) + def visit_ArrayStringJoin(self, op, *, sep, arg): + return self.f.array_join(arg, sep) + + @visit_node.register(ops.FindInSet) + def visit_FindInSet(self, op, *, needle, values): + return self.f.coalesce( + self.f.array_position(self.f.make_array(*values), needle), 0 + ) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + if unit == TimestampUnit.SECOND: + return self.f.from_unixtime(arg) + elif unit in ( + TimestampUnit.MILLISECOND, + TimestampUnit.MICROSECOND, + TimestampUnit.NANOSECOND, + ): + return self.f.arrow_cast(arg, f"Timestamp({unit.name.capitalize()}, None)") + else: + raise com.UnsupportedOperationError(f"Unsupported unit {unit}") + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return self.cast( + self.f.concat( + self.f.lpad(self.cast(self.cast(year, dt.int64), dt.string), 4, "0"), + "-", + self.f.lpad(self.cast(self.cast(month, dt.int64), dt.string), 2, "0"), + "-", + self.f.lpad(self.cast(self.cast(day, dt.int64), dt.string), 2, "0"), + ), + dt.date, + ) + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds, **_ + ): + return self.f.to_timestamp_micros( + self.f.concat( + self.f.lpad(self.cast(self.cast(year, dt.int64), dt.string), 4, "0"), + "-", + self.f.lpad(self.cast(self.cast(month, dt.int64), dt.string), 2, "0"), + "-", + self.f.lpad(self.cast(self.cast(day, dt.int64), dt.string), 2, "0"), + "T", + self.f.lpad(self.cast(self.cast(hours, dt.int64), dt.string), 2, "0"), + ":", + self.f.lpad(self.cast(self.cast(minutes, dt.int64), dt.string), 2, "0"), + ":", + self.f.lpad(self.cast(self.cast(seconds, dt.int64), dt.string), 2, "0"), + ".000000Z", + ) + ) + + @visit_node.register(ops.IsInf) + def visit_IsInf(self, op, *, arg): + return sg.and_(sg.not_(self.f.isnan(arg)), self.f.abs(arg).eq(self.POS_INF)) + + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return self.f.array_element(arg, index + self.cast(index >= 0, op.index.dtype)) + + @visit_node.register(ops.Arbitrary) + @visit_node.register(ops.ArgMax) + @visit_node.register(ops.ArgMin) + @visit_node.register(ops.ArrayDistinct) + @visit_node.register(ops.ArrayFilter) + @visit_node.register(ops.ArrayFlatten) + @visit_node.register(ops.ArrayIntersect) + @visit_node.register(ops.ArrayMap) + @visit_node.register(ops.ArraySort) + @visit_node.register(ops.ArrayUnion) + @visit_node.register(ops.ArrayZip) + @visit_node.register(ops.BitwiseNot) + @visit_node.register(ops.Clip) + @visit_node.register(ops.CountDistinctStar) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.Greatest) + @visit_node.register(ops.GroupConcat) + @visit_node.register(ops.IntervalFromInteger) + @visit_node.register(ops.Least) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.RowID) + @visit_node.register(ops.Strftime) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimestampDelta) + @visit_node.register(ops.TimestampNow) + @visit_node.register(ops.TypeOf) + @visit_node.register(ops.Unnest) + @visit_node.register(ops.EndsWith) + @visit_node.register(ops.StringToTimestamp) + @visit_node.register(ops.Levenshtein) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + @visit_node.register(ops.Aggregate) + def visit_Aggregate(self, op, *, parent, groups, metrics): + """Support `GROUP BY` expressions in `SELECT` since DataFusion does not.""" + quoted = self.quoted + metrics = tuple(starmap(self._dedup_name, metrics.items())) + + if groups: + # datafusion doesn't support count distinct aggregations alongside + # computed grouping keys so create a projection of the key and all + # existing columns first, followed by the usual group by + # + # analogous to a user calling mutate -> group_by + cols = list( + map( + partial( + sg.column, + table=sg.to_identifier(parent.alias, quoted=quoted), + quoted=quoted, + ), + # can't use set subtraction here since the schema keys' + # order matters and set subtraction doesn't preserve order + (k for k in op.parent.schema.keys() if k not in groups), + ) + ) + table = ( + sg.select(*cols, *starmap(self._dedup_name, groups.items())) + .from_(parent) + .subquery(parent.alias) + ) + + # datafusion lower cases all column names internally unless quoted so + # quoted=True is required here for correctness + by_names_quoted = tuple( + sg.column(key, table=getattr(value, "table", None), quoted=True) + for key, value in groups.items() + ) + selections = by_names_quoted + metrics + else: + selections = metrics or (STAR,) + table = parent + + sel = sg.select(*selections).from_(table) + + if groups: + sel = sel.group_by(*by_names_quoted) + + return sel + + +_SIMPLE_OPS = { + ops.ApproxMedian: "approx_median", + ops.ArrayRemove: "array_remove_all", + ops.BitAnd: "bit_and", + ops.BitOr: "bit_or", + ops.BitXor: "bit_xor", + ops.Cot: "cot", + ops.ExtractMicrosecond: "extract_microsecond", + ops.First: "first_value", + ops.LPad: "lpad", + ops.Last: "last_value", + ops.Median: "median", + ops.RPad: "rpad", + ops.StringAscii: "ascii", + ops.StringLength: "character_length", + ops.RegexSplit: "regex_split", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @DataFusionCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @DataFusionCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(DataFusionCompiler, f"visit_{_op.__name__}", _fmt) diff --git a/ibis/backends/datafusion/compiler/__init__.py b/ibis/backends/datafusion/compiler/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/ibis/backends/datafusion/compiler/core.py b/ibis/backends/datafusion/compiler/core.py deleted file mode 100644 index 9b9a28b20b3d..000000000000 --- a/ibis/backends/datafusion/compiler/core.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Ibis expression to sqlglot compiler. - -The compiler is built with a few `singledispatch` functions: - - 1. `translate_rel` for compiling `ops.TableNode`s - 1. `translate_val` for compiling `ops.Value`s - -## `translate` - -### Node Implementation - -There's a single `ops.Node` implementation for `ops.TableNode`s instances. - -This function compiles each node in topological order. The topological sorting, -result caching, and iteration are all handled by -`ibis.expr.operations.core.Node.map`. -""" - -from __future__ import annotations - -import itertools -from typing import TYPE_CHECKING, Any - -import sqlglot as sg - -import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis.backends.datafusion.compiler.relations import translate_rel -from ibis.backends.datafusion.compiler.values import translate_val -from ibis.common.deferred import _ -from ibis.expr.analysis import c, find_first_base_table, p, x, y -from ibis.expr.rewrites import rewrite_dropna, rewrite_fillna, rewrite_sample - -if TYPE_CHECKING: - from collections.abc import Mapping - - -def _translate_node(node, **kwargs): - if isinstance(node, ops.Value): - return translate_val(node, **kwargs) - assert isinstance(node, ops.TableNode) - return translate_rel(node, **kwargs) - - -def translate(op: ops.TableNode, params: Mapping[ir.Value, Any]) -> sg.exp.Expression: - """Translate an ibis operation to a sqlglot expression. - - Parameters - ---------- - op - An ibis `TableNode` - params - A mapping of expressions to concrete values - - Returns - ------- - sqlglot.expressions.Expression - A sqlglot expression - """ - - gen_alias_index = itertools.count() - - def fn(node, _, **kwargs): - result = _translate_node(node, **kwargs) - - # don't alias root nodes or value ops - if node is op or isinstance(node, ops.Value): - return result - - assert isinstance(node, ops.TableNode) - - alias_index = next(gen_alias_index) - alias = f"t{alias_index:d}" - - try: - return result.subquery(alias) - except AttributeError: - return sg.alias(result, alias) - - # substitute parameters immediately to avoid having to define a - # ScalarParameter translation rule - # - # this lets us avoid threading `params` through every `translate_val` call - # only to be used in the one place it would be needed: the ScalarParameter - # `translate_val` rule - params = {param.op(): value for param, value in params.items()} - replace_literals = p.ScalarParameter(dtype=x) >> ( - lambda _, x: ops.Literal(value=params[_], dtype=x) - ) - - # replace the right side of InColumn into a scalar subquery for sql - # backends - replace_in_column_with_table_array_view = p.InColumn(..., y) >> _.copy( - options=c.TableArrayView( - c.Selection(table=lambda _, y: find_first_base_table(y), selections=(y,)) - ), - ) - - # replace any checks against an empty right side of the IN operation with - # `False` - replace_empty_in_values_with_false = p.InValues(..., ()) >> c.Literal( - False, dtype="bool" - ) - - # subtract one from one-based functions to convert to zero-based indexing - subtract_one_from_one_indexed_functions = ( - p.WindowFunction(p.RankBase | p.NTile) - | p.StringFind - | p.FindInSet - | p.ArrayPosition - ) >> c.Subtract(_, 1) - - add_one_to_nth_value_input = p.NthValue >> _.copy(nth=c.Add(_.nth, 1)) - - op = op.replace( - replace_literals - | replace_in_column_with_table_array_view - | replace_empty_in_values_with_false - | subtract_one_from_one_indexed_functions - | add_one_to_nth_value_input - | rewrite_fillna - | rewrite_dropna - | rewrite_sample - ) - - # apply translate rules in topological order - node = op.map(fn)[op] - return node.this if isinstance(node, sg.exp.Subquery) else node diff --git a/ibis/backends/datafusion/compiler/relations.py b/ibis/backends/datafusion/compiler/relations.py deleted file mode 100644 index e023e3fb15ac..000000000000 --- a/ibis/backends/datafusion/compiler/relations.py +++ /dev/null @@ -1,211 +0,0 @@ -from __future__ import annotations - -import functools - -import sqlglot as sg - -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.sqlglot import STAR - - -@functools.singledispatch -def translate_rel(op, **_): - """Translate a value expression into sqlglot.""" - raise com.OperationNotDefinedError(f"No translation rule for {type(op)}") - - -@translate_rel.register(ops.DummyTable) -def dummy_table(op, *, values, **_): - return sg.select(*values) - - -@translate_rel.register -def _physical_table(op: ops.PhysicalTable, **_): - return sg.table(op.name) - - -@translate_rel.register(ops.DatabaseTable) -def table(op, *, name, namespace, **_): - return sg.table(name, db=namespace.schema, catalog=namespace.database) - - -@translate_rel.register(ops.SelfReference) -def _self_ref(op, *, table, **_): - return sg.alias(table, op.name) - - -_JOIN_TYPES = { - ops.InnerJoin: "inner", - ops.LeftJoin: "left", - ops.RightJoin: "right", - ops.OuterJoin: "full", - ops.LeftAntiJoin: "left anti", - ops.LeftSemiJoin: "left semi", -} - - -@translate_rel.register -def _join(op: ops.Join, *, left, right, predicates, **_): - on = sg.and_(*predicates) if predicates else None - join_type = _JOIN_TYPES[type(op)] - try: - return left.join(right, join_type=join_type, on=on) - except AttributeError: - select_args = [f"{left.alias_or_name}.*"] - - # select from both the left and right side of the join if the join - # is not a filtering join (semi join or anti join); filtering joins - # only return the left side columns - if not isinstance(op, (ops.LeftSemiJoin, ops.LeftAntiJoin)): - select_args.append(f"{right.alias_or_name}.*") - return ( - sg.select(*select_args).from_(left).join(right, join_type=join_type, on=on) - ) - - -def replace_tables_with_star_selection(node, alias=None): - if isinstance(node, (sg.exp.Subquery, sg.exp.Table, sg.exp.CTE)): - return sg.exp.Column( - this=STAR, - table=sg.to_identifier(alias if alias is not None else node.alias_or_name), - ) - return node - - -@translate_rel.register -def _selection(op: ops.Selection, *, table, selections, predicates, sort_keys, **_): - # needs_alias should never be true here in explicitly, but it may get - # passed via a (recursive) call to translate_val - if isinstance(op.table, ops.Join) and not isinstance( - op.table, (ops.LeftSemiJoin, ops.LeftAntiJoin) - ): - args = table.this.args - from_ = args["from"] - (join,) = args["joins"] - else: - from_ = join = None - - alias = table.alias_or_name - selections = tuple( - replace_tables_with_star_selection( - node, - # replace the table name with the alias if the table is **not** a - # join, because we may be selecting from a subquery or an aliased - # table; otherwise we'll select from the _unaliased_ table or the - # _child_ table, which may have a different alias than the one we - # generated for the input table - alias if from_ is None and join is None else None, - ) - for node in selections - ) or (STAR,) - - sel = sg.select(*selections).from_(from_ if from_ is not None else table) - - if join is not None: - sel = sel.join(join) - - if predicates: - if join is not None: - sel = sg.select(STAR).from_(sel.subquery(alias)) - sel = sel.where(*predicates) - - if sort_keys: - sel = sel.order_by(*sort_keys) - - return sel - - -@translate_rel.register -def _limit(op: ops.Limit, *, table, n, offset, **_): - result = sg.select(STAR).from_(table) - - if n is not None: - if not isinstance(n, int): - limit = sg.select(n).from_(table).subquery() - else: - limit = n - result = result.limit(limit) - - if not isinstance(offset, int): - return result.offset( - sg.select(offset).from_(table).subquery().sql("clickhouse") - ) - - return result.offset(offset) if offset != 0 else result - - -@translate_rel.register(ops.Aggregation) -def _aggregation( - op: ops.Aggregation, *, table, metrics, by, having, predicates, sort_keys, **_ -): - if by: - # datafusion doesn't support count distinct aggregations alongside - # computed grouping keys so create a projection of the key and all - # existing columns first, followed by the usual group by - # - # analogous to a user calling mutate -> group_by - by_names = frozenset(b.alias_or_name for b in by) - cols = [ - sg.column( - name, - table=sg.to_identifier(table.alias_or_name, quoted=True), - quoted=True, - ) - for name in op.table.schema.keys() - by_names - ] - table = sg.select(*cols, *by).from_(table).subquery() - - # datafusion lower cases all column names internally unless quoted so - # quoted=True is required here for correctness - by_names_quoted = tuple( - sg.column(b.alias_or_name, table=getattr(b, "table", None), quoted=True) - for b in by - ) - selections = by_names_quoted + metrics - else: - selections = metrics or (STAR,) - - sel = sg.select(*selections).from_(table) - - if by: - sel = sel.group_by(*by_names_quoted) - - if predicates: - sel = sel.where(*predicates) - - if having: - sel = sel.having(*having) - - if sort_keys: - sel = sel.order_by(*sort_keys) - - return sel - - -_SET_OP_FUNC = { - ops.Union: sg.union, - ops.Intersection: sg.intersect, - ops.Difference: sg.except_, -} - - -@translate_rel.register -def _set_op(op: ops.SetOp, *, left, right, distinct: bool = False, **_): - if isinstance(left, sg.exp.Table): - left = sg.select(STAR).from_(left) - - if isinstance(right, sg.exp.Table): - right = sg.select(STAR).from_(right) - - func = _SET_OP_FUNC[type(op)] - - left = left.args.get("this", left) - right = right.args.get("this", right) - - return func(left, right, distinct=distinct) - - -@translate_rel.register -def _distinct(op: ops.Distinct, *, table, **_): - return sg.select(STAR).distinct().from_(table) diff --git a/ibis/backends/datafusion/compiler/values.py b/ibis/backends/datafusion/compiler/values.py deleted file mode 100644 index 60f75d66ac0f..000000000000 --- a/ibis/backends/datafusion/compiler/values.py +++ /dev/null @@ -1,818 +0,0 @@ -from __future__ import annotations - -import functools -import math -import operator -from typing import Any - -import sqlglot as sg - -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.base.sqlglot import ( - FALSE, - NULL, - AggGen, - F, - interval, - make_cast, - paren, - parenthesize, -) -from ibis.backends.base.sqlglot.datatypes import PostgresType -from ibis.common.temporal import IntervalUnit, TimestampUnit -from ibis.expr.operations.udf import InputType -from ibis.formats.pyarrow import PyArrowType - - -def _aggregate(funcname, *args, where): - expr = F[funcname](*args) - if where is not None: - return sg.exp.Filter(this=expr, expression=sg.exp.Where(this=where)) - return expr - - -@functools.singledispatch -def translate_val(op, **_): - """Translate a value expression into sqlglot.""" - raise com.OperationNotDefinedError(f"No translation rule for {type(op)}") - - -agg = AggGen(aggfunc=_aggregate) -cast = make_cast(PostgresType) -if_ = F["if"] - -_simple_ops = { - ops.Abs: "abs", - ops.Ln: "ln", - ops.Log2: "log2", - ops.Log10: "log10", - ops.Sqrt: "sqrt", - ops.Reverse: "reverse", - ops.Strip: "trim", - ops.LStrip: "ltrim", - ops.RStrip: "rtrim", - ops.Lowercase: "lower", - ops.Uppercase: "upper", - ops.StringLength: "character_length", - ops.Capitalize: "initcap", - ops.Repeat: "repeat", - ops.LPad: "lpad", - ops.RPad: "rpad", - ops.Count: "count", - ops.Min: "min", - ops.Max: "max", - ops.Mean: "avg", - ops.Median: "median", - ops.ApproxMedian: "approx_median", - ops.Acos: "acos", - ops.Asin: "asin", - ops.Atan: "atan", - ops.Cos: "cos", - ops.Sin: "sin", - ops.Tan: "tan", - ops.Exp: "exp", - ops.Power: "power", - ops.RandomScalar: "random", - ops.Translate: "translate", - ops.StringAscii: "ascii", - ops.StartsWith: "starts_with", - ops.StrRight: "right", - ops.StringReplace: "replace", - ops.Sign: "sign", - ops.ExtractMicrosecond: "extract_microsecond", - ops.RowNumber: "row_number", - ops.Any: "bool_or", - ops.All: "bool_and", - ops.BitOr: "bit_or", - ops.BitXor: "bit_xor", - ops.BitAnd: "bit_and", - ops.ApproxCountDistinct: "approx_distinct", - ops.BitwiseAnd: "bit_and", - ops.Lag: "lag", - ops.Lead: "lead", - ops.First: "first_value", - ops.Last: "last_value", - ops.DenseRank: "dense_rank", - ops.PercentRank: "percent_rank", - ops.NTile: "ntile", - ops.MinRank: "rank", - ops.CumeDist: "cume_dist", - ops.NthValue: "nth_value", - ops.Cot: "cot", - ops.Atan2: "atan2", - ops.Radians: "radians", - ops.Degrees: "degrees", - ops.NullIf: "nullif", - ops.Pi: "pi", - ops.ArrayContains: "array_contains", - ops.ArrayLength: "array_length", - ops.ArrayRemove: "array_remove_all", - ops.RegexSplit: "regex_split", -} - -for _op, _name in _simple_ops.items(): - assert isinstance(type(_op), type), type(_op) - if issubclass(_op, ops.Reduction): - - @translate_val.register(_op) - def _fmt(_, _name: str = _name, *, where, **kw): - return agg[_name](*kw.values(), where=where) - - else: - - @translate_val.register(_op) - def _fmt(_, _name: str = _name, **kw): - return F[_name](*kw.values()) - - -del _fmt, _name, _op - -_binary_infix_ops = { - # Binary operations - ops.Add: operator.add, - ops.Subtract: operator.sub, - ops.Multiply: operator.mul, - ops.Modulus: operator.mod, - # Comparisons - ops.Equals: sg.exp.Condition.eq, - ops.NotEquals: sg.exp.Condition.neq, - ops.GreaterEqual: operator.ge, - ops.Greater: operator.gt, - ops.LessEqual: operator.le, - ops.Less: operator.lt, - # Boolean comparisons - ops.And: operator.and_, - ops.Or: operator.or_, - ops.Xor: F.xor, - ops.DateAdd: operator.add, - ops.DateSub: operator.sub, - ops.DateDiff: operator.sub, - ops.TimestampDiff: operator.sub, - ops.TimestampSub: operator.sub, - ops.TimestampAdd: operator.add, - ops.IntervalAdd: operator.add, - ops.IntervalSubtract: operator.sub, -} - - -def _binary_infix(func): - def formatter(op, *, left, right, **_): - return func(parenthesize(op.left, left), parenthesize(op.right, right)) - - return formatter - - -for _op, _func in _binary_infix_ops.items(): - translate_val.register(_op)(_binary_infix(_func)) - -del _op, _func - - -@translate_val.register(ops.Alias) -def alias(op, *, arg, name, **_): - return arg.as_(name) - - -def _to_timestamp(value, target_dtype, literal=False): - tz = ( - f'Some("{timezone}")' - if (timezone := target_dtype.timezone) is not None - else "None" - ) - unit = ( - target_dtype.unit.name.capitalize() - if target_dtype.scale is not None - else "Microsecond" - ) - str_value = str(value) if literal else value - return F.arrow_cast(str_value, f"Timestamp({unit}, {tz})") - - -@translate_val.register(ops.Literal) -def _literal(op, *, value, dtype, **kw): - if value is None and dtype.nullable: - if dtype.is_null(): - return NULL - return cast(NULL, dtype) - elif dtype.is_boolean(): - return sg.exp.convert(bool(value)) - elif dtype.is_inet(): - # treat inet as strings for now - return sg.exp.convert(str(value)) - elif dtype.is_decimal(): - return cast( - sg.exp.convert(str(value)), - dt.Decimal(precision=dtype.precision or 38, scale=dtype.scale or 9), - ) - elif dtype.is_string() or dtype.is_macaddr(): - return sg.exp.convert(str(value)) - elif dtype.is_numeric(): - if isinstance(value, float) and math.isinf(value): - return sg.exp.Literal.number("'+Inf'::double") - return sg.exp.convert(value) - elif dtype.is_interval(): - if dtype.unit.short in {"ms", "us", "ns"}: - raise com.UnsupportedOperationError( - "DataFusion doesn't support subsecond interval resolutions" - ) - - return interval(value, unit=dtype.unit.plural.lower()) - elif dtype.is_timestamp(): - return _to_timestamp(value, dtype, literal=True) - elif dtype.is_date(): - return F.date_trunc("day", value.isoformat()) - elif dtype.is_time(): - return cast(sg.exp.convert(str(value)), dt.time()) - elif dtype.is_array(): - vtype = dtype.value_type - values = [ - _literal(ops.Literal(v, dtype=vtype), value=v, dtype=vtype, **kw) - for v in value - ] - return F.array(*values) - elif dtype.is_map(): - vtype = dtype.value_type - keys = [] - values = [] - - for k, v in value.items(): - keys.append(sg.exp.convert(k)) - values.append( - _literal(ops.Literal(v, dtype=vtype), value=v, dtype=vtype, **kw) - ) - - return F.map(F.array(*keys), F.array(*values)) - elif dtype.is_struct(): - fields = [ - _literal(ops.Literal(v, dtype=ftype), value=v, dtype=ftype, **kw) - for ftype, v in zip(dtype.types, value.values()) - ] - return cast(sg.exp.Struct.from_arg_list(fields), dtype) - elif dtype.is_binary(): - return sg.exp.HexString(this=value.hex()) - else: - raise NotImplementedError(f"Unsupported type: {dtype!r}") - - -@translate_val.register(ops.Cast) -def _cast(op, *, arg, to, **_): - if to.is_interval(): - unit_name = to.unit.name.lower() - return sg.cast(F.concat(sg.cast(arg, "text"), f" {unit_name}"), "interval") - if to.is_timestamp(): - return _to_timestamp(arg, to) - if to.is_decimal(): - return F.arrow_cast(arg, f"{PyArrowType.from_ibis(to)}".capitalize()) - return cast(arg, to) - - -@translate_val.register(ops.TableColumn) -def column(op, *, table, name, **_): - return sg.column(name, table=table.alias_or_name, quoted=True) - - -@translate_val.register -def sort_key(op: ops.SortKey, *, expr, ascending: bool, **_): - return sg.exp.Ordered(this=expr, desc=not ascending) - - -@translate_val.register(ops.Not) -def invert(op, *, arg, **_): - if isinstance(arg, sg.exp.Filter): - return sg.exp.Filter( - this=_de_morgan_law(arg.this), expression=arg.expression - ) # transform the not expression using _de_morgan_law - return sg.not_(paren(arg)) - - -def _de_morgan_law(logical_op: sg.exp.Expression): - if isinstance(logical_op, sg.exp.LogicalAnd): - return sg.exp.LogicalOr(this=sg.not_(paren(logical_op.this))) - if isinstance(logical_op, sg.exp.LogicalOr): - return sg.exp.LogicalAnd(this=sg.not_(paren(logical_op.this))) - return None - - -@translate_val.register(ops.Ceil) -@translate_val.register(ops.Floor) -def ceil_floor(op, *, arg, **_): - return cast(F[type(op).__name__.lower()](arg), dt.int64) - - -@translate_val.register(ops.Round) -def round(op, *, arg, digits, **_): - if digits is not None: - return F.round(arg, digits) - return F.round(arg) - - -@translate_val.register(ops.Substring) -def substring(op, *, arg, start, length, **_): - start = if_(start < 0, F.length(arg) + start + 1, start + 1) - if length is not None: - return F.substr(arg, start, length) - return F.substr(arg, start) - - -@translate_val.register(ops.Divide) -def div(op, *, left, right, **_): - return cast(left, dt.float64) / cast(right, dt.float64) - - -@translate_val.register(ops.FloorDivide) -def floordiv(op, *, left, right, **_): - return F.floor(left / right) - - -@translate_val.register(ops.CountDistinct) -def count_distinct(op, *, arg, where, **_): - return agg.count(sg.exp.Distinct(expressions=[arg]), where=where) - - -@translate_val.register(ops.CountStar) -def count_star(op, *, where, **_): - return agg.count(1, where=where) - - -@translate_val.register(ops.Sum) -def sum(op, *, arg, where, **_): - if op.arg.dtype.is_boolean(): - arg = cast(arg, dt.int64) - return agg.sum(arg, where=where) - - -@translate_val.register(ops.Variance) -def variance(op, *, arg, how, where, **_): - if how == "sample": - return agg.var_samp(arg, where=where) - elif how == "pop": - return agg.var_pop(arg, where=where) - else: - raise ValueError(f"Unrecognized how value: {how}") - - -@translate_val.register(ops.StandardDev) -def stddev(op, *, arg, how, where, **_): - if how == "sample": - return agg.stddev_samp(arg, where=where) - elif how == "pop": - return agg.stddev_pop(arg, where=where) - else: - raise ValueError(f"Unrecognized how value: {how}") - - -@translate_val.register(ops.InValues) -def in_values(op, *, value, options, **_): - return parenthesize(op.value, value).isin(*options) - - -@translate_val.register(ops.Negate) -def negate(op, *, arg, **_): - return -paren(arg) - - -@translate_val.register(ops.Coalesce) -def coalesce(op, *, arg, **_): - return F.coalesce(*arg) - - -@translate_val.register(ops.Log) -def log(op, *, arg, base, **_): - return F.log(base, arg) - - -@translate_val.register(ops.E) -def e(op, **_): - return F.exp(1) - - -@translate_val.register(ops.ScalarUDF) -def scalar_udf(op, **kw): - input_type = op.__input_type__ - if input_type in (InputType.PYARROW, InputType.BUILTIN): - return F[op.__full_name__](*kw.values()) - else: - raise NotImplementedError( - f"DataFusion only supports PyArrow UDFs: got a {input_type.name.lower()} UDF" - ) - - -@translate_val.register(ops.ElementWiseVectorizedUDF) -def elementwise_udf(op, *, func, func_args, **_): - return F[func.__name__](*func_args) - - -@translate_val.register(ops.AggUDF) -def agg_udf(op, *, where, **kw): - return agg[op.__full_name__](*kw.values(), where=where) - - -@translate_val.register(ops.StringConcat) -def string_concat(op, *, arg, **_): - return F.concat(*arg) - - -@translate_val.register(ops.RegexExtract) -def regex_extract(op, *, arg, pattern, index, **_): - if not isinstance(op.index, ops.Literal): - raise ValueError( - "re_extract `index` expressions must be literals. " - "Arbitrary expressions are not supported in the DataFusion backend" - ) - return F.regexp_match(arg, F.concat("(", pattern, ")"))[index] - - -@translate_val.register(ops.RegexReplace) -def regex_replace(op, *, arg, pattern, replacement, **_): - return F.regexp_replace(arg, pattern, replacement, sg.exp.convert("g")) - - -@translate_val.register(ops.StringFind) -def string_find(op, *, arg, substr, start, end, **_): - if end is not None: - raise NotImplementedError("`end` not yet implemented") - - if start is not None: - pos = F.strpos(F.substr(arg, start + 1), substr) - return F.coalesce(F.nullif(pos + start, start), 0) - - return F.strpos(arg, substr) - - -@translate_val.register(ops.RegexSearch) -def regex_search(op, *, arg, pattern, **_): - return if_( - sg.or_(arg.is_(NULL), pattern.is_(NULL)), - NULL, - F.coalesce( - # null is returned for non-matching patterns, so coalesce to false - # because that is the desired behavior for ops.RegexSearch - F.array_length(F.regexp_match(arg, pattern)) > 0, - FALSE, - ), - ) - - -@translate_val.register(ops.StringContains) -def string_contains(op, *, haystack, needle, **_): - return F.strpos(haystack, needle) > sg.exp.convert(0) - - -@translate_val.register(ops.StringJoin) -def string_join(op, *, sep, arg, **_): - if not isinstance(op.sep, ops.Literal): - raise ValueError( - "join `sep` expressions must be literals. " - "Arbitrary expressions are not supported in the DataFusion backend" - ) - - return F.concat_ws(sep, *arg) - - -@translate_val.register(ops.ExtractFragment) -def _(op, *, arg, **_): - return F.extract_url_field(arg, "fragment") - - -@translate_val.register(ops.ExtractProtocol) -def extract_protocol(op, *, arg, **_): - return F.extract_url_field(arg, "scheme") - - -@translate_val.register(ops.ExtractAuthority) -def extract_authority(op, *, arg, **_): - return F.extract_url_field(arg, "netloc") - - -@translate_val.register(ops.ExtractPath) -def extract_path(op, *, arg, **_): - return F.extract_url_field(arg, "path") - - -@translate_val.register(ops.ExtractHost) -def extract_host(op, *, arg, **_): - return F.extract_url_field(arg, "hostname") - - -@translate_val.register(ops.ExtractQuery) -def extract_query(op, *, arg, key, **_): - if key is not None: - return F.extract_query_param(arg, key) - return F.extract_query(arg) - - -@translate_val.register(ops.ExtractUserInfo) -def extract_user_info(op, *, arg, **_): - return F.extract_user_info(arg) - - -@translate_val.register(ops.ExtractYear) -@translate_val.register(ops.ExtractMonth) -@translate_val.register(ops.ExtractQuarter) -@translate_val.register(ops.ExtractDay) -def extract(op, *, arg, **_): - skip = len("Extract") - part = type(op).__name__[skip:].lower() - return F.date_part(part, arg) - - -@translate_val.register(ops.ExtractDayOfYear) -def extract_day_of_the_year(op, *, arg, **_): - return F.date_part("doy", arg) - - -@translate_val.register(ops.DayOfWeekIndex) -def extract_day_of_the_week_index(op, *, arg, **_): - return (F.date_part("dow", arg) + 6) % 7 - - -_DOW_INDEX_NAME = { - 0: "Monday", - 1: "Tuesday", - 2: "Wednesday", - 3: "Thursday", - 4: "Friday", - 5: "Saturday", - 6: "Sunday", -} - - -@translate_val.register(ops.DayOfWeekName) -def extract_day_of_the_week_name(op, *, arg, **_): - cases, results = zip(*_DOW_INDEX_NAME.items()) - - return sg.exp.Case( - this=paren((F.date_part("dow", arg) + 6) % 7), - ifs=list(map(if_, cases, results)), - ) - - -@translate_val.register(ops.Date) -def date(op, *, arg, **_): - return F.date_trunc("day", arg) - - -@translate_val.register(ops.ExtractWeekOfYear) -def extract_week_of_year(op, *, arg, **_): - return F.date_part("week", arg) - - -@translate_val.register(ops.TimestampTruncate) -def timestamp_truncate(op, *, arg, **_): - unit = op.unit - if unit in ( - IntervalUnit.MILLISECOND, - IntervalUnit.MICROSECOND, - IntervalUnit.NANOSECOND, - ): - raise com.UnsupportedOperationError( - f"The function is not defined for time unit {unit}" - ) - - return F.date_trunc(unit.name.lower(), arg) - - -@translate_val.register(ops.ExtractEpochSeconds) -def extract_epoch_seconds(op, *, arg, **_): - if op.arg.dtype.is_date(): - return F.extract_epoch_seconds_date(arg) - elif op.arg.dtype.is_timestamp(): - return F.extract_epoch_seconds_timestamp(arg) - else: - raise com.OperationNotDefinedError( - f"The function is not defined for {op.arg.dtype}" - ) - - -@translate_val.register(ops.ExtractMinute) -def extract_minute(op, *, arg, **_): - if op.arg.dtype.is_date(): - return F.date_part("minute", arg) - elif op.arg.dtype.is_time(): - return F.extract_minute_time(arg) - elif op.arg.dtype.is_timestamp(): - return F.extract_minute_timestamp(arg) - else: - raise com.OperationNotDefinedError( - f"The function is not defined for {op.arg.dtype}" - ) - - -@translate_val.register(ops.ExtractMillisecond) -def extract_millisecond(op, *, arg, **_): - if op.arg.dtype.is_time(): - return F.extract_millisecond_time(arg) - elif op.arg.dtype.is_timestamp(): - return F.extract_millisecond_timestamp(arg) - else: - raise com.OperationNotDefinedError( - f"The function is not defined for {op.arg.dtype}" - ) - - -@translate_val.register(ops.ExtractHour) -def extract_hour(op, *, arg, **_): - if op.arg.dtype.is_date() or op.arg.dtype.is_timestamp(): - return F.date_part("hour", arg) - elif op.arg.dtype.is_time(): - return F.extract_hour_time(arg) - else: - raise com.OperationNotDefinedError( - f"The function is not defined for {op.arg.dtype}" - ) - - -@translate_val.register(ops.ExtractSecond) -def extract_second(op, *, arg, **_): - if op.arg.dtype.is_date() or op.arg.dtype.is_timestamp(): - return F.extract_second_timestamp(arg) - elif op.arg.dtype.is_time(): - return F.extract_second_time(arg) - else: - raise com.OperationNotDefinedError( - f"The function is not defined for {op.arg.dtype}" - ) - - -@translate_val.register(ops.TableArrayView) -def _table_array_view(op, *, table, **_): - return table.args["this"].subquery() - - -@translate_val.register(ops.BitwiseAnd) -def _bitwise_and(op, *, left, right, **_): - return sg.exp.BitwiseAnd(this=left, expression=right) - - -@translate_val.register(ops.BitwiseOr) -def _bitwise_and(op, *, left, right, **_): - return sg.exp.BitwiseOr(this=left, expression=right) - - -@translate_val.register(ops.BitwiseXor) -def _bitwise_and(op, *, left, right, **_): - return sg.exp.BitwiseXor(this=left, expression=right) - - -@translate_val.register(ops.BitwiseLeftShift) -def _bitwise_and(op, *, left, right, **_): - return sg.exp.BitwiseLeftShift(this=left, expression=right) - - -@translate_val.register(ops.BitwiseRightShift) -def _bitwise_and(op, *, left, right, **_): - return sg.exp.BitwiseRightShift(this=left, expression=right) - - -@translate_val.register(ops.RowsWindowFrame) -@translate_val.register(ops.RangeWindowFrame) -def _window_frame(op, *, group_by, order_by, start, end, max_lookback=None, **_): - if max_lookback is not None: - raise NotImplementedError( - "`max_lookback` is not supported in the ClickHouse backend" - ) - - if start is None: - start = {} - - start_value = start.get("value", "UNBOUNDED") - start_side = start.get("side", "PRECEDING") - - if end is None: - end = {} - - end_value = end.get("value", "UNBOUNDED") - end_side = end.get("side", "FOLLOWING") - - spec = sg.exp.WindowSpec( - kind=op.how.upper(), - start=start_value, - start_side=start_side, - end=end_value, - end_side=end_side, - over="OVER", - ) - - order = sg.exp.Order(expressions=order_by) if order_by else None - - # TODO: bit of a hack to return a partial, but similar to `WindowBoundary` - # there's no sqlglot expression that corresponds to _only_ this information - return functools.partial( - sg.exp.Window, partition_by=group_by, order=order, spec=spec - ) - - -@translate_val.register(ops.WindowFunction) -def _window(op: ops.WindowFunction, *, func, frame, **_: Any): - # frame is a partial call to sg.exp.Window - return frame(this=func) - - -@translate_val.register(ops.WindowBoundary) -def _window_boundary(op, *, value, preceding, **_): - # TODO: bit of a hack to return a dict, but there's no sqlglot expression - # that corresponds to _only_ this information - return {"value": value, "side": "preceding" if preceding else "following"} - - -@translate_val.register(ops.SimpleCase) -@translate_val.register(ops.SearchedCase) -def _case(op, *, base=None, cases, results, default, **_): - return sg.exp.Case(this=base, ifs=list(map(if_, cases, results)), default=default) - - -@translate_val.register(ops.IfElse) -def _if_else(op, *, bool_expr, true_expr, false_null_expr, **_): - return if_(bool_expr, true_expr, false_null_expr) - - -@translate_val.register(ops.NotNull) -def _not_null(op, *, arg, **_): - return sg.not_(arg.is_(NULL)) - - -@translate_val.register(ops.Array) -def array_column(op, *, exprs, **_): - return F.make_array(*exprs) - - -@translate_val.register(ops.ArrayRepeat) -def array_repeat(op, *, arg, times, **_): - return F.flatten(F.array_repeat(arg, times)) - - -@translate_val.register(ops.ArrayConcat) -def array_concat(op, *, arg, **_): - return F.array_concat(*arg) - - -@translate_val.register(ops.ArrayPosition) -def array_position(op, *, arg, other, **_): - return F.coalesce(F.array_position(arg, other), 0) - - -@translate_val.register(ops.Covariance) -def covariance(op, *, left, right, how, where, **_): - x = op.left - if x.dtype.is_boolean(): - left = cast(left, dt.float64) - - y = op.right - if y.dtype.is_boolean(): - right = cast(right, dt.float64) - - if how == "sample": - return agg["covar_samp"](left, right, where=where) - elif how == "pop": - return agg["covar_pop"](left, right, where=where) - else: - raise ValueError(f"Unrecognized how = `{how}` value") - - -@translate_val.register(ops.Correlation) -def correlation(op, *, left, right, where, **_): - x = op.left - if x.dtype.is_boolean(): - left = cast(left, dt.float64) - - y = op.right - if y.dtype.is_boolean(): - right = cast(right, dt.float64) - - return agg["corr"](left, right, where=where) - - -@translate_val.register(ops.IsNull) -def is_null(op, *, arg, **_): - return arg.is_(NULL) - - -@translate_val.register(ops.IsNan) -def is_nan(op, *, arg, **_): - return F.isnan(F.coalesce(arg, sg.exp.Literal.number("'NaN'::double"))) - - -@translate_val.register(ops.ArrayStringJoin) -def array_string_join(op, *, sep, arg, **_): - return F.array_join(arg, sep) - - -@translate_val.register(ops.FindInSet) -def array_string_find(op, *, needle, values, **_): - return F.coalesce(F.array_position(F.make_array(*values), needle), 0) - - -@translate_val.register(ops.TimestampFromUNIX) -def timestamp_from_unix(op, *, arg, unit, **_): - if unit == TimestampUnit.SECOND: - return F.from_unixtime(arg) - elif unit in ( - TimestampUnit.MILLISECOND, - TimestampUnit.MICROSECOND, - TimestampUnit.NANOSECOND, - ): - return F.arrow_cast(arg, f"Timestamp({unit.name.capitalize()}, None)") - else: - raise com.UnsupportedOperationError(f"Unsupported unit {unit}") diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/datafusion/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/datafusion/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/datafusion/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/datafusion/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/datafusion/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/datafusion/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/datafusion/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/datafusion/out.sql new file mode 100644 index 000000000000..6bd0ba8c995d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/datafusion/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/datafusion/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/datafusion/out.sql new file mode 100644 index 000000000000..97338646649f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/datafusion/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql index d3969647c9ea..703ef7e85d34 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql @@ -1,22 +1,28 @@ SELECT - CASE "t0"."continent" - WHEN 'NA' - THEN 'North America' - WHEN 'SA' - THEN 'South America' - WHEN 'EU' - THEN 'Europe' - WHEN 'AF' - THEN 'Africa' - WHEN 'AS' - THEN 'Asia' - WHEN 'OC' - THEN 'Oceania' - WHEN 'AN' - THEN 'Antarctica' - ELSE 'Unknown continent' - END AS "cont", + "cont", SUM("t0"."population") AS "total_pop" -FROM "countries" AS "t0" +FROM ( + SELECT + "t0"."continent", + "t0"."population", + CASE "t0"."continent" + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS "cont" + FROM "countries" AS "t0" +) AS t0 GROUP BY - 1 \ No newline at end of file + "cont" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql index b5362bf67adc..c1611d8cecc3 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql @@ -1,15 +1,9 @@ SELECT - "t0"."x" IN (( + "t0"."x" IN ( SELECT - "t1"."x" AS "x" - FROM ( - SELECT - * - FROM "t" AS "t0" - WHERE - ( - "t0"."x" > 2 - ) - ) AS "t1" - )) AS "InSubquery(x)" + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 190a5d560537..b1b1ae048108 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -772,16 +772,7 @@ def mean_and_std(v): id="collect", marks=[ pytest.mark.notimpl( - [ - "impala", - "mysql", - "sqlite", - "datafusion", - "mssql", - "druid", - "oracle", - "exasol", - ], + ["impala", "mysql", "sqlite", "mssql", "druid", "oracle", "exasol"], raises=com.OperationNotDefinedError, ), pytest.mark.broken( diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 7be4e9f68861..ea41cbb89956 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -97,7 +97,6 @@ def test_array_concat_variadic(con): # Issues #2370 -@pytest.mark.notimpl(["datafusion"], raises=BaseException) @pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( ["postgres", "trino"], @@ -151,7 +150,7 @@ def test_np_array_literal(con): @pytest.mark.parametrize("idx", range(3)) -@pytest.mark.notimpl(["polars", "datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) def test_array_index(con, idx): arr = [1, 2, 3] expr = ibis.literal(arr) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index d06e8a31374c..2acddb1fab4d 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -87,7 +87,7 @@ def _create_temp_table_with_schema(backend, con, temp_table_name, schema, data=N ), ], ) -@pytest.mark.notimpl(["dask", "datafusion", "druid", "impala"]) +@pytest.mark.notimpl(["dask", "druid", "impala"]) @pytest.mark.notimpl( ["flink"], reason="Flink backend supports creating only TEMPORARY VIEW for in-memory data.", @@ -214,7 +214,7 @@ def test_sql(backend, con): } -@mark.notimpl(["datafusion", "druid"]) +@mark.notimpl(["druid"]) @pytest.mark.notimpl( ["flink"], raises=com.IbisError, @@ -313,7 +313,7 @@ def test_rename_table(con, temp_table, temp_table_orig): assert temp_table_orig not in con.list_tables() -@mark.notimpl(["datafusion", "polars", "druid"]) +@mark.notimpl(["polars", "druid"]) @mark.never(["impala", "pyspark"], reason="No non-nullable datatypes") @mark.notyet( ["trino"], reason="trino doesn't support NOT NULL in its in-memory catalog" @@ -340,7 +340,7 @@ def test_nullable_input_output(con, temp_table): assert t.schema().types[2].nullable -@mark.notimpl(["datafusion", "druid", "polars"]) +@mark.notimpl(["druid", "polars"]) @pytest.mark.broken( ["flink"], raises=ValueError, @@ -932,7 +932,7 @@ def test_self_join_memory_table(backend, con, monkeypatch): ], ids=["python", "pandas"], ) -@pytest.mark.notimpl(["dask", "datafusion", "druid"]) +@pytest.mark.notimpl(["dask", "druid"]) @pytest.mark.notimpl( ["flink"], reason="Flink backend supports creating only TEMPORARY VIEW for in-memory data.", @@ -1459,7 +1459,7 @@ def gen_test_name(con: BaseBackend) -> str: @mark.notimpl( - ["datafusion", "polars"], + ["polars"], raises=NotImplementedError, reason="overwriting not implemented in ibis for this backend", ) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 4bdba3c9f13c..cd340aaeb1de 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -142,6 +142,7 @@ def test_isna(backend, alltypes, col, filt): [ "bigquery", "clickhouse", + "datafusion", "duckdb", "impala", "postgres", @@ -195,9 +196,7 @@ def test_coalesce(con, expr, expected): # TODO(dask) - identicalTo - #2553 -@pytest.mark.notimpl( - ["clickhouse", "datafusion", "dask", "pyspark", "mssql", "druid", "exasol"] -) +@pytest.mark.notimpl(["clickhouse", "dask", "pyspark", "mssql", "druid", "exasol"]) def test_identical_to(backend, alltypes, sorted_df): sorted_alltypes = alltypes.order_by("id") df = sorted_df @@ -623,7 +622,7 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): reason="dask doesn't support Series as isin/notin argument", raises=NotImplementedError, ) -@pytest.mark.notimpl(["datafusion", "druid"]) +@pytest.mark.notimpl(["druid"]) @pytest.mark.parametrize( ("ibis_op", "pandas_op"), [ @@ -641,11 +640,13 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): _.string_col.notin(_.string_col), lambda df: ~df.string_col.isin(df.string_col), id="notin_col", + marks=[pytest.mark.notimpl(["datafusion"])], ), param( (_.bigint_col + 1).notin(_.string_col.length() + 1), lambda df: ~(df.bigint_col.add(1)).isin(df.string_col.str.len().add(1)), id="notin_expr", + marks=[pytest.mark.notimpl(["datafusion"])], ), ], ) @@ -762,7 +763,6 @@ def test_select_filter_select(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) @pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError) def test_between(backend, alltypes, df): expr = alltypes.double_col.between(5, 10) @@ -898,7 +898,7 @@ def test_isin_uncorrelated( @pytest.mark.broken(["polars"], reason="incorrect answer") -@pytest.mark.notimpl(["datafusion", "pyspark", "druid", "exasol"]) +@pytest.mark.notimpl(["pyspark", "druid", "exasol"]) @pytest.mark.notyet(["dask"], reason="not supported by the backend") def test_isin_uncorrelated_filter( backend, batting, awards_players, batting_df, awards_players_df @@ -1367,7 +1367,6 @@ def hash_256(col): "pandas", "dask", "bigquery", - "datafusion", "druid", "impala", "mssql", @@ -1400,6 +1399,7 @@ def hash_256(col): reason="raises TrinoUserError", ), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), + pytest.mark.broken(["datafusion"], reason="casts to 1672531200000000"), ], ), ], @@ -1887,9 +1887,7 @@ def test_substitute(backend): @pytest.mark.notimpl( - ["dask", "datafusion", "pandas", "polars"], - raises=NotImplementedError, - reason="not a SQL backend", + ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" ) @pytest.mark.notimpl( ["pyspark"], reason="pyspark doesn't generate SQL", raises=NotImplementedError diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 2e345252a2e7..27c4aa9b5110 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -788,14 +788,7 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): np.isinf, id="isinf", marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) ], ), ], diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 910ac4cb97f6..8266186481b2 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -37,7 +37,7 @@ def test_floating_scalar_parameter(backend, alltypes, df, column, raw_value): ("start_string", "end_string"), [("2009-03-01", "2010-07-03"), ("2014-12-01", "2017-01-05")], ) -@pytest.mark.notimpl(["datafusion", "mssql", "trino", "druid"]) +@pytest.mark.notimpl(["mssql", "trino", "druid"]) @pytest.mark.broken(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl( ["risingwave"], diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index eb8aae905935..0ef9d20ba505 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -39,8 +39,7 @@ reason="Not a SQL backend", ) no_sql_extraction = pytest.mark.notimpl( - ["datafusion", "pyspark", "polars"], - reason="Not clear how to extract SQL from the backend", + ["pyspark", "polars"], reason="Not clear how to extract SQL from the backend" ) @@ -62,9 +61,7 @@ def test_literal(backend, expr): assert ibis.to_sql(expr, dialect=backend.name()) -@pytest.mark.never( - ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL" -) +@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") @pytest.mark.xfail_version( mssql=["sqlalchemy>=2"], reason="sqlalchemy 2 prefixes literals with `N`" ) @@ -106,9 +103,7 @@ def test_cte_refs_in_topo_order(backend, snapshot): snapshot.assert_match(sql, "out.sql") -@pytest.mark.never( - ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL" -) +@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") def test_isin_bug(con, snapshot): t = ibis.table(dict(x="int"), name="t") good = t[t.x > 2].x @@ -117,10 +112,13 @@ def test_isin_bug(con, snapshot): @pytest.mark.never( - ["pandas", "dask", "datafusion", "polars", "pyspark"], + ["pandas", "dask", "polars", "pyspark"], reason="not SQL", raises=NotImplementedError, ) +@pytest.mark.notyet( + ["datafusion"], reason="no unnest support", raises=exc.OperationNotDefinedError +) @pytest.mark.notyet( ["sqlite", "mysql", "druid", "impala", "mssql"], reason="no unnest support upstream" ) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 83366c24bdb1..d441b39896f2 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -147,9 +147,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6.*"), id="like", marks=[ - pytest.mark.notimpl( - ["datafusion", "polars"], raises=com.OperationNotDefinedError - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -162,9 +160,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6%"), id="complex_like_escape", marks=[ - pytest.mark.notimpl( - ["datafusion", "polars"], raises=com.OperationNotDefinedError - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -177,9 +173,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6%.*"), id="complex_like_escape_match", marks=[ - pytest.mark.notimpl( - ["datafusion", "polars"], raises=com.OperationNotDefinedError - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", @@ -193,7 +187,7 @@ def uses_java_re(t): id="ilike", marks=[ pytest.mark.notimpl( - ["datafusion", "pyspark", "polars"], + ["pyspark", "polars"], raises=com.OperationNotDefinedError, ), pytest.mark.broken( @@ -830,7 +824,6 @@ def uses_java_re(t): marks=pytest.mark.notimpl( [ "dask", - "datafusion", "impala", "mysql", "sqlite", diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 78e4ce67208e..eb1330d8c19c 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -643,7 +643,7 @@ def test_timestamp_truncate(backend, alltypes, df, unit): ["polars", "druid"], reason="snaps to the UNIX epoch", raises=AssertionError ) @pytest.mark.notimpl( - ["datafusion", "oracle"], + ["oracle"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -2046,10 +2046,7 @@ def test_now_from_projection(alltypes): } -@pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=sa.exc.ProgrammingError, reason="SQL parse failed" ) @@ -2097,10 +2094,7 @@ def test_date_literal(con, backend): } -@pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=sa.exc.ProgrammingError, @@ -2139,8 +2133,7 @@ def test_timestamp_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "datafusion", "mysql", "dask", "pyspark"], - raises=com.OperationNotDefinedError, + ["pandas", "mysql", "dask", "pyspark"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl( ["mysql"], @@ -2167,6 +2160,13 @@ def test_timestamp_literal(con, backend): "PST8PDT", "2022-02-04 08:20:00PST", # The time zone for Berkeley, California. id="iso", + marks=[ + pytest.mark.broken( + ["datafusion"], + raises=AssertionError, + reason="timezones don't seem to work", + ), + ], ), ], ) @@ -2267,9 +2267,7 @@ def test_time_literal(con, backend): @pytest.mark.broken( ["sqlite"], raises=AssertionError, reason="SQLite returns Timedelta from execution" ) -@pytest.mark.notimpl( - ["dask", "datafusion", "pandas"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["dask", "pandas"], raises=com.OperationNotDefinedError) @pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.parametrize( "microsecond", @@ -2389,10 +2387,7 @@ def test_interval_literal(con, backend): assert con.execute(expr.typeof()) == INTERVAL_BACKEND_TYPES[backend_name] -@pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["mysql"], raises=sa.exc.ProgrammingError, @@ -2427,10 +2422,7 @@ def test_date_column_from_ymd(backend, con, alltypes, df): backend.assert_series_equal(golden, result.timestamp_col) -@pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark"], - raises=com.OperationNotDefinedError, -) +@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 1b43317a52c9..d85cfbcbd4be 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -88,11 +88,6 @@ def calc_zscore(s): id="lag", marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.broken( - ["datafusion"], - raises=Exception, - reason="Exception: Internal error: Expects default value to have Int64 type.", - ), pytest.mark.notimpl( ["flink"], raises=Py4JJavaError, @@ -110,11 +105,6 @@ def calc_zscore(s): reason="upstream is broken; returns all nulls", raises=AssertionError, ), - pytest.mark.broken( - ["datafusion"], - reason="Exception: Internal error: Expects default value to have Int64 type.", - raises=BaseException, - ), pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["flink"], @@ -851,11 +841,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): True, id="ordered-lag", marks=[ - pytest.mark.broken( - ["datafusion"], - raises=Exception, - reason="Exception: Internal error: Expects default value to have Int64 type.", - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -875,11 +860,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=AssertionError, ), pytest.mark.broken(["oracle"], raises=AssertionError), - pytest.mark.broken( - ["datafusion"], - raises=Exception, - reason="Exception: Internal error: Expects default value to have Int64 type.", - ), pytest.mark.notimpl( ["pyspark"], raises=PySparkAnalysisException, @@ -910,11 +890,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): True, id="ordered-lead", marks=[ - pytest.mark.broken( - ["datafusion"], - raises=Exception, - reason="Exception: Internal error: Expects default value to have Int64 type.", - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -936,11 +911,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ), raises=AssertionError, ), - pytest.mark.broken( - ["datafusion"], - raises=Exception, - reason="Exception: Internal error: Expects default value to have Int64 type.", - ), pytest.mark.broken(["oracle"], raises=AssertionError), pytest.mark.notimpl( ["pyspark"], @@ -1168,11 +1138,6 @@ def test_percent_rank_whole_table_no_order_by(backend, alltypes, df): @pytest.mark.broken( ["pandas"], reason="pandas returns incorrect results", raises=AssertionError ) -@pytest.mark.broken( - ["datafusion"], - reason="Exception: External error: Internal error: Expects default value to have Int64 type", - raises=Exception, -) def test_grouped_ordered_window_coalesce(backend, alltypes, df): t = alltypes expr = ( @@ -1207,11 +1172,6 @@ def agg(df): @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["datafusion"], - raises=Exception, - reason="Exception: Internal error: Expects default value to have Int64 type.", -) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, From 8bd248a9794922254dbb15dd50de18d8804ae72d Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 26 Dec 2023 12:14:30 -0500 Subject: [PATCH 020/161] refactor(compilers): conslidate StringJoin impl --- ibis/backends/clickhouse/compiler.py | 4 ---- ibis/backends/duckdb/compiler.py | 4 ---- 2 files changed, 8 deletions(-) diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py index 7d04c531d77f..44760bf8ed8e 100644 --- a/ibis/backends/clickhouse/compiler.py +++ b/ibis/backends/clickhouse/compiler.py @@ -395,10 +395,6 @@ def visit_StringSplit(self, op, *, arg, delimiter): delimiter, self.cast(arg, dt.String(nullable=False)) ) - @visit_node.register(ops.StringJoin) - def visit_StringJoin(self, op, *, sep, arg): - return self.f.arrayStringConcat(self.f.array(*arg), sep) - @visit_node.register(ops.Capitalize) def visit_Capitalize(self, op, *, arg): return self.f.concat( diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 35fa4729f07b..c84ee4b0d162 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -196,10 +196,6 @@ def visit_CountDistinctStar(self, op, *, where, arg): ) return self.agg.count(sge.Distinct(expressions=[row]), where=where) - @visit_node.register(ops.StringJoin) - def visit_StringJoin(self, op, *, arg, sep): - return self.f.list_aggr(self.f.array(*arg), "string_agg", sep) - @visit_node.register(ops.ExtractMillisecond) def visit_ExtractMillisecond(self, op, *, arg): return self.f.mod(self.f.extract("ms", arg), 1_000) From bce36bd238f151ddd629650f86b7688e02ffd141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 28 Dec 2023 22:47:34 +0100 Subject: [PATCH 021/161] feat(common): add `Dispatched` base class for convenient visitor pattern implementation --- ibis/common/dispatch.py | 68 ++++++++++++++++++++++++++ ibis/common/tests/test_dispatch.py | 77 +++++++++++++++++++++++++++++- 2 files changed, 144 insertions(+), 1 deletion(-) diff --git a/ibis/common/dispatch.py b/ibis/common/dispatch.py index e9cf71c20109..28999b452e37 100644 --- a/ibis/common/dispatch.py +++ b/ibis/common/dispatch.py @@ -2,6 +2,7 @@ import abc import functools +import inspect import re from collections import defaultdict @@ -91,3 +92,70 @@ def call(arg, *args, **kwargs): call.register = register return call + + +class _MultiDict(dict): + """A dictionary that allows multiple values for a single key.""" + + def __setitem__(self, key, value): + if key in self: + self[key].append(value) + else: + super().__setitem__(key, [value]) + + +class DispatchedMeta(type): + """Metaclass that allows multiple implementations of a method to be defined.""" + + def __new__(cls, name, bases, dct): + namespace = {} + for key, value in dct.items(): + if len(value) == 1: + # there is just a single attribute so pick that + namespace[key] = value[0] + elif all(inspect.isfunction(v) for v in value): + # multiple functions are defined with the same name, so create + # a dispatcher function + first, *rest = value + func = functools.singledispatchmethod(first) + for impl in rest: + func.register(impl) + namespace[key] = func + elif all(isinstance(v, classmethod) for v in value): + first, *rest = value + func = functools.singledispatchmethod(first.__func__) + for v in rest: + func.register(v.__func__) + namespace[key] = classmethod(func) + elif all(isinstance(v, staticmethod) for v in value): + first, *rest = value + func = functools.singledispatch(first.__func__) + for v in rest: + func.register(v.__func__) + namespace[key] = staticmethod(func) + else: + raise TypeError(f"Multiple attributes are defined with name {key}") + + return type.__new__(cls, name, bases, namespace) + + @classmethod + def __prepare__(cls, name, bases): + return _MultiDict() + + +class Dispatched(metaclass=DispatchedMeta): + """Base class supporting multiple implementations of a method. + + Methods with the same name can be defined multiple times. The first method + defined is the default implementation, and subsequent methods are registered + as implementations for specific types of the first argument. + + The constructed methods are equivalent as if they were defined with + `functools.singledispatchmethod` but without the need to use the decorator + syntax. The recommended application of this class is to implement visitor + patterns. + + Besides ordinary methods, classmethods and staticmethods are also supported. + The implementation can be extended to overload multiple arguments by using + `multimethod` instead of `singledispatchmethod` as the dispatcher. + """ diff --git a/ibis/common/tests/test_dispatch.py b/ibis/common/tests/test_dispatch.py index 4b3a34ff5b7c..5f34c533851d 100644 --- a/ibis/common/tests/test_dispatch.py +++ b/ibis/common/tests/test_dispatch.py @@ -3,7 +3,9 @@ import collections import decimal -from ibis.common.dispatch import lazy_singledispatch +from ibis.common.dispatch import Dispatched, lazy_singledispatch + +# ruff: noqa: F811 def test_lazy_singledispatch(): @@ -118,3 +120,76 @@ def _(a): assert foo({}) == "mapping" assert foo(mydict()) == "mydict" # concrete takes precedence assert foo(sum) == "callable" + + +class Visitor(Dispatched): + def a(self): + return "a" + + def b(self, x: int): + return "b_int" + + def b(self, x: str): + return "b_str" + + @classmethod + def c(cls, x: int, **kwargs): + return "c_int" + + @classmethod + def c(cls, x: str, a=0, b=1): + return "c_str" + + def d(self, x: int): + return "d_int" + + def d(self, x: str): + return "d_str" + + @staticmethod + def e(x: int): + return "e_int" + + @staticmethod + def e(x: str): + return "e_str" + + +class Subvisitor(Visitor): + def b(self, x): + return super().b(x) + + def b(self, x: float): + return "b_float" + + @classmethod + def c(cls, x): + return super().c(x) + + @classmethod + def c(cls, s: float): + return "c_float" + + +def test_dispatched(): + v = Visitor() + assert v.a == v.a + assert v.b(1) == "b_int" + assert v.b("1") == "b_str" + assert v.d(1) == "d_int" + assert v.d("1") == "d_str" + + w = Subvisitor() + assert w.b(1) == "b_int" + assert w.b(1.1) == "b_float" + + assert Visitor.c(1, a=0, b=0) == "c_int" + assert Visitor.c("1") == "c_str" + + assert Visitor.e("1") == "e_str" + assert Visitor.e(1) == "e_int" + + assert Subvisitor.c(1) == "c_int" + assert Subvisitor.c(1.1) == "c_float" + + assert Subvisitor.e(1) == "e_int" From ac25d9ecc0a90bf53af05e2bb237aab918d7a88c Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 29 Dec 2023 10:02:04 -0500 Subject: [PATCH 022/161] refactor(duckdb): remove the need for a specialized `_to_geodataframe` method --- ibis/backends/duckdb/__init__.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index b1df75561970..1088c8037b73 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1383,26 +1383,6 @@ def _fetch_from_cursor( ) return DuckDBPandasData.convert_table(df, schema) - # TODO(gforsyth): this may not need to be specialized in the future - @staticmethod - def _to_geodataframe(df, schema): - """Convert `df` to a `GeoDataFrame`. - - Required libraries for geospatial support must be installed and - a geospatial column is present in the dataframe. - """ - import geopandas as gpd - - geom_col = None - for name, dtype in schema.items(): - if dtype.is_geospatial(): - if not geom_col: - geom_col = name - df[name] = gpd.GeoSeries.from_wkb(df[name]) - if geom_col: - df = gpd.GeoDataFrame(df, geometry=geom_col) - return df - def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: with self._safe_raw_sql(f"DESCRIBE {query}") as cur: rows = cur.fetch_arrow_table() From a3cca0b20903bb161416b880e41dc0c899ef81e9 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 29 Dec 2023 10:07:53 -0500 Subject: [PATCH 023/161] fix(duckdb): ensure that create_schema and create_database are actually tested --- ibis/backends/duckdb/__init__.py | 10 ++++++---- ibis/backends/tests/test_client.py | 4 ++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 1088c8037b73..44e4fe78c7f4 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -491,8 +491,9 @@ def create_schema( "DuckDB cannot create a schema in another database." ) - name = sg.to_identifier(database, quoted=True) - return sge.Create(this=name, kind="SCHEMA", replace=force) + name = sg.table(name, catalog=database, quoted=self.compiler.quoted) + with self._safe_raw_sql(sge.Create(this=name, kind="SCHEMA", replace=force)): + pass def drop_schema( self, name: str, database: str | None = None, force: bool = False @@ -502,8 +503,9 @@ def drop_schema( "DuckDB cannot drop a schema in another database." ) - name = sg.to_identifier(database, quoted=True) - return sge.Drop(this=name, kind="SCHEMA", replace=force) + name = sg.table(name, catalog=database, quoted=self.compiler.quoted) + with self._safe_raw_sql(sge.Drop(this=name, kind="SCHEMA", replace=force)): + pass def register( self, diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 2acddb1fab4d..96f1575e8e83 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -1502,13 +1502,17 @@ def test_overwrite(ddl_con, monkeypatch): def test_create_database(con_create_database): database = gen_name("test_create_database") con_create_database.create_database(database) + assert database in con_create_database.list_databases() con_create_database.drop_database(database) + assert database not in con_create_database.list_databases() def test_create_schema(con_create_schema): schema = gen_name("test_create_schema") con_create_schema.create_schema(schema) + assert schema in con_create_schema.list_schemas() con_create_schema.drop_schema(schema) + assert schema not in con_create_schema.list_schemas() @pytest.mark.notimpl( From dffbda11eab44d8f1f236d0f51b1906208ac1932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 29 Dec 2023 15:17:30 +0100 Subject: [PATCH 024/161] refactor(ir): stricter scalar subquery integrity checks --- ibis/expr/operations/relations.py | 34 +++++++++++++++---------------- ibis/expr/rewrites.py | 4 ++-- ibis/expr/tests/test_newrels.py | 8 ++++++++ 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 5ab1ee37c595..54ed7ab11fc9 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -112,8 +112,8 @@ def __init__(self, rel, **kwargs): @attribute def value(self): - name = self.rel.schema.names[0] - return self.rel.values[name] + (value,) = self.rel.values.values() + return value @attribute def relations(self): @@ -127,12 +127,13 @@ def dtype(self): @public class ScalarSubquery(Subquery): def __init__(self, rel): - from ibis.expr.rewrites import ReductionValue + from ibis.expr.operations import Reduction super().__init__(rel=rel) - if not self.value.find(ReductionValue, filter=Value): + if not isinstance(self.value, Reduction): raise IntegrityError( - f"Subquery {self.value!r} is not scalar, it must be turned into a scalar subquery first" + f"Subquery {self.value!r} is not a reduction, only " + "reductions can be used as scalar subqueries" ) @@ -146,8 +147,8 @@ class InSubquery(Subquery): needle: Value dtype = dt.boolean - def __init__(self, **kwargs): - super().__init__(**kwargs) + def __init__(self, rel, needle): + super().__init__(rel=rel, needle=needle) if not rlz.comparable(self.value, self.needle): raise IntegrityError( f"Subquery {self.needle!r} is not comparable to {self.value!r}" @@ -275,12 +276,13 @@ class Filter(Simple): predicates: VarTuple[Value[dt.Boolean]] def __init__(self, parent, predicates): - from ibis.expr.rewrites import ReductionValue + from ibis.expr.rewrites import ReductionLike for pred in predicates: - if pred.find(ReductionValue, filter=Value): + if pred.find(ReductionLike, filter=Value): raise IntegrityError( - f"Cannot add {pred!r} to filter, it is a reduction" + f"Cannot add {pred!r} to filter, it is a reduction which " + "must be converted to a scalar subquery first" ) if pred.relations and parent not in pred.relations: raise IntegrityError( @@ -291,6 +293,8 @@ def __init__(self, parent, predicates): @public class Limit(Simple): + # TODO(kszucs): dynamic limit should contain ScalarSubqueries rather than + # plain scalar values n: typing.Union[int, Scalar[dt.Integer], None] = None offset: typing.Union[int, Scalar[dt.Integer]] = 0 @@ -324,6 +328,7 @@ class Set(Relation): left: Relation right: Relation distinct: bool = False + values = FrozenDict() def __init__(self, left, right, **kwargs): # convert to dictionary first, to get key-unordered comparison semantics @@ -336,10 +341,6 @@ def __init__(self, left, right, **kwargs): right = Project(right, cols) super().__init__(left=left, right=right, **kwargs) - @attribute - def values(self): - return FrozenDict() - @attribute def schema(self): return self.left.schema @@ -363,10 +364,7 @@ class Difference(Set): @public class PhysicalTable(Relation): name: str - - @attribute - def values(self): - return FrozenDict() + values = FrozenDict() @public diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 5cac1708e89b..d0d3cd013e7b 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -140,10 +140,10 @@ def rewrite_project_input(value, relation): ) -ReductionValue = p.Reduction | p.Field(p.Aggregate(groups={})) +ReductionLike = p.Reduction | p.Field(p.Aggregate(groups={})) -@replace(ReductionValue) +@replace(ReductionLike) def filter_wrap_reduction(_): # Wrap reductions or fields referencing an aggregation without a group by - # which are scalar fields - in a scalar subquery. In the latter case we diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 906284c1a32c..b904fb0f193a 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -139,11 +139,19 @@ def test_select_windowizing_analytic_function(): def test_subquery_integrity_check(): t = ibis.table(name="t", schema={"a": "int64", "b": "string"}) + agg = t.agg([t.a.sum(), t.a.mean()]) msg = "Subquery must have exactly one column, got 2" + with pytest.raises(IntegrityError, match=msg): + ops.ScalarSubquery(agg) with pytest.raises(IntegrityError, match=msg): ops.ScalarSubquery(t) + agg = t.agg(t.a.sum() + 1) + msg = "is not a reduction" + with pytest.raises(IntegrityError, match=msg): + ops.ScalarSubquery(agg) + def test_select_turns_scalar_reduction_into_subquery(): arr = ibis.literal([1, 2, 3]) From 5f89cc03256d62a0be65bacd99ed678096980c7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 29 Dec 2023 16:50:17 +0100 Subject: [PATCH 025/161] feat(common): add a memory efficient `Node.map()` implementation Alternative implementation of `map` to reduce memory usage. While `map` keeps all the results in memory until the end of the traversal, the new `map_clear()` method removes intermediate results as soon as they are not needed anymore. --- ibis/common/graph.py | 56 +++++++++++++++++++++++++++++++-- ibis/common/tests/test_graph.py | 24 +++++++++++++- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/ibis/common/graph.py b/ibis/common/graph.py index b8bbe932dd95..89f81af4d8cd 100644 --- a/ibis/common/graph.py +++ b/ibis/common/graph.py @@ -245,15 +245,67 @@ def map(self, fn: Callable, filter: Optional[Finder] = None) -> dict[Node, Any]: A mapping of nodes to their results. """ results: dict[Node, Any] = {} - for node in Graph.from_bfs(self, filter=filter).toposort(): + + graph, _ = Graph.from_bfs(self, filter=filter).toposort() + for node in graph: # minor optimization to directly recurse into the children kwargs = { k: _recursive_lookup(v, results) for k, v in zip(node.__argnames__, node.__args__) } results[node] = fn(node, results, **kwargs) + return results + @experimental + def map_clear( + self, fn: Callable, filter: Optional[Finder] = None + ) -> dict[Node, Any]: + """Apply a function to all nodes in the graph more memory efficiently. + + Alternative implementation of `map` to reduce memory usage. While `map` keeps + all the results in memory until the end of the traversal, this method removes + intermediate results as soon as they are not needed anymore. + + Prefer this method over `map` if the results consume significant amount of + memory and if the intermediate results are not needed. + + Parameters + ---------- + fn + Function to apply to each node. It receives the node as the first argument, + the results as the second and the results of the children as keyword + arguments. + filter + Pattern-like object to filter out nodes from the traversal. The traversal + will only visit nodes that match the given pattern and stop otherwise. + + Returns + ------- + In contrast to `map`, this method returns the result of the root node only since + the rest of the results are already discarded. + """ + results: dict[Node, Any] = {} + + graph, dependents = Graph.from_bfs(self, filter=filter).toposort() + dependents = {k: set(v) for k, v in dependents.items()} + + for node, dependencies in graph.items(): + kwargs = { + k: _recursive_lookup(v, results) + for k, v in zip(node.__argnames__, node.__args__) + } + results[node] = fn(node, results, **kwargs) + + # remove the results belonging to the dependencies if they are not + # needed by other nodes during the rest of the traversal + for dependency in dependencies: + dependents[dependency].remove(node) + if not dependents[dependency]: + del results[dependency] + + return results[self] + # TODO(kszucs): perhaps rename it to find_all() for better clarity def find( self, @@ -489,7 +541,7 @@ def toposort(self) -> Self: if any(in_degree.values()): raise ValueError("cycle detected in the graph") - return result + return result, dependents # these could be callables instead diff --git a/ibis/common/tests/test_graph.py b/ibis/common/tests/test_graph.py index 50839529aa9a..787926660a9e 100644 --- a/ibis/common/tests/test_graph.py +++ b/ibis/common/tests/test_graph.py @@ -101,8 +101,9 @@ def test_invert(): def test_toposort(): - g = Graph(A).toposort() + g, dependents = Graph(A).toposort() assert list(g.keys()) == [C, D, E, B, A] + assert dependents == Graph(A).invert() def test_toposort_cycle_detection(): @@ -427,3 +428,24 @@ def test_node_find_topmost_dont_traverse_the_same_node_twice(): result = E.find_topmost(If(_.name == "G")) expected = [G] assert result == expected + + +def test_map_clear(): + Z = MyNode(name="Z", children=[A]) + result_sequence = {} + + def record_result_keys(node, results, **kwargs): + result_sequence[node] = tuple(results.keys()) + return node + + expected_result_sequence = { + C: (), + D: (C,), + E: (C, D), + B: (C, D, E), + A: (C, B), + Z: (A,), + } + result = Z.map_clear(record_result_keys) + assert result == Z + assert result_sequence == expected_result_sequence From 1726e6ddc4845ee446576f0415891c8a2a2c2669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 29 Dec 2023 17:54:33 +0100 Subject: [PATCH 026/161] fix(common): intermediate result removal fails if there are duplicated dependencies --- ibis/common/graph.py | 2 +- ibis/common/tests/test_graph.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ibis/common/graph.py b/ibis/common/graph.py index 89f81af4d8cd..c27dcc480f01 100644 --- a/ibis/common/graph.py +++ b/ibis/common/graph.py @@ -299,7 +299,7 @@ def map_clear( # remove the results belonging to the dependencies if they are not # needed by other nodes during the rest of the traversal - for dependency in dependencies: + for dependency in set(dependencies): dependents[dependency].remove(node) if not dependents[dependency]: del results[dependency] diff --git a/ibis/common/tests/test_graph.py b/ibis/common/tests/test_graph.py index 787926660a9e..59d39a4f7733 100644 --- a/ibis/common/tests/test_graph.py +++ b/ibis/common/tests/test_graph.py @@ -431,7 +431,9 @@ def test_node_find_topmost_dont_traverse_the_same_node_twice(): def test_map_clear(): - Z = MyNode(name="Z", children=[A]) + Z = MyNode(name="Z", children=[A, A]) + Y = MyNode(name="Y", children=[A]) + X = MyNode(name="X", children=[Z, Y]) result_sequence = {} def record_result_keys(node, results, **kwargs): @@ -445,7 +447,9 @@ def record_result_keys(node, results, **kwargs): B: (C, D, E), A: (C, B), Z: (A,), + Y: (A, Z), + X: (Z, Y), } - result = Z.map_clear(record_result_keys) - assert result == Z + result = X.map_clear(record_result_keys) + assert result == X assert result_sequence == expected_result_sequence From d547d33139a93677597b167201c95dedf6eeaeb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 29 Dec 2023 15:28:23 +0100 Subject: [PATCH 027/161] refactor(api): revamp `asof` join predicates Previously the `ASOF` join API was imprecise. The backends supporting `asof` joins require exactly one nearest match (inequality) predicate along with arbitrary number of ordinary join predicates, see [ClickHouse ASOF](https://clickhouse.com/docs/en/sql-reference/statements/select/join#asof-join-usage), [DuckDB ASOF](https://duckdb.org/docs/guides/sql_features/asof_join.html#asof-joins-with-the-using-keyword) and [Pandas ASOF](https://pandas.pydata.org/docs/reference/api/pandas.merge_asof.html). This change alters the API to `table.asof_join(left, right, on, predicates, ...)` where `on` is the nearest match predicate defaulting to `left[on] <= right[on]` if not an expression is given. I kept the `by` argument for compatibility reasons, but we should phase that out in favor of `predicates`. Also ensure that all the join methods or `ir.Join` have the exact same docstrings as `ir.Table`. BREAKING CHANGE: `on` paremater of `table.asof_join()` is now only accept a single predicate, use `predicates` to supply additional join predicates. --- ibis/backends/clickhouse/tests/test_select.py | 2 +- ibis/expr/operations/relations.py | 2 +- .../test_format/test_asof_join/repr.txt | 2 +- ibis/expr/tests/test_format.py | 2 +- ibis/expr/tests/test_newrels.py | 35 ++++- ibis/expr/types/joins.py | 145 +++++++++++++++--- ibis/expr/types/relations.py | 57 ++----- ibis/tests/expr/test_table.py | 14 +- 8 files changed, 178 insertions(+), 81 deletions(-) diff --git a/ibis/backends/clickhouse/tests/test_select.py b/ibis/backends/clickhouse/tests/test_select.py index b74ce39ff621..ffb4aeb4c355 100644 --- a/ibis/backends/clickhouse/tests/test_select.py +++ b/ibis/backends/clickhouse/tests/test_select.py @@ -371,9 +371,9 @@ def test_join_with_external_table(alltypes, df): def test_asof_join(time_left, time_right): expr = time_left.asof_join( time_right, + on=time_left["time"] >= time_right["time"], predicates=[ time_left["key"] == time_right["key"], - time_left["time"] >= time_right["time"], ], ).drop("time_right") result = expr.execute() diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 54ed7ab11fc9..67c7bec36fdc 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -259,7 +259,7 @@ def schema(self): def to_expr(self): import ibis.expr.types as ir - return ir.JoinExpr(self) + return ir.Join(self) @public diff --git a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt index 6c43f0adfc6b..6524ea7fb5b7 100644 --- a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt @@ -8,7 +8,7 @@ r1 := UnboundTable: right JoinChain[r0] JoinLink[asof, r1] - r0.time1 == r1.time2 + r0.time1 <= r1.time2 JoinLink[inner, r1] r0.value == r1.value2 values: diff --git a/ibis/expr/tests/test_format.py b/ibis/expr/tests/test_format.py index cec850521eab..0ea0b4d94576 100644 --- a/ibis/expr/tests/test_format.py +++ b/ibis/expr/tests/test_format.py @@ -311,7 +311,7 @@ def test_fillna(snapshot): def test_asof_join(snapshot): left = ibis.table([("time1", "int32"), ("value", "double")], name="left") right = ibis.table([("time2", "int32"), ("value2", "double")], name="right") - joined = left.asof_join(right, [("time1", "time2")]).inner_join( + joined = left.asof_join(right, ("time1", "time2")).inner_join( right, left.value == right.value2 ) diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index b904fb0f193a..e7398755595f 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -487,14 +487,14 @@ def test_join(): t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) joined = t1.join(t2, [t1.a == t2.c]) - assert isinstance(joined, ir.JoinExpr) + assert isinstance(joined, ir.Join) assert isinstance(joined.op(), JoinChain) - assert isinstance(joined.op().to_expr(), ir.JoinExpr) + assert isinstance(joined.op().to_expr(), ir.Join) result = joined._finish() assert isinstance(joined, ir.TableExpr) assert isinstance(joined.op(), JoinChain) - assert isinstance(joined.op().to_expr(), ir.JoinExpr) + assert isinstance(joined.op().to_expr(), ir.Join) with join_tables(t1, t2) as (t1, t2): assert result.op() == JoinChain( @@ -1264,3 +1264,32 @@ def test_join_between_joins(): }, ) assert expr.op() == expected + + +def test_join_method_docstrings(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) + joined = t1.join(t2, [t1.a == t2.c]) + + assert isinstance(t1, ir.Table) + assert isinstance(joined, ir.Join) + assert isinstance(joined, ir.Table) + + method_names = [ + "select", + "join", + "inner_join", + "left_join", + "outer_join", + "semi_join", + "anti_join", + "asof_join", + "cross_join", + "right_join", + "any_inner_join", + "any_left_join", + ] + for method in method_names: + join_method = getattr(joined, method) + table_method = getattr(t1, method) + assert join_method.__doc__ == table_method.__doc__ diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 0b2aa972e948..919515d9c1bc 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -1,22 +1,25 @@ -from ibis.expr.types.relations import ( - bind, - dereference_values, - unwrap_aliases, -) +from __future__ import annotations + +import functools from public import public -import ibis.expr.operations as ops -from ibis.expr.types import Table, ValueExpr from typing import Any, Optional from collections.abc import Iterator, Mapping + +import ibis +import ibis.expr.operations as ops + +from ibis import util +from ibis.expr.types import Table, ValueExpr from ibis.common.deferred import Deferred from ibis.expr.analysis import flatten_predicates -from ibis.expr.operations.relations import JoinKind from ibis.common.exceptions import ExpressionError, IntegrityError -from ibis import util -import functools -from ibis.expr.types.relations import dereference_mapping -import ibis - +from ibis.expr.types.relations import ( + bind, + dereference_values, + dereference_mapping, + unwrap_aliases, +) +from ibis.expr.operations.relations import JoinKind from ibis.expr.rewrites import peel_join_field @@ -91,9 +94,12 @@ def dereference_value(pred, deref_left, deref_right): return pred.replace(deref_both, filter=ops.Value) -def prepare_predicates(left, right, predicates, deref_left, deref_right): +def prepare_predicates( + left, right, predicates, deref_left, deref_right, comparison=ops.Equals +): """Bind and dereference predicates to the left and right tables.""" + left, right = left.to_expr(), right.to_expr() for pred in util.promote_list(predicates): if pred is True or pred is False: yield ops.Literal(pred, dtype="bool") @@ -120,7 +126,7 @@ def prepare_predicates(left, right, predicates, deref_left, deref_right): left_value, right_value = dereference_sides( left_value.op(), right_value.op(), deref_left, deref_right ) - yield ops.Equals(left_value, right_value).to_expr() + yield comparison(left_value, right_value) def finished(method): @@ -134,10 +140,18 @@ def wrapper(self, *args, **kwargs): @public -class JoinExpr(Table): +class Join(Table): __slots__ = ("_collisions",) def __init__(self, arg, collisions=None): + assert isinstance(arg, ops.Node) + if not isinstance(arg, ops.JoinChain): + # coerce the input node to a join chain operation by first wrapping + # the input relation in a JoinTable so that we can join the same + # table with itself multiple times and to enable optimization + # passes later on + arg = ops.JoinTable(arg, index=0) + arg = ops.JoinChain(arg, rest=(), values=arg.fields) super().__init__(arg) object.__setattr__(self, "_collisions", collisions or set()) @@ -147,7 +161,8 @@ def _finish(self) -> Table: raise IntegrityError(f"Name collisions: {self._collisions}") return Table(self.op()) - def join( + @functools.wraps(Table.join) + def join( # noqa: D102 self, right, predicates: Any, @@ -156,10 +171,10 @@ def join( lname: str = "", rname: str = "{name}_right", ): - """Join with another table.""" import pyarrow as pa import pandas as pd + # TODO(kszucs): factor out to a helper function if isinstance(right, (pd.DataFrame, pa.Table)): right = ibis.memtable(right) elif not isinstance(right, Table): @@ -169,6 +184,8 @@ def join( if how == "left_semi": how = "semi" + elif how == "asof": + raise IbisInputError("use table.asof_join(...) instead") left = self.op() right = ops.JoinTable(right, index=left.length) @@ -177,17 +194,17 @@ def join( # bind and dereference the predicates preds = prepare_predicates( - left.to_expr(), - right.to_expr(), + left, + right, predicates, deref_left=subs_left, deref_right=subs_right, ) preds = flatten_predicates(list(preds)) - - # if there are no predicates, default to every row matching unless the - # join is a cross join, because a cross join already has this behavior if not preds and how != "cross": + # if there are no predicates, default to every row matching unless + # the join is a cross join, because a cross join already has this + # behavior preds.append(ops.Literal(True, dtype="bool")) # calculate the fields based in lname and rname, this should be a best @@ -205,8 +222,83 @@ def join( # return with a new JoinExpr wrapping the new join chain return self.__class__(left, collisions=collisions) - def select(self, *args, **kwargs): - """Select expressions.""" + @functools.wraps(Table.asof_join) + def asof_join( # noqa: D102 + self: Table, + right: Table, + on, + predicates=(), + by=(), + tolerance=None, + *, + lname: str = "", + rname: str = "{name}_right", + ): + predicates = util.promote_list(predicates) + util.promote_list(by) + if tolerance is not None: + if not isinstance(on, str): + raise TypeError( + "tolerance can only be specified when predicates is a string" + ) + # construct a predicate with two sides from the two tables + predicates.append(self[on] <= right[on] + tolerance) + + left = self.op() + right = ops.JoinTable(right, index=left.length) + subs_left = dereference_mapping_left(left) + subs_right = dereference_mapping_right(right) + + # TODO(kszucs): add extra validation for `on` with clear error messages + preds = list( + prepare_predicates( + left, + right, + [on], + deref_left=subs_left, + deref_right=subs_right, + comparison=ops.LessEqual, + ) + ) + preds += flatten_predicates( + list( + prepare_predicates( + left, + right, + predicates, + deref_left=subs_left, + deref_right=subs_right, + comparison=ops.Equals, + ) + ) + ) + values, collisions = disambiguate_fields( + "asof", left.values, right.fields, lname, rname + ) + + # construct a new join link and add it to the join chain + link = ops.JoinLink("asof", table=right, predicates=preds) + left = left.copy(rest=left.rest + (link,), values=values) + + # return with a new JoinExpr wrapping the new join chain + return self.__class__(left, collisions=collisions) + + @functools.wraps(Table.cross_join) + def cross_join( # noqa: D102 + self: Table, + right: Table, + *rest: Table, + lname: str = "", + rname: str = "{name}_right", + ): + left = self.join(right, how="cross", predicates=(), lname=lname, rname=rname) + for right in rest: + left = left.join( + right, how="cross", predicates=(), lname=lname, rname=rname + ) + return left + + @functools.wraps(Table.select) + def select(self, *args, **kwargs): # noqa: D102 chain = self.op() values = bind(self, (args, kwargs)) values = unwrap_aliases(values) @@ -245,3 +337,6 @@ def select(self, *args, **kwargs): unbind = finished(Table.unbind) union = finished(Table.union) view = finished(Table.view) + + +public(JoinExpr=Join) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 1058d12941f6..cbc9285e6877 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -2991,28 +2991,17 @@ def join( │ 106782 │ Leonardo DiCaprio │ 5989 │ Leonardo DiCaprio │ └─────────┴───────────────────┴───────────────┴───────────────────┘ """ - from ibis.expr.types.joins import JoinExpr - - left = left.op() - if isinstance(left, ops.JoinChain): - # if the left side is already a join chain, we can reuse it, for - # example in the `a.join(b)[fields].join(c)` expression the first - # join followed by a projection `a.join(b)[...]` constructs a - # `ir.Table(ops.JoinChain())` expression, which we can reuse here - expr = left.to_expr() - else: - # all participants of the join must be wrapped in JoinTable nodes - # so that we can join the same table with itself multiple times and - # to enable optimization passes later on - left = ops.JoinTable(left, index=0) - expr = ops.JoinChain(left, rest=(), values=left.fields).to_expr() + from ibis.expr.types.joins import Join - return expr.join(right, predicates, how=how, lname=lname, rname=rname) + return Join(left.op()).join( + right, predicates, how=how, lname=lname, rname=rname + ) def asof_join( left: Table, right: Table, - predicates: str | ir.BooleanColumn | Sequence[str | ir.BooleanColumn] = (), + on: str | ir.BooleanColumn, + predicates: str | ir.Column | Sequence[str | ir.Column] = (), by: str | ir.Column | Sequence[str | ir.Column] = (), tolerance: str | ir.IntervalScalar | None = None, *, @@ -3032,10 +3021,10 @@ def asof_join( Table expression right Table expression + on + Closest match inequality condition predicates - Join expressions - by - column to group by before joining + Additional join predicates tolerance Amount of time to look behind when joining lname @@ -3050,22 +3039,11 @@ def asof_join( Table Table expression """ - if by: - # `by` is an argument that comes from pandas, which for pandas was - # a convenient and fast way to perform a standard join before the - # asof join, so we implement the equivalent behavior here for - # consistency across backends. - left = left.join(right, by, lname=lname, rname=rname) - - if tolerance is not None: - if not isinstance(predicates, str): - raise TypeError( - "tolerance can only be specified when predicates is a string" - ) - left_key, right_key = left[predicates], right[predicates] - predicates = [left_key == right_key, left_key - right_key <= tolerance] + from ibis.expr.types.joins import Join - return left.join(right, predicates, how="asof", lname=lname, rname=rname) + return Join(left.op()).asof_join( + right, on, predicates, by=by, tolerance=tolerance, lname=lname, rname=rname + ) def cross_join( left: Table, @@ -3141,12 +3119,9 @@ def cross_join( >>> expr.count() 344 """ - left = left.join(right, how="cross", predicates=(), lname=lname, rname=rname) - for right in rest: - left = left.join( - right, how="cross", predicates=(), lname=lname, rname=rname - ) - return left + from ibis.expr.types.joins import Join + + return Join(left.op()).cross_join(right, *rest, lname=lname, rname=rname) inner_join = _regular_join_method("inner_join", "inner") left_join = _regular_join_method("left_join", "left") diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index b2ee00a2ec27..cc97eeefa316 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -923,7 +923,7 @@ def test_asof_join_with_by(): r2 = join_without_by.op().rest[0].table.to_expr() expected = ops.JoinChain( first=r1, - rest=[ops.JoinLink("asof", r2, [r1.time == r2.time])], + rest=[ops.JoinLink("asof", r2, [r1.time <= r2.time])], values={ "time": r1.time, "key": r1.key, @@ -936,12 +936,11 @@ def test_asof_join_with_by(): assert join_without_by.op() == expected join_with_by = api.asof_join(left, right, "time", by="key") - with join_tables(left, right, right) as (r1, r2, r3): + with join_tables(left, right) as (r1, r2): expected = ops.JoinChain( first=r1, rest=[ - ops.JoinLink("inner", r2, [r1.key == r2.key]), - ops.JoinLink("asof", r3, [r1.time == r3.time]), + ops.JoinLink("asof", r2, [r1.time <= r2.time, r1.key == r2.key]), ], values={ "time": r1.time, @@ -950,7 +949,6 @@ def test_asof_join_with_by(): "time_right": r2.time, "key_right": r2.key, "value2": r2.value2, - "value2_right": r3.value2, }, ) assert join_with_by.op() == expected @@ -975,8 +973,8 @@ def test_asof_join_with_by(): ], ) def test_asof_join_with_tolerance(ibis_interval, timedelta_interval): - left = ibis.table([("time", "int32"), ("key", "int32"), ("value", "double")]) - right = ibis.table([("time", "int32"), ("key", "int32"), ("value2", "double")]) + left = ibis.table([("time", "timestamp"), ("key", "int32"), ("value", "double")]) + right = ibis.table([("time", "timestamp"), ("key", "int32"), ("value2", "double")]) for interval in [ibis_interval, timedelta_interval]: joined = api.asof_join(left, right, "time", tolerance=interval) @@ -987,7 +985,7 @@ def test_asof_join_with_tolerance(ibis_interval, timedelta_interval): ops.JoinLink( "asof", r2, - [r1.time == r2.time, (r1.time - r2.time) <= interval], + [r1.time <= r2.time, r1.time <= (r2.time + interval)], ) ], values={ From 1d1c541bca80485fa9ce1060f18980131bc27d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 2 Jan 2024 23:01:11 +0100 Subject: [PATCH 028/161] fix(ir): self reference fields were incorrectly dereferenced to the parent relation --- .../out.sql | 2 +- .../out.sql | 2 +- .../test_where_correlated_subquery/out.sql | 2 +- ibis/expr/operations/relations.py | 4 +++ ibis/expr/tests/test_dereference.py | 25 +++++++++++++++++++ 5 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 ibis/expr/tests/test_dereference.py diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql index d262b49d64c1..38b614333084 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql @@ -1,5 +1,5 @@ SELECT - t7.r_name, + t2.r_name, t7.n_name FROM tpch_region AS t2 INNER JOIN tpch_nation AS t3 diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql index ab07ecf75d0e..f9302546380a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql @@ -70,7 +70,7 @@ WHERE ON t7.o_custkey = t6.c_custkey ) AS t12 WHERE - t12.region = t12.region + t12.region = t11.region ) AS t13 ) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql index 0b1767f7a740..ee5a1da42bd3 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql @@ -16,6 +16,6 @@ WHERE t1.y FROM foo AS t1 WHERE - t1.dept_id = t1.dept_id + t0.dept_id = t1.dept_id ) AS t2 ) \ No newline at end of file diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 67c7bec36fdc..65627c0d506b 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -205,6 +205,10 @@ def __init__(self, parent, identifier): identifier = next(self._uid_counter) super().__init__(parent=parent, identifier=identifier) + @attribute + def values(self): + return FrozenDict() + JoinKind = Literal[ "inner", diff --git a/ibis/expr/tests/test_dereference.py b/ibis/expr/tests/test_dereference.py new file mode 100644 index 000000000000..8f827d218f50 --- /dev/null +++ b/ibis/expr/tests/test_dereference.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import ibis +from ibis.expr.types.relations import dereference_mapping + +t = ibis.table( + [ + ("int_col", "int32"), + ("double_col", "double"), + ("string_col", "string"), + ], + name="t", +) + + +def dereference_expect(expected): + return {k.op(): v.op() for k, v in expected.items()} + + +def test_dereference_mapping_self_reference(): + v = t.view() + + mapping = dereference_mapping([v.op()]) + expected = dereference_expect({}) + assert mapping == expected From 78d38c7d6b6828db9a81f218219315d45da8f835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 2 Jan 2024 17:43:44 +0100 Subject: [PATCH 029/161] fix(rewrites): add missing filter arguments for `node.replace()` calls --- ibis/backends/base/sqlglot/rewrites.py | 6 +++--- ibis/expr/rewrites.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index ca999208aa39..2214c0652e23 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -104,13 +104,13 @@ def merge_select_select(_): if v.find((Window, ops.Unnest), filter=ops.Value): return _ for v in _.predicates: - if v.find(ops.ExistsSubquery, filter=ops.Value): + if v.find((ops.ExistsSubquery, ops.InSubquery), filter=ops.Value): return _ subs = {ops.Field(_.parent, k): v for k, v in _.parent.values.items()} - selections = {k: v.replace(subs) for k, v in _.selections.items()} + selections = {k: v.replace(subs, filter=ops.Value) for k, v in _.selections.items()} predicates = tuple(p.replace(subs, filter=ops.Value) for p in _.predicates) - sort_keys = tuple(s.replace(subs) for s in _.sort_keys) + sort_keys = tuple(s.replace(subs, filter=ops.Value) for s in _.sort_keys) return Select( _.parent.parent, diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index d0d3cd013e7b..840cb59459e7 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -254,25 +254,25 @@ def complete_reprojection(_, y): @replace(p.Project(y @ p.Project)) def subsequent_projects(_, y): rule = p.Field(y, name) >> Item(y.values, name) - values = {k: v.replace(rule) for k, v in _.values.items()} + values = {k: v.replace(rule, filter=ops.Value) for k, v in _.values.items()} return ops.Project(y.parent, values) @replace(p.Filter(y @ p.Filter)) def subsequent_filters(_, y): rule = p.Field(y, name) >> d.Field(y.parent, name) - preds = tuple(v.replace(rule) for v in _.predicates) + preds = tuple(v.replace(rule, filter=ops.Value) for v in _.predicates) return ops.Filter(y.parent, y.predicates + preds) @replace(p.Filter(y @ p.Project)) def reorder_filter_project(_, y): rule = p.Field(y, name) >> Item(y.values, name) - preds = tuple(v.replace(rule) for v in _.predicates) + preds = tuple(v.replace(rule, filter=ops.Value) for v in _.predicates) inner = ops.Filter(y.parent, preds) rule = p.Field(y.parent, name) >> d.Field(inner, name) - projs = {k: v.replace(rule) for k, v in y.values.items()} + projs = {k: v.replace(rule, filter=ops.Value) for k, v in y.values.items()} return ops.Project(inner, projs) From 2f7640fb8d584c5bb8442121021f0aab0dd73a83 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 26 Dec 2023 09:09:51 -0500 Subject: [PATCH 030/161] refactor(snowflake): use sqlglot for the snowflake backend --- .github/renovate.json | 5 +- .github/workflows/ibis-backends.yml | 2 +- ibis/backends/base/sqlglot/compiler.py | 15 +- ibis/backends/base/sqlglot/datatypes.py | 20 + ibis/backends/base/sqlglot/rewrites.py | 10 + ibis/backends/conftest.py | 1 - ibis/backends/datafusion/compiler.py | 4 - ibis/backends/duckdb/compiler.py | 5 + ibis/backends/duckdb/tests/test_client.py | 1 - ibis/backends/oracle/__init__.py | 7 +- ibis/backends/snowflake/__init__.py | 760 +++++++++--------- ibis/backends/snowflake/compiler.py | 676 ++++++++++++++++ ibis/backends/snowflake/datatypes.py | 78 -- ibis/backends/snowflake/registry.py | 584 -------------- ibis/backends/snowflake/tests/conftest.py | 83 +- ibis/backends/snowflake/tests/test_client.py | 21 +- .../snowflake/tests/test_datatypes.py | 68 +- ibis/backends/snowflake/tests/test_udf.py | 12 +- ibis/backends/tests/errors.py | 5 + .../test_default_limit/snowflake/out.sql | 5 + .../snowflake/out.sql | 5 + .../snowflake/out.sql | 3 + .../test_respect_set_limit/snowflake/out.sql | 10 + .../test_sql/test_isin_bug/snowflake/out.sql | 6 +- ibis/backends/tests/test_aggregation.py | 10 +- ibis/backends/tests/test_benchmarks.py | 1 - ibis/backends/tests/test_client.py | 3 +- ibis/backends/tests/test_export.py | 5 +- ibis/backends/tests/test_generic.py | 15 +- ibis/backends/tests/test_interactive.py | 4 +- ibis/backends/tests/test_numeric.py | 43 +- ibis/backends/tests/test_temporal.py | 25 +- ibis/backends/tests/test_window.py | 7 +- .../test_h01/test_tpc_h01/snowflake/h01.sql | 24 +- .../test_h02/test_tpc_h02/snowflake/h02.sql | 206 ++--- .../test_h03/test_tpc_h03/snowflake/h03.sql | 176 ++-- .../test_h04/test_tpc_h04/snowflake/h04.sql | 70 +- .../test_h05/test_tpc_h05/snowflake/h05.sql | 236 +++--- .../test_h07/test_tpc_h07/snowflake/h07.sql | 81 +- .../test_h08/test_tpc_h08/snowflake/h08.sql | 80 +- .../test_h09/test_tpc_h09/snowflake/h09.sql | 66 +- .../test_h10/test_tpc_h10/snowflake/h10.sql | 210 ++--- .../test_h11/test_tpc_h11/snowflake/h11.sql | 172 ++-- .../test_h12/test_tpc_h12/snowflake/h12.sql | 136 ++-- .../test_h13/test_tpc_h13/snowflake/h13.sql | 62 +- .../test_h14/test_tpc_h14/snowflake/h14.sql | 122 +-- .../test_h15/test_tpc_h15/snowflake/h15.sql | 84 +- .../test_h16/test_tpc_h16/snowflake/h16.sql | 102 +-- .../test_h17/test_tpc_h17/snowflake/h17.sql | 156 ++-- .../test_h18/test_tpc_h18/snowflake/h18.sql | 188 ++--- .../test_h19/test_tpc_h19/snowflake/h19.sql | 156 ++-- .../test_h20/test_tpc_h20/snowflake/h20.sql | 68 +- .../test_h21/test_tpc_h21/snowflake/h21.sql | 146 ++-- .../test_h22/test_tpc_h22/snowflake/h22.sql | 49 +- ibis/backends/tests/tpch/test_h08.py | 5 - ibis/backends/tests/tpch/test_h11.py | 7 - ibis/backends/tests/tpch/test_h14.py | 5 - ibis/backends/tests/tpch/test_h17.py | 5 - ibis/backends/tests/tpch/test_h21.py | 8 - ibis/backends/tests/tpch/test_h22.py | 7 - pyproject.toml | 16 +- 61 files changed, 2610 insertions(+), 2532 deletions(-) create mode 100644 ibis/backends/snowflake/compiler.py delete mode 100644 ibis/backends/snowflake/datatypes.py delete mode 100644 ibis/backends/snowflake/registry.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/snowflake/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/snowflake/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/snowflake/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/snowflake/out.sql diff --git a/.github/renovate.json b/.github/renovate.json index 8f6329da3b2a..9fbc4f63a7bd 100644 --- a/.github/renovate.json +++ b/.github/renovate.json @@ -82,10 +82,7 @@ "addLabels": ["pyspark"] }, { - "matchPackagePatterns": [ - "snowflake-connector-python", - "snowflake-sqlalchemy" - ], + "matchPackagePatterns": ["snowflake-connector-python"], "addLabels": ["snowflake"] }, { diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 90208910fa1b..ff7caf83b69d 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -621,7 +621,7 @@ jobs: # - run: python -m pip install --upgrade pip 'poetry==1.7.1' # # - name: remove deps that are not compatible with sqlalchemy 2 - # run: poetry remove snowflake-sqlalchemy sqlalchemy-exasol + # run: poetry remove sqlalchemy-exasol # # - name: add sqlalchemy 2 # run: poetry add --lock --optional 'sqlalchemy>=2,<3' diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index ecba280b7867..fbfdd0082157 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -50,9 +50,13 @@ def __getitem__(self, key: str) -> partial: class FuncGen: - __slots__ = () + __slots__ = ("namespace",) + + def __init__(self, namespace: str | None = None) -> None: + self.namespace = namespace def __getattr__(self, name: str) -> partial: + name = ".".join(filter(None, (self.namespace, name))) return lambda *args, **kwargs: sg.func(name, *map(sge.convert, args), **kwargs) def __getitem__(self, key: str) -> partial: @@ -413,15 +417,10 @@ def visit_Time(self, op, *, arg): @visit_node.register(ops.TimestampNow) def visit_TimestampNow(self, op): - """DuckDB current timestamp defaults to timestamp + tz.""" - return self.cast(sge.CurrentTimestamp(), dt.timestamp) + return sge.CurrentTimestamp() @visit_node.register(ops.Strftime) def visit_Strftime(self, op, *, arg, format_str): - if not isinstance(op.format_str, ops.Literal): - raise com.UnsupportedOperationError( - f"{self.dialect} `format_str` must be a literal `str`; got {type(op.format_str)}" - ) return sge.TimeToStr(this=arg, format=format_str) @visit_node.register(ops.ExtractEpochSeconds) @@ -541,7 +540,7 @@ def visit_StringFind(self, op, *, arg, substr, start, end): @visit_node.register(ops.RegexSearch) def visit_RegexSearch(self, op, *, arg, pattern): - return self.f.regexp_matches(arg, pattern, "s") + return sge.RegexpLike(this=arg, expression=pattern, flag=sge.convert("s")) @visit_node.register(ops.RegexReplace) def visit_RegexReplace(self, op, *, arg, pattern, replacement): diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index e3244246e6fe..46a7c996c996 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -460,6 +460,10 @@ class OracleType(SqlglotType): class SnowflakeType(SqlglotType): dialect = "snowflake" + + default_decimal_precision = 38 + default_decimal_scale = 9 + default_temporal_scale = 9 @classmethod @@ -478,6 +482,22 @@ def _from_sqlglot_ARRAY(cls, value_type=None) -> dt.Array: assert value_type is None return dt.Array(dt.json, nullable=cls.default_nullable) + @classmethod + def _from_ibis_JSON(cls, dtype: dt.JSON) -> sge.DataType: + return sge.DataType(this=sge.DataType.Type.VARIANT) + + @classmethod + def _from_ibis_Array(cls, dtype: dt.Array) -> sge.DataType: + return sge.DataType(this=sge.DataType.Type.ARRAY, nested=True) + + @classmethod + def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType: + return sge.DataType(this=sge.DataType.Type.OBJECT, nested=True) + + @classmethod + def _from_ibis_Struct(cls, dtype: dt.Struct) -> sge.DataType: + return sge.DataType(this=sge.DataType.Type.OBJECT, nested=True) + class SQLiteType(SqlglotType): dialect = "sqlite" diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 2214c0652e23..4b8341329980 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -88,6 +88,16 @@ def window_function_to_window(_): ) +@replace(p.Log2) +def replace_log2(_): + return ops.Log(_.arg, base=2) + + +@replace(p.Log10) +def replace_log10(_): + return ops.Log(_.arg, base=10) + + @replace(Object(Select, Object(Select))) def merge_select_select(_): """Merge subsequent Select relations into one. diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index e177b32324dd..2dfabcc8e06d 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -541,7 +541,6 @@ def ddl_con(ddl_backend): "oracle", "postgres", "risingwave", - "snowflake", "sqlite", "trino", ) diff --git a/ibis/backends/datafusion/compiler.py b/ibis/backends/datafusion/compiler.py index 92d3fd70a66f..3166c1c3b4d3 100644 --- a/ibis/backends/datafusion/compiler.py +++ b/ibis/backends/datafusion/compiler.py @@ -171,10 +171,6 @@ def visit_RegexExtract(self, op, *, arg, pattern, index): ) return self.f.regexp_match(arg, self.f.concat("(", pattern, ")"))[index] - # @visit_node.register(ops.RegexReplace) - # def regex_replace(self, op, *, arg, pattern, replacement): - # return self.f.regexp_replace(arg, pattern, replacement, sg.exp.convert("g")) - @visit_node.register(ops.StringFind) def visit_StringFind(self, op, *, arg, substr, start, end): if end is not None: diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index c84ee4b0d162..6afe634d4280 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -335,6 +335,11 @@ def visit_GeoConvert(self, op, *, arg, source, target): def visit_HexDigest(self, op, *, arg, how): return self.f[how](arg) + @visit_node.register(ops.TimestampNow) + def visit_TimestampNow(self, op): + """DuckDB current timestamp defaults to timestamp + tz.""" + return self.cast(super().visit_TimestampNow(op), dt.timestamp) + _SIMPLE_OPS = { ops.ArrayPosition: "list_indexof", diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index 08cee6fb0954..e01467aa5f65 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -189,7 +189,6 @@ def test_insert(con): assert t.count().execute() == 2 -@pytest.mark.xfail(reason="snowflake backend not yet rewritten") def test_to_other_sql(con, snapshot): pytest.importorskip("snowflake.connector") diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index ea8e34d5bc34..cd5ca715f28e 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -16,11 +16,10 @@ # Wow, this is truly horrible # Get out your clippers, it's time to shave a yak. # -# 1. snowflake-sqlalchemy doesn't support sqlalchemy 2.0 -# 2. oracledb is only supported in sqlalchemy 2.0 -# 3. Ergo, module hacking is required to avoid doing a silly amount of work +# 1. oracledb is only supported in sqlalchemy 2.0 +# 2. Ergo, module hacking is required to avoid doing a silly amount of work # to create multiple lockfiles or port snowflake away from sqlalchemy -# 4. Also the version needs to be spoofed to be >= 7 or else the cx_Oracle +# 3. Also the version needs to be spoofed to be >= 7 or else the cx_Oracle # dialect barfs oracledb.__version__ = oracledb.version = "7" diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 0409365478f4..aa1258f24b7b 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -9,22 +9,22 @@ import json import os import platform -import re import shutil import sys import tempfile import textwrap import warnings +from operator import itemgetter from pathlib import Path from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse from urllib.request import urlretrieve import pyarrow as pa import pyarrow_hotfix # noqa: F401 -import sqlalchemy as sa import sqlglot as sg +import sqlglot.expressions as sge from packaging.version import parse as vparse -from sqlalchemy.ext.compiler import compiles import ibis import ibis.common.exceptions as com @@ -32,13 +32,11 @@ import ibis.expr.operations as ops import ibis.expr.types as ir from ibis import util -from ibis.backends.base import CanCreateDatabase -from ibis.backends.base.sql.alchemy import ( - AlchemyCanCreateSchema, - AlchemyCompiler, - AlchemyCrossSchemaBackend, - AlchemyExprTranslator, -) +from ibis.backends.base import CanCreateDatabase, CanCreateSchema +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.datatypes import SnowflakeType +from ibis.backends.snowflake.compiler import SnowflakeCompiler +from ibis.backends.snowflake.converter import SnowflakePandasData with warnings.catch_warnings(): if vparse(importlib.metadata.version("snowflake-connector-python")) >= vparse( @@ -49,11 +47,7 @@ message="You have an incompatible version of 'pyarrow' installed", category=UserWarning, ) - from snowflake.sqlalchemy import ARRAY, DOUBLE, OBJECT, URL, VARCHAR - - from ibis.backends.snowflake.converter import SnowflakePandasData - from ibis.backends.snowflake.datatypes import SnowflakeType - from ibis.backends.snowflake.registry import operation_registry + import snowflake.connector as sc if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping @@ -63,42 +57,20 @@ import ibis.expr.schema as sch -class SnowflakeExprTranslator(AlchemyExprTranslator): - _registry = operation_registry - _rewrites = AlchemyExprTranslator._rewrites.copy() - _has_reduction_filter_syntax = False - _forbids_frame_clause = ( - *AlchemyExprTranslator._forbids_frame_clause, - ops.Lag, - ops.Lead, - ) - _require_order_by = (*AlchemyExprTranslator._require_order_by, ops.Reduction) - _dialect_name = "snowflake" - _quote_column_names = True - _quote_table_names = True - supports_unnest_in_select = False - type_mapper = SnowflakeType - - -class SnowflakeCompiler(AlchemyCompiler): - cheap_in_memory_tables = True - translator_class = SnowflakeExprTranslator - - _SNOWFLAKE_MAP_UDFS = { "ibis_udfs.public.object_merge": { - "inputs": {"obj1": OBJECT, "obj2": OBJECT}, - "returns": OBJECT, + "inputs": {"obj1": "OBJECT", "obj2": "OBJECT"}, + "returns": "OBJECT", "source": "return Object.assign(obj1, obj2)", }, "ibis_udfs.public.object_values": { - "inputs": {"obj": OBJECT}, - "returns": ARRAY, + "inputs": {"obj": "OBJECT"}, + "returns": "ARRAY", "source": "return Object.values(obj)", }, "ibis_udfs.public.array_zip": { - "inputs": {"arrays": ARRAY}, - "returns": ARRAY, + "inputs": {"arrays": "ARRAY"}, + "returns": "ARRAY", "source": """\ const longest = arrays.reduce((a, b) => a.length > b.length ? a : b, []); const keys = Array.from(Array(arrays.length).keys()).map(key => `f${key + 1}`); @@ -109,22 +81,16 @@ class SnowflakeCompiler(AlchemyCompiler): "ibis_udfs.public.array_repeat": { # Integer inputs are not allowed because JavaScript only supports # doubles - "inputs": {"value": ARRAY, "count": DOUBLE}, - "returns": ARRAY, + "inputs": {"value": "ARRAY", "count": "DOUBLE"}, + "returns": "ARRAY", "source": """return Array(count).fill(value).flat();""", }, - "ibis_udfs.public.regexp_split": { - "inputs": {"value": VARCHAR, "pattern": VARCHAR}, - "returns": ARRAY, - "source": """return value.split(new RegExp(pattern, "g"));""", - }, } -class Backend(AlchemyCrossSchemaBackend, CanCreateDatabase, AlchemyCanCreateSchema): +class Backend(SQLGlotBackend, CanCreateDatabase, CanCreateSchema): name = "snowflake" - compiler = SnowflakeCompiler - supports_create_or_replace = True + compiler = SnowflakeCompiler() supports_python_udfs = True _latest_udf_python_version = (3, 10) @@ -133,31 +99,85 @@ def _convert_kwargs(self, kwargs): with contextlib.suppress(KeyError): kwargs["account"] = kwargs.pop("host") + def _from_url(self, url: str, **kwargs): + """Connect to a backend using a URL `url`. + + Parameters + ---------- + url + URL with which to connect to a backend. + kwargs + Additional keyword arguments + + Returns + ------- + BaseBackend + A backend instance + """ + + url = urlparse(url) + database, schema = url.path[1:].split("/", 1) + query_params = parse_qs(url.query) + (warehouse,) = query_params.pop("warehouse", (None,)) + connect_args = { + "user": url.username, + "password": url.password or "", + "account": url.hostname, + "warehouse": warehouse, + "database": database or "", + "schema": schema or "", + } + + for name, value in query_params.items(): + if len(value) > 1: + connect_args[name] = value + elif len(value) == 1: + connect_args[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + + session_parameters = kwargs.setdefault("session_parameters", {}) + + session_parameters["MULTI_STATEMENT_COUNT"] = 0 + session_parameters["JSON_INDENT"] = 0 + session_parameters["PYTHON_CONNECTOR_QUERY_RESULT_FORMAT"] = "arrow_force" + + kwargs.update(connect_args) + self._convert_kwargs(kwargs) + + if "database" in kwargs and not kwargs["database"]: + del kwargs["database"] + + if "schema" in kwargs and not kwargs["schema"]: + del kwargs["schema"] + + if "password" in kwargs and kwargs["password"] is None: + kwargs["password"] = "" + return self.connect(**kwargs) + @property def version(self) -> str: - return self._scalar_query(sa.select(sa.func.current_version())) + with self._safe_raw_sql(sg.select(sg.func("current_version"))) as cur: + (version,) = cur.fetchone() + return version @property def current_schema(self) -> str: - with self.con.connect() as con: - return con.connection.schema + return self.con.schema @property def current_database(self) -> str: - with self.con.connect() as con: - return con.connection.database - - def _compile_sqla_type(self, typ) -> str: - return sa.types.to_instance(typ).compile(dialect=self.con.dialect) + return self.con.database def _make_udf(self, name: str, defn) -> str: - dialect = self.con.dialect - quote = dialect.preparer(dialect).quote_identifier signature = ", ".join( - f"{quote(argname)} {self._compile_sqla_type(typ)}" + "{} {}".format( + sg.to_identifier(argname, quoted=self.compiler.quoted).sql(self.name), + typ, + ) for argname, typ in defn["inputs"].items() ) - return_type = self._compile_sqla_type(defn["returns"]) + return_type = defn["returns"] return f"""\ CREATE OR REPLACE FUNCTION {name}({signature}) RETURNS {return_type} @@ -167,17 +187,7 @@ def _make_udf(self, name: str, defn) -> str: AS $$ {defn["source"]} $$""" - def do_connect( - self, - user: str, - account: str, - database: str, - password: str | None = None, - authenticator: str | None = None, - connect_args: Mapping[str, Any] | None = None, - create_object_udfs: bool = True, - **kwargs: Any, - ): + def do_connect(self, create_object_udfs: bool = True, **kwargs: Any): """Connect to Snowflake. Parameters @@ -206,95 +216,71 @@ def do_connect( Enable object UDF extensions defined by ibis on the first connection to the database. connect_args - Additional arguments passed to the SQLAlchemy engine creation call. + Additional arguments passed to the DBAPI connection call. kwargs - Additional arguments passed to the SQLAlchemy URL constructor. - See https://docs.snowflake.com/en/developer-guide/python-connector/sqlalchemy#additional-connection-parameters - for more details + Additional arguments passed to the URL constructor. """ - dbparams = dict(zip(("database", "schema"), database.split("/", 1))) - if dbparams.get("schema") is None: - raise ValueError( - "Schema must be non-None. Pass the schema as part of the " - f"database e.g., {dbparams['database']}/my_schema" - ) - - # snowflake-connector-python does not handle `None` for password, but - # accepts the empty string - url = URL( - account=account, user=user, password=password or "", **dbparams, **kwargs - ) - if connect_args is None: - connect_args = {} - - session_parameters = connect_args.setdefault("session_parameters", {}) + connect_args = kwargs.copy() + session_parameters = connect_args.pop("session_parameters", {}) # enable multiple SQL statements by default - session_parameters.setdefault("MULTI_STATEMENT_COUNT", "0") + session_parameters.setdefault("MULTI_STATEMENT_COUNT", 0) # don't format JSON output by default - session_parameters.setdefault("JSON_INDENT", "0") + session_parameters.setdefault("JSON_INDENT", 0) # overwrite session parameters that are required for ibis + snowflake # to work session_parameters.update( dict( # Use Arrow for query results - PYTHON_CONNECTOR_QUERY_RESULT_FORMAT="ARROW", + PYTHON_CONNECTOR_QUERY_RESULT_FORMAT="arrow_force", # JSON output must be strict for null versus undefined - STRICT_JSON_OUTPUT="TRUE", + STRICT_JSON_OUTPUT=True, # Timezone must be UTC TIMEZONE="UTC", ), ) - if authenticator is not None: - connect_args.setdefault("authenticator", authenticator) - - engine = sa.create_engine( - url, connect_args=connect_args, poolclass=sa.pool.StaticPool - ) - - @sa.event.listens_for(engine, "connect") - def connect(dbapi_connection, connection_record): - """Register UDFs on a `"connect"` event.""" - if create_object_udfs: - with dbapi_connection.cursor() as cur: - database, schema = cur.execute( - "SELECT CURRENT_DATABASE(), CURRENT_SCHEMA()" - ).fetchone() - try: - cur.execute("CREATE DATABASE IF NOT EXISTS ibis_udfs") - # snowflake activates a database on creation, so reset - # it back to the original database and schema - cur.execute(f"USE SCHEMA {database}.{schema}") - for name, defn in _SNOWFLAKE_MAP_UDFS.items(): - cur.execute(self._make_udf(name, defn)) - except Exception as e: # noqa: BLE001 - warnings.warn( - f"Unable to create map UDFs, some functionality will not work: {e}" - ) - - super().do_connect(engine) - - def normalize_name(name): - if name is None: - return None - elif not name: - return "" - elif name.lower() == name: - return sa.sql.quoted_name(name, quote=True) - else: - return name + con = sc.connect(**connect_args, session_parameters=session_parameters) + + if create_object_udfs: + database = con.database + schema = con.schema + dialect = self.name + create_stmt = sge.Create( + kind="DATABASE", this="ibis_udfs", exists=True + ).sql(dialect) + use_stmt = sge.Use( + kind="SCHEMA", + this=sg.table(schema, db=database, quoted=self.compiler.quoted), + ).sql(dialect) + + stmts = [ + create_stmt, + # snowflake activates a database on creation, so reset it back + # to the original database and schema + use_stmt, + *itertools.starmap(self._make_udf, _SNOWFLAKE_MAP_UDFS.items()), + ] - self.con.dialect.normalize_name = normalize_name + stmt = ";\n".join(stmts) + with contextlib.closing(con.cursor()) as cur: + try: + cur.execute(stmt) + except Exception as e: # noqa: BLE001 + warnings.warn( + f"Unable to create Ibis UDFs, some functionality will not work: {e}" + ) + self.con = con + self._temp_views: set[str] = set() def _get_udf_source(self, udf_node: ops.ScalarUDF): name = type(udf_node).__name__ signature = ", ".join( - f"{name} {self._compile_type(arg.dtype)}" + f"{name} {self.compiler.type_mapper.to_string(arg.dtype)}" for name, arg in zip(udf_node.argnames, udf_node.args) ) - return_type = self._compile_type(udf_node.dtype) + return_type = SnowflakeType.to_string(udf_node.dtype) lines, _ = inspect.getsourcelines(udf_node.__func__) source = textwrap.dedent( "".join( @@ -339,6 +325,29 @@ def _get_udf_source(self, udf_node: ops.ScalarUDF): "COMMENT = '{comment}'", ) + def _define_udf_translation_rules(self, expr): + """No-op, these are defined in the compiler.""" + + def _register_udfs(self, expr: ir.Expr) -> None: + udf_sources = [] + for udf_node in expr.op().find(ops.ScalarUDF): + compile_func = getattr( + self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" + ) + if sql := compile_func(udf_node): + udf_sources.append(sql) + if udf_sources: + # define every udf in one execution to avoid the overhead of db + # round trips per udf + with self._safe_raw_sql(";\n".join(udf_sources)): + pass + + def _compile_builtin_udf(self, udf_node: ops.ScalarUDF) -> None: + """No op.""" + + def _compile_pyarrow_udf(self, udf_node: ops.ScalarUDF) -> None: + raise NotImplementedError("pyarrow UDFs are not supported in Snowflake") + def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> str: return """\ {preamble} @@ -377,16 +386,15 @@ def to_pyarrow( *, params: Mapping[ir.Scalar, Any] | None = None, limit: int | str | None = None, - **_: Any, + **kwargs: Any, ) -> pa.Table: from ibis.backends.snowflake.converter import SnowflakePyArrowData self._run_pre_execute_hooks(expr) - query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - sql = query_ast.compile() - with self.begin() as con: - res = con.execute(sql).cursor.fetch_arrow_all() + sql = self.compile(expr, limit=limit, params=params, **kwargs) + with self._safe_raw_sql(sql) as cur: + res = cur.fetch_arrow_all() target_schema = expr.as_table().schema().to_pyarrow() if res is None: @@ -394,10 +402,11 @@ def to_pyarrow( return expr.__pyarrow_result__(res, data_mapper=SnowflakePyArrowData) - def fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: - if (table := cursor.cursor.fetch_arrow_all()) is None: + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + if (table := cursor.fetch_arrow_all()) is None: table = schema.to_pyarrow().empty_table() df = table.to_pandas(timestamp_as_object=True) + df.columns = list(schema.names) return SnowflakePandasData.convert_table(df, schema) def to_pandas_batches( @@ -406,20 +415,18 @@ def to_pandas_batches( *, params: Mapping[ir.Scalar, Any] | None = None, limit: int | str | None = None, - **_: Any, + **kwargs: Any, ) -> Iterator[pd.DataFrame | pd.Series | Any]: self._run_pre_execute_hooks(expr) - query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - sql = query_ast.compile() + sql = self.compile(expr, limit=limit, params=params, **kwargs) target_schema = expr.as_table().schema() converter = functools.partial( SnowflakePandasData.convert_table, schema=target_schema ) - with self.begin() as con, contextlib.closing(con.execute(sql)) as cur: + with self._safe_raw_sql(sql) as cur: yield from map( - expr.__pandas_result__, - map(converter, cur.cursor.fetch_pandas_batches()), + expr.__pandas_result__, map(converter, cur.fetch_pandas_batches()) ) def to_pyarrow_batches( @@ -429,11 +436,10 @@ def to_pyarrow_batches( params: Mapping[ir.Scalar, Any] | None = None, limit: int | str | None = None, chunk_size: int = 1_000_000, - **_: Any, + **kwargs: Any, ) -> pa.ipc.RecordBatchReader: self._run_pre_execute_hooks(expr) - query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - sql = query_ast.compile() + sql = self.compile(expr, limit=limit, params=params, **kwargs) target_schema = expr.as_table().schema().to_pyarrow() return pa.RecordBatchReader.from_batches( @@ -446,30 +452,48 @@ def to_pyarrow_batches( def _make_batch_iter( self, sql: str, *, target_schema: sch.Schema, chunk_size: int ) -> Iterator[pa.RecordBatch]: - with self.begin() as con, contextlib.closing(con.execute(sql)) as cur: + with self._safe_raw_sql(sql) as cur: yield from itertools.chain.from_iterable( t.rename_columns(target_schema.names) .cast(target_schema) .to_batches(max_chunksize=chunk_size) - for t in cur.cursor.fetch_arrow_batches() + for t in cur.fetch_arrow_batches() ) + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> Iterable[tuple[str, dt.DataType]]: + table = sg.table( + table_name, db=schema, catalog=database, quoted=self.compiler.quoted + ) + with self._safe_raw_sql(sge.Describe(kind="TABLE", this=table)) as cur: + result = cur.fetchall() + + type_mapper = self.compiler.type_mapper + return ibis.schema( + { + name: type_mapper.from_string(typ, nullable=nullable == "Y") + for name, typ, _, nullable, *_ in result + } + ) + def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: - with self.begin() as con: - con.exec_driver_sql(query) - result = con.exec_driver_sql("DESC RESULT last_query_id()").mappings().all() + dialect = self.name + sql = sge.Describe(kind="RESULT", this=self.compiler.f.last_query_id()).sql( + dialect + ) + with self._safe_raw_sql(sg.parse_one(query, read=dialect).limit(0)) as cur: + rows = cur.execute(sql).fetchall() - for field in result: - name = field["name"] - type_string = field["type"] - is_nullable = field["null?"] == "Y" - yield name, SnowflakeType.from_string(type_string, nullable=is_nullable) + type_mapper = self.compiler.type_mapper + return ( + (name, type_mapper.from_string(type_name, nullable=nullable == "Y")) + for name, type_name, _, nullable, *_ in rows + ) def list_databases(self, like: str | None = None) -> list[str]: - with self.begin() as con: - databases = [ - row["name"] for row in con.exec_driver_sql("SHOW DATABASES").mappings() - ] + with self._safe_raw_sql("SHOW DATABASES") as con: + databases = list(map(itemgetter(1), con)) return self._filter_with_like(databases, like) def list_schemas( @@ -478,10 +502,11 @@ def list_schemas( query = "SHOW SCHEMAS" if database is not None: - query += f" IN {self._quote(database)}" + db = sg.to_identifier(database, quoted=self.compiler.quoted).sql(self.name) + query += f" IN {db}" - with self.begin() as con: - schemata = [row["name"] for row in con.exec_driver_sql(query).mappings()] + with self._safe_raw_sql(query) as con: + schemata = list(map(itemgetter(1), con)) return self._filter_with_like(schemata, like) @@ -511,12 +536,18 @@ def list_tables( """ if database is not None and schema is None: - raise com.IbisInputError( - f"{self.name} cannot list tables only using `database` specifier. " - "Include a `schema` argument." + util.warn_deprecated( + "database", + instead=( + f"{self.name} cannot list tables only using `database` specifier. " + "Include a `schema` argument." + ), + as_of="7.1", + removed_in="8.0", ) + database = sg.parse_one(database, into=sge.Table).sql(dialect=self.name) elif database is None and schema is not None: - database = sg.parse_one(schema, into=sg.exp.Table).sql(dialect=self.name) + database = sg.parse_one(schema, into=sge.Table).sql(dialect=self.name) else: database = ( sg.table(schema, db=database, quoted=True).sql(dialect=self.name) @@ -530,12 +561,10 @@ def list_tables( tables_query += f" IN {database}" views_query += f" IN {database}" - with self.begin() as con: + with self.con.cursor() as cur: # TODO: considering doing this with a single query using information_schema - tables = [ - row["name"] for row in con.exec_driver_sql(tables_query).mappings() - ] - views = [row["name"] for row in con.exec_driver_sql(views_query).mappings()] + tables = list(map(itemgetter(1), cur.execute(tables_query))) + views = list(map(itemgetter(1), cur.execute(views_query))) return self._filter_with_like(tables + views, like=like) @@ -544,8 +573,8 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: raw_name = op.name - with self.begin() as con: - if con.exec_driver_sql(f"SHOW TABLES LIKE '{raw_name}'").scalar() is None: + with self.con.cursor() as con: + if not con.execute(f"SHOW TABLES LIKE '{raw_name}'").fetchone(): tmpdir = tempfile.TemporaryDirectory() try: path = os.path.join(tmpdir.name, f"{raw_name}.parquet") @@ -559,25 +588,23 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: with contextlib.suppress(Exception): shutil.rmtree(tmpdir.name) - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - yield f"CREATE OR REPLACE TEMPORARY VIEW {name} AS {definition}" - def create_database(self, name: str, force: bool = False) -> None: - create_stmt = sg.exp.Create( - kind="DATABASE", this=sg.to_identifier(name, quoted=True), exists=force - ).sql(self.name) - current_ident = sg.table( - self.current_schema, db=self.current_database, quoted=True + current_database = self.current_database + current_schema = self.current_schema + quoted = self.compiler.quoted + create_stmt = sge.Create( + this=sg.to_identifier(name, quoted=quoted), kind="DATABASE", exists=force + ) + use_stmt = sge.Use( + kind="SCHEMA", + this=sg.table(current_schema, db=current_database, quoted=quoted), ).sql(self.name) - with self.begin() as con: - con.exec_driver_sql(create_stmt) + with self._safe_raw_sql(create_stmt) as cur: # Snowflake automatically switches to the new database after creating # it per # https://docs.snowflake.com/en/sql-reference/sql/create-database#general-usage-notes # so we switch back to the original database and schema - con.exec_driver_sql(f"USE SCHEMA {current_ident}") + cur.execute(use_stmt) def drop_database(self, name: str, force: bool = False) -> None: current_database = self.current_database @@ -585,28 +612,53 @@ def drop_database(self, name: str, force: bool = False) -> None: raise com.UnsupportedOperationError( "Dropping the current database is not supported because its behavior is undefined" ) - drop_stmt = sg.exp.Drop( - kind="DATABASE", this=sg.to_identifier(name, quoted=True), exists=force - ).sql(self.name) - with self.begin() as con: - con.exec_driver_sql(drop_stmt) + drop_stmt = sge.Drop( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="DATABASE", + exists=force, + ) + with self._safe_raw_sql(drop_stmt): + pass def create_schema( self, name: str, database: str | None = None, force: bool = False ) -> None: - create_stmt = sg.exp.Create( - kind="SCHEMA", this=sg.table(name, db=database, quoted=True), exists=force - ).sql(self.name) - current_ident = sg.table( - self.current_schema, db=self.current_database, quoted=True + current_database = self.current_database + current_schema = self.current_schema + quoted = self.compiler.quoted + create_stmt = sge.Create( + this=sg.table(name, db=database, quoted=quoted), kind="SCHEMA", exists=force + ) + use_stmt = sge.Use( + kind="SCHEMA", + this=sg.table(current_schema, db=current_database, quoted=quoted), ).sql(self.name) - with self.begin() as con: - con.exec_driver_sql(create_stmt) + with self._safe_raw_sql(create_stmt) as cur: # Snowflake automatically switches to the new schema after creating # it per # https://docs.snowflake.com/en/sql-reference/sql/create-schema#usage-notes # so we switch back to the original schema - con.exec_driver_sql(f"USE SCHEMA {current_ident}") + cur.execute(use_stmt) + + @contextlib.contextmanager + def _safe_raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name) + + with contextlib.closing(self.raw_sql(query, **kwargs)) as cur: + yield cur + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name) + cur = self.con.cursor() + try: + cur.execute(query, **kwargs) + except Exception: + cur.close() + raise + else: + return cur def drop_schema( self, name: str, database: str | None = None, force: bool = False @@ -618,11 +670,13 @@ def drop_schema( "Dropping the current schema is not supported because its behavior is undefined" ) - drop_stmt = sg.exp.Drop( - kind="SCHEMA", this=sg.table(name, db=database, quoted=True), exists=force - ).sql(self.name) - with self.begin() as con: - con.exec_driver_sql(drop_stmt) + drop_stmt = sge.Drop( + this=sg.table(name, db=database, quoted=self.compiler.quoted), + kind="SCHEMA", + exists=force, + ) + with self._safe_raw_sql(drop_stmt): + pass def create_table( self, @@ -661,31 +715,40 @@ def create_table( if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") - create_stmt = "CREATE" - - if overwrite: - create_stmt += " OR REPLACE" - - if temp: - create_stmt += " TEMPORARY" + quoted = self.compiler.quoted if database is None: - ident = sg.table(name, quoted=True) + target = sg.table(name, quoted=quoted) catalog = db = database else: - db = sg.parse_one(database, into=sg.exp.Table, read=self.name) + db = sg.parse_one(database, into=sge.Table, read=self.name) catalog = db.db db = db.name - ident = sg.table(name, db=db, catalog=catalog, quoted=True) + target = sg.table(name, db=db, catalog=catalog, quoted=quoted) + + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(name, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for name, typ in (schema or {}).items() + ] - create_stmt += f" TABLE {ident.sql(self.name)}" + if column_defs: + target = sge.Schema(this=target, expressions=column_defs) - if schema is not None: - schema_sql = ", ".join( - f"{name} {SnowflakeType.to_string(typ) + ' NOT NULL' * (not typ.nullable)}" - for name, typ in zip(map(self._quote, schema.keys()), schema.values()) - ) - create_stmt += f" ({schema_sql})" + properties = [] + + if temp: + properties.append(sge.TemporaryProperty()) + + if comment is not None: + properties.append(sge.SchemaCommentProperty(this=sge.convert(comment))) if obj is not None: if not isinstance(obj, ir.Expr): @@ -695,36 +758,23 @@ def create_table( self._run_pre_execute_hooks(table) - query = self.compile(table).compile( - dialect=self.con.dialect, compile_kwargs=dict(literal_binds=True) - ) - create_stmt += f" AS {query}" + query = self._to_sqlglot(table) + else: + query = None - if comment is not None: - create_stmt += f" COMMENT '{comment}'" + create_stmt = sge.Create( + kind="TABLE", + this=target, + replace=overwrite, + properties=sge.Properties(expressions=properties), + expression=query, + ) - with self.begin() as con: - con.exec_driver_sql(create_stmt) + with self._safe_raw_sql(create_stmt): + pass return self.table(name, schema=db, database=catalog) - def drop_table( - self, - name: str, - database: str | None = None, - schema: str | None = None, - force: bool = False, - ) -> None: - """Drop a table from Snowflake.""" - drop_stmt = sg.exp.Drop( - kind="TABLE", - this=sg.table(name, db=schema, catalog=database, quoted=True), - exists=force, - ).sql(self.name) - - with self.begin() as con: - con.exec_driver_sql(drop_stmt) - def read_csv( self, path: str | Path, table_name: str | None = None, **kwargs: Any ) -> ir.Table: @@ -751,7 +801,8 @@ def read_csv( # https://docs.snowflake.com/en/sql-reference/sql/put#optional-parameters threads = min((os.cpu_count() or 2) // 2, 99) table = table_name or ibis.util.gen_name("read_csv_snowflake") - qtable = self._quote(table) + quoted = self.compiler.quoted + qtable = sg.to_identifier(table, quoted=quoted) parse_header = header = kwargs.pop("parse_header", True) skip_header = kwargs.pop("skip_header", True) @@ -766,66 +817,68 @@ def read_csv( f"{name.upper()} = {value!r}" for name, value in kwargs.items() ) - with self.begin() as con: + stmts = [ # create a temporary stage for the file - con.exec_driver_sql(f"CREATE TEMP STAGE {stage}") - + f"CREATE TEMP STAGE {stage}", # create a temporary file format for CSV schema inference - create_infer_fmt = ( + ( f"CREATE TEMP FILE FORMAT {file_format} TYPE = CSV PARSE_HEADER = {str(header).upper()}" + options - ) - con.exec_driver_sql(create_infer_fmt) + ), + ] + with self._safe_raw_sql(";\n".join(stmts)) as cur: + # copy the local file to the stage if str(path).startswith("https://"): with tempfile.NamedTemporaryFile() as tmp: - urlretrieve(path, filename=tmp.name) + tmpname = tmp.name + urlretrieve(path, filename=tmpname) tmp.flush() - con.exec_driver_sql( - f"PUT 'file://{tmp.name}' @{stage} PARALLEL = {threads:d} AUTO_COMPRESS = TRUE" + cur.execute( + f"PUT 'file://{tmpname}' @{stage} PARALLEL = {threads:d}" ) else: - con.exec_driver_sql( - f"PUT 'file://{Path(path).absolute()}' @{stage} PARALLEL = {threads:d} AUTO_COMPRESS = TRUE" + cur.execute( + f"PUT 'file://{Path(path).absolute()}' @{stage} PARALLEL = {threads:d}" ) # handle setting up the schema in python because snowflake is # broken for csv globs: it cannot parse the result of the following # query in USING TEMPLATE - fields = json.loads( - con.exec_driver_sql( - f""" - SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) - WITHIN GROUP (ORDER BY ORDER_ID ASC) - FROM TABLE( - INFER_SCHEMA( - LOCATION => '@{stage}', - FILE_FORMAT => '{file_format}' - ) + (info,) = cur.execute( + f""" + SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) + WITHIN GROUP (ORDER BY ORDER_ID ASC) + FROM TABLE( + INFER_SCHEMA( + LOCATION => '@{stage}', + FILE_FORMAT => '{file_format}' ) - """ - ).scalar() - ) - fields = [ - (self._quote(field["COLUMN_NAME"]), field["TYPE"], field["NULLABLE"]) - for field in fields - ] + ) + """ + ).fetchone() columns = ", ".join( - f"{quoted_name} {typ}{' NOT NULL' * (not nullable)}" - for quoted_name, typ, nullable in fields + "{} {}{}".format( + sg.to_identifier(field["COLUMN_NAME"], quoted=quoted).sql( + self.name + ), + field["TYPE"], + " NOT NULL" if not field["NULLABLE"] else "", + ) + for field in json.loads(info) ) - # create a temporary table using the stage and format inferred - # from the CSV - con.exec_driver_sql(f"CREATE TEMP TABLE {qtable} ({columns})") - - # load the CSV into the table - con.exec_driver_sql( + stmts = [ + # create a temporary table using the stage and format inferred + # from the CSV + f"CREATE TEMP TABLE {qtable} ({columns})", + # load the CSV into the table f""" COPY INTO {qtable} FROM @{stage} FILE_FORMAT = (TYPE = CSV SKIP_HEADER = {int(header)}{options}) - """ - ) + """, + ] + cur.execute(";\n".join(stmts)) return self.table(table) @@ -853,7 +906,7 @@ def read_json( stage = util.gen_name("read_json_stage") file_format = util.gen_name("read_json_format") table = table_name or util.gen_name("read_json_snowflake") - qtable = self._quote(table) + qtable = sg.to_identifier(table, quoted=self.compiler.quoted) threads = min((os.cpu_count() or 2) // 2, 99) kwargs.setdefault("strip_outer_array", True) @@ -863,41 +916,39 @@ def read_json( f"{name.upper()} = {value!r}" for name, value in kwargs.items() ) - with self.begin() as con: - con.exec_driver_sql( - f"CREATE TEMP FILE FORMAT {file_format} TYPE = JSON" + options - ) + stmts = [ + f"CREATE TEMP FILE FORMAT {file_format} TYPE = JSON" + options, + f"CREATE TEMP STAGE {stage} FILE_FORMAT = {file_format}", + ] - con.exec_driver_sql( - f"CREATE TEMP STAGE {stage} FILE_FORMAT = {file_format}" - ) - con.exec_driver_sql( + with self._safe_raw_sql(";\n".join(stmts)) as cur: + cur.execute( f"PUT 'file://{Path(path).absolute()}' @{stage} PARALLEL = {threads:d}" ) - - con.exec_driver_sql( - f""" - CREATE TEMP TABLE {qtable} - USING TEMPLATE ( - SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) - WITHIN GROUP (ORDER BY ORDER_ID ASC) - FROM TABLE( - INFER_SCHEMA( - LOCATION => '@{stage}', - FILE_FORMAT => '{file_format}' + cur.execute( + ";\n".join( + [ + f""" + CREATE TEMP TABLE {qtable} + USING TEMPLATE ( + SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) + WITHIN GROUP (ORDER BY ORDER_ID ASC) + FROM TABLE( + INFER_SCHEMA( + LOCATION => '@{stage}', + FILE_FORMAT => '{file_format}' + ) + ) ) - ) + """, + # load the JSON file into the table + f""" + COPY INTO {qtable} + FROM @{stage} + MATCH_BY_COLUMN_NAME = {str(match_by_column_name).upper()} + """, + ] ) - """ - ) - - # load the JSON file into the table - con.exec_driver_sql( - f""" - COPY INTO {qtable} - FROM @{stage} - MATCH_BY_COLUMN_NAME = {str(match_by_column_name).upper()} - """ ) return self.table(table) @@ -934,7 +985,8 @@ def read_parquet( stage = util.gen_name("read_parquet_stage") table = table_name or util.gen_name("read_parquet_snowflake") - qtable = self._quote(table) + quoted = self.compiler.quoted + qtable = sg.to_identifier(table, quoted=quoted) threads = min((os.cpu_count() or 2) // 2, 99) options = " " * bool(kwargs) + " ".join( @@ -947,11 +999,16 @@ def read_parquet( # see # https://community.snowflake.com/s/article/How-to-load-logical-type-TIMESTAMP-data-from-Parquet-files-into-Snowflake names_types = [ - (name, SnowflakeType.to_string(typ), typ.nullable, typ.is_timestamp()) + ( + name, + self.compiler.type_mapper.to_string(typ), + typ.nullable, + typ.is_timestamp(), + ) for name, typ in schema.items() ] snowflake_schema = ", ".join( - f"{self._quote(col)} {typ}{' NOT NULL' * (not nullable)}" + f"{sg.to_identifier(col, quoted=quoted)} {typ}{' NOT NULL' * (not nullable)}" for col, typ, nullable, _ in names_types ) cols = ", ".join( @@ -959,17 +1016,15 @@ def read_parquet( for col, typ, _, is_timestamp in names_types ) - with self.begin() as con: - con.exec_driver_sql( - f"CREATE TEMP STAGE {stage} FILE_FORMAT = (TYPE = PARQUET{options})" - ) - con.exec_driver_sql( - f"PUT 'file://{abspath}' @{stage} PARALLEL = {threads:d}" - ) - con.exec_driver_sql(f"CREATE TEMP TABLE {qtable} ({snowflake_schema})") - con.exec_driver_sql( - f"COPY INTO {qtable} FROM (SELECT {cols} FROM @{stage})" - ) + stmts = [ + f"CREATE TEMP STAGE {stage} FILE_FORMAT = (TYPE = PARQUET{options})", + f"CREATE TEMP TABLE {qtable} ({snowflake_schema})", + ] + + query = ";\n".join(stmts) + with self._safe_raw_sql(query) as cur: + cur.execute(f"PUT 'file://{abspath}' @{stage} PARALLEL = {threads:d}") + cur.execute(f"COPY INTO {qtable} FROM (SELECT {cols} FROM @{stage})") return self.table(table) @@ -1007,23 +1062,8 @@ def insert( columns=[sg.column(col, quoted=True) for col in obj.columns], dialect=self.name, ) - with self.begin() as con: + with self.begin() as cur: if overwrite: - con.exec_driver_sql(f"TRUNCATE TABLE {table.sql(self.name)}") - - con.exec_driver_sql(query.sql(self.name)) - - -@compiles(sa.sql.Join, "snowflake") -def compile_join(element, compiler, **kw): - """Override compilation of LATERAL joins. - - Snowflake doesn't support lateral joins with ON clauses as of - https://docs.snowflake.com/en/release-notes/bcr-bundles/2023_04/bcr-1057 - even if they are trivial boolean literals. - """ - result = compiler.visit_join(element, **kw) + cur.execute(f"TRUNCATE TABLE {table.sql(self.name)}") - if element.right._is_lateral: - return re.sub(r"^(.+) ON true$", r"\1", result, flags=re.IGNORECASE | re.DOTALL) - return result + cur.execute(query.sql(self.name)) diff --git a/ibis/backends/snowflake/compiler.py b/ibis/backends/snowflake/compiler.py new file mode 100644 index 000000000000..a61bd7c7ef84 --- /dev/null +++ b/ibis/backends/snowflake/compiler.py @@ -0,0 +1,676 @@ +from __future__ import annotations + +import itertools +from functools import partial, singledispatchmethod + +import sqlglot as sg +import sqlglot.expressions as sge +from public import public +from sqlglot import exp +from sqlglot.dialects import Snowflake +from sqlglot.dialects.dialect import rename_func + +import ibis +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis import util +from ibis.backends.base.sqlglot.compiler import NULL, C, FuncGen, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import SnowflakeType +from ibis.backends.base.sqlglot.rewrites import replace_log2, replace_log10 +from ibis.common.patterns import replace +from ibis.expr.analysis import p, x, y + +Snowflake.Generator.TRANSFORMS |= { + exp.ApproxDistinct: rename_func("approx_count_distinct"), + exp.Levenshtein: rename_func("editdistance"), +} + + +@replace(p.WindowFunction(p.First(x, y))) +def rewrite_first(_, x, y): + if y is not None: + raise com.UnsupportedOperationError( + "`first` aggregate over window does not support `where`" + ) + return _.copy(func=ops.FirstValue(x)) + + +@replace(p.WindowFunction(p.Last(x, y))) +def rewrite_last(_, x, y): + if y is not None: + raise com.UnsupportedOperationError( + "`last` aggregate over window does not support `where`" + ) + return _.copy(func=ops.LastValue(x)) + + +@replace(p.WindowFunction(frame=x @ p.WindowFrame(order_by=()))) +def rewrite_empty_order_by_window(_, x): + return _.copy(frame=x.copy(order_by=(ibis.NA,))) + + +@replace(p.WindowFunction(p.RowNumber | p.NTile, x)) +def exclude_unsupported_window_frame_from_row_number(_, x): + return ops.Subtract(_.copy(frame=x.copy(start=None, end=None)), 1) + + +@replace( + p.WindowFunction( + p.Lag | p.Lead | p.PercentRank | p.CumeDist | p.Any | p.All, + x @ p.WindowFrame(start=None), + ) +) +def exclude_unsupported_window_frame_from_ops(_, x): + return _.copy(frame=x.copy(start=None, end=None)) + + +@replace(p.ToJSONMap | p.ToJSONArray) +def replace_to_json(_): + return ops.Cast(_.arg, to=_.dtype) + + +class SnowflakeFuncGen(FuncGen): + udf = FuncGen(namespace="ibis_udfs.public") + + +@public +class SnowflakeCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "snowflake" + quoted = True + type_mapper = SnowflakeType + no_limit_value = NULL + rewrites = ( + replace_to_json, + exclude_unsupported_window_frame_from_row_number, + exclude_unsupported_window_frame_from_ops, + rewrite_first, + rewrite_last, + rewrite_empty_order_by_window, + replace_log2, + replace_log10, + *SQLGlotCompiler.rewrites, + ) + + def __init__(self): + super().__init__() + self.f = SnowflakeFuncGen() + + def _aggregate(self, funcname: str, *args, where): + if where is not None: + args = [self.if_(where, arg, NULL) for arg in args] + + func = self.f[funcname] + return func(*args) + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype): + if value is None: + return super().visit_Literal(op, value=value, dtype=dtype) + elif dtype.is_string(): + # sqlglot doesn't escape backslashes in strings + return sge.convert(value.replace("\\", "\\\\")) + elif dtype.is_timestamp(): + args = ( + value.year, + value.month, + value.day, + value.hour, + value.minute, + value.second, + value.microsecond * 1_000, + ) + if value.tzinfo is not None: + return self.f.timestamp_tz_from_parts(*args, dtype.timezone) + else: + # workaround sqlglot not supporting more than 6 arguments + return sge.Anonymous( + this=sg.to_identifier("timestamp_from_parts"), + expressions=list(map(sge.convert, args)), + ) + elif dtype.is_time(): + nanos = value.microsecond * 1_000 + return self.f.time_from_parts(value.hour, value.minute, value.second, nanos) + elif dtype.is_map(): + key_type = dtype.key_type + value_type = dtype.value_type + + pairs = [] + + for k, v in value.items(): + pairs.append( + self.visit_Literal( + ops.Literal(k, key_type), value=k, dtype=key_type + ) + ) + pairs.append( + self.visit_Literal( + ops.Literal(v, value_type), value=v, dtype=value_type + ) + ) + + return self.f.object_construct_keep_null(*pairs) + elif dtype.is_struct(): + pairs = [] + for k, v in value.items(): + pairs.append(k) + pairs.append( + self.visit_Literal( + ops.Literal(v, dtype[k]), value=v, dtype=dtype[k] + ) + ) + return self.f.object_construct_keep_null(*pairs) + elif dtype.is_uuid(): + return sge.convert(str(value)) + elif dtype.is_binary(): + return sge.HexString(this=value.hex()) + return super().visit_node(op, value=value, dtype=dtype) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + if to.is_struct() or to.is_map(): + return self.if_(self.f.is_object(arg), arg, NULL) + elif to.is_array(): + return self.if_(self.f.is_array(arg), arg, NULL) + return self.cast(arg, to) + + @visit_node.register(ops.IsNan) + def visit_IsNan(self, op, *, arg): + return arg.eq(self.NAN) + + @visit_node.register(ops.IsInf) + def visit_IsInf(self, op, *, arg): + return arg.isin(self.POS_INF, self.NEG_INF) + + @visit_node.register(ops.JSONGetItem) + def visit_JSONGetItem(self, op, *, arg, index): + return self.f.get(arg, index) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + args = [substr, arg] + if start is not None: + args.append(start + 1) + return self.f.position(*args) + + @visit_node.register(ops.RegexSplit) + def visit_RegexSplit(self, op, *, arg, pattern): + return self.f.udf.regexp_split(arg, pattern) + + @visit_node.register(ops.Map) + def visit_Map(self, op, *, keys, values): + return self.if_( + sg.and_(self.f.is_array(keys), self.f.is_array(values)), + self.f.udf.object_from_arrays(keys, values), + NULL, + ) + + @visit_node.register(ops.MapKeys) + def visit_MapKeys(self, op, *, arg): + return self.if_(self.f.is_object(arg), self.f.object_keys(arg), NULL) + + @visit_node.register(ops.MapValues) + def visit_MapValues(self, op, *, arg): + return self.if_(self.f.is_object(arg), self.f.udf.object_values(arg), NULL) + + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + dtype = op.dtype + expr = self.f.coalesce(self.f.get(arg, key), self.f.to_variant(default)) + if dtype.is_json() or dtype.is_null(): + return expr + return self.cast(expr, dtype) + + @visit_node.register(ops.MapContains) + def visit_MapContains(self, op, *, arg, key): + return self.f.array_contains( + self.if_(self.f.is_object(arg), self.f.object_keys(arg), NULL), + self.f.to_variant(key), + ) + + @visit_node.register(ops.MapMerge) + def visit_MapMerge(self, op, *, left, right): + return self.if_( + sg.and_(self.f.is_object(left), self.f.is_object(right)), + self.f.udf.object_merge(left, right), + NULL, + ) + + @visit_node.register(ops.MapLength) + def visit_MapLength(self, op, *, arg): + return self.if_( + self.f.is_object(arg), self.f.array_size(self.f.object_keys(arg)), NULL + ) + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + return self.f.log(base, arg, dialect=self.dialect) + + @visit_node.register(ops.RandomScalar) + def visit_RandomScalar(self, op): + return self.f.uniform( + self.f.to_double(0.0), self.f.to_double(1.0), self.f.random() + ) + + @visit_node.register(ops.ApproxMedian) + def visit_ApproxMedian(self, op, *, arg, where): + return self.agg.approx_percentile(arg, 0.5, where=where) + + @visit_node.register(ops.TimeDelta) + def visit_TimeDelta(self, op, *, part, left, right): + return self.f.timediff(part, right, left, dialect=self.dialect) + + @visit_node.register(ops.DateDelta) + def visit_DateDelta(self, op, *, part, left, right): + return self.f.datediff(part, right, left, dialect=self.dialect) + + @visit_node.register(ops.TimestampDelta) + def visit_TimestampDelta(self, op, *, part, left, right): + return self.f.timestampdiff(part, right, left, dialect=self.dialect) + + @visit_node.register(ops.TimestampAdd) + @visit_node.register(ops.DateAdd) + def visit_TimestampDateAdd(self, op, *, left, right): + if not isinstance(op.right, ops.Literal): + raise com.OperationNotDefinedError( + f"right side of {type(op).__name__} operation must be an interval literal" + ) + return sg.exp.Add(this=left, expression=right) + + @visit_node.register(ops.IntegerRange) + def visit_IntegerRange(self, op, *, start, stop, step): + return self.if_( + step.neq(0), self.f.array_generate_range(start, stop, step), self.f.array() + ) + + @visit_node.register(ops.StructColumn) + def visit_StructColumn(self, op, *, names, values): + return self.f.object_construct_keep_null( + *itertools.chain.from_iterable(zip(names, values)) + ) + + @visit_node.register(ops.StructField) + def visit_StructField(self, op, *, arg, field): + return self.cast(self.f.get(arg, field), op.dtype) + + @visit_node.register(ops.RegexSearch) + def visit_RegexSearch(self, op, *, arg, pattern): + return sge.RegexpLike( + this=arg, + expression=self.f.concat(".*", pattern, ".*"), + flag=sge.convert("cs"), + ) + + @visit_node.register(ops.RegexReplace) + def visit_RegexReplace(self, op, *, arg, pattern, replacement): + return sge.RegexpReplace(this=arg, expression=pattern, replacement=replacement) + + @visit_node.register(ops.TypeOf) + def visit_TypeOf(self, op, *, arg): + return self.f.typeof(self.f.to_variant(arg)) + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + return self.f.udf.array_repeat(arg, times) + + @visit_node.register(ops.ArrayUnion) + def visit_ArrayUnion(self, op, *, left, right): + return self.f.array_distinct(self.f.array_cat(left, right)) + + @visit_node.register(ops.ArrayContains) + def visit_ArrayContains(self, op, *, arg, other): + return self.f.array_contains(arg, self.f.to_variant(other)) + + @visit_node.register(ops.ArrayCollect) + def visit_ArrayCollect(self, op, *, arg, where): + return self.agg.array_agg( + self.f.ifnull(arg, self.f.parse_json("null")), where=where + ) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + # array_cat only accepts two arguments + return self.f.array_flatten(self.f.array(*arg)) + + @visit_node.register(ops.ArrayPosition) + def visit_ArrayPosition(self, op, *, arg, other): + # snowflake is zero-based here, so we don't need to subtract 1 from the + # result + return self.f.coalesce( + self.f.array_position(self.f.to_variant(other), arg) + 1, 0 + ) + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + # https://docs.snowflake.com/en/sql-reference/functions/regexp_substr + return sge.RegexpExtract( + this=arg, + expression=pattern, + position=sge.convert(1), + group=index, + parameters=sge.convert("ce"), + ) + + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op, *, arg): + return self.f.udf.array_zip(self.f.array(*arg)) + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + return sge.Case( + this=self.f.dayname(arg), + ifs=[ + self.if_("Sun", "Sunday"), + self.if_("Mon", "Monday"), + self.if_("Tue", "Tuesday"), + self.if_("Wed", "Wednesday"), + self.if_("Thu", "Thursday"), + self.if_("Fri", "Friday"), + self.if_("Sat", "Saturday"), + ], + default=NULL, + ) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + timestamp_units_to_scale = {"s": 0, "ms": 3, "us": 6, "ns": 9} + return self.f.to_timestamp(arg, timestamp_units_to_scale[unit.short]) + + @visit_node.register(ops.First) + def visit_First(self, op, *, arg, where): + return self.f.get(self.agg.array_agg(arg, where=where), 0) + + @visit_node.register(ops.Last) + def visit_Last(self, op, *, arg, where): + expr = self.agg.array_agg(arg, where=where) + return self.f.get(expr, self.f.array_size(expr) - 1) + + @visit_node.register(ops.GroupConcat) + def visit_GroupConcat(self, op, *, arg, where, sep): + if where is None: + return self.f.listagg(arg, sep) + + return self.if_( + self.f.count_if(where) > 0, + self.f.listagg(self.if_(where, arg, NULL), sep), + NULL, + ) + + @visit_node.register(ops.TimestampBucket) + def visit_TimestampBucket(self, op, *, arg, interval, offset): + if offset is not None: + raise com.UnsupportedOperationError( + "`offset` is not supported in the Snowflake backend for timestamp bucketing" + ) + + interval = op.interval + if not isinstance(interval, ops.Literal): + raise com.UnsupportedOperationError( + f"Interval must be a literal for the Snowflake backend, got {type(interval)}" + ) + + return self.f.time_slice(arg, interval.value, interval.dtype.unit.name) + + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if how == "first": + return self.f.get(self.agg.array_agg(arg, where=where), 0) + elif how == "last": + expr = self.agg.array_agg(arg, where=where) + return self.f.get(expr, self.f.array_size(expr) - 1) + else: + raise com.UnsupportedOperationError("how must be 'first' or 'last'") + + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + if start is None: + start = 0 + + if stop is None: + stop = self.f.array_size(arg) + return self.f.array_slice(arg, start, stop) + + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractExtractEpochSeconds(self, op, *, arg): + return self.f.extract("epoch", arg) + + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + return self.f.extract("epoch_microsecond", arg) % 1_000_000 + + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + return self.f.extract("epoch_millisecond", arg) % 1_000 + + @visit_node.register(ops.ExtractQuery) + def visit_ExtractQuery(self, op, *, arg, key): + parsed_url = self.f.parse_url(arg, 1) + if key is not None: + r = self.f.get(self.f.get(parsed_url, "parameters"), key) + else: + r = self.f.get(parsed_url, "query") + return self.f.nullif(self.f.as_varchar(r), "") + + @visit_node.register(ops.ExtractProtocol) + def visit_ExtractProtocol(self, op, *, arg): + return self.f.nullif( + self.f.as_varchar(self.f.get(self.f.parse_url(arg, 1), "scheme")), "" + ) + + @visit_node.register(ops.ExtractAuthority) + def visit_ExtractAuthority(self, op, *, arg): + return self.f.concat_ws( + ":", + self.f.as_varchar(self.f.get(self.f.parse_url(arg, 1), "host")), + self.f.as_varchar(self.f.get(self.f.parse_url(arg, 1), "port")), + ) + + @visit_node.register(ops.ExtractFile) + def visit_ExtractFile(self, op, *, arg): + return self.f.concat_ws( + "?", + self.visit_ExtractPath(op, arg=arg), + self.visit_ExtractQuery(op, arg=arg, key=None), + ) + + @visit_node.register(ops.ExtractPath) + def visit_ExtractPath(self, op, *, arg): + return self.f.concat( + "/", self.f.as_varchar(self.f.get(self.f.parse_url(arg, 1), "path")) + ) + + @visit_node.register(ops.ExtractFragment) + def visit_ExtractFragment(self, op, *, arg): + return self.f.nullif( + self.f.as_varchar(self.f.get(self.f.parse_url(arg, 1), "fragment")), "" + ) + + @visit_node.register(ops.Unnest) + def visit_Unnest(self, op, *, arg): + sep = sge.convert(util.guid()) + split = self.f.split( + self.f.array_to_string(self.f.nullif(arg, self.f.array()), sep), sep + ) + expr = self.f.nullif(sge.Explode(this=split), "") + return self.cast(expr, op.dtype) + + @visit_node.register(ops.Quantile) + def visit_Quantile(self, op, *, arg, quantile, where): + # can't use `self.agg` here because `quantile` must be a constant and + # the agg method filters using `where` for every argument which turns + # the constant into an expression + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.percentile_cont(arg, quantile) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + if where is None: + return super().visit_node(op, arg=arg, where=where) + return self.f.count_if(where) + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.count(sge.Distinct(expressions=[arg])) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, arg, where): + columns = op.arg.schema.names + quoted = self.quoted + col = partial(sg.column, quoted=quoted) + if where is None: + expressions = list(map(col, columns)) + else: + # any null columns will cause the entire row not to be counted + expressions = [self.if_(where, col(name), NULL) for name in columns] + return self.f.count(sge.Distinct(expressions=expressions)) + + @visit_node.register(ops.Xor) + def visit_Xor(self, op, *, left, right): + # boolxor accepts numerics ... and returns a boolean? wtf? + return self.f.boolxor(self.cast(left, dt.int8), self.cast(right, dt.int8)) + + @visit_node.register(ops.WindowBoundary) + def visit_WindowBoundary(self, op, *, value, preceding): + if not isinstance(op.value, ops.Literal): + raise com.OperationNotDefinedError( + "Expressions in window bounds are not supported by Snowflake" + ) + return super().visit_WindowBoundary(op, value=value, preceding=preceding) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "sample": + raise com.UnsupportedOperationError( + f"{self.dialect} only implements `pop` correlation coefficient" + ) + + # TODO: rewrite rule? + if (left_type := op.left.dtype).is_boolean(): + left = self.cast(left, dt.Int32(nullable=left_type.nullable)) + + if (right_type := op.right.dtype).is_boolean(): + right = self.cast(right, dt.Int32(nullable=right_type.nullable)) + + return self.agg.corr(left, right, where=where) + + @visit_node.register(ops.TimestampRange) + def visit_TimestampRange(self, op, *, start, stop, step): + raw_step = op.step + + if not isinstance(raw_step, ops.Literal): + raise com.UnsupportedOperationError("`step` argument must be a literal") + + unit = raw_step.dtype.unit.name.lower() + step = raw_step.value + + value_type = op.dtype.value_type + + if step == 0: + return self.f.array() + + return ( + sg.select( + self.f.array_agg( + self.f.replace( + # conversion to varchar is necessary to control + # the timestamp format + # + # otherwise, since timestamps in arrays become strings + # anyway due to lack of parameterized type support in + # Snowflake the format depends on a session parameter + self.f.to_varchar( + self.f.dateadd(unit, C.value, start, dialect=self.dialect), + 'YYYY-MM-DD"T"HH24:MI:SS.FF6' + + (value_type.timezone is not None) * "TZH:TZM", + ), + # timezones are always hour:minute offsets from UTC, not + # named, so replacing "Z" shouldn't be an issue + "Z", + "+00:00", + ), + ) + ) + .from_( + sge.Table( + this=sge.Unnest( + expressions=[ + self.f.array_generate_range( + 0, + self.f.datediff( + unit, start, stop, dialect=self.dialect + ), + step, + ) + ] + ) + ) + ) + .subquery() + ) + + @visit_node.register(ops.ArrayMap) + @visit_node.register(ops.ArrayFilter) + @visit_node.register(ops.RowID) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.IntervalFromInteger) + @visit_node.register(ops.IntervalAdd) + @visit_node.register(ops.TimestampDiff) + @visit_node.register(ops.TryCast) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.ArrayDistinct: "array_distinct", + ops.ArrayFlatten: "array_flatten", + ops.ArrayIndex: "get", + ops.ArrayIntersect: "array_intersection", + ops.ArrayRemove: "array_remove", + ops.BitAnd: "bitand_agg", + ops.BitOr: "bitor_agg", + ops.BitXor: "bitxor_agg", + ops.BitwiseAnd: "bitand", + ops.BitwiseLeftShift: "bitshiftleft", + ops.BitwiseNot: "bitnot", + ops.BitwiseOr: "bitor", + ops.BitwiseRightShift: "bitshiftright", + ops.BitwiseXor: "bitxor", + ops.EndsWith: "endswith", + ops.Hash: "hash", + ops.LPad: "lpad", + ops.Median: "median", + ops.Mode: "mode", + ops.RPad: "rpad", + ops.StringAscii: "ascii", + ops.StringToTimestamp: "to_timestamp_tz", + ops.TimeFromHMS: "time_from_parts", + ops.TimestampFromYMDHMS: "timestamp_from_parts", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @SnowflakeCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @SnowflakeCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(SnowflakeCompiler, f"visit_{_op.__name__}", _fmt) + + +del _op, _name, _fmt diff --git a/ibis/backends/snowflake/datatypes.py b/ibis/backends/snowflake/datatypes.py deleted file mode 100644 index f93b0cc9b855..000000000000 --- a/ibis/backends/snowflake/datatypes.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import annotations - -import sqlalchemy.types as sat -from snowflake.sqlalchemy import ( - ARRAY, - OBJECT, - TIMESTAMP_LTZ, - TIMESTAMP_NTZ, - TIMESTAMP_TZ, - VARIANT, -) -from sqlalchemy.ext.compiler import compiles - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import SnowflakeType as SqlglotSnowflakeType - - -@compiles(sat.NullType, "snowflake") -def compiles_nulltype(element, compiler, **kw): - return "VARIANT" - - -class SnowflakeType(AlchemyType): - dialect = "snowflake" - - @classmethod - def from_ibis(cls, dtype): - if dtype.is_array(): - return ARRAY - elif dtype.is_map() or dtype.is_struct(): - return OBJECT - elif dtype.is_json(): - return VARIANT - elif dtype.is_timestamp(): - if dtype.timezone is None: - return TIMESTAMP_NTZ - else: - return TIMESTAMP_TZ - elif dtype.is_string(): - # 16MB - return sat.VARCHAR(2**24) - elif dtype.is_binary(): - # 8MB - return sat.VARBINARY(2**23) - else: - return super().from_ibis(dtype) - - @classmethod - def to_ibis(cls, typ, nullable=True): - if isinstance(typ, (sat.REAL, sat.FLOAT, sat.Float)): - return dt.Float64(nullable=nullable) - elif isinstance(typ, TIMESTAMP_NTZ): - return dt.Timestamp(timezone=None, nullable=nullable) - elif isinstance(typ, (TIMESTAMP_LTZ, TIMESTAMP_TZ)): - return dt.Timestamp(timezone="UTC", nullable=nullable) - elif isinstance(typ, ARRAY): - return dt.Array(dt.json, nullable=nullable) - elif isinstance(typ, OBJECT): - return dt.Map(dt.string, dt.json, nullable=nullable) - elif isinstance(typ, VARIANT): - return dt.JSON(nullable=nullable) - elif isinstance(typ, sat.Numeric): - if (scale := typ.scale) == 0: - # kind of a lie, should be int128 because 38 digits - return dt.Int64(nullable=nullable) - else: - return dt.Decimal( - precision=typ.precision or 38, - scale=scale or 0, - nullable=nullable, - ) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotSnowflakeType.from_string(type_string, nullable=nullable) diff --git a/ibis/backends/snowflake/registry.py b/ibis/backends/snowflake/registry.py deleted file mode 100644 index 7e97330d4481..000000000000 --- a/ibis/backends/snowflake/registry.py +++ /dev/null @@ -1,584 +0,0 @@ -from __future__ import annotations - -import functools -import itertools - -import numpy as np -import sqlalchemy as sa -from snowflake.sqlalchemy import ARRAY, OBJECT, VARIANT -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.elements import Cast - -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis import util -from ibis.backends.base.sql.alchemy.registry import ( - fixed_arity, - geospatial_functions, - get_col, - get_sqla_table, - reduction, - unary, - varargs, -) -from ibis.backends.postgres.registry import _literal as _postgres_literal -from ibis.backends.postgres.registry import operation_registry as _operation_registry - -operation_registry = { - op: _operation_registry[op] - for op in _operation_registry.keys() - geospatial_functions.keys() -} - - -def _literal(t, op): - value = op.value - dtype = op.dtype - - if value is None: - return sa.null() - - if dtype.is_floating(): - if np.isnan(value): - return _SF_NAN - - if np.isinf(value): - return _SF_NEG_INF if value < 0 else _SF_POS_INF - elif dtype.is_timestamp(): - args = ( - value.year, - value.month, - value.day, - value.hour, - value.minute, - value.second, - value.microsecond * 1_000, - ) - if value.tzinfo is not None: - return sa.func.timestamp_tz_from_parts(*args, dtype.timezone) - else: - return sa.func.timestamp_from_parts(*args) - elif dtype.is_date(): - return sa.func.date_from_parts(value.year, value.month, value.day) - elif dtype.is_time(): - nanos = value.microsecond * 1_000 - return sa.func.time_from_parts(value.hour, value.minute, value.second, nanos) - elif dtype.is_array(): - return sa.func.array_construct(*value) - elif dtype.is_map() or dtype.is_struct(): - return sa.func.object_construct_keep_null( - *itertools.chain.from_iterable(value.items()) - ) - elif dtype.is_uuid(): - return sa.literal(str(value)) - return _postgres_literal(t, op) - - -def _table_column(t, op): - ctx = t.context - table = op.table - - sa_table = get_sqla_table(ctx, table) - out_expr = get_col(sa_table, op) - - if (dtype := op.dtype).is_timestamp() and (timezone := dtype.timezone) is not None: - out_expr = sa.func.convert_timezone(timezone, out_expr).label(op.name) - - # If the column does not originate from the table set in the current SELECT - # context, we should format as a subquery - if t.permit_subquery and ctx.is_foreign_expr(table): - return sa.select(out_expr) - - return out_expr - - -def _string_find(t, op): - args = [t.translate(op.substr), t.translate(op.arg)] - if (start := op.start) is not None: - args.append(t.translate(start) + 1) - return sa.func.position(*args) - 1 - - -def _round(t, op): - args = [t.translate(op.arg)] - if (digits := op.digits) is not None: - args.append(t.translate(digits)) - return sa.func.round(*args) - - -def _day_of_week_name(arg): - return sa.case( - ("Sun", "Sunday"), - ("Mon", "Monday"), - ("Tue", "Tuesday"), - ("Wed", "Wednesday"), - ("Thu", "Thursday"), - ("Fri", "Friday"), - ("Sat", "Saturday"), - value=sa.func.dayname(arg), - else_=None, - ) - - -def _extract_url_query(t, op): - parsed_url = sa.func.parse_url(t.translate(op.arg), 1) - - if (key := op.key) is not None: - r = sa.func.get(sa.func.get(parsed_url, "parameters"), t.translate(key)) - else: - r = sa.func.get(parsed_url, "query") - - return sa.func.nullif(sa.func.as_varchar(r), "") - - -def _array_slice(t, op): - arg = t.translate(op.arg) - - if (start := op.start) is not None: - start = t.translate(start) - else: - start = 0 - - if (stop := op.stop) is not None: - stop = t.translate(stop) - else: - stop = sa.func.array_size(arg) - - return sa.func.array_slice(t.translate(op.arg), start, stop) - - -def _nth_value(t, op): - if not isinstance(nth := op.nth, ops.Literal): - raise TypeError(f"`nth` argument must be a literal Python int, got {type(nth)}") - return sa.func.nth_value(t.translate(op.arg), nth.value + 1) - - -def _arbitrary(t, op): - if (how := op.how) == "first": - return t._reduction(lambda x: sa.func.get(sa.func.array_agg(x), 0), op) - elif how == "last": - return t._reduction( - lambda x: sa.func.get( - sa.func.array_agg(x), sa.func.array_size(sa.func.array_agg(x)) - 1 - ), - op, - ) - else: - raise com.UnsupportedOperationError("how must be 'first' or 'last'") - - -@compiles(Cast, "snowflake") -def compiles_cast(element, compiler, **kw): - typ = compiler.visit_typeclause(element, **kw) - if typ in ("OBJECT", "ARRAY"): - arg = compiler.process(element.clause, **kw) - return f"IFF(IS_{typ}({arg}), {arg}, NULL)" - return compiler.visit_cast(element, **kw) - - -@compiles(sa.TEXT, "snowflake") -@compiles(sa.VARCHAR, "snowflake") -def compiles_string(element, compiler, **kw): - return "VARCHAR" - - -@compiles(OBJECT, "snowflake") -@compiles(ARRAY, "snowflake") -@compiles(VARIANT, "snowflake") -def compiles_object_type(element, compiler, **kw): - return type(element).__name__.upper() - - -def _unnest(t, op): - arg = t.translate(op.arg) - # HACK: https://community.snowflake.com/s/question/0D50Z000086MVhnSAG/has-anyone-found-a-way-to-unnest-an-array-without-loosing-the-null-values - sep = util.guid() - col = sa.func.nullif( - sa.func.split_to_table( - sa.func.array_to_string( - sa.func.nullif(arg, sa.func.array_construct()), sep - ), - sep, - ) - .table_valued("value") # seq, index, value is supported but we only need value - .lateral() - .c["value"], - "", - ) - return sa.cast( - sa.func.coalesce(sa.func.try_parse_json(col), sa.func.to_variant(col)), - type_=t.get_sqla_type(op.dtype), - ) - - -def _group_concat(t, op): - if (where := op.where) is None: - return sa.func.listagg(t.translate(op.arg), t.translate(op.sep)) - - where_sa = t.translate(where) - arg_sa = sa.func.iff(where_sa, t.translate(op.arg), None) - - return sa.func.iff( - sa.func.count_if(arg_sa != sa.null()) != 0, - sa.func.listagg(arg_sa, t.translate(op.sep)), - None, - ) - - -def _array_zip(t, op): - return sa.type_coerce( - sa.func.ibis_udfs.public.array_zip( - sa.func.array_construct(*map(t.translate, op.arg)) - ), - t.get_sqla_type(op.dtype), - ) - - -def _regex_extract(t, op): - arg = t.translate(op.arg) - pattern = t.translate(op.pattern) - index = t.translate(op.index) - # https://docs.snowflake.com/en/sql-reference/functions/regexp_substr - return sa.func.regexp_substr(arg, pattern, 1, 1, "ce", index) - - -def _map_get(t, op): - arg = op.arg - key = op.key - default = op.default - dtype = op.dtype - sqla_type = t.get_sqla_type(dtype) - expr = sa.func.coalesce( - sa.func.get(t.translate(arg), t.translate(key)), - sa.func.to_variant(t.translate(default)), - type_=sqla_type, - ) - if dtype.is_json() or dtype.is_null(): - return expr - - # cast if ibis thinks the value type is not JSON - # - # this ensures that we can get deserialized map values even though maps are - # always JSON in the value type inside snowflake - return sa.cast(expr, sqla_type) - - -def _timestamp_bucket(t, op): - if op.offset is not None: - raise com.UnsupportedOperationError( - "`offset` is not supported in the Snowflake backend for timestamp bucketing" - ) - - interval = op.interval - - if not isinstance(interval, ops.Literal): - raise com.UnsupportedOperationError( - f"Interval must be a literal for the Snowflake backend, got {type(interval)}" - ) - - return sa.func.time_slice( - t.translate(op.arg), interval.value, interval.dtype.unit.name - ) - - -class _flatten(sa.sql.functions.GenericFunction): - def __init__(self, arg, *, type: sa.types.TypeEngine) -> None: - super().__init__(arg) - self.type = sa.sql.sqltypes.TableValueType( - sa.Column("index", sa.BIGINT()), sa.Column("value", type) - ) - - -@compiles(_flatten, "snowflake") -def compiles_flatten(element, compiler, **kw): - (arg,) = element.clauses.clauses - return f"TABLE(FLATTEN(INPUT => {compiler.process(arg, **kw)}, MODE => 'ARRAY'))" - - -def _timestamp_range(t, op): - if not isinstance(op.step, ops.Literal): - raise com.UnsupportedOperationError("`step` argument must be a literal") - - start = t.translate(op.start) - stop = t.translate(op.stop) - - unit = op.step.dtype.unit.name.lower() - step = op.step.value - - value_type = op.dtype.value_type - - f = _flatten( - sa.func.array_generate_range(0, sa.func.datediff(unit, start, stop), step), - type=t.get_sqla_type(op.start.dtype), - ).alias("f") - return sa.func.iff( - step != 0, - sa.select( - sa.func.array_agg( - sa.func.replace( - # conversion to varchar is necessary to control - # the timestamp format - # - # otherwise, since timestamps in arrays become strings - # anyway due to lack of parameterized type support in - # Snowflake the format depends on a session parameter - sa.func.to_varchar( - sa.func.dateadd(unit, f.c.value, start), - 'YYYY-MM-DD"T"HH24:MI:SS.FF6' - + (value_type.timezone is not None) * "TZH:TZM", - ), - # timezones are always hour:minute offsets from UTC, not - # named, so replacing "Z" shouldn't be an issue - "Z", - "+00:00", - ), - ) - ) - .select_from(f) - .scalar_subquery(), - sa.func.array_construct(), - ) - - -_TIMESTAMP_UNITS_TO_SCALE = {"s": 0, "ms": 3, "us": 6, "ns": 9} - -_SF_POS_INF = sa.func.to_double("Inf") -_SF_NEG_INF = sa.func.to_double("-Inf") -_SF_NAN = sa.func.to_double("NaN") - -operation_registry.update( - { - ops.JSONGetItem: fixed_arity(sa.func.get, 2), - ops.StringFind: _string_find, - ops.Map: fixed_arity( - lambda keys, values: sa.func.iff( - sa.func.is_array(keys) & sa.func.is_array(values), - sa.func.arrays_to_object(keys, values), - sa.null(), - ), - 2, - ), - ops.MapKeys: unary( - lambda arg: sa.func.iff( - sa.func.is_object(arg), sa.func.object_keys(arg), sa.null() - ) - ), - ops.MapValues: unary( - lambda arg: sa.func.iff( - sa.func.is_object(arg), - sa.func.ibis_udfs.public.object_values(arg), - sa.null(), - ) - ), - ops.MapGet: _map_get, - ops.MapContains: fixed_arity( - lambda arg, key: sa.func.array_contains( - sa.func.to_variant(key), - sa.func.iff( - sa.func.is_object(arg), sa.func.object_keys(arg), sa.null() - ), - ), - 2, - ), - ops.MapMerge: fixed_arity( - lambda a, b: sa.func.iff( - sa.func.is_object(a) & sa.func.is_object(b), - sa.func.ibis_udfs.public.object_merge(a, b), - sa.null(), - ), - 2, - ), - ops.MapLength: unary( - lambda arg: sa.func.array_size( - sa.func.iff(sa.func.is_object(arg), sa.func.object_keys(arg), sa.null()) - ) - ), - ops.BitwiseAnd: fixed_arity(sa.func.bitand, 2), - ops.BitwiseNot: unary(sa.func.bitnot), - ops.BitwiseOr: fixed_arity(sa.func.bitor, 2), - ops.BitwiseXor: fixed_arity(sa.func.bitxor, 2), - ops.BitwiseLeftShift: fixed_arity(sa.func.bitshiftleft, 2), - ops.BitwiseRightShift: fixed_arity(sa.func.bitshiftright, 2), - ops.Ln: unary(sa.func.ln), - ops.Log2: unary(lambda arg: sa.func.log(2, arg)), - ops.Log10: unary(lambda arg: sa.func.log(10, arg)), - ops.Log: fixed_arity(lambda arg, base: sa.func.log(base, arg), 2), - ops.IsInf: unary(lambda arg: arg.in_((_SF_POS_INF, _SF_NEG_INF))), - ops.IsNan: unary(lambda arg: arg == _SF_NAN), - ops.Literal: _literal, - ops.Round: _round, - ops.Modulus: fixed_arity(sa.func.mod, 2), - ops.Mode: reduction(sa.func.mode), - ops.IfElse: fixed_arity(sa.func.iff, 3), - # numbers - ops.RandomScalar: fixed_arity( - lambda: sa.func.uniform( - sa.func.to_double(0.0), sa.func.to_double(1.0), sa.func.random() - ), - 0, - ), - # time and dates - ops.TimeFromHMS: fixed_arity(sa.func.time_from_parts, 3), - # columns - ops.DayOfWeekName: unary(_day_of_week_name), - ops.ExtractProtocol: unary( - lambda arg: sa.func.nullif( - sa.func.as_varchar(sa.func.get(sa.func.parse_url(arg, 1), "scheme")), "" - ) - ), - ops.ExtractAuthority: unary( - lambda arg: sa.func.concat_ws( - ":", - sa.func.as_varchar(sa.func.get(sa.func.parse_url(arg, 1), "host")), - sa.func.as_varchar(sa.func.get(sa.func.parse_url(arg, 1), "port")), - ) - ), - ops.ExtractFile: unary( - lambda arg: sa.func.concat_ws( - "?", - "/" - + sa.func.as_varchar(sa.func.get(sa.func.parse_url(arg, 1), "path")), - sa.func.as_varchar(sa.func.get(sa.func.parse_url(arg, 1), "query")), - ) - ), - ops.ExtractPath: unary( - lambda arg: ( - "/" + sa.func.as_varchar(sa.func.get(sa.func.parse_url(arg, 1), "path")) - ) - ), - ops.ExtractQuery: _extract_url_query, - ops.ExtractFragment: unary( - lambda arg: sa.func.nullif( - sa.func.as_varchar(sa.func.get(sa.func.parse_url(arg, 1), "fragment")), - "", - ) - ), - ops.ArrayIndex: fixed_arity(sa.func.get, 2), - ops.ArrayLength: fixed_arity(sa.func.array_size, 1), - ops.ArrayConcat: varargs( - lambda *args: functools.reduce(sa.func.array_cat, args) - ), - ops.Array: lambda t, op: sa.func.array_construct(*map(t.translate, op.exprs)), - ops.ArraySlice: _array_slice, - ops.ArrayCollect: reduction( - lambda arg: sa.func.array_agg( - sa.func.ifnull(arg, sa.func.parse_json("null")), type_=ARRAY - ) - ), - ops.ArrayContains: fixed_arity( - lambda arr, el: sa.func.array_contains(sa.func.to_variant(el), arr), 2 - ), - ops.ArrayPosition: fixed_arity( - # snowflake is zero-based here, so we don't need to subtract 1 from the result - lambda lst, el: sa.func.coalesce( - sa.func.array_position(sa.func.to_variant(el), lst), -1 - ), - 2, - ), - ops.ArrayDistinct: fixed_arity(sa.func.array_distinct, 1), - ops.ArrayUnion: fixed_arity( - lambda left, right: sa.func.array_distinct(sa.func.array_cat(left, right)), - 2, - ), - ops.ArrayRemove: fixed_arity(sa.func.array_remove, 2), - ops.ArrayIntersect: fixed_arity(sa.func.array_intersection, 2), - ops.ArrayZip: _array_zip, - ops.ArraySort: unary(sa.func.array_sort), - ops.ArrayRepeat: fixed_arity(sa.func.ibis_udfs.public.array_repeat, 2), - ops.ArrayFlatten: fixed_arity(sa.func.array_flatten, 1), - ops.StringSplit: fixed_arity(sa.func.split, 2), - # snowflake typeof only accepts VARIANT, so we cast - ops.TypeOf: unary(lambda arg: sa.func.typeof(sa.func.to_variant(arg))), - ops.All: reduction(sa.func.booland_agg), - ops.Any: reduction(sa.func.boolor_agg), - ops.BitAnd: reduction(sa.func.bitand_agg), - ops.BitOr: reduction(sa.func.bitor_agg), - ops.BitXor: reduction(sa.func.bitxor_agg), - ops.DateFromYMD: fixed_arity(sa.func.date_from_parts, 3), - ops.StringToTimestamp: fixed_arity(sa.func.to_timestamp_tz, 2), - ops.RegexExtract: _regex_extract, - ops.RegexSearch: fixed_arity( - lambda arg, pattern: sa.func.regexp_instr(arg, pattern) != 0, 2 - ), - ops.RegexReplace: fixed_arity(sa.func.regexp_replace, 3), - ops.ExtractMicrosecond: fixed_arity( - lambda arg: sa.cast( - sa.extract("epoch_microsecond", arg) % 1000000, sa.SMALLINT - ), - 1, - ), - ops.ExtractMillisecond: fixed_arity( - lambda arg: sa.cast( - sa.extract("epoch_millisecond", arg) % 1000, sa.SMALLINT - ), - 1, - ), - ops.TimestampFromYMDHMS: fixed_arity(sa.func.timestamp_from_parts, 6), - ops.TimestampFromUNIX: lambda t, op: sa.func.to_timestamp( - t.translate(op.arg), _TIMESTAMP_UNITS_TO_SCALE[op.unit.short] - ), - ops.StructField: lambda t, op: sa.cast( - sa.func.get(t.translate(op.arg), op.field), t.get_sqla_type(op.dtype) - ), - ops.NthValue: _nth_value, - ops.Arbitrary: _arbitrary, - ops.First: reduction(lambda x: sa.func.get(sa.func.array_agg(x), 0)), - ops.Last: reduction( - lambda x: sa.func.get( - sa.func.array_agg(x), sa.func.array_size(sa.func.array_agg(x)) - 1 - ) - ), - ops.StructColumn: lambda t, op: sa.func.object_construct_keep_null( - *itertools.chain.from_iterable(zip(op.names, map(t.translate, op.values))) - ), - ops.Unnest: _unnest, - ops.ArgMin: reduction(sa.func.min_by), - ops.ArgMax: reduction(sa.func.max_by), - ops.ToJSONArray: lambda t, op: t.translate(ops.Cast(op.arg, op.dtype)), - ops.ToJSONMap: lambda t, op: t.translate(ops.Cast(op.arg, op.dtype)), - ops.StartsWith: fixed_arity(sa.func.startswith, 2), - ops.EndsWith: fixed_arity(sa.func.endswith, 2), - ops.GroupConcat: _group_concat, - ops.Hash: unary(sa.func.hash), - ops.ApproxMedian: reduction(lambda x: sa.func.approx_percentile(x, 0.5)), - ops.Median: reduction(sa.func.median), - ops.TableColumn: _table_column, - ops.Levenshtein: fixed_arity(sa.func.editdistance, 2), - ops.TimeDelta: fixed_arity( - lambda part, left, right: sa.func.timediff(part, right, left), 3 - ), - ops.DateDelta: fixed_arity( - lambda part, left, right: sa.func.datediff(part, right, left), 3 - ), - ops.TimestampDelta: fixed_arity( - lambda part, left, right: sa.func.timestampdiff(part, right, left), 3 - ), - ops.TimestampBucket: _timestamp_bucket, - ops.IntegerRange: fixed_arity( - lambda start, stop, step: sa.func.iff( - step != 0, - sa.func.array_generate_range(start, stop, step), - sa.func.array_construct(), - ), - 3, - ), - ops.TimestampRange: _timestamp_range, - ops.RegexSplit: fixed_arity(sa.func.ibis_udfs.public.regexp_split, 2), - } -) - -_invalid_operations = { - # ibis.expr.operations.array - ops.ArrayMap, - ops.ArrayFilter, - # ibis.expr.operations.reductions - ops.MultiQuantile, - # ibis.expr.operations.strings - ops.FindInSet, - # ibis.expr.operations.temporal - ops.IntervalFromInteger, - ops.TimestampDiff, -} - -operation_registry = { - k: v for k, v in operation_registry.items() if k not in _invalid_operations -} diff --git a/ibis/backends/snowflake/tests/conftest.py b/ibis/backends/snowflake/tests/conftest.py index 1ff557bb8063..6d041a3c5a8b 100644 --- a/ibis/backends/snowflake/tests/conftest.py +++ b/ibis/backends/snowflake/tests/conftest.py @@ -3,19 +3,21 @@ import concurrent.futures import os import tempfile +from contextlib import closing from pathlib import Path from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse from urllib.request import urlretrieve import pyarrow.parquet as pq import pyarrow_hotfix # noqa: F401 import pytest -import sqlalchemy as sa +import snowflake.connector as sc import sqlglot as sg import ibis +from ibis.backends.base.sqlglot.datatypes import SnowflakeType from ibis.backends.conftest import TEST_TABLES -from ibis.backends.snowflake.datatypes import SnowflakeType from ibis.backends.tests.base import BackendTest from ibis.formats.pyarrow import PyArrowSchema @@ -54,8 +56,8 @@ def copy_into(con, data_dir: Path, table: str) -> None: f"$1:{name}{'::VARCHAR' * typ.is_timestamp()}::{SnowflakeType.to_string(typ)}" for name, typ in schema.items() ) - con.exec_driver_sql(f"PUT {file.as_uri()} @{stage}/{file.name}") - con.exec_driver_sql( + con.execute(f"PUT {file.as_uri()} @{stage}/{file.name}") + con.execute( f""" COPY INTO {table} FROM (SELECT {columns} FROM @{stage}/{file.name}) @@ -67,7 +69,7 @@ def copy_into(con, data_dir: Path, table: str) -> None: class TestConf(BackendTest): supports_map = True default_identifier_case_fn = staticmethod(str.upper) - deps = ("snowflake.connector", "snowflake.sqlalchemy") + deps = ("snowflake.connector",) supports_tpch = True def load_tpch(self) -> None: @@ -76,7 +78,8 @@ def load_tpch(self) -> None: def _tpch_table(self, name: str): t = self.connection.table( self.default_identifier_case_fn(name), - schema="SNOWFLAKE_SAMPLE_DATA.TPCH_SF1", + database="SNOWFLAKE_SAMPLE_DATA", + schema="TPCH_SF1", ) return t.rename("snake_case") @@ -97,42 +100,50 @@ def add_catalog_and_schema(node): def _load_data(self, **_: Any) -> None: """Load test data into a Snowflake backend instance.""" - snowflake_url = _get_url() - raw_url = sa.engine.make_url(snowflake_url) - _, schema = raw_url.database.rsplit("/", 1) - url = raw_url.set(database="") - con = sa.create_engine( - url, connect_args={"session_parameters": {"MULTI_STATEMENT_COUNT": "0"}} - ) - - dbschema = f"ibis_testing.{schema}" - - with con.begin() as c: - c.exec_driver_sql( - f"""\ -CREATE DATABASE IF NOT EXISTS ibis_testing; -USE DATABASE ibis_testing; -CREATE SCHEMA IF NOT EXISTS {dbschema}; -USE SCHEMA {dbschema}; -CREATE TEMP STAGE ibis_testing; -CREATE STAGE IF NOT EXISTS models; -{self.script_dir.joinpath("snowflake.sql").read_text()}""" + url = urlparse(_get_url()) + db, schema = url.path[1:].split("/", 1) + (warehouse,) = parse_qs(url.query)["warehouse"] + connect_args = { + "user": url.username, + "password": url.password, + "account": url.hostname, + "warehouse": warehouse, + } + + session_parameters = { + "MULTI_STATEMENT_COUNT": 0, + "JSON_INDENT": 0, + "PYTHON_CONNECTOR_QUERY_RESULT_FORMAT": "arrow_force", + } + + dbschema = f"{db}.{schema}" + + with closing( + sc.connect(**connect_args, session_parameters=session_parameters) + ) as con, closing(con.cursor()) as c: + c.execute( + f""" + CREATE DATABASE IF NOT EXISTS {db}; + CREATE SCHEMA IF NOT EXISTS {dbschema}; + USE {dbschema}; + CREATE TEMP STAGE {db}; + CREATE STAGE IF NOT EXISTS models; + {self.script_dir.joinpath("snowflake.sql").read_text()} + """ ) - with tempfile.TemporaryDirectory() as d: - path, _ = urlretrieve( - "https://storage.googleapis.com/ibis-testing-data/model.joblib", - os.path.join(d, "model.joblib"), - ) + with tempfile.TemporaryDirectory() as d: + path, _ = urlretrieve( + "https://storage.googleapis.com/ibis-testing-data/model.joblib", + os.path.join(d, "model.joblib"), + ) - assert os.path.exists(path) - assert os.path.getsize(path) > 0 + assert os.path.exists(path) + assert os.path.getsize(path) - with con.begin() as c: - c.exec_driver_sql(f"PUT {Path(path).as_uri()} @MODELS") + c.execute(f"PUT {Path(path).as_uri()} @MODELS") - with con.begin() as c: # not much we can do to make this faster, but running these in # multiple threads seems to save about 2x with concurrent.futures.ThreadPoolExecutor() as exe: diff --git a/ibis/backends/snowflake/tests/test_client.py b/ibis/backends/snowflake/tests/test_client.py index 9a46c02bdd96..8a0de2b0d9b6 100644 --- a/ibis/backends/snowflake/tests/test_client.py +++ b/ibis/backends/snowflake/tests/test_client.py @@ -42,11 +42,8 @@ def temp_schema(con, temp_db): def test_cross_db_access(con, temp_db, temp_schema): table = gen_name("tmp_table") - with con.begin() as c: - c.exec_driver_sql( - f'CREATE TABLE "{temp_db}"."{temp_schema}"."{table}" ("x" INT)' - ) - t = con.table(table, schema=f"{temp_db}.{temp_schema}") + con.raw_sql(f'CREATE TABLE "{temp_db}"."{temp_schema}"."{table}" ("x" INT)').close() + t = con.table(table, schema=temp_schema, database=temp_db) assert t.schema() == ibis.schema(dict(x="int")) assert t.execute().empty @@ -55,7 +52,7 @@ def test_cross_db_create_table(con, temp_db, temp_schema): table_name = gen_name("tmp_table") data = pd.DataFrame({"key": list("abc"), "value": [[1], [2], [3]]}) table = con.create_table(table_name, data, database=f"{temp_db}.{temp_schema}") - queried_table = con.table(table_name, schema=f"{temp_db}.{temp_schema}") + queried_table = con.table(table_name, database=temp_db, schema=temp_schema) tm.assert_frame_equal(table.execute(), data) tm.assert_frame_equal(queried_table.execute(), data) @@ -156,14 +153,12 @@ def test_drop_current_db_not_allowed(db_con): assert db_con.current_database == cur_db - with db_con.begin() as c: - c.exec_driver_sql(f'USE DATABASE "{database}"') + db_con.raw_sql(f'USE DATABASE "{database}"').close() with pytest.raises(com.UnsupportedOperationError, match="behavior is undefined"): db_con.drop_database(database) - with db_con.begin() as c: - c.exec_driver_sql(f"USE DATABASE {cur_db}") + db_con.raw_sql(f'USE DATABASE "{cur_db}"').close() db_con.drop_database(database) @@ -176,14 +171,12 @@ def test_drop_current_schema_not_allowed(schema_con): assert schema_con.current_schema == cur_schema - with schema_con.begin() as c: - c.exec_driver_sql(f'USE SCHEMA "{schema}"') + schema_con.raw_sql(f'USE SCHEMA "{schema}"').close() with pytest.raises(com.UnsupportedOperationError, match="behavior is undefined"): schema_con.drop_schema(schema) - with schema_con.begin() as c: - c.exec_driver_sql(f"USE SCHEMA {cur_schema}") + schema_con.raw_sql(f'USE SCHEMA "{cur_schema}"').close() schema_con.drop_schema(schema) diff --git a/ibis/backends/snowflake/tests/test_datatypes.py b/ibis/backends/snowflake/tests/test_datatypes.py index a740e3a15aff..c6e9ec17422a 100644 --- a/ibis/backends/snowflake/tests/test_datatypes.py +++ b/ibis/backends/snowflake/tests/test_datatypes.py @@ -5,7 +5,7 @@ import ibis import ibis.expr.datatypes as dt -from ibis.backends.snowflake.datatypes import SnowflakeType +from ibis.backends.base.sqlglot.datatypes import SnowflakeType from ibis.backends.snowflake.tests.conftest import _get_url from ibis.util import gen_name @@ -87,70 +87,32 @@ def con(): ) def test_extract_type_from_table_query(con, snowflake_type, ibis_type): name = gen_name("test_extract_type_from_table") - with con.begin() as c: - c.exec_driver_sql(f'CREATE TEMP TABLE "{name}" ("a" {snowflake_type})') - + query = f'CREATE TEMP TABLE "{name}" ("a" {snowflake_type})' + con.raw_sql(query).close() expected_schema = ibis.schema(dict(a=ibis_type)) t = con.sql(f'SELECT "a" FROM "{name}"') assert t.schema() == expected_schema -broken_timestamps = pytest.mark.xfail( - raises=AssertionError, - reason=( - "snowflake-sqlalchemy timestamp types are broken and do not preserve scale " - "information" - ), -) - - @pytest.mark.parametrize( ("snowflake_type", "ibis_type"), [ - # what the result SHOULD be - param("DATETIME", dt.Timestamp(scale=9), marks=broken_timestamps), - param("TIMESTAMP", dt.Timestamp(scale=9), marks=broken_timestamps), - param("TIMESTAMP(3)", dt.Timestamp(scale=3), marks=broken_timestamps), - param( - "TIMESTAMP_LTZ", - dt.Timestamp(timezone="UTC", scale=9), - marks=broken_timestamps, - ), - param( - "TIMESTAMP_LTZ(3)", - dt.Timestamp(timezone="UTC", scale=3), - marks=broken_timestamps, - ), - param("TIMESTAMP_NTZ", dt.Timestamp(scale=9), marks=broken_timestamps), - param("TIMESTAMP_NTZ(3)", dt.Timestamp(scale=3), marks=broken_timestamps), - param( - "TIMESTAMP_TZ", - dt.Timestamp(timezone="UTC", scale=9), - marks=broken_timestamps, - ), - param( - "TIMESTAMP_TZ(3)", - dt.Timestamp(timezone="UTC", scale=3), - marks=broken_timestamps, - ), - # what the result ACTUALLY is - ("DATETIME", dt.timestamp), - ("TIMESTAMP", dt.timestamp), - ("TIMESTAMP(3)", dt.timestamp), - ("TIMESTAMP_LTZ", dt.Timestamp(timezone="UTC")), - ("TIMESTAMP_LTZ(3)", dt.Timestamp(timezone="UTC")), - ("TIMESTAMP_NTZ", dt.timestamp), - ("TIMESTAMP_NTZ(3)", dt.timestamp), - ("TIMESTAMP_TZ", dt.Timestamp(timezone="UTC")), - ("TIMESTAMP_TZ(3)", dt.Timestamp(timezone="UTC")), + param("DATETIME", dt.Timestamp(scale=9)), + param("TIMESTAMP", dt.Timestamp(scale=9)), + param("TIMESTAMP(3)", dt.Timestamp(scale=3)), + param("TIMESTAMP_LTZ", dt.Timestamp(timezone="UTC", scale=9)), + param("TIMESTAMP_LTZ(3)", dt.Timestamp(timezone="UTC", scale=3)), + param("TIMESTAMP_NTZ", dt.Timestamp(scale=9)), + param("TIMESTAMP_NTZ(3)", dt.Timestamp(scale=3)), + param("TIMESTAMP_TZ", dt.Timestamp(timezone="UTC", scale=9)), + param("TIMESTAMP_TZ(3)", dt.Timestamp(timezone="UTC", scale=3)), ], ) -def test_extract_timestamp_from_table_sqlalchemy(con, snowflake_type, ibis_type): - """snowflake-sqlalchemy doesn't preserve timestamp scale information""" +def test_extract_timestamp_from_table(con, snowflake_type, ibis_type): name = gen_name("test_extract_type_from_table") - with con.begin() as c: - c.exec_driver_sql(f'CREATE TEMP TABLE "{name}" ("a" {snowflake_type})') + query = f'CREATE TEMP TABLE "{name}" ("a" {snowflake_type})' + con.raw_sql(query).close() expected_schema = ibis.schema(dict(a=ibis_type)) diff --git a/ibis/backends/snowflake/tests/test_udf.py b/ibis/backends/snowflake/tests/test_udf.py index 4ee17d757684..b685933b5c66 100644 --- a/ibis/backends/snowflake/tests/test_udf.py +++ b/ibis/backends/snowflake/tests/test_udf.py @@ -40,8 +40,8 @@ def test_builtin_scalar_udf(con, func, args): expr = func(*args) query = f"SELECT {func.__name__}({', '.join(map(repr, args))})" - with con.begin() as c: - expected = c.exec_driver_sql(query).scalar() + with con._safe_raw_sql(query) as cur: + [(expected,)] = cur.fetchall() assert con.execute(expr) == expected @@ -57,8 +57,8 @@ def test_compress(con, func, pyargs, snowargs): expr = func(*pyargs) query = f"SELECT compress({', '.join(snowargs)})" - with con.begin() as c: - expected = c.exec_driver_sql(query).scalar() + with con._safe_raw_sql(query) as cur: + [(expected,)] = cur.fetchall() assert con.execute(expr) == expected @@ -88,8 +88,8 @@ def test_builtin_agg_udf(con): ) ) """ - with con.begin() as c: - expected = c.exec_driver_sql(query).cursor.fetch_pandas_all() + with con._safe_raw_sql(query) as cur: + expected = cur.fetch_pandas_all() tm.assert_frame_equal(result, expected) diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 230ccfe1d37a..3384d7c2bd9e 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -69,3 +69,8 @@ from deltalake import PyDeltaTableError except ImportError: PyDeltaTableError = None + +try: + from snowflake.connector.errors import ProgrammingError as SnowflakeProgrammingError +except ImportError: + SnowflakeProgrammingError = None diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/snowflake/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/snowflake/out.sql new file mode 100644 index 000000000000..22183d00d7e6 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/snowflake/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "FUNCTIONAL_ALLTYPES" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/snowflake/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/snowflake/out.sql new file mode 100644 index 000000000000..22183d00d7e6 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/snowflake/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "FUNCTIONAL_ALLTYPES" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/snowflake/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/snowflake/out.sql new file mode 100644 index 000000000000..e93bc2f62780 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/snowflake/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "FUNCTIONAL_ALLTYPES" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/snowflake/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/snowflake/out.sql new file mode 100644 index 000000000000..6fe48f48c2a8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/snowflake/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "FUNCTIONAL_ALLTYPES" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql index 92e386ef62a4..c1611d8cecc3 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql @@ -1,9 +1,9 @@ SELECT - "t0"."x" IN (( + "t0"."x" IN ( SELECT - "t0"."x" AS "x" + "t0"."x" FROM "t" AS "t0" WHERE "t0"."x" > 2 - )) AS "InSubquery(x)" + ) AS "InSubquery(x)" FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index b1b1ae048108..4b0a4f7cc056 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -21,6 +21,7 @@ PolarsInvalidOperationError, Py4JError, PySparkAnalysisException, + SnowflakeProgrammingError, ) from ibis.legacy.udf.vectorized import reduction @@ -855,7 +856,7 @@ def test_reduction_ops( id="cond", marks=[ pytest.mark.notyet( - ["snowflake", "mysql"], + ["mysql"], raises=com.UnsupportedOperationError, reason="backend does not support filtered count distinct with more than one column", ), @@ -1277,7 +1278,7 @@ def test_median(alltypes, df): @pytest.mark.notimpl(["dask"], raises=(AssertionError, NotImplementedError, TypeError)) @pytest.mark.notyet( ["snowflake"], - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, reason="doesn't support median of strings", ) @pytest.mark.notyet(["polars"], raises=PolarsInvalidOperationError) @@ -1318,7 +1319,7 @@ def test_string_quantile(alltypes, func): @pytest.mark.broken(["druid"], raises=AttributeError) @pytest.mark.notyet( ["snowflake"], - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, reason="doesn't support median of dates", ) @pytest.mark.notimpl(["dask"], raises=(AssertionError, NotImplementedError, TypeError)) @@ -1691,7 +1692,8 @@ def test_grouped_case(backend, con): ) @pytest.mark.notyet(["impala", "flink"], raises=com.UnsupportedOperationError) @pytest.mark.notyet(["clickhouse"], raises=ClickHouseDatabaseError) -@pytest.mark.notyet(["druid", "trino", "snowflake"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["druid", "trino"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notyet(["mysql"], raises=sa.exc.NotSupportedError) @pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) diff --git a/ibis/backends/tests/test_benchmarks.py b/ibis/backends/tests/test_benchmarks.py index 42ea66e8f0f0..3234d3c8693f 100644 --- a/ibis/backends/tests/test_benchmarks.py +++ b/ibis/backends/tests/test_benchmarks.py @@ -730,7 +730,6 @@ def test_insert_duckdb(benchmark, overwrite, tmp_path): def test_snowflake_medium_sized_to_pandas(benchmark): pytest.importorskip("snowflake.connector") - pytest.importorskip("snowflake.sqlalchemy") if (url := os.environ.get("SNOWFLAKE_URL")) is None: pytest.skip("SNOWFLAKE_URL environment variable not set") diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 96f1575e8e83..974c0e9f56a9 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -318,7 +318,6 @@ def test_rename_table(con, temp_table, temp_table_orig): @mark.notyet( ["trino"], reason="trino doesn't support NOT NULL in its in-memory catalog" ) -@mark.broken(["snowflake"], reason="snowflake shows not nullable column as nullable") @pytest.mark.notimpl( ["flink"], raises=com.IbisError, @@ -602,7 +601,7 @@ def test_list_databases(alchemy_con): @pytest.mark.never( - ["bigquery", "postgres", "risingwave", "mssql", "mysql", "snowflake", "oracle"], + ["bigquery", "postgres", "risingwave", "mssql", "mysql", "oracle"], reason="backend does not support client-side in-memory tables", raises=(sa.exc.OperationalError, TypeError, sa.exc.InterfaceError), ) diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index a50fcb2b7e98..6e417032603d 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -15,6 +15,7 @@ DuckDBParserException, PyDeltaTableError, PySparkAnalysisException, + SnowflakeProgrammingError, ) from ibis.formats.pyarrow import PyArrowType @@ -351,9 +352,9 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): pytest.mark.notyet(["impala"], reason="precision not supported"), pytest.mark.notyet(["duckdb"], reason="precision is out of range"), pytest.mark.notyet( - ["druid", "mssql", "snowflake", "trino"], - raises=sa.exc.ProgrammingError, + ["druid", "mssql", "trino"], raises=sa.exc.ProgrammingError ), + pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), pytest.mark.notyet(["mysql"], raises=sa.exc.OperationalError), pytest.mark.notyet( diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index cd340aaeb1de..e84a5eb97f02 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -23,6 +23,7 @@ GoogleBadRequest, ImpalaHiveServer2Error, Py4JJavaError, + SnowflakeProgrammingError, ) from ibis.common.annotations import ValidationError @@ -1682,10 +1683,15 @@ def test_static_table_slice(backend, slc, expected_count_fn): ids=str, ) @pytest.mark.notyet( - ["mysql", "snowflake", "trino"], + ["mysql", "trino"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notyet( + ["snowflake"], + raises=SnowflakeProgrammingError, + reason="backend doesn't support dynamic limit/offset", +) @pytest.mark.notimpl( ["mssql"], raises=sa.exc.CompileError, @@ -1735,10 +1741,15 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): @pytest.mark.notyet( - ["mysql", "snowflake", "trino"], + ["mysql", "trino"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notyet( + ["snowflake"], + raises=SnowflakeProgrammingError, + reason="backend doesn't support dynamic limit/offset", +) @pytest.mark.notimpl( ["exasol"], raises=sa.exc.CompileError, diff --git a/ibis/backends/tests/test_interactive.py b/ibis/backends/tests/test_interactive.py index 8014ab7f1fa2..bfa3f6adffe1 100644 --- a/ibis/backends/tests/test_interactive.py +++ b/ibis/backends/tests/test_interactive.py @@ -29,8 +29,8 @@ def queries(monkeypatch): @pytest.fixture(scope="module") -def table(con): - return con.table("functional_alltypes") +def table(backend): + return backend.functional_alltypes def test_interactive_execute_on_repr(table, queries, snapshot): diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 27c4aa9b5110..2a15dd5f8f3d 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -23,6 +23,7 @@ GoogleBadRequest, ImpalaHiveServer2Error, Py4JError, + SnowflakeProgrammingError, ) from ibis.expr import datatypes as dt from ibis.tests.util import assert_equal @@ -254,7 +255,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": "1.1", + "snowflake": 1.1, "sqlite": 1.1, "trino": 1.1, "dask": decimal.Decimal("1.1"), @@ -272,7 +273,7 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "NUMERIC", - "snowflake": "VARCHAR", + "snowflake": "DECIMAL", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(18,3)", @@ -308,7 +309,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": "1.100000000", + "snowflake": 1.1, "sqlite": 1.1, "trino": 1.1, "duckdb": decimal.Decimal("1.100000000"), @@ -328,7 +329,7 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "NUMERIC", "clickhouse": "Decimal(38, 9)", - "snowflake": "VARCHAR", + "snowflake": "DECIMAL", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(38,9)", @@ -359,7 +360,6 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": "1.10000000000000000000000000000000000000", "sqlite": 1.1, "trino": 1.1, "dask": decimal.Decimal("1.1"), @@ -378,7 +378,6 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "BIGNUMERIC", "clickhouse": "Decimal(76, 38)", - "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "duckdb": "DECIMAL(18,3)", @@ -386,10 +385,8 @@ def test_numeric_literal(con, backend, expr, expected_types): "risingwave": "numeric", }, marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), + pytest.mark.notimpl(["exasol"], raises=ExaQueryError), + pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), pytest.mark.broken( ["impala"], "impala.error.HiveServer2Error: AnalysisException: Syntax error in line 1:" @@ -417,7 +414,6 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("inf"), - "snowflake": "Infinity", "sqlite": float("inf"), "postgres": float("nan"), "risingwave": float("nan"), @@ -429,7 +425,6 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "FLOAT64", - "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "postgres": "numeric", @@ -488,6 +483,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "Infinity is not supported in Flink SQL", raises=ValueError, ), + pytest.mark.notyet( + ["snowflake"], + "infinity is not allowed as a decimal value", + raises=SnowflakeProgrammingError, + ), ], id="decimal-infinity+", ), @@ -496,7 +496,6 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("-inf"), - "snowflake": "-Infinity", "sqlite": float("-inf"), "postgres": float("nan"), "risingwave": float("nan"), @@ -508,7 +507,6 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "FLOAT64", - "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", "postgres": "numeric", @@ -567,6 +565,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "Infinity is not supported in Flink SQL", raises=ValueError, ), + pytest.mark.notyet( + ["snowflake"], + "infinity is not allowed as a decimal value", + raises=SnowflakeProgrammingError, + ), ], id="decimal-infinity-", ), @@ -575,7 +578,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": float("nan"), - "snowflake": "NaN", + "snowflake": float("nan"), "sqlite": None, "postgres": float("nan"), "risingwave": float("nan"), @@ -587,7 +590,7 @@ def test_numeric_literal(con, backend, expr, expected_types): }, { "bigquery": "FLOAT64", - "snowflake": "VARCHAR", + "snowflake": "DOUBLE", "sqlite": "null", "trino": "decimal(2,1)", "postgres": "numeric", @@ -654,6 +657,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "NaN is not supported in Flink SQL", raises=ValueError, ), + pytest.mark.notyet( + ["snowflake"], + "NaN is not allowed as a decimal value", + raises=SnowflakeProgrammingError, + ), ], id="decimal-NaN", ), @@ -1435,7 +1443,7 @@ def test_floating_mod(backend, alltypes, df): ) @pytest.mark.notyet(["mssql"], raises=(sa.exc.OperationalError, sa.exc.DataError)) @pytest.mark.notyet(["postgres"], raises=sa.exc.DataError) -@pytest.mark.notyet(["snowflake"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, com.IbisTypeError)) def test_divide_by_zero(backend, alltypes, df, column, denominator): expr = alltypes[column] / denominator @@ -1486,6 +1494,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "pyspark", "polars", "flink", + "snowflake", ], reason="Not SQLAlchemy backends", ) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index eb1330d8c19c..baff2a018e18 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -28,6 +28,7 @@ PolarsPanicException, Py4JJavaError, PySparkIllegalArgumentException, + SnowflakeProgrammingError, ) from ibis.common.annotations import ValidationError @@ -1638,17 +1639,12 @@ def test_interval_add_cast_column(backend, alltypes, df): param( lambda t: ( t.mutate(suffix="%d") - .select( - [ - lambda t: t.timestamp_col.strftime("%Y%m" + t.suffix).name( - "formatted" - ) - ] - ) + .select(formatted=lambda t: t.timestamp_col.strftime("%Y%m" + t.suffix)) .formatted ), "%Y%m%d", marks=[ + pytest.mark.notimpl(["pandas"], raises=com.OperationNotDefinedError), pytest.mark.notimpl( [ "pandas", @@ -1666,7 +1662,6 @@ def test_interval_add_cast_column(backend, alltypes, df): [ "postgres", "risingwave", - "snowflake", ], raises=AttributeError, reason="Neither 'concat' object nor 'Comparator' object has an attribute 'value'", @@ -1684,14 +1679,6 @@ def test_interval_add_cast_column(backend, alltypes, df): raises=AttributeError, reason="'StringConcat' object has no attribute 'value'", ), - pytest.mark.notyet( - ["duckdb"], - raises=com.UnsupportedOperationError, - reason=( - "DuckDB format_str must be a literal `str`; got " - "" - ), - ), pytest.mark.notimpl( ["druid"], raises=AttributeError, @@ -1831,7 +1818,7 @@ def test_integer_to_timestamp(backend, con, unit): "(snowflake.connector.errors.ProgrammingError) 100096 (22007): " "Can't parse '11/01/10' as timestamp with format '%m/%d/%y'" ), - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, ), pytest.mark.never( ["flink"], @@ -2326,7 +2313,7 @@ def test_extract_time_from_timestamp(con, microsecond): ["snowflake"], "(snowflake.connector.errors.ProgrammingError) 001007 (22023): SQL compilation error:" "invalid type [CAST(INTERVAL_LITERAL('second', '1') AS VARIANT)] for parameter 'TO_VARIANT'", - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, ) @pytest.mark.broken( ["druid"], @@ -2864,7 +2851,7 @@ def test_delta(con, start, end, unit, expected): ), pytest.mark.notimpl( ["snowflake"], - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, reason="snowflake doesn't support sub-second interval precision", ), pytest.mark.notimpl( diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index d85cfbcbd4be..28ae24cfd19c 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -18,6 +18,7 @@ ImpalaHiveServer2Error, Py4JJavaError, PySparkAnalysisException, + SnowflakeProgrammingError, ) from ibis.legacy.udf.vectorized import analytic, reduction @@ -875,7 +876,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): pytest.mark.notyet( ["snowflake"], reason="backend requires ordering", - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, ), pytest.mark.notimpl( ["risingwave"], @@ -927,7 +928,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): pytest.mark.notyet( ["snowflake"], reason="backend requires ordering", - raises=sa.exc.ProgrammingError, + raises=SnowflakeProgrammingError, ), pytest.mark.notimpl( ["risingwave"], @@ -1036,7 +1037,7 @@ def test_ungrouped_unbounded_window( @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["snowflake"], raises=sa.exc.ProgrammingError) +@pytest.mark.notimpl(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notimpl( ["impala"], raises=ImpalaHiveServer2Error, reason="limited RANGE support" ) diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql index 9f1cf92c38e2..369c228deff0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql @@ -1,18 +1,18 @@ SELECT - "t2"."l_returnflag" AS "l_returnflag", - "t2"."l_linestatus" AS "l_linestatus", - "t2"."sum_qty" AS "sum_qty", - "t2"."sum_base_price" AS "sum_base_price", - "t2"."sum_disc_price" AS "sum_disc_price", - "t2"."sum_charge" AS "sum_charge", - "t2"."avg_qty" AS "avg_qty", - "t2"."avg_price" AS "avg_price", - "t2"."avg_disc" AS "avg_disc", - "t2"."count_order" AS "count_order" + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."sum_qty", + "t2"."sum_base_price", + "t2"."sum_disc_price", + "t2"."sum_charge", + "t2"."avg_qty", + "t2"."avg_price", + "t2"."avg_disc", + "t2"."count_order" FROM ( SELECT - "t1"."l_returnflag" AS "l_returnflag", - "t1"."l_linestatus" AS "l_linestatus", + "t1"."l_returnflag", + "t1"."l_linestatus", SUM("t1"."l_quantity") AS "sum_qty", SUM("t1"."l_extendedprice") AS "sum_base_price", SUM("t1"."l_extendedprice" * ( diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql index 01cfa3a33d2e..e4d7499a9126 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql @@ -1,42 +1,42 @@ SELECT - "t24"."s_acctbal" AS "s_acctbal", - "t24"."s_name" AS "s_name", - "t24"."n_name" AS "n_name", - "t24"."p_partkey" AS "p_partkey", - "t24"."p_mfgr" AS "p_mfgr", - "t24"."s_address" AS "s_address", - "t24"."s_phone" AS "s_phone", - "t24"."s_comment" AS "s_comment" + "t26"."s_acctbal", + "t26"."s_name", + "t26"."n_name", + "t26"."p_partkey", + "t26"."p_mfgr", + "t26"."s_address", + "t26"."s_phone", + "t26"."s_comment" FROM ( SELECT - "t5"."p_partkey" AS "p_partkey", - "t5"."p_name" AS "p_name", - "t5"."p_mfgr" AS "p_mfgr", - "t5"."p_brand" AS "p_brand", - "t5"."p_type" AS "p_type", - "t5"."p_size" AS "p_size", - "t5"."p_container" AS "p_container", - "t5"."p_retailprice" AS "p_retailprice", - "t5"."p_comment" AS "p_comment", - "t10"."ps_partkey" AS "ps_partkey", - "t10"."ps_suppkey" AS "ps_suppkey", - "t10"."ps_availqty" AS "ps_availqty", - "t10"."ps_supplycost" AS "ps_supplycost", - "t10"."ps_comment" AS "ps_comment", - "t11"."s_suppkey" AS "s_suppkey", - "t11"."s_name" AS "s_name", - "t11"."s_address" AS "s_address", - "t11"."s_nationkey" AS "s_nationkey", - "t11"."s_phone" AS "s_phone", - "t11"."s_acctbal" AS "s_acctbal", - "t11"."s_comment" AS "s_comment", - "t13"."n_nationkey" AS "n_nationkey", - "t13"."n_name" AS "n_name", - "t13"."n_regionkey" AS "n_regionkey", - "t13"."n_comment" AS "n_comment", - "t15"."r_regionkey" AS "r_regionkey", - "t15"."r_name" AS "r_name", - "t15"."r_comment" AS "r_comment" + "t10"."p_partkey", + "t10"."p_name", + "t10"."p_mfgr", + "t10"."p_brand", + "t10"."p_type", + "t10"."p_size", + "t10"."p_container", + "t10"."p_retailprice", + "t10"."p_comment", + "t11"."ps_partkey", + "t11"."ps_suppkey", + "t11"."ps_availqty", + "t11"."ps_supplycost", + "t11"."ps_comment", + "t13"."s_suppkey", + "t13"."s_name", + "t13"."s_address", + "t13"."s_nationkey", + "t13"."s_phone", + "t13"."s_acctbal", + "t13"."s_comment", + "t15"."n_nationkey", + "t15"."n_name", + "t15"."n_regionkey", + "t15"."n_comment", + "t17"."r_regionkey", + "t17"."r_name", + "t17"."r_comment" FROM ( SELECT "t0"."P_PARTKEY" AS "p_partkey", @@ -49,7 +49,7 @@ FROM ( "t0"."P_RETAILPRICE" AS "p_retailprice", "t0"."P_COMMENT" AS "p_comment" FROM "PART" AS "t0" - ) AS "t5" + ) AS "t10" INNER JOIN ( SELECT "t1"."PS_PARTKEY" AS "ps_partkey", @@ -58,8 +58,8 @@ FROM ( "t1"."PS_SUPPLYCOST" AS "ps_supplycost", "t1"."PS_COMMENT" AS "ps_comment" FROM "PARTSUPP" AS "t1" - ) AS "t10" - ON "t5"."p_partkey" = "t10"."ps_partkey" + ) AS "t11" + ON "t10"."p_partkey" = "t11"."ps_partkey" INNER JOIN ( SELECT "t2"."S_SUPPKEY" AS "s_suppkey", @@ -70,8 +70,8 @@ FROM ( "t2"."S_ACCTBAL" AS "s_acctbal", "t2"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t2" - ) AS "t11" - ON "t11"."s_suppkey" = "t10"."ps_suppkey" + ) AS "t13" + ON "t13"."s_suppkey" = "t11"."ps_suppkey" INNER JOIN ( SELECT "t3"."N_NATIONKEY" AS "n_nationkey", @@ -79,66 +79,66 @@ FROM ( "t3"."N_REGIONKEY" AS "n_regionkey", "t3"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t3" - ) AS "t13" - ON "t11"."s_nationkey" = "t13"."n_nationkey" + ) AS "t15" + ON "t13"."s_nationkey" = "t15"."n_nationkey" INNER JOIN ( SELECT "t4"."R_REGIONKEY" AS "r_regionkey", "t4"."R_NAME" AS "r_name", "t4"."R_COMMENT" AS "r_comment" FROM "REGION" AS "t4" - ) AS "t15" - ON "t13"."n_regionkey" = "t15"."r_regionkey" -) AS "t24" + ) AS "t17" + ON "t15"."n_regionkey" = "t17"."r_regionkey" +) AS "t26" WHERE - "t24"."p_size" = 15 - AND "t24"."p_type" LIKE '%BRASS' - AND "t24"."r_name" = 'EUROPE' - AND "t24"."ps_supplycost" = ( + "t26"."p_size" = 15 + AND "t26"."p_type" LIKE '%BRASS' + AND "t26"."r_name" = 'EUROPE' + AND "t26"."ps_supplycost" = ( SELECT - MIN("t26"."ps_supplycost") AS "Min(ps_supplycost)" + MIN("t28"."ps_supplycost") AS "Min(ps_supplycost)" FROM ( SELECT - "t25"."ps_partkey" AS "ps_partkey", - "t25"."ps_suppkey" AS "ps_suppkey", - "t25"."ps_availqty" AS "ps_availqty", - "t25"."ps_supplycost" AS "ps_supplycost", - "t25"."ps_comment" AS "ps_comment", - "t25"."s_suppkey" AS "s_suppkey", - "t25"."s_name" AS "s_name", - "t25"."s_address" AS "s_address", - "t25"."s_nationkey" AS "s_nationkey", - "t25"."s_phone" AS "s_phone", - "t25"."s_acctbal" AS "s_acctbal", - "t25"."s_comment" AS "s_comment", - "t25"."n_nationkey" AS "n_nationkey", - "t25"."n_name" AS "n_name", - "t25"."n_regionkey" AS "n_regionkey", - "t25"."n_comment" AS "n_comment", - "t25"."r_regionkey" AS "r_regionkey", - "t25"."r_name" AS "r_name", - "t25"."r_comment" AS "r_comment" + "t27"."ps_partkey", + "t27"."ps_suppkey", + "t27"."ps_availqty", + "t27"."ps_supplycost", + "t27"."ps_comment", + "t27"."s_suppkey", + "t27"."s_name", + "t27"."s_address", + "t27"."s_nationkey", + "t27"."s_phone", + "t27"."s_acctbal", + "t27"."s_comment", + "t27"."n_nationkey", + "t27"."n_name", + "t27"."n_regionkey", + "t27"."n_comment", + "t27"."r_regionkey", + "t27"."r_name", + "t27"."r_comment" FROM ( SELECT - "t6"."ps_partkey" AS "ps_partkey", - "t6"."ps_suppkey" AS "ps_suppkey", - "t6"."ps_availqty" AS "ps_availqty", - "t6"."ps_supplycost" AS "ps_supplycost", - "t6"."ps_comment" AS "ps_comment", - "t12"."s_suppkey" AS "s_suppkey", - "t12"."s_name" AS "s_name", - "t12"."s_address" AS "s_address", - "t12"."s_nationkey" AS "s_nationkey", - "t12"."s_phone" AS "s_phone", - "t12"."s_acctbal" AS "s_acctbal", - "t12"."s_comment" AS "s_comment", - "t14"."n_nationkey" AS "n_nationkey", - "t14"."n_name" AS "n_name", - "t14"."n_regionkey" AS "n_regionkey", - "t14"."n_comment" AS "n_comment", - "t16"."r_regionkey" AS "r_regionkey", - "t16"."r_name" AS "r_name", - "t16"."r_comment" AS "r_comment" + "t12"."ps_partkey", + "t12"."ps_suppkey", + "t12"."ps_availqty", + "t12"."ps_supplycost", + "t12"."ps_comment", + "t14"."s_suppkey", + "t14"."s_name", + "t14"."s_address", + "t14"."s_nationkey", + "t14"."s_phone", + "t14"."s_acctbal", + "t14"."s_comment", + "t16"."n_nationkey", + "t16"."n_name", + "t16"."n_regionkey", + "t16"."n_comment", + "t18"."r_regionkey", + "t18"."r_name", + "t18"."r_comment" FROM ( SELECT "t1"."PS_PARTKEY" AS "ps_partkey", @@ -147,7 +147,7 @@ WHERE "t1"."PS_SUPPLYCOST" AS "ps_supplycost", "t1"."PS_COMMENT" AS "ps_comment" FROM "PARTSUPP" AS "t1" - ) AS "t6" + ) AS "t12" INNER JOIN ( SELECT "t2"."S_SUPPKEY" AS "s_suppkey", @@ -158,8 +158,8 @@ WHERE "t2"."S_ACCTBAL" AS "s_acctbal", "t2"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t2" - ) AS "t12" - ON "t12"."s_suppkey" = "t6"."ps_suppkey" + ) AS "t14" + ON "t14"."s_suppkey" = "t12"."ps_suppkey" INNER JOIN ( SELECT "t3"."N_NATIONKEY" AS "n_nationkey", @@ -167,24 +167,24 @@ WHERE "t3"."N_REGIONKEY" AS "n_regionkey", "t3"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t3" - ) AS "t14" - ON "t12"."s_nationkey" = "t14"."n_nationkey" + ) AS "t16" + ON "t14"."s_nationkey" = "t16"."n_nationkey" INNER JOIN ( SELECT "t4"."R_REGIONKEY" AS "r_regionkey", "t4"."R_NAME" AS "r_name", "t4"."R_COMMENT" AS "r_comment" FROM "REGION" AS "t4" - ) AS "t16" - ON "t14"."n_regionkey" = "t16"."r_regionkey" - ) AS "t25" + ) AS "t18" + ON "t16"."n_regionkey" = "t18"."r_regionkey" + ) AS "t27" WHERE - "t25"."r_name" = 'EUROPE' AND "t24"."p_partkey" = "t25"."ps_partkey" - ) AS "t26" + "t27"."r_name" = 'EUROPE' AND "t26"."p_partkey" = "t27"."ps_partkey" + ) AS "t28" ) ORDER BY - "t24"."s_acctbal" DESC NULLS LAST, - "t24"."n_name" ASC, - "t24"."s_name" ASC, - "t24"."p_partkey" ASC + "t26"."s_acctbal" DESC NULLS LAST, + "t26"."n_name" ASC, + "t26"."s_name" ASC, + "t26"."p_partkey" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql index 13a8f7da2bd2..0c8dab8a1e70 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql @@ -1,86 +1,86 @@ SELECT - "t12"."l_orderkey" AS "l_orderkey", - "t12"."revenue" AS "revenue", - "t12"."o_orderdate" AS "o_orderdate", - "t12"."o_shippriority" AS "o_shippriority" + "t13"."l_orderkey", + "t13"."revenue", + "t13"."o_orderdate", + "t13"."o_shippriority" FROM ( SELECT - "t11"."l_orderkey" AS "l_orderkey", - "t11"."o_orderdate" AS "o_orderdate", - "t11"."o_shippriority" AS "o_shippriority", - SUM("t11"."l_extendedprice" * ( - 1 - "t11"."l_discount" + "t12"."l_orderkey", + "t12"."o_orderdate", + "t12"."o_shippriority", + SUM("t12"."l_extendedprice" * ( + 1 - "t12"."l_discount" )) AS "revenue" FROM ( SELECT - "t10"."c_custkey" AS "c_custkey", - "t10"."c_name" AS "c_name", - "t10"."c_address" AS "c_address", - "t10"."c_nationkey" AS "c_nationkey", - "t10"."c_phone" AS "c_phone", - "t10"."c_acctbal" AS "c_acctbal", - "t10"."c_mktsegment" AS "c_mktsegment", - "t10"."c_comment" AS "c_comment", - "t10"."o_orderkey" AS "o_orderkey", - "t10"."o_custkey" AS "o_custkey", - "t10"."o_orderstatus" AS "o_orderstatus", - "t10"."o_totalprice" AS "o_totalprice", - "t10"."o_orderdate" AS "o_orderdate", - "t10"."o_orderpriority" AS "o_orderpriority", - "t10"."o_clerk" AS "o_clerk", - "t10"."o_shippriority" AS "o_shippriority", - "t10"."o_comment" AS "o_comment", - "t10"."l_orderkey" AS "l_orderkey", - "t10"."l_partkey" AS "l_partkey", - "t10"."l_suppkey" AS "l_suppkey", - "t10"."l_linenumber" AS "l_linenumber", - "t10"."l_quantity" AS "l_quantity", - "t10"."l_extendedprice" AS "l_extendedprice", - "t10"."l_discount" AS "l_discount", - "t10"."l_tax" AS "l_tax", - "t10"."l_returnflag" AS "l_returnflag", - "t10"."l_linestatus" AS "l_linestatus", - "t10"."l_shipdate" AS "l_shipdate", - "t10"."l_commitdate" AS "l_commitdate", - "t10"."l_receiptdate" AS "l_receiptdate", - "t10"."l_shipinstruct" AS "l_shipinstruct", - "t10"."l_shipmode" AS "l_shipmode", - "t10"."l_comment" AS "l_comment" + "t11"."c_custkey", + "t11"."c_name", + "t11"."c_address", + "t11"."c_nationkey", + "t11"."c_phone", + "t11"."c_acctbal", + "t11"."c_mktsegment", + "t11"."c_comment", + "t11"."o_orderkey", + "t11"."o_custkey", + "t11"."o_orderstatus", + "t11"."o_totalprice", + "t11"."o_orderdate", + "t11"."o_orderpriority", + "t11"."o_clerk", + "t11"."o_shippriority", + "t11"."o_comment", + "t11"."l_orderkey", + "t11"."l_partkey", + "t11"."l_suppkey", + "t11"."l_linenumber", + "t11"."l_quantity", + "t11"."l_extendedprice", + "t11"."l_discount", + "t11"."l_tax", + "t11"."l_returnflag", + "t11"."l_linestatus", + "t11"."l_shipdate", + "t11"."l_commitdate", + "t11"."l_receiptdate", + "t11"."l_shipinstruct", + "t11"."l_shipmode", + "t11"."l_comment" FROM ( SELECT - "t3"."c_custkey" AS "c_custkey", - "t3"."c_name" AS "c_name", - "t3"."c_address" AS "c_address", - "t3"."c_nationkey" AS "c_nationkey", - "t3"."c_phone" AS "c_phone", - "t3"."c_acctbal" AS "c_acctbal", - "t3"."c_mktsegment" AS "c_mktsegment", - "t3"."c_comment" AS "c_comment", - "t6"."o_orderkey" AS "o_orderkey", - "t6"."o_custkey" AS "o_custkey", - "t6"."o_orderstatus" AS "o_orderstatus", - "t6"."o_totalprice" AS "o_totalprice", - "t6"."o_orderdate" AS "o_orderdate", - "t6"."o_orderpriority" AS "o_orderpriority", - "t6"."o_clerk" AS "o_clerk", - "t6"."o_shippriority" AS "o_shippriority", - "t6"."o_comment" AS "o_comment", - "t7"."l_orderkey" AS "l_orderkey", - "t7"."l_partkey" AS "l_partkey", - "t7"."l_suppkey" AS "l_suppkey", - "t7"."l_linenumber" AS "l_linenumber", - "t7"."l_quantity" AS "l_quantity", - "t7"."l_extendedprice" AS "l_extendedprice", - "t7"."l_discount" AS "l_discount", - "t7"."l_tax" AS "l_tax", - "t7"."l_returnflag" AS "l_returnflag", - "t7"."l_linestatus" AS "l_linestatus", - "t7"."l_shipdate" AS "l_shipdate", - "t7"."l_commitdate" AS "l_commitdate", - "t7"."l_receiptdate" AS "l_receiptdate", - "t7"."l_shipinstruct" AS "l_shipinstruct", - "t7"."l_shipmode" AS "l_shipmode", - "t7"."l_comment" AS "l_comment" + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t8"."l_orderkey", + "t8"."l_partkey", + "t8"."l_suppkey", + "t8"."l_linenumber", + "t8"."l_quantity", + "t8"."l_extendedprice", + "t8"."l_discount", + "t8"."l_tax", + "t8"."l_returnflag", + "t8"."l_linestatus", + "t8"."l_shipdate", + "t8"."l_commitdate", + "t8"."l_receiptdate", + "t8"."l_shipinstruct", + "t8"."l_shipmode", + "t8"."l_comment" FROM ( SELECT "t0"."C_CUSTKEY" AS "c_custkey", @@ -92,7 +92,7 @@ FROM ( "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" FROM "CUSTOMER" AS "t0" - ) AS "t3" + ) AS "t6" INNER JOIN ( SELECT "t1"."O_ORDERKEY" AS "o_orderkey", @@ -105,8 +105,8 @@ FROM ( "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t1" - ) AS "t6" - ON "t3"."c_custkey" = "t6"."o_custkey" + ) AS "t7" + ON "t6"."c_custkey" = "t7"."o_custkey" INNER JOIN ( SELECT "t2"."L_ORDERKEY" AS "l_orderkey", @@ -126,20 +126,20 @@ FROM ( "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t2" - ) AS "t7" - ON "t7"."l_orderkey" = "t6"."o_orderkey" - ) AS "t10" + ) AS "t8" + ON "t8"."l_orderkey" = "t7"."o_orderkey" + ) AS "t11" WHERE - "t10"."c_mktsegment" = 'BUILDING' - AND "t10"."o_orderdate" < DATEFROMPARTS(1995, 3, 15) - AND "t10"."l_shipdate" > DATEFROMPARTS(1995, 3, 15) - ) AS "t11" + "t11"."c_mktsegment" = 'BUILDING' + AND "t11"."o_orderdate" < DATEFROMPARTS(1995, 3, 15) + AND "t11"."l_shipdate" > DATEFROMPARTS(1995, 3, 15) + ) AS "t12" GROUP BY 1, 2, 3 -) AS "t12" +) AS "t13" ORDER BY - "t12"."revenue" DESC NULLS LAST, - "t12"."o_orderdate" ASC + "t13"."revenue" DESC NULLS LAST, + "t13"."o_orderdate" ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql index 67291d6a3632..fe25f373c30c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql @@ -1,42 +1,52 @@ SELECT - "t4"."o_orderpriority" AS "o_orderpriority", - "t4"."order_count" AS "order_count" + "t5"."o_orderpriority", + "t5"."order_count" FROM ( SELECT - "t3"."o_orderpriority" AS "o_orderpriority", + "t4"."o_orderpriority", COUNT(*) AS "order_count" FROM ( SELECT - "t0"."O_ORDERKEY" AS "o_orderkey", - "t0"."O_CUSTKEY" AS "o_custkey", - "t0"."O_ORDERSTATUS" AS "o_orderstatus", - "t0"."O_TOTALPRICE" AS "o_totalprice", - "t0"."O_ORDERDATE" AS "o_orderdate", - "t0"."O_ORDERPRIORITY" AS "o_orderpriority", - "t0"."O_CLERK" AS "o_clerk", - "t0"."O_SHIPPRIORITY" AS "o_shippriority", - "t0"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t0" + "t2"."o_orderkey", + "t2"."o_custkey", + "t2"."o_orderstatus", + "t2"."o_totalprice", + "t2"."o_orderdate", + "t2"."o_orderpriority", + "t2"."o_clerk", + "t2"."o_shippriority", + "t2"."o_comment" + FROM ( + SELECT + "t0"."O_ORDERKEY" AS "o_orderkey", + "t0"."O_CUSTKEY" AS "o_custkey", + "t0"."O_ORDERSTATUS" AS "o_orderstatus", + "t0"."O_TOTALPRICE" AS "o_totalprice", + "t0"."O_ORDERDATE" AS "o_orderdate", + "t0"."O_ORDERPRIORITY" AS "o_orderpriority", + "t0"."O_CLERK" AS "o_clerk", + "t0"."O_SHIPPRIORITY" AS "o_shippriority", + "t0"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t0" + ) AS "t2" WHERE EXISTS( - ( - SELECT - 1 AS "1" - FROM "LINEITEM" AS "t1" - WHERE - ( - "t1"."L_ORDERKEY" = "t0"."O_ORDERKEY" - ) - AND ( - "t1"."L_COMMITDATE" < "t1"."L_RECEIPTDATE" - ) - ) + SELECT + 1 AS "1" + FROM "LINEITEM" AS "t1" + WHERE + ( + "t1"."L_ORDERKEY" = "t2"."o_orderkey" + ) + AND ( + "t1"."L_COMMITDATE" < "t1"."L_RECEIPTDATE" + ) ) - AND "t0"."O_ORDERDATE" >= DATEFROMPARTS(1993, 7, 1) - AND "t0"."O_ORDERDATE" < DATEFROMPARTS(1993, 10, 1) - ) AS "t3" + AND "t2"."o_orderdate" >= DATEFROMPARTS(1993, 7, 1) + AND "t2"."o_orderdate" < DATEFROMPARTS(1993, 10, 1) + ) AS "t4" GROUP BY 1 -) AS "t4" +) AS "t5" ORDER BY - "t4"."o_orderpriority" ASC \ No newline at end of file + "t5"."o_orderpriority" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql index 4ec8d7241f0d..b1a604664723 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql @@ -1,110 +1,110 @@ SELECT - "t24"."n_name" AS "n_name", - "t24"."revenue" AS "revenue" + "t25"."n_name", + "t25"."revenue" FROM ( SELECT - "t23"."n_name" AS "n_name", - SUM("t23"."l_extendedprice" * ( - 1 - "t23"."l_discount" + "t24"."n_name", + SUM("t24"."l_extendedprice" * ( + 1 - "t24"."l_discount" )) AS "revenue" FROM ( SELECT - "t22"."c_custkey" AS "c_custkey", - "t22"."c_name" AS "c_name", - "t22"."c_address" AS "c_address", - "t22"."c_nationkey" AS "c_nationkey", - "t22"."c_phone" AS "c_phone", - "t22"."c_acctbal" AS "c_acctbal", - "t22"."c_mktsegment" AS "c_mktsegment", - "t22"."c_comment" AS "c_comment", - "t22"."o_orderkey" AS "o_orderkey", - "t22"."o_custkey" AS "o_custkey", - "t22"."o_orderstatus" AS "o_orderstatus", - "t22"."o_totalprice" AS "o_totalprice", - "t22"."o_orderdate" AS "o_orderdate", - "t22"."o_orderpriority" AS "o_orderpriority", - "t22"."o_clerk" AS "o_clerk", - "t22"."o_shippriority" AS "o_shippriority", - "t22"."o_comment" AS "o_comment", - "t22"."l_orderkey" AS "l_orderkey", - "t22"."l_partkey" AS "l_partkey", - "t22"."l_suppkey" AS "l_suppkey", - "t22"."l_linenumber" AS "l_linenumber", - "t22"."l_quantity" AS "l_quantity", - "t22"."l_extendedprice" AS "l_extendedprice", - "t22"."l_discount" AS "l_discount", - "t22"."l_tax" AS "l_tax", - "t22"."l_returnflag" AS "l_returnflag", - "t22"."l_linestatus" AS "l_linestatus", - "t22"."l_shipdate" AS "l_shipdate", - "t22"."l_commitdate" AS "l_commitdate", - "t22"."l_receiptdate" AS "l_receiptdate", - "t22"."l_shipinstruct" AS "l_shipinstruct", - "t22"."l_shipmode" AS "l_shipmode", - "t22"."l_comment" AS "l_comment", - "t22"."s_suppkey" AS "s_suppkey", - "t22"."s_name" AS "s_name", - "t22"."s_address" AS "s_address", - "t22"."s_nationkey" AS "s_nationkey", - "t22"."s_phone" AS "s_phone", - "t22"."s_acctbal" AS "s_acctbal", - "t22"."s_comment" AS "s_comment", - "t22"."n_nationkey" AS "n_nationkey", - "t22"."n_name" AS "n_name", - "t22"."n_regionkey" AS "n_regionkey", - "t22"."n_comment" AS "n_comment", - "t22"."r_regionkey" AS "r_regionkey", - "t22"."r_name" AS "r_name", - "t22"."r_comment" AS "r_comment" + "t23"."c_custkey", + "t23"."c_name", + "t23"."c_address", + "t23"."c_nationkey", + "t23"."c_phone", + "t23"."c_acctbal", + "t23"."c_mktsegment", + "t23"."c_comment", + "t23"."o_orderkey", + "t23"."o_custkey", + "t23"."o_orderstatus", + "t23"."o_totalprice", + "t23"."o_orderdate", + "t23"."o_orderpriority", + "t23"."o_clerk", + "t23"."o_shippriority", + "t23"."o_comment", + "t23"."l_orderkey", + "t23"."l_partkey", + "t23"."l_suppkey", + "t23"."l_linenumber", + "t23"."l_quantity", + "t23"."l_extendedprice", + "t23"."l_discount", + "t23"."l_tax", + "t23"."l_returnflag", + "t23"."l_linestatus", + "t23"."l_shipdate", + "t23"."l_commitdate", + "t23"."l_receiptdate", + "t23"."l_shipinstruct", + "t23"."l_shipmode", + "t23"."l_comment", + "t23"."s_suppkey", + "t23"."s_name", + "t23"."s_address", + "t23"."s_nationkey", + "t23"."s_phone", + "t23"."s_acctbal", + "t23"."s_comment", + "t23"."n_nationkey", + "t23"."n_name", + "t23"."n_regionkey", + "t23"."n_comment", + "t23"."r_regionkey", + "t23"."r_name", + "t23"."r_comment" FROM ( SELECT - "t6"."c_custkey" AS "c_custkey", - "t6"."c_name" AS "c_name", - "t6"."c_address" AS "c_address", - "t6"."c_nationkey" AS "c_nationkey", - "t6"."c_phone" AS "c_phone", - "t6"."c_acctbal" AS "c_acctbal", - "t6"."c_mktsegment" AS "c_mktsegment", - "t6"."c_comment" AS "c_comment", - "t12"."o_orderkey" AS "o_orderkey", - "t12"."o_custkey" AS "o_custkey", - "t12"."o_orderstatus" AS "o_orderstatus", - "t12"."o_totalprice" AS "o_totalprice", - "t12"."o_orderdate" AS "o_orderdate", - "t12"."o_orderpriority" AS "o_orderpriority", - "t12"."o_clerk" AS "o_clerk", - "t12"."o_shippriority" AS "o_shippriority", - "t12"."o_comment" AS "o_comment", - "t13"."l_orderkey" AS "l_orderkey", - "t13"."l_partkey" AS "l_partkey", - "t13"."l_suppkey" AS "l_suppkey", - "t13"."l_linenumber" AS "l_linenumber", - "t13"."l_quantity" AS "l_quantity", - "t13"."l_extendedprice" AS "l_extendedprice", - "t13"."l_discount" AS "l_discount", - "t13"."l_tax" AS "l_tax", - "t13"."l_returnflag" AS "l_returnflag", - "t13"."l_linestatus" AS "l_linestatus", - "t13"."l_shipdate" AS "l_shipdate", - "t13"."l_commitdate" AS "l_commitdate", - "t13"."l_receiptdate" AS "l_receiptdate", - "t13"."l_shipinstruct" AS "l_shipinstruct", - "t13"."l_shipmode" AS "l_shipmode", - "t13"."l_comment" AS "l_comment", - "t14"."s_suppkey" AS "s_suppkey", - "t14"."s_name" AS "s_name", - "t14"."s_address" AS "s_address", - "t14"."s_nationkey" AS "s_nationkey", - "t14"."s_phone" AS "s_phone", - "t14"."s_acctbal" AS "s_acctbal", - "t14"."s_comment" AS "s_comment", - "t15"."n_nationkey" AS "n_nationkey", - "t15"."n_name" AS "n_name", - "t15"."n_regionkey" AS "n_regionkey", - "t15"."n_comment" AS "n_comment", - "t16"."r_regionkey" AS "r_regionkey", - "t16"."r_name" AS "r_name", - "t16"."r_comment" AS "r_comment" + "t12"."c_custkey", + "t12"."c_name", + "t12"."c_address", + "t12"."c_nationkey", + "t12"."c_phone", + "t12"."c_acctbal", + "t12"."c_mktsegment", + "t12"."c_comment", + "t13"."o_orderkey", + "t13"."o_custkey", + "t13"."o_orderstatus", + "t13"."o_totalprice", + "t13"."o_orderdate", + "t13"."o_orderpriority", + "t13"."o_clerk", + "t13"."o_shippriority", + "t13"."o_comment", + "t14"."l_orderkey", + "t14"."l_partkey", + "t14"."l_suppkey", + "t14"."l_linenumber", + "t14"."l_quantity", + "t14"."l_extendedprice", + "t14"."l_discount", + "t14"."l_tax", + "t14"."l_returnflag", + "t14"."l_linestatus", + "t14"."l_shipdate", + "t14"."l_commitdate", + "t14"."l_receiptdate", + "t14"."l_shipinstruct", + "t14"."l_shipmode", + "t14"."l_comment", + "t15"."s_suppkey", + "t15"."s_name", + "t15"."s_address", + "t15"."s_nationkey", + "t15"."s_phone", + "t15"."s_acctbal", + "t15"."s_comment", + "t16"."n_nationkey", + "t16"."n_name", + "t16"."n_regionkey", + "t16"."n_comment", + "t17"."r_regionkey", + "t17"."r_name", + "t17"."r_comment" FROM ( SELECT "t0"."C_CUSTKEY" AS "c_custkey", @@ -116,7 +116,7 @@ FROM ( "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" FROM "CUSTOMER" AS "t0" - ) AS "t6" + ) AS "t12" INNER JOIN ( SELECT "t1"."O_ORDERKEY" AS "o_orderkey", @@ -129,8 +129,8 @@ FROM ( "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t1" - ) AS "t12" - ON "t6"."c_custkey" = "t12"."o_custkey" + ) AS "t13" + ON "t12"."c_custkey" = "t13"."o_custkey" INNER JOIN ( SELECT "t2"."L_ORDERKEY" AS "l_orderkey", @@ -150,8 +150,8 @@ FROM ( "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t2" - ) AS "t13" - ON "t13"."l_orderkey" = "t12"."o_orderkey" + ) AS "t14" + ON "t14"."l_orderkey" = "t13"."o_orderkey" INNER JOIN ( SELECT "t3"."S_SUPPKEY" AS "s_suppkey", @@ -162,8 +162,8 @@ FROM ( "t3"."S_ACCTBAL" AS "s_acctbal", "t3"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t3" - ) AS "t14" - ON "t13"."l_suppkey" = "t14"."s_suppkey" + ) AS "t15" + ON "t14"."l_suppkey" = "t15"."s_suppkey" INNER JOIN ( SELECT "t4"."N_NATIONKEY" AS "n_nationkey", @@ -171,25 +171,25 @@ FROM ( "t4"."N_REGIONKEY" AS "n_regionkey", "t4"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t4" - ) AS "t15" - ON "t6"."c_nationkey" = "t14"."s_nationkey" - AND "t14"."s_nationkey" = "t15"."n_nationkey" + ) AS "t16" + ON "t12"."c_nationkey" = "t15"."s_nationkey" + AND "t15"."s_nationkey" = "t16"."n_nationkey" INNER JOIN ( SELECT "t5"."R_REGIONKEY" AS "r_regionkey", "t5"."R_NAME" AS "r_name", "t5"."R_COMMENT" AS "r_comment" FROM "REGION" AS "t5" - ) AS "t16" - ON "t15"."n_regionkey" = "t16"."r_regionkey" - ) AS "t22" + ) AS "t17" + ON "t16"."n_regionkey" = "t17"."r_regionkey" + ) AS "t23" WHERE - "t22"."r_name" = 'ASIA' - AND "t22"."o_orderdate" >= DATEFROMPARTS(1994, 1, 1) - AND "t22"."o_orderdate" < DATEFROMPARTS(1995, 1, 1) - ) AS "t23" + "t23"."r_name" = 'ASIA' + AND "t23"."o_orderdate" >= DATEFROMPARTS(1994, 1, 1) + AND "t23"."o_orderdate" < DATEFROMPARTS(1995, 1, 1) + ) AS "t24" GROUP BY 1 -) AS "t24" +) AS "t25" ORDER BY - "t24"."revenue" DESC NULLS LAST \ No newline at end of file + "t25"."revenue" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql index 69d9b1af31d1..48269d09259c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql @@ -1,24 +1,33 @@ SELECT - * + "t24"."supp_nation", + "t24"."cust_nation", + "t24"."l_year", + "t24"."revenue" FROM ( SELECT - "t17"."supp_nation" AS "supp_nation", - "t17"."cust_nation" AS "cust_nation", - "t17"."l_year" AS "l_year", - SUM("t17"."volume") AS "revenue" + "t23"."supp_nation", + "t23"."cust_nation", + "t23"."l_year", + SUM("t23"."volume") AS "revenue" FROM ( SELECT - * + "t22"."supp_nation", + "t22"."cust_nation", + "t22"."l_shipdate", + "t22"."l_extendedprice", + "t22"."l_discount", + "t22"."l_year", + "t22"."volume" FROM ( SELECT - "t9"."n_name" AS "supp_nation", - "t10"."n_name" AS "cust_nation", - "t6"."l_shipdate" AS "l_shipdate", - "t6"."l_extendedprice" AS "l_extendedprice", - "t6"."l_discount" AS "l_discount", - DATE_PART('year', "t6"."l_shipdate") AS "l_year", - "t6"."l_extendedprice" * ( - 1 - "t6"."l_discount" + "t14"."n_name" AS "supp_nation", + "t16"."n_name" AS "cust_nation", + "t11"."l_shipdate", + "t11"."l_extendedprice", + "t11"."l_discount", + DATE_PART('year', "t11"."l_shipdate") AS "l_year", + "t11"."l_extendedprice" * ( + 1 - "t11"."l_discount" ) AS "volume" FROM ( SELECT @@ -30,7 +39,7 @@ FROM ( "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t0" - ) AS "t5" + ) AS "t10" INNER JOIN ( SELECT "t1"."L_ORDERKEY" AS "l_orderkey", @@ -50,8 +59,8 @@ FROM ( "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t1" - ) AS "t6" - ON "t5"."s_suppkey" = "t6"."l_suppkey" + ) AS "t11" + ON "t10"."s_suppkey" = "t11"."l_suppkey" INNER JOIN ( SELECT "t2"."O_ORDERKEY" AS "o_orderkey", @@ -64,8 +73,8 @@ FROM ( "t2"."O_SHIPPRIORITY" AS "o_shippriority", "t2"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t2" - ) AS "t7" - ON "t7"."o_orderkey" = "t6"."l_orderkey" + ) AS "t12" + ON "t12"."o_orderkey" = "t11"."l_orderkey" INNER JOIN ( SELECT "t3"."C_CUSTKEY" AS "c_custkey", @@ -77,8 +86,8 @@ FROM ( "t3"."C_MKTSEGMENT" AS "c_mktsegment", "t3"."C_COMMENT" AS "c_comment" FROM "CUSTOMER" AS "t3" - ) AS "t8" - ON "t8"."c_custkey" = "t7"."o_custkey" + ) AS "t13" + ON "t13"."c_custkey" = "t12"."o_custkey" INNER JOIN ( SELECT "t4"."N_NATIONKEY" AS "n_nationkey", @@ -86,8 +95,8 @@ FROM ( "t4"."N_REGIONKEY" AS "n_regionkey", "t4"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t4" - ) AS "t9" - ON "t5"."s_nationkey" = "t9"."n_nationkey" + ) AS "t14" + ON "t10"."s_nationkey" = "t14"."n_nationkey" INNER JOIN ( SELECT "t4"."N_NATIONKEY" AS "n_nationkey", @@ -95,34 +104,34 @@ FROM ( "t4"."N_REGIONKEY" AS "n_regionkey", "t4"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t4" - ) AS "t10" - ON "t8"."c_nationkey" = "t10"."n_nationkey" - ) AS "t16" + ) AS "t16" + ON "t13"."c_nationkey" = "t16"."n_nationkey" + ) AS "t22" WHERE ( ( ( - "t16"."cust_nation" = 'FRANCE' + "t22"."cust_nation" = 'FRANCE' ) AND ( - "t16"."supp_nation" = 'GERMANY' + "t22"."supp_nation" = 'GERMANY' ) ) OR ( ( - "t16"."cust_nation" = 'GERMANY' + "t22"."cust_nation" = 'GERMANY' ) AND ( - "t16"."supp_nation" = 'FRANCE' + "t22"."supp_nation" = 'FRANCE' ) ) ) - AND "t16"."l_shipdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) - ) AS "t17" + AND "t22"."l_shipdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) + ) AS "t23" GROUP BY 1, 2, 3 -) AS "t18" +) AS "t24" ORDER BY - "t18"."supp_nation" ASC, - "t18"."cust_nation" ASC, - "t18"."l_year" ASC \ No newline at end of file + "t24"."supp_nation" ASC, + "t24"."cust_nation" ASC, + "t24"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql index 800d7a74c645..8d25f3b2df17 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql @@ -1,29 +1,29 @@ SELECT - "t30"."o_year" AS "o_year", - "t30"."mkt_share" AS "mkt_share" + "t32"."o_year", + "t32"."mkt_share" FROM ( SELECT - "t29"."o_year" AS "o_year", - SUM("t29"."nation_volume") / SUM("t29"."volume") AS "mkt_share" + "t31"."o_year", + SUM("t31"."nation_volume") / SUM("t31"."volume") AS "mkt_share" FROM ( SELECT - "t28"."o_year" AS "o_year", - "t28"."volume" AS "volume", - "t28"."nation" AS "nation", - "t28"."r_name" AS "r_name", - "t28"."o_orderdate" AS "o_orderdate", - "t28"."p_type" AS "p_type", - CASE WHEN "t28"."nation" = 'BRAZIL' THEN "t28"."volume" ELSE 0 END AS "nation_volume" + "t30"."o_year", + "t30"."volume", + "t30"."nation", + "t30"."r_name", + "t30"."o_orderdate", + "t30"."p_type", + CASE WHEN "t30"."nation" = 'BRAZIL' THEN "t30"."volume" ELSE 0 END AS "nation_volume" FROM ( SELECT - DATE_PART('year', "t16"."o_orderdate") AS "o_year", - "t14"."l_extendedprice" * ( - 1 - "t14"."l_discount" + DATE_PART('year', "t17"."o_orderdate") AS "o_year", + "t15"."l_extendedprice" * ( + 1 - "t15"."l_discount" ) AS "volume", - "t19"."n_name" AS "nation", - "t20"."r_name" AS "r_name", - "t16"."o_orderdate" AS "o_orderdate", - "t7"."p_type" AS "p_type" + "t22"."n_name" AS "nation", + "t21"."r_name", + "t17"."o_orderdate", + "t14"."p_type" FROM ( SELECT "t0"."P_PARTKEY" AS "p_partkey", @@ -36,7 +36,7 @@ FROM ( "t0"."P_RETAILPRICE" AS "p_retailprice", "t0"."P_COMMENT" AS "p_comment" FROM "PART" AS "t0" - ) AS "t7" + ) AS "t14" INNER JOIN ( SELECT "t1"."L_ORDERKEY" AS "l_orderkey", @@ -56,8 +56,8 @@ FROM ( "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t1" - ) AS "t14" - ON "t7"."p_partkey" = "t14"."l_partkey" + ) AS "t15" + ON "t14"."p_partkey" = "t15"."l_partkey" INNER JOIN ( SELECT "t2"."S_SUPPKEY" AS "s_suppkey", @@ -68,8 +68,8 @@ FROM ( "t2"."S_ACCTBAL" AS "s_acctbal", "t2"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t2" - ) AS "t15" - ON "t15"."s_suppkey" = "t14"."l_suppkey" + ) AS "t16" + ON "t16"."s_suppkey" = "t15"."l_suppkey" INNER JOIN ( SELECT "t3"."O_ORDERKEY" AS "o_orderkey", @@ -82,8 +82,8 @@ FROM ( "t3"."O_SHIPPRIORITY" AS "o_shippriority", "t3"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t3" - ) AS "t16" - ON "t14"."l_orderkey" = "t16"."o_orderkey" + ) AS "t17" + ON "t15"."l_orderkey" = "t17"."o_orderkey" INNER JOIN ( SELECT "t4"."C_CUSTKEY" AS "c_custkey", @@ -95,8 +95,8 @@ FROM ( "t4"."C_MKTSEGMENT" AS "c_mktsegment", "t4"."C_COMMENT" AS "c_comment" FROM "CUSTOMER" AS "t4" - ) AS "t17" - ON "t16"."o_custkey" = "t17"."c_custkey" + ) AS "t18" + ON "t17"."o_custkey" = "t18"."c_custkey" INNER JOIN ( SELECT "t5"."N_NATIONKEY" AS "n_nationkey", @@ -104,16 +104,16 @@ FROM ( "t5"."N_REGIONKEY" AS "n_regionkey", "t5"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t5" - ) AS "t18" - ON "t17"."c_nationkey" = "t18"."n_nationkey" + ) AS "t19" + ON "t18"."c_nationkey" = "t19"."n_nationkey" INNER JOIN ( SELECT "t6"."R_REGIONKEY" AS "r_regionkey", "t6"."R_NAME" AS "r_name", "t6"."R_COMMENT" AS "r_comment" FROM "REGION" AS "t6" - ) AS "t20" - ON "t18"."n_regionkey" = "t20"."r_regionkey" + ) AS "t21" + ON "t19"."n_regionkey" = "t21"."r_regionkey" INNER JOIN ( SELECT "t5"."N_NATIONKEY" AS "n_nationkey", @@ -121,16 +121,16 @@ FROM ( "t5"."N_REGIONKEY" AS "n_regionkey", "t5"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t5" - ) AS "t19" - ON "t15"."s_nationkey" = "t19"."n_nationkey" - ) AS "t28" + ) AS "t22" + ON "t16"."s_nationkey" = "t22"."n_nationkey" + ) AS "t30" WHERE - "t28"."r_name" = 'AMERICA' - AND "t28"."o_orderdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) - AND "t28"."p_type" = 'ECONOMY ANODIZED STEEL' - ) AS "t29" + "t30"."r_name" = 'AMERICA' + AND "t30"."o_orderdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) + AND "t30"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t31" GROUP BY 1 -) AS "t30" +) AS "t32" ORDER BY - "t30"."o_year" ASC \ No newline at end of file + "t32"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql index 2ae1e1172403..a57563a10289 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql @@ -1,30 +1,30 @@ SELECT - "t24"."nation" AS "nation", - "t24"."o_year" AS "o_year", - "t24"."sum_profit" AS "sum_profit" + "t25"."nation", + "t25"."o_year", + "t25"."sum_profit" FROM ( SELECT - "t23"."nation" AS "nation", - "t23"."o_year" AS "o_year", - SUM("t23"."amount") AS "sum_profit" + "t24"."nation", + "t24"."o_year", + SUM("t24"."amount") AS "sum_profit" FROM ( SELECT - "t22"."amount" AS "amount", - "t22"."o_year" AS "o_year", - "t22"."nation" AS "nation", - "t22"."p_name" AS "p_name" + "t23"."amount", + "t23"."o_year", + "t23"."nation", + "t23"."p_name" FROM ( SELECT ( - "t6"."l_extendedprice" * ( - 1 - "t6"."l_discount" + "t12"."l_extendedprice" * ( + 1 - "t12"."l_discount" ) ) - ( - "t13"."ps_supplycost" * "t6"."l_quantity" + "t14"."ps_supplycost" * "t12"."l_quantity" ) AS "amount", - DATE_PART('year', "t15"."o_orderdate") AS "o_year", - "t16"."n_name" AS "nation", - "t14"."p_name" AS "p_name" + DATE_PART('year', "t16"."o_orderdate") AS "o_year", + "t17"."n_name" AS "nation", + "t15"."p_name" FROM ( SELECT "t0"."L_ORDERKEY" AS "l_orderkey", @@ -44,7 +44,7 @@ FROM ( "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t0" - ) AS "t6" + ) AS "t12" INNER JOIN ( SELECT "t1"."S_SUPPKEY" AS "s_suppkey", @@ -55,8 +55,8 @@ FROM ( "t1"."S_ACCTBAL" AS "s_acctbal", "t1"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t1" - ) AS "t12" - ON "t12"."s_suppkey" = "t6"."l_suppkey" + ) AS "t13" + ON "t13"."s_suppkey" = "t12"."l_suppkey" INNER JOIN ( SELECT "t2"."PS_PARTKEY" AS "ps_partkey", @@ -65,8 +65,8 @@ FROM ( "t2"."PS_SUPPLYCOST" AS "ps_supplycost", "t2"."PS_COMMENT" AS "ps_comment" FROM "PARTSUPP" AS "t2" - ) AS "t13" - ON "t13"."ps_suppkey" = "t6"."l_suppkey" AND "t13"."ps_partkey" = "t6"."l_partkey" + ) AS "t14" + ON "t14"."ps_suppkey" = "t12"."l_suppkey" AND "t14"."ps_partkey" = "t12"."l_partkey" INNER JOIN ( SELECT "t3"."P_PARTKEY" AS "p_partkey", @@ -79,8 +79,8 @@ FROM ( "t3"."P_RETAILPRICE" AS "p_retailprice", "t3"."P_COMMENT" AS "p_comment" FROM "PART" AS "t3" - ) AS "t14" - ON "t14"."p_partkey" = "t6"."l_partkey" + ) AS "t15" + ON "t15"."p_partkey" = "t12"."l_partkey" INNER JOIN ( SELECT "t4"."O_ORDERKEY" AS "o_orderkey", @@ -93,8 +93,8 @@ FROM ( "t4"."O_SHIPPRIORITY" AS "o_shippriority", "t4"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t4" - ) AS "t15" - ON "t15"."o_orderkey" = "t6"."l_orderkey" + ) AS "t16" + ON "t16"."o_orderkey" = "t12"."l_orderkey" INNER JOIN ( SELECT "t5"."N_NATIONKEY" AS "n_nationkey", @@ -102,16 +102,16 @@ FROM ( "t5"."N_REGIONKEY" AS "n_regionkey", "t5"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t5" - ) AS "t16" - ON "t12"."s_nationkey" = "t16"."n_nationkey" - ) AS "t22" + ) AS "t17" + ON "t13"."s_nationkey" = "t17"."n_nationkey" + ) AS "t23" WHERE - "t22"."p_name" LIKE '%green%' - ) AS "t23" + "t23"."p_name" LIKE '%green%' + ) AS "t24" GROUP BY 1, 2 -) AS "t24" +) AS "t25" ORDER BY - "t24"."nation" ASC, - "t24"."o_year" DESC NULLS LAST \ No newline at end of file + "t25"."nation" ASC, + "t25"."o_year" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql index a6f5f97cb8c8..49e710a61584 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql @@ -1,102 +1,102 @@ SELECT - "t16"."c_custkey" AS "c_custkey", - "t16"."c_name" AS "c_name", - "t16"."revenue" AS "revenue", - "t16"."c_acctbal" AS "c_acctbal", - "t16"."n_name" AS "n_name", - "t16"."c_address" AS "c_address", - "t16"."c_phone" AS "c_phone", - "t16"."c_comment" AS "c_comment" + "t17"."c_custkey", + "t17"."c_name", + "t17"."revenue", + "t17"."c_acctbal", + "t17"."n_name", + "t17"."c_address", + "t17"."c_phone", + "t17"."c_comment" FROM ( SELECT - "t15"."c_custkey" AS "c_custkey", - "t15"."c_name" AS "c_name", - "t15"."c_acctbal" AS "c_acctbal", - "t15"."n_name" AS "n_name", - "t15"."c_address" AS "c_address", - "t15"."c_phone" AS "c_phone", - "t15"."c_comment" AS "c_comment", - SUM("t15"."l_extendedprice" * ( - 1 - "t15"."l_discount" + "t16"."c_custkey", + "t16"."c_name", + "t16"."c_acctbal", + "t16"."n_name", + "t16"."c_address", + "t16"."c_phone", + "t16"."c_comment", + SUM("t16"."l_extendedprice" * ( + 1 - "t16"."l_discount" )) AS "revenue" FROM ( SELECT - "t14"."c_custkey" AS "c_custkey", - "t14"."c_name" AS "c_name", - "t14"."c_address" AS "c_address", - "t14"."c_nationkey" AS "c_nationkey", - "t14"."c_phone" AS "c_phone", - "t14"."c_acctbal" AS "c_acctbal", - "t14"."c_mktsegment" AS "c_mktsegment", - "t14"."c_comment" AS "c_comment", - "t14"."o_orderkey" AS "o_orderkey", - "t14"."o_custkey" AS "o_custkey", - "t14"."o_orderstatus" AS "o_orderstatus", - "t14"."o_totalprice" AS "o_totalprice", - "t14"."o_orderdate" AS "o_orderdate", - "t14"."o_orderpriority" AS "o_orderpriority", - "t14"."o_clerk" AS "o_clerk", - "t14"."o_shippriority" AS "o_shippriority", - "t14"."o_comment" AS "o_comment", - "t14"."l_orderkey" AS "l_orderkey", - "t14"."l_partkey" AS "l_partkey", - "t14"."l_suppkey" AS "l_suppkey", - "t14"."l_linenumber" AS "l_linenumber", - "t14"."l_quantity" AS "l_quantity", - "t14"."l_extendedprice" AS "l_extendedprice", - "t14"."l_discount" AS "l_discount", - "t14"."l_tax" AS "l_tax", - "t14"."l_returnflag" AS "l_returnflag", - "t14"."l_linestatus" AS "l_linestatus", - "t14"."l_shipdate" AS "l_shipdate", - "t14"."l_commitdate" AS "l_commitdate", - "t14"."l_receiptdate" AS "l_receiptdate", - "t14"."l_shipinstruct" AS "l_shipinstruct", - "t14"."l_shipmode" AS "l_shipmode", - "t14"."l_comment" AS "l_comment", - "t14"."n_nationkey" AS "n_nationkey", - "t14"."n_name" AS "n_name", - "t14"."n_regionkey" AS "n_regionkey", - "t14"."n_comment" AS "n_comment" + "t15"."c_custkey", + "t15"."c_name", + "t15"."c_address", + "t15"."c_nationkey", + "t15"."c_phone", + "t15"."c_acctbal", + "t15"."c_mktsegment", + "t15"."c_comment", + "t15"."o_orderkey", + "t15"."o_custkey", + "t15"."o_orderstatus", + "t15"."o_totalprice", + "t15"."o_orderdate", + "t15"."o_orderpriority", + "t15"."o_clerk", + "t15"."o_shippriority", + "t15"."o_comment", + "t15"."l_orderkey", + "t15"."l_partkey", + "t15"."l_suppkey", + "t15"."l_linenumber", + "t15"."l_quantity", + "t15"."l_extendedprice", + "t15"."l_discount", + "t15"."l_tax", + "t15"."l_returnflag", + "t15"."l_linestatus", + "t15"."l_shipdate", + "t15"."l_commitdate", + "t15"."l_receiptdate", + "t15"."l_shipinstruct", + "t15"."l_shipmode", + "t15"."l_comment", + "t15"."n_nationkey", + "t15"."n_name", + "t15"."n_regionkey", + "t15"."n_comment" FROM ( SELECT - "t4"."c_custkey" AS "c_custkey", - "t4"."c_name" AS "c_name", - "t4"."c_address" AS "c_address", - "t4"."c_nationkey" AS "c_nationkey", - "t4"."c_phone" AS "c_phone", - "t4"."c_acctbal" AS "c_acctbal", - "t4"."c_mktsegment" AS "c_mktsegment", - "t4"."c_comment" AS "c_comment", - "t8"."o_orderkey" AS "o_orderkey", - "t8"."o_custkey" AS "o_custkey", - "t8"."o_orderstatus" AS "o_orderstatus", - "t8"."o_totalprice" AS "o_totalprice", - "t8"."o_orderdate" AS "o_orderdate", - "t8"."o_orderpriority" AS "o_orderpriority", - "t8"."o_clerk" AS "o_clerk", - "t8"."o_shippriority" AS "o_shippriority", - "t8"."o_comment" AS "o_comment", - "t9"."l_orderkey" AS "l_orderkey", - "t9"."l_partkey" AS "l_partkey", - "t9"."l_suppkey" AS "l_suppkey", - "t9"."l_linenumber" AS "l_linenumber", - "t9"."l_quantity" AS "l_quantity", - "t9"."l_extendedprice" AS "l_extendedprice", - "t9"."l_discount" AS "l_discount", - "t9"."l_tax" AS "l_tax", - "t9"."l_returnflag" AS "l_returnflag", - "t9"."l_linestatus" AS "l_linestatus", - "t9"."l_shipdate" AS "l_shipdate", - "t9"."l_commitdate" AS "l_commitdate", - "t9"."l_receiptdate" AS "l_receiptdate", - "t9"."l_shipinstruct" AS "l_shipinstruct", - "t9"."l_shipmode" AS "l_shipmode", - "t9"."l_comment" AS "l_comment", - "t10"."n_nationkey" AS "n_nationkey", - "t10"."n_name" AS "n_name", - "t10"."n_regionkey" AS "n_regionkey", - "t10"."n_comment" AS "n_comment" + "t8"."c_custkey", + "t8"."c_name", + "t8"."c_address", + "t8"."c_nationkey", + "t8"."c_phone", + "t8"."c_acctbal", + "t8"."c_mktsegment", + "t8"."c_comment", + "t9"."o_orderkey", + "t9"."o_custkey", + "t9"."o_orderstatus", + "t9"."o_totalprice", + "t9"."o_orderdate", + "t9"."o_orderpriority", + "t9"."o_clerk", + "t9"."o_shippriority", + "t9"."o_comment", + "t10"."l_orderkey", + "t10"."l_partkey", + "t10"."l_suppkey", + "t10"."l_linenumber", + "t10"."l_quantity", + "t10"."l_extendedprice", + "t10"."l_discount", + "t10"."l_tax", + "t10"."l_returnflag", + "t10"."l_linestatus", + "t10"."l_shipdate", + "t10"."l_commitdate", + "t10"."l_receiptdate", + "t10"."l_shipinstruct", + "t10"."l_shipmode", + "t10"."l_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment" FROM ( SELECT "t0"."C_CUSTKEY" AS "c_custkey", @@ -108,7 +108,7 @@ FROM ( "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" FROM "CUSTOMER" AS "t0" - ) AS "t4" + ) AS "t8" INNER JOIN ( SELECT "t1"."O_ORDERKEY" AS "o_orderkey", @@ -121,8 +121,8 @@ FROM ( "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t1" - ) AS "t8" - ON "t4"."c_custkey" = "t8"."o_custkey" + ) AS "t9" + ON "t8"."c_custkey" = "t9"."o_custkey" INNER JOIN ( SELECT "t2"."L_ORDERKEY" AS "l_orderkey", @@ -142,8 +142,8 @@ FROM ( "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t2" - ) AS "t9" - ON "t9"."l_orderkey" = "t8"."o_orderkey" + ) AS "t10" + ON "t10"."l_orderkey" = "t9"."o_orderkey" INNER JOIN ( SELECT "t3"."N_NATIONKEY" AS "n_nationkey", @@ -151,14 +151,14 @@ FROM ( "t3"."N_REGIONKEY" AS "n_regionkey", "t3"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t3" - ) AS "t10" - ON "t4"."c_nationkey" = "t10"."n_nationkey" - ) AS "t14" + ) AS "t11" + ON "t8"."c_nationkey" = "t11"."n_nationkey" + ) AS "t15" WHERE - "t14"."o_orderdate" >= DATEFROMPARTS(1993, 10, 1) - AND "t14"."o_orderdate" < DATEFROMPARTS(1994, 1, 1) - AND "t14"."l_returnflag" = 'R' - ) AS "t15" + "t15"."o_orderdate" >= DATEFROMPARTS(1993, 10, 1) + AND "t15"."o_orderdate" < DATEFROMPARTS(1994, 1, 1) + AND "t15"."l_returnflag" = 'R' + ) AS "t16" GROUP BY 1, 2, @@ -167,7 +167,7 @@ FROM ( 5, 6, 7 -) AS "t16" +) AS "t17" ORDER BY - "t16"."revenue" DESC NULLS LAST + "t17"."revenue" DESC NULLS LAST LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql index 5d311642834a..8c99ccb39f9c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql @@ -1,46 +1,46 @@ SELECT - "t18"."ps_partkey" AS "ps_partkey", - "t18"."value" AS "value" + "t13"."ps_partkey", + "t13"."value" FROM ( SELECT - "t16"."ps_partkey" AS "ps_partkey", - SUM("t16"."ps_supplycost" * "t16"."ps_availqty") AS "value" + "t12"."ps_partkey", + SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "value" FROM ( SELECT - "t14"."ps_partkey" AS "ps_partkey", - "t14"."ps_suppkey" AS "ps_suppkey", - "t14"."ps_availqty" AS "ps_availqty", - "t14"."ps_supplycost" AS "ps_supplycost", - "t14"."ps_comment" AS "ps_comment", - "t14"."s_suppkey" AS "s_suppkey", - "t14"."s_name" AS "s_name", - "t14"."s_address" AS "s_address", - "t14"."s_nationkey" AS "s_nationkey", - "t14"."s_phone" AS "s_phone", - "t14"."s_acctbal" AS "s_acctbal", - "t14"."s_comment" AS "s_comment", - "t14"."n_nationkey" AS "n_nationkey", - "t14"."n_name" AS "n_name", - "t14"."n_regionkey" AS "n_regionkey", - "t14"."n_comment" AS "n_comment" + "t11"."ps_partkey", + "t11"."ps_suppkey", + "t11"."ps_availqty", + "t11"."ps_supplycost", + "t11"."ps_comment", + "t11"."s_suppkey", + "t11"."s_name", + "t11"."s_address", + "t11"."s_nationkey", + "t11"."s_phone", + "t11"."s_acctbal", + "t11"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment" FROM ( SELECT - "t3"."ps_partkey" AS "ps_partkey", - "t3"."ps_suppkey" AS "ps_suppkey", - "t3"."ps_availqty" AS "ps_availqty", - "t3"."ps_supplycost" AS "ps_supplycost", - "t3"."ps_comment" AS "ps_comment", - "t6"."s_suppkey" AS "s_suppkey", - "t6"."s_name" AS "s_name", - "t6"."s_address" AS "s_address", - "t6"."s_nationkey" AS "s_nationkey", - "t6"."s_phone" AS "s_phone", - "t6"."s_acctbal" AS "s_acctbal", - "t6"."s_comment" AS "s_comment", - "t8"."n_nationkey" AS "n_nationkey", - "t8"."n_name" AS "n_name", - "t8"."n_regionkey" AS "n_regionkey", - "t8"."n_comment" AS "n_comment" + "t6"."ps_partkey", + "t6"."ps_suppkey", + "t6"."ps_availqty", + "t6"."ps_supplycost", + "t6"."ps_comment", + "t7"."s_suppkey", + "t7"."s_name", + "t7"."s_address", + "t7"."s_nationkey", + "t7"."s_phone", + "t7"."s_acctbal", + "t7"."s_comment", + "t8"."n_nationkey", + "t8"."n_name", + "t8"."n_regionkey", + "t8"."n_comment" FROM ( SELECT "t0"."PS_PARTKEY" AS "ps_partkey", @@ -49,7 +49,7 @@ FROM ( "t0"."PS_SUPPLYCOST" AS "ps_supplycost", "t0"."PS_COMMENT" AS "ps_comment" FROM "PARTSUPP" AS "t0" - ) AS "t3" + ) AS "t6" INNER JOIN ( SELECT "t1"."S_SUPPKEY" AS "s_suppkey", @@ -60,8 +60,8 @@ FROM ( "t1"."S_ACCTBAL" AS "s_acctbal", "t1"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t1" - ) AS "t6" - ON "t3"."ps_suppkey" = "t6"."s_suppkey" + ) AS "t7" + ON "t6"."ps_suppkey" = "t7"."s_suppkey" INNER JOIN ( SELECT "t2"."N_NATIONKEY" AS "n_nationkey", @@ -70,55 +70,55 @@ FROM ( "t2"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t2" ) AS "t8" - ON "t8"."n_nationkey" = "t6"."s_nationkey" - ) AS "t14" + ON "t8"."n_nationkey" = "t7"."s_nationkey" + ) AS "t11" WHERE - "t14"."n_name" = 'GERMANY' - ) AS "t16" + "t11"."n_name" = 'GERMANY' + ) AS "t12" GROUP BY 1 -) AS "t18" +) AS "t13" WHERE - "t18"."value" > ( + "t13"."value" > ( ( SELECT - SUM("t17"."ps_supplycost" * "t17"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" + SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - "t15"."ps_partkey" AS "ps_partkey", - "t15"."ps_suppkey" AS "ps_suppkey", - "t15"."ps_availqty" AS "ps_availqty", - "t15"."ps_supplycost" AS "ps_supplycost", - "t15"."ps_comment" AS "ps_comment", - "t15"."s_suppkey" AS "s_suppkey", - "t15"."s_name" AS "s_name", - "t15"."s_address" AS "s_address", - "t15"."s_nationkey" AS "s_nationkey", - "t15"."s_phone" AS "s_phone", - "t15"."s_acctbal" AS "s_acctbal", - "t15"."s_comment" AS "s_comment", - "t15"."n_nationkey" AS "n_nationkey", - "t15"."n_name" AS "n_name", - "t15"."n_regionkey" AS "n_regionkey", - "t15"."n_comment" AS "n_comment" + "t11"."ps_partkey", + "t11"."ps_suppkey", + "t11"."ps_availqty", + "t11"."ps_supplycost", + "t11"."ps_comment", + "t11"."s_suppkey", + "t11"."s_name", + "t11"."s_address", + "t11"."s_nationkey", + "t11"."s_phone", + "t11"."s_acctbal", + "t11"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment" FROM ( SELECT - "t3"."ps_partkey" AS "ps_partkey", - "t3"."ps_suppkey" AS "ps_suppkey", - "t3"."ps_availqty" AS "ps_availqty", - "t3"."ps_supplycost" AS "ps_supplycost", - "t3"."ps_comment" AS "ps_comment", - "t7"."s_suppkey" AS "s_suppkey", - "t7"."s_name" AS "s_name", - "t7"."s_address" AS "s_address", - "t7"."s_nationkey" AS "s_nationkey", - "t7"."s_phone" AS "s_phone", - "t7"."s_acctbal" AS "s_acctbal", - "t7"."s_comment" AS "s_comment", - "t9"."n_nationkey" AS "n_nationkey", - "t9"."n_name" AS "n_name", - "t9"."n_regionkey" AS "n_regionkey", - "t9"."n_comment" AS "n_comment" + "t6"."ps_partkey", + "t6"."ps_suppkey", + "t6"."ps_availqty", + "t6"."ps_supplycost", + "t6"."ps_comment", + "t7"."s_suppkey", + "t7"."s_name", + "t7"."s_address", + "t7"."s_nationkey", + "t7"."s_phone", + "t7"."s_acctbal", + "t7"."s_comment", + "t8"."n_nationkey", + "t8"."n_name", + "t8"."n_regionkey", + "t8"."n_comment" FROM ( SELECT "t0"."PS_PARTKEY" AS "ps_partkey", @@ -127,7 +127,7 @@ WHERE "t0"."PS_SUPPLYCOST" AS "ps_supplycost", "t0"."PS_COMMENT" AS "ps_comment" FROM "PARTSUPP" AS "t0" - ) AS "t3" + ) AS "t6" INNER JOIN ( SELECT "t1"."S_SUPPKEY" AS "s_suppkey", @@ -139,7 +139,7 @@ WHERE "t1"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t1" ) AS "t7" - ON "t3"."ps_suppkey" = "t7"."s_suppkey" + ON "t6"."ps_suppkey" = "t7"."s_suppkey" INNER JOIN ( SELECT "t2"."N_NATIONKEY" AS "n_nationkey", @@ -147,13 +147,13 @@ WHERE "t2"."N_REGIONKEY" AS "n_regionkey", "t2"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t2" - ) AS "t9" - ON "t9"."n_nationkey" = "t7"."s_nationkey" - ) AS "t15" + ) AS "t8" + ON "t8"."n_nationkey" = "t7"."s_nationkey" + ) AS "t11" WHERE - "t15"."n_name" = 'GERMANY' - ) AS "t17" + "t11"."n_name" = 'GERMANY' + ) AS "t12" ) * 0.0001 ) ORDER BY - "t18"."value" DESC NULLS LAST \ No newline at end of file + "t13"."value" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql index 799ef5b12cd0..d0b4e47f354a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql @@ -1,70 +1,70 @@ SELECT - "t8"."l_shipmode" AS "l_shipmode", - "t8"."high_line_count" AS "high_line_count", - "t8"."low_line_count" AS "low_line_count" + "t9"."l_shipmode", + "t9"."high_line_count", + "t9"."low_line_count" FROM ( SELECT - "t7"."l_shipmode" AS "l_shipmode", + "t8"."l_shipmode", SUM( - CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END + CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END ) AS "high_line_count", SUM( - CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END + CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END ) AS "low_line_count" FROM ( SELECT - "t6"."o_orderkey" AS "o_orderkey", - "t6"."o_custkey" AS "o_custkey", - "t6"."o_orderstatus" AS "o_orderstatus", - "t6"."o_totalprice" AS "o_totalprice", - "t6"."o_orderdate" AS "o_orderdate", - "t6"."o_orderpriority" AS "o_orderpriority", - "t6"."o_clerk" AS "o_clerk", - "t6"."o_shippriority" AS "o_shippriority", - "t6"."o_comment" AS "o_comment", - "t6"."l_orderkey" AS "l_orderkey", - "t6"."l_partkey" AS "l_partkey", - "t6"."l_suppkey" AS "l_suppkey", - "t6"."l_linenumber" AS "l_linenumber", - "t6"."l_quantity" AS "l_quantity", - "t6"."l_extendedprice" AS "l_extendedprice", - "t6"."l_discount" AS "l_discount", - "t6"."l_tax" AS "l_tax", - "t6"."l_returnflag" AS "l_returnflag", - "t6"."l_linestatus" AS "l_linestatus", - "t6"."l_shipdate" AS "l_shipdate", - "t6"."l_commitdate" AS "l_commitdate", - "t6"."l_receiptdate" AS "l_receiptdate", - "t6"."l_shipinstruct" AS "l_shipinstruct", - "t6"."l_shipmode" AS "l_shipmode", - "t6"."l_comment" AS "l_comment" + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment" FROM ( SELECT - "t2"."o_orderkey" AS "o_orderkey", - "t2"."o_custkey" AS "o_custkey", - "t2"."o_orderstatus" AS "o_orderstatus", - "t2"."o_totalprice" AS "o_totalprice", - "t2"."o_orderdate" AS "o_orderdate", - "t2"."o_orderpriority" AS "o_orderpriority", - "t2"."o_clerk" AS "o_clerk", - "t2"."o_shippriority" AS "o_shippriority", - "t2"."o_comment" AS "o_comment", - "t4"."l_orderkey" AS "l_orderkey", - "t4"."l_partkey" AS "l_partkey", - "t4"."l_suppkey" AS "l_suppkey", - "t4"."l_linenumber" AS "l_linenumber", - "t4"."l_quantity" AS "l_quantity", - "t4"."l_extendedprice" AS "l_extendedprice", - "t4"."l_discount" AS "l_discount", - "t4"."l_tax" AS "l_tax", - "t4"."l_returnflag" AS "l_returnflag", - "t4"."l_linestatus" AS "l_linestatus", - "t4"."l_shipdate" AS "l_shipdate", - "t4"."l_commitdate" AS "l_commitdate", - "t4"."l_receiptdate" AS "l_receiptdate", - "t4"."l_shipinstruct" AS "l_shipinstruct", - "t4"."l_shipmode" AS "l_shipmode", - "t4"."l_comment" AS "l_comment" + "t4"."o_orderkey", + "t4"."o_custkey", + "t4"."o_orderstatus", + "t4"."o_totalprice", + "t4"."o_orderdate", + "t4"."o_orderpriority", + "t4"."o_clerk", + "t4"."o_shippriority", + "t4"."o_comment", + "t5"."l_orderkey", + "t5"."l_partkey", + "t5"."l_suppkey", + "t5"."l_linenumber", + "t5"."l_quantity", + "t5"."l_extendedprice", + "t5"."l_discount", + "t5"."l_tax", + "t5"."l_returnflag", + "t5"."l_linestatus", + "t5"."l_shipdate", + "t5"."l_commitdate", + "t5"."l_receiptdate", + "t5"."l_shipinstruct", + "t5"."l_shipmode", + "t5"."l_comment" FROM ( SELECT "t0"."O_ORDERKEY" AS "o_orderkey", @@ -77,7 +77,7 @@ FROM ( "t0"."O_SHIPPRIORITY" AS "o_shippriority", "t0"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t0" - ) AS "t2" + ) AS "t4" INNER JOIN ( SELECT "t1"."L_ORDERKEY" AS "l_orderkey", @@ -97,18 +97,18 @@ FROM ( "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t1" - ) AS "t4" - ON "t2"."o_orderkey" = "t4"."l_orderkey" - ) AS "t6" + ) AS "t5" + ON "t4"."o_orderkey" = "t5"."l_orderkey" + ) AS "t7" WHERE - "t6"."l_shipmode" IN ('MAIL', 'SHIP') - AND "t6"."l_commitdate" < "t6"."l_receiptdate" - AND "t6"."l_shipdate" < "t6"."l_commitdate" - AND "t6"."l_receiptdate" >= DATEFROMPARTS(1994, 1, 1) - AND "t6"."l_receiptdate" < DATEFROMPARTS(1995, 1, 1) - ) AS "t7" + "t7"."l_shipmode" IN ('MAIL', 'SHIP') + AND "t7"."l_commitdate" < "t7"."l_receiptdate" + AND "t7"."l_shipdate" < "t7"."l_commitdate" + AND "t7"."l_receiptdate" >= DATEFROMPARTS(1994, 1, 1) + AND "t7"."l_receiptdate" < DATEFROMPARTS(1995, 1, 1) + ) AS "t8" GROUP BY 1 -) AS "t8" +) AS "t9" ORDER BY - "t8"."l_shipmode" ASC \ No newline at end of file + "t9"."l_shipmode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql index fd4edd9dfb77..159d83733a08 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql @@ -1,33 +1,33 @@ SELECT - "t8"."c_count" AS "c_count", - "t8"."custdist" AS "custdist" + "t9"."c_count", + "t9"."custdist" FROM ( SELECT - "t7"."c_count" AS "c_count", + "t8"."c_count", COUNT(*) AS "custdist" FROM ( SELECT - "t6"."c_custkey" AS "c_custkey", - COUNT("t6"."o_orderkey") AS "c_count" + "t7"."c_custkey", + COUNT("t7"."o_orderkey") AS "c_count" FROM ( SELECT - "t2"."c_custkey" AS "c_custkey", - "t2"."c_name" AS "c_name", - "t2"."c_address" AS "c_address", - "t2"."c_nationkey" AS "c_nationkey", - "t2"."c_phone" AS "c_phone", - "t2"."c_acctbal" AS "c_acctbal", - "t2"."c_mktsegment" AS "c_mktsegment", - "t2"."c_comment" AS "c_comment", - "t4"."o_orderkey" AS "o_orderkey", - "t4"."o_custkey" AS "o_custkey", - "t4"."o_orderstatus" AS "o_orderstatus", - "t4"."o_totalprice" AS "o_totalprice", - "t4"."o_orderdate" AS "o_orderdate", - "t4"."o_orderpriority" AS "o_orderpriority", - "t4"."o_clerk" AS "o_clerk", - "t4"."o_shippriority" AS "o_shippriority", - "t4"."o_comment" AS "o_comment" + "t4"."c_custkey", + "t4"."c_name", + "t4"."c_address", + "t4"."c_nationkey", + "t4"."c_phone", + "t4"."c_acctbal", + "t4"."c_mktsegment", + "t4"."c_comment", + "t5"."o_orderkey", + "t5"."o_custkey", + "t5"."o_orderstatus", + "t5"."o_totalprice", + "t5"."o_orderdate", + "t5"."o_orderpriority", + "t5"."o_clerk", + "t5"."o_shippriority", + "t5"."o_comment" FROM ( SELECT "t0"."C_CUSTKEY" AS "c_custkey", @@ -39,7 +39,7 @@ FROM ( "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" FROM "CUSTOMER" AS "t0" - ) AS "t2" + ) AS "t4" LEFT OUTER JOIN ( SELECT "t1"."O_ORDERKEY" AS "o_orderkey", @@ -52,18 +52,18 @@ FROM ( "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t1" - ) AS "t4" - ON "t2"."c_custkey" = "t4"."o_custkey" + ) AS "t5" + ON "t4"."c_custkey" = "t5"."o_custkey" AND NOT ( - "t4"."o_comment" LIKE '%special%requests%' + "t5"."o_comment" LIKE '%special%requests%' ) - ) AS "t6" + ) AS "t7" GROUP BY 1 - ) AS "t7" + ) AS "t8" GROUP BY 1 -) AS "t8" +) AS "t9" ORDER BY - "t8"."custdist" DESC NULLS LAST, - "t8"."c_count" DESC NULLS LAST \ No newline at end of file + "t9"."custdist" DESC NULLS LAST, + "t9"."c_count" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql index a417e7d42d34..46bd271c3296 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql @@ -1,67 +1,67 @@ SELECT ( SUM( - IFF("t7"."p_type" LIKE 'PROMO%', "t7"."l_extendedprice" * ( - 1 - "t7"."l_discount" + IFF("t8"."p_type" LIKE 'PROMO%', "t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" ), 0) ) * 100 - ) / SUM("t7"."l_extendedprice" * ( - 1 - "t7"."l_discount" + ) / SUM("t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" )) AS "promo_revenue" FROM ( SELECT - "t6"."l_orderkey" AS "l_orderkey", - "t6"."l_partkey" AS "l_partkey", - "t6"."l_suppkey" AS "l_suppkey", - "t6"."l_linenumber" AS "l_linenumber", - "t6"."l_quantity" AS "l_quantity", - "t6"."l_extendedprice" AS "l_extendedprice", - "t6"."l_discount" AS "l_discount", - "t6"."l_tax" AS "l_tax", - "t6"."l_returnflag" AS "l_returnflag", - "t6"."l_linestatus" AS "l_linestatus", - "t6"."l_shipdate" AS "l_shipdate", - "t6"."l_commitdate" AS "l_commitdate", - "t6"."l_receiptdate" AS "l_receiptdate", - "t6"."l_shipinstruct" AS "l_shipinstruct", - "t6"."l_shipmode" AS "l_shipmode", - "t6"."l_comment" AS "l_comment", - "t6"."p_partkey" AS "p_partkey", - "t6"."p_name" AS "p_name", - "t6"."p_mfgr" AS "p_mfgr", - "t6"."p_brand" AS "p_brand", - "t6"."p_type" AS "p_type", - "t6"."p_size" AS "p_size", - "t6"."p_container" AS "p_container", - "t6"."p_retailprice" AS "p_retailprice", - "t6"."p_comment" AS "p_comment" + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" FROM ( SELECT - "t2"."l_orderkey" AS "l_orderkey", - "t2"."l_partkey" AS "l_partkey", - "t2"."l_suppkey" AS "l_suppkey", - "t2"."l_linenumber" AS "l_linenumber", - "t2"."l_quantity" AS "l_quantity", - "t2"."l_extendedprice" AS "l_extendedprice", - "t2"."l_discount" AS "l_discount", - "t2"."l_tax" AS "l_tax", - "t2"."l_returnflag" AS "l_returnflag", - "t2"."l_linestatus" AS "l_linestatus", - "t2"."l_shipdate" AS "l_shipdate", - "t2"."l_commitdate" AS "l_commitdate", - "t2"."l_receiptdate" AS "l_receiptdate", - "t2"."l_shipinstruct" AS "l_shipinstruct", - "t2"."l_shipmode" AS "l_shipmode", - "t2"."l_comment" AS "l_comment", - "t4"."p_partkey" AS "p_partkey", - "t4"."p_name" AS "p_name", - "t4"."p_mfgr" AS "p_mfgr", - "t4"."p_brand" AS "p_brand", - "t4"."p_type" AS "p_type", - "t4"."p_size" AS "p_size", - "t4"."p_container" AS "p_container", - "t4"."p_retailprice" AS "p_retailprice", - "t4"."p_comment" AS "p_comment" + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" FROM ( SELECT "t0"."L_ORDERKEY" AS "l_orderkey", @@ -81,7 +81,7 @@ FROM ( "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t0" - ) AS "t2" + ) AS "t4" INNER JOIN ( SELECT "t1"."P_PARTKEY" AS "p_partkey", @@ -94,10 +94,10 @@ FROM ( "t1"."P_RETAILPRICE" AS "p_retailprice", "t1"."P_COMMENT" AS "p_comment" FROM "PART" AS "t1" - ) AS "t4" - ON "t2"."l_partkey" = "t4"."p_partkey" - ) AS "t6" + ) AS "t5" + ON "t4"."l_partkey" = "t5"."p_partkey" + ) AS "t7" WHERE - "t6"."l_shipdate" >= DATEFROMPARTS(1995, 9, 1) - AND "t6"."l_shipdate" < DATEFROMPARTS(1995, 10, 1) -) AS "t7" \ No newline at end of file + "t7"."l_shipdate" >= DATEFROMPARTS(1995, 9, 1) + AND "t7"."l_shipdate" < DATEFROMPARTS(1995, 10, 1) +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql index c919360a42d2..014b7f420d8d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql @@ -1,20 +1,20 @@ SELECT - "t7"."s_suppkey" AS "s_suppkey", - "t7"."s_name" AS "s_name", - "t7"."s_address" AS "s_address", - "t7"."s_phone" AS "s_phone", - "t7"."total_revenue" AS "total_revenue" + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_phone", + "t8"."total_revenue" FROM ( SELECT - "t2"."s_suppkey" AS "s_suppkey", - "t2"."s_name" AS "s_name", - "t2"."s_address" AS "s_address", - "t2"."s_nationkey" AS "s_nationkey", - "t2"."s_phone" AS "s_phone", - "t2"."s_acctbal" AS "s_acctbal", - "t2"."s_comment" AS "s_comment", - "t5"."l_suppkey" AS "l_suppkey", - "t5"."total_revenue" AS "total_revenue" + "t3"."s_suppkey", + "t3"."s_name", + "t3"."s_address", + "t3"."s_nationkey", + "t3"."s_phone", + "t3"."s_acctbal", + "t3"."s_comment", + "t6"."l_suppkey", + "t6"."total_revenue" FROM ( SELECT "t0"."S_SUPPKEY" AS "s_suppkey", @@ -25,12 +25,12 @@ FROM ( "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t0" - ) AS "t2" + ) AS "t3" INNER JOIN ( SELECT - "t3"."l_suppkey" AS "l_suppkey", - SUM("t3"."l_extendedprice" * ( - 1 - "t3"."l_discount" + "t4"."l_suppkey", + SUM("t4"."l_extendedprice" * ( + 1 - "t4"."l_discount" )) AS "total_revenue" FROM ( SELECT @@ -54,27 +54,27 @@ FROM ( WHERE "t1"."L_SHIPDATE" >= DATEFROMPARTS(1996, 1, 1) AND "t1"."L_SHIPDATE" < DATEFROMPARTS(1996, 4, 1) - ) AS "t3" + ) AS "t4" GROUP BY 1 - ) AS "t5" - ON "t2"."s_suppkey" = "t5"."l_suppkey" -) AS "t7" + ) AS "t6" + ON "t3"."s_suppkey" = "t6"."l_suppkey" +) AS "t8" WHERE - "t7"."total_revenue" = ( + "t8"."total_revenue" = ( SELECT - MAX("t7"."total_revenue") AS "Max(total_revenue)" + MAX("t8"."total_revenue") AS "Max(total_revenue)" FROM ( SELECT - "t2"."s_suppkey" AS "s_suppkey", - "t2"."s_name" AS "s_name", - "t2"."s_address" AS "s_address", - "t2"."s_nationkey" AS "s_nationkey", - "t2"."s_phone" AS "s_phone", - "t2"."s_acctbal" AS "s_acctbal", - "t2"."s_comment" AS "s_comment", - "t5"."l_suppkey" AS "l_suppkey", - "t5"."total_revenue" AS "total_revenue" + "t3"."s_suppkey", + "t3"."s_name", + "t3"."s_address", + "t3"."s_nationkey", + "t3"."s_phone", + "t3"."s_acctbal", + "t3"."s_comment", + "t6"."l_suppkey", + "t6"."total_revenue" FROM ( SELECT "t0"."S_SUPPKEY" AS "s_suppkey", @@ -85,12 +85,12 @@ WHERE "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t0" - ) AS "t2" + ) AS "t3" INNER JOIN ( SELECT - "t3"."l_suppkey" AS "l_suppkey", - SUM("t3"."l_extendedprice" * ( - 1 - "t3"."l_discount" + "t4"."l_suppkey", + SUM("t4"."l_extendedprice" * ( + 1 - "t4"."l_discount" )) AS "total_revenue" FROM ( SELECT @@ -114,12 +114,12 @@ WHERE WHERE "t1"."L_SHIPDATE" >= DATEFROMPARTS(1996, 1, 1) AND "t1"."L_SHIPDATE" < DATEFROMPARTS(1996, 4, 1) - ) AS "t3" + ) AS "t4" GROUP BY 1 - ) AS "t5" - ON "t2"."s_suppkey" = "t5"."l_suppkey" - ) AS "t7" + ) AS "t6" + ON "t3"."s_suppkey" = "t6"."l_suppkey" + ) AS "t8" ) ORDER BY - "t7"."s_suppkey" ASC \ No newline at end of file + "t8"."s_suppkey" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql index 6ebdf27930bf..ed1cd26910ad 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql @@ -1,46 +1,46 @@ SELECT - "t10"."p_brand" AS "p_brand", - "t10"."p_type" AS "p_type", - "t10"."p_size" AS "p_size", - "t10"."supplier_cnt" AS "supplier_cnt" + "t11"."p_brand", + "t11"."p_type", + "t11"."p_size", + "t11"."supplier_cnt" FROM ( SELECT - "t9"."p_brand" AS "p_brand", - "t9"."p_type" AS "p_type", - "t9"."p_size" AS "p_size", - COUNT(DISTINCT "t9"."ps_suppkey") AS "supplier_cnt" + "t10"."p_brand", + "t10"."p_type", + "t10"."p_size", + COUNT(DISTINCT "t10"."ps_suppkey") AS "supplier_cnt" FROM ( SELECT - "t8"."ps_partkey" AS "ps_partkey", - "t8"."ps_suppkey" AS "ps_suppkey", - "t8"."ps_availqty" AS "ps_availqty", - "t8"."ps_supplycost" AS "ps_supplycost", - "t8"."ps_comment" AS "ps_comment", - "t8"."p_partkey" AS "p_partkey", - "t8"."p_name" AS "p_name", - "t8"."p_mfgr" AS "p_mfgr", - "t8"."p_brand" AS "p_brand", - "t8"."p_type" AS "p_type", - "t8"."p_size" AS "p_size", - "t8"."p_container" AS "p_container", - "t8"."p_retailprice" AS "p_retailprice", - "t8"."p_comment" AS "p_comment" + "t9"."ps_partkey", + "t9"."ps_suppkey", + "t9"."ps_availqty", + "t9"."ps_supplycost", + "t9"."ps_comment", + "t9"."p_partkey", + "t9"."p_name", + "t9"."p_mfgr", + "t9"."p_brand", + "t9"."p_type", + "t9"."p_size", + "t9"."p_container", + "t9"."p_retailprice", + "t9"."p_comment" FROM ( SELECT - "t3"."ps_partkey" AS "ps_partkey", - "t3"."ps_suppkey" AS "ps_suppkey", - "t3"."ps_availqty" AS "ps_availqty", - "t3"."ps_supplycost" AS "ps_supplycost", - "t3"."ps_comment" AS "ps_comment", - "t6"."p_partkey" AS "p_partkey", - "t6"."p_name" AS "p_name", - "t6"."p_mfgr" AS "p_mfgr", - "t6"."p_brand" AS "p_brand", - "t6"."p_type" AS "p_type", - "t6"."p_size" AS "p_size", - "t6"."p_container" AS "p_container", - "t6"."p_retailprice" AS "p_retailprice", - "t6"."p_comment" AS "p_comment" + "t5"."ps_partkey", + "t5"."ps_suppkey", + "t5"."ps_availqty", + "t5"."ps_supplycost", + "t5"."ps_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" FROM ( SELECT "t0"."PS_PARTKEY" AS "ps_partkey", @@ -49,7 +49,7 @@ FROM ( "t0"."PS_SUPPLYCOST" AS "ps_supplycost", "t0"."PS_COMMENT" AS "ps_comment" FROM "PARTSUPP" AS "t0" - ) AS "t3" + ) AS "t5" INNER JOIN ( SELECT "t2"."P_PARTKEY" AS "p_partkey", @@ -62,32 +62,32 @@ FROM ( "t2"."P_RETAILPRICE" AS "p_retailprice", "t2"."P_COMMENT" AS "p_comment" FROM "PART" AS "t2" - ) AS "t6" - ON "t6"."p_partkey" = "t3"."ps_partkey" - ) AS "t8" + ) AS "t7" + ON "t7"."p_partkey" = "t5"."ps_partkey" + ) AS "t9" WHERE - "t8"."p_brand" <> 'Brand#45' + "t9"."p_brand" <> 'Brand#45' AND NOT ( - "t8"."p_type" LIKE 'MEDIUM POLISHED%' + "t9"."p_type" LIKE 'MEDIUM POLISHED%' ) - AND "t8"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) + AND "t9"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) AND NOT ( - "t8"."ps_suppkey" IN (( + "t9"."ps_suppkey" IN ( SELECT "t1"."S_SUPPKEY" AS "s_suppkey" FROM "SUPPLIER" AS "t1" WHERE "t1"."S_COMMENT" LIKE '%Customer%Complaints%' - )) + ) ) - ) AS "t9" + ) AS "t10" GROUP BY 1, 2, 3 -) AS "t10" +) AS "t11" ORDER BY - "t10"."supplier_cnt" DESC NULLS LAST, - "t10"."p_brand" ASC, - "t10"."p_type" ASC, - "t10"."p_size" ASC \ No newline at end of file + "t11"."supplier_cnt" DESC NULLS LAST, + "t11"."p_brand" ASC, + "t11"."p_type" ASC, + "t11"."p_size" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql index 6bd68abfdee8..67ed46944982 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql @@ -1,35 +1,59 @@ SELECT - SUM("t8"."l_extendedprice") / 7.0 AS "avg_yearly" + SUM("t10"."l_extendedprice") / 7.0 AS "avg_yearly" FROM ( SELECT - * + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" FROM ( SELECT - "t2"."l_orderkey" AS "l_orderkey", - "t2"."l_partkey" AS "l_partkey", - "t2"."l_suppkey" AS "l_suppkey", - "t2"."l_linenumber" AS "l_linenumber", - "t2"."l_quantity" AS "l_quantity", - "t2"."l_extendedprice" AS "l_extendedprice", - "t2"."l_discount" AS "l_discount", - "t2"."l_tax" AS "l_tax", - "t2"."l_returnflag" AS "l_returnflag", - "t2"."l_linestatus" AS "l_linestatus", - "t2"."l_shipdate" AS "l_shipdate", - "t2"."l_commitdate" AS "l_commitdate", - "t2"."l_receiptdate" AS "l_receiptdate", - "t2"."l_shipinstruct" AS "l_shipinstruct", - "t2"."l_shipmode" AS "l_shipmode", - "t2"."l_comment" AS "l_comment", - "t3"."p_partkey" AS "p_partkey", - "t3"."p_name" AS "p_name", - "t3"."p_mfgr" AS "p_mfgr", - "t3"."p_brand" AS "p_brand", - "t3"."p_type" AS "p_type", - "t3"."p_size" AS "p_size", - "t3"."p_container" AS "p_container", - "t3"."p_retailprice" AS "p_retailprice", - "t3"."p_comment" AS "p_comment" + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" FROM ( SELECT "t0"."L_ORDERKEY" AS "l_orderkey", @@ -49,7 +73,7 @@ FROM ( "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t0" - ) AS "t2" + ) AS "t4" INNER JOIN ( SELECT "t1"."P_PARTKEY" AS "p_partkey", @@ -62,50 +86,38 @@ FROM ( "t1"."P_RETAILPRICE" AS "p_retailprice", "t1"."P_COMMENT" AS "p_comment" FROM "PART" AS "t1" - ) AS "t3" - ON "t3"."p_partkey" = "t2"."l_partkey" - ) AS "t5" + ) AS "t5" + ON "t5"."p_partkey" = "t4"."l_partkey" + ) AS "t7" WHERE - ( - "t5"."p_brand" = 'Brand#23' - ) - AND ( - "t5"."p_container" = 'MED BOX' - ) - AND ( - "t5"."l_quantity" < ( - ( + "t7"."p_brand" = 'Brand#23' + AND "t7"."p_container" = 'MED BOX' + AND "t7"."l_quantity" < ( + ( + SELECT + AVG("t8"."l_quantity") AS "Mean(l_quantity)" + FROM ( SELECT - AVG("t6"."l_quantity") AS "Mean(l_quantity)" - FROM ( - SELECT - * - FROM ( - SELECT - "t0"."L_ORDERKEY" AS "l_orderkey", - "t0"."L_PARTKEY" AS "l_partkey", - "t0"."L_SUPPKEY" AS "l_suppkey", - "t0"."L_LINENUMBER" AS "l_linenumber", - "t0"."L_QUANTITY" AS "l_quantity", - "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", - "t0"."L_DISCOUNT" AS "l_discount", - "t0"."L_TAX" AS "l_tax", - "t0"."L_RETURNFLAG" AS "l_returnflag", - "t0"."L_LINESTATUS" AS "l_linestatus", - "t0"."L_SHIPDATE" AS "l_shipdate", - "t0"."L_COMMITDATE" AS "l_commitdate", - "t0"."L_RECEIPTDATE" AS "l_receiptdate", - "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", - "t0"."L_SHIPMODE" AS "l_shipmode", - "t0"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t0" - ) AS "t2" - WHERE - ( - "t2"."l_partkey" = "t5"."p_partkey" - ) - ) AS "t6" - ) * 0.2 - ) + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + WHERE + "t0"."L_PARTKEY" = "t7"."p_partkey" + ) AS "t8" + ) * 0.2 ) -) AS "t8" \ No newline at end of file +) AS "t10" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql index e2a86eb98ccf..51c9e215cf29 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql @@ -1,88 +1,88 @@ SELECT - "t14"."c_name" AS "c_name", - "t14"."c_custkey" AS "c_custkey", - "t14"."o_orderkey" AS "o_orderkey", - "t14"."o_orderdate" AS "o_orderdate", - "t14"."o_totalprice" AS "o_totalprice", - "t14"."sum_qty" AS "sum_qty" + "t15"."c_name", + "t15"."c_custkey", + "t15"."o_orderkey", + "t15"."o_orderdate", + "t15"."o_totalprice", + "t15"."sum_qty" FROM ( SELECT - "t13"."c_name" AS "c_name", - "t13"."c_custkey" AS "c_custkey", - "t13"."o_orderkey" AS "o_orderkey", - "t13"."o_orderdate" AS "o_orderdate", - "t13"."o_totalprice" AS "o_totalprice", - SUM("t13"."l_quantity") AS "sum_qty" + "t14"."c_name", + "t14"."c_custkey", + "t14"."o_orderkey", + "t14"."o_orderdate", + "t14"."o_totalprice", + SUM("t14"."l_quantity") AS "sum_qty" FROM ( SELECT - "t11"."c_custkey" AS "c_custkey", - "t11"."c_name" AS "c_name", - "t11"."c_address" AS "c_address", - "t11"."c_nationkey" AS "c_nationkey", - "t11"."c_phone" AS "c_phone", - "t11"."c_acctbal" AS "c_acctbal", - "t11"."c_mktsegment" AS "c_mktsegment", - "t11"."c_comment" AS "c_comment", - "t11"."o_orderkey" AS "o_orderkey", - "t11"."o_custkey" AS "o_custkey", - "t11"."o_orderstatus" AS "o_orderstatus", - "t11"."o_totalprice" AS "o_totalprice", - "t11"."o_orderdate" AS "o_orderdate", - "t11"."o_orderpriority" AS "o_orderpriority", - "t11"."o_clerk" AS "o_clerk", - "t11"."o_shippriority" AS "o_shippriority", - "t11"."o_comment" AS "o_comment", - "t11"."l_orderkey" AS "l_orderkey", - "t11"."l_partkey" AS "l_partkey", - "t11"."l_suppkey" AS "l_suppkey", - "t11"."l_linenumber" AS "l_linenumber", - "t11"."l_quantity" AS "l_quantity", - "t11"."l_extendedprice" AS "l_extendedprice", - "t11"."l_discount" AS "l_discount", - "t11"."l_tax" AS "l_tax", - "t11"."l_returnflag" AS "l_returnflag", - "t11"."l_linestatus" AS "l_linestatus", - "t11"."l_shipdate" AS "l_shipdate", - "t11"."l_commitdate" AS "l_commitdate", - "t11"."l_receiptdate" AS "l_receiptdate", - "t11"."l_shipinstruct" AS "l_shipinstruct", - "t11"."l_shipmode" AS "l_shipmode", - "t11"."l_comment" AS "l_comment" + "t12"."c_custkey", + "t12"."c_name", + "t12"."c_address", + "t12"."c_nationkey", + "t12"."c_phone", + "t12"."c_acctbal", + "t12"."c_mktsegment", + "t12"."c_comment", + "t12"."o_orderkey", + "t12"."o_custkey", + "t12"."o_orderstatus", + "t12"."o_totalprice", + "t12"."o_orderdate", + "t12"."o_orderpriority", + "t12"."o_clerk", + "t12"."o_shippriority", + "t12"."o_comment", + "t12"."l_orderkey", + "t12"."l_partkey", + "t12"."l_suppkey", + "t12"."l_linenumber", + "t12"."l_quantity", + "t12"."l_extendedprice", + "t12"."l_discount", + "t12"."l_tax", + "t12"."l_returnflag", + "t12"."l_linestatus", + "t12"."l_shipdate", + "t12"."l_commitdate", + "t12"."l_receiptdate", + "t12"."l_shipinstruct", + "t12"."l_shipmode", + "t12"."l_comment" FROM ( SELECT - "t3"."c_custkey" AS "c_custkey", - "t3"."c_name" AS "c_name", - "t3"."c_address" AS "c_address", - "t3"."c_nationkey" AS "c_nationkey", - "t3"."c_phone" AS "c_phone", - "t3"."c_acctbal" AS "c_acctbal", - "t3"."c_mktsegment" AS "c_mktsegment", - "t3"."c_comment" AS "c_comment", - "t6"."o_orderkey" AS "o_orderkey", - "t6"."o_custkey" AS "o_custkey", - "t6"."o_orderstatus" AS "o_orderstatus", - "t6"."o_totalprice" AS "o_totalprice", - "t6"."o_orderdate" AS "o_orderdate", - "t6"."o_orderpriority" AS "o_orderpriority", - "t6"."o_clerk" AS "o_clerk", - "t6"."o_shippriority" AS "o_shippriority", - "t6"."o_comment" AS "o_comment", - "t7"."l_orderkey" AS "l_orderkey", - "t7"."l_partkey" AS "l_partkey", - "t7"."l_suppkey" AS "l_suppkey", - "t7"."l_linenumber" AS "l_linenumber", - "t7"."l_quantity" AS "l_quantity", - "t7"."l_extendedprice" AS "l_extendedprice", - "t7"."l_discount" AS "l_discount", - "t7"."l_tax" AS "l_tax", - "t7"."l_returnflag" AS "l_returnflag", - "t7"."l_linestatus" AS "l_linestatus", - "t7"."l_shipdate" AS "l_shipdate", - "t7"."l_commitdate" AS "l_commitdate", - "t7"."l_receiptdate" AS "l_receiptdate", - "t7"."l_shipinstruct" AS "l_shipinstruct", - "t7"."l_shipmode" AS "l_shipmode", - "t7"."l_comment" AS "l_comment" + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t8"."l_orderkey", + "t8"."l_partkey", + "t8"."l_suppkey", + "t8"."l_linenumber", + "t8"."l_quantity", + "t8"."l_extendedprice", + "t8"."l_discount", + "t8"."l_tax", + "t8"."l_returnflag", + "t8"."l_linestatus", + "t8"."l_shipdate", + "t8"."l_commitdate", + "t8"."l_receiptdate", + "t8"."l_shipinstruct", + "t8"."l_shipmode", + "t8"."l_comment" FROM ( SELECT "t0"."C_CUSTKEY" AS "c_custkey", @@ -94,7 +94,7 @@ FROM ( "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" FROM "CUSTOMER" AS "t0" - ) AS "t3" + ) AS "t6" INNER JOIN ( SELECT "t1"."O_ORDERKEY" AS "o_orderkey", @@ -107,8 +107,8 @@ FROM ( "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t1" - ) AS "t6" - ON "t3"."c_custkey" = "t6"."o_custkey" + ) AS "t7" + ON "t6"."c_custkey" = "t7"."o_custkey" INNER JOIN ( SELECT "t2"."L_ORDERKEY" AS "l_orderkey", @@ -128,16 +128,16 @@ FROM ( "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t2" - ) AS "t7" - ON "t6"."o_orderkey" = "t7"."l_orderkey" - ) AS "t11" + ) AS "t8" + ON "t7"."o_orderkey" = "t8"."l_orderkey" + ) AS "t12" WHERE - "t11"."o_orderkey" IN (( + "t12"."o_orderkey" IN ( SELECT - "t8"."l_orderkey" AS "l_orderkey" + "t9"."l_orderkey" FROM ( SELECT - "t5"."l_orderkey" AS "l_orderkey", + "t5"."l_orderkey", SUM("t5"."l_quantity") AS "qty_sum" FROM ( SELECT @@ -161,19 +161,19 @@ FROM ( ) AS "t5" GROUP BY 1 - ) AS "t8" + ) AS "t9" WHERE - "t8"."qty_sum" > 300 - )) - ) AS "t13" + "t9"."qty_sum" > 300 + ) + ) AS "t14" GROUP BY 1, 2, 3, 4, 5 -) AS "t14" +) AS "t15" ORDER BY - "t14"."o_totalprice" DESC NULLS LAST, - "t14"."o_orderdate" ASC + "t15"."o_totalprice" DESC NULLS LAST, + "t15"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql index b6db87f2435c..2eaa6ff3f403 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql @@ -1,61 +1,61 @@ SELECT - SUM("t7"."l_extendedprice" * ( - 1 - "t7"."l_discount" + SUM("t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" )) AS "revenue" FROM ( SELECT - "t6"."l_orderkey" AS "l_orderkey", - "t6"."l_partkey" AS "l_partkey", - "t6"."l_suppkey" AS "l_suppkey", - "t6"."l_linenumber" AS "l_linenumber", - "t6"."l_quantity" AS "l_quantity", - "t6"."l_extendedprice" AS "l_extendedprice", - "t6"."l_discount" AS "l_discount", - "t6"."l_tax" AS "l_tax", - "t6"."l_returnflag" AS "l_returnflag", - "t6"."l_linestatus" AS "l_linestatus", - "t6"."l_shipdate" AS "l_shipdate", - "t6"."l_commitdate" AS "l_commitdate", - "t6"."l_receiptdate" AS "l_receiptdate", - "t6"."l_shipinstruct" AS "l_shipinstruct", - "t6"."l_shipmode" AS "l_shipmode", - "t6"."l_comment" AS "l_comment", - "t6"."p_partkey" AS "p_partkey", - "t6"."p_name" AS "p_name", - "t6"."p_mfgr" AS "p_mfgr", - "t6"."p_brand" AS "p_brand", - "t6"."p_type" AS "p_type", - "t6"."p_size" AS "p_size", - "t6"."p_container" AS "p_container", - "t6"."p_retailprice" AS "p_retailprice", - "t6"."p_comment" AS "p_comment" + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" FROM ( SELECT - "t2"."l_orderkey" AS "l_orderkey", - "t2"."l_partkey" AS "l_partkey", - "t2"."l_suppkey" AS "l_suppkey", - "t2"."l_linenumber" AS "l_linenumber", - "t2"."l_quantity" AS "l_quantity", - "t2"."l_extendedprice" AS "l_extendedprice", - "t2"."l_discount" AS "l_discount", - "t2"."l_tax" AS "l_tax", - "t2"."l_returnflag" AS "l_returnflag", - "t2"."l_linestatus" AS "l_linestatus", - "t2"."l_shipdate" AS "l_shipdate", - "t2"."l_commitdate" AS "l_commitdate", - "t2"."l_receiptdate" AS "l_receiptdate", - "t2"."l_shipinstruct" AS "l_shipinstruct", - "t2"."l_shipmode" AS "l_shipmode", - "t2"."l_comment" AS "l_comment", - "t4"."p_partkey" AS "p_partkey", - "t4"."p_name" AS "p_name", - "t4"."p_mfgr" AS "p_mfgr", - "t4"."p_brand" AS "p_brand", - "t4"."p_type" AS "p_type", - "t4"."p_size" AS "p_size", - "t4"."p_container" AS "p_container", - "t4"."p_retailprice" AS "p_retailprice", - "t4"."p_comment" AS "p_comment" + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" FROM ( SELECT "t0"."L_ORDERKEY" AS "l_orderkey", @@ -75,7 +75,7 @@ FROM ( "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t0" - ) AS "t2" + ) AS "t4" INNER JOIN ( SELECT "t1"."P_PARTKEY" AS "p_partkey", @@ -88,9 +88,9 @@ FROM ( "t1"."P_RETAILPRICE" AS "p_retailprice", "t1"."P_COMMENT" AS "p_comment" FROM "PART" AS "t1" - ) AS "t4" - ON "t4"."p_partkey" = "t2"."l_partkey" - ) AS "t6" + ) AS "t5" + ON "t5"."p_partkey" = "t4"."l_partkey" + ) AS "t7" WHERE ( ( @@ -100,24 +100,24 @@ FROM ( ( ( ( - "t6"."p_brand" = 'Brand#12' + "t7"."p_brand" = 'Brand#12' ) - AND "t6"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND "t7"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') ) AND ( - "t6"."l_quantity" >= 1 + "t7"."l_quantity" >= 1 ) ) AND ( - "t6"."l_quantity" <= 11 + "t7"."l_quantity" <= 11 ) ) - AND "t6"."p_size" BETWEEN 1 AND 5 + AND "t7"."p_size" BETWEEN 1 AND 5 ) - AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) OR ( @@ -127,24 +127,24 @@ FROM ( ( ( ( - "t6"."p_brand" = 'Brand#23' + "t7"."p_brand" = 'Brand#23' ) - AND "t6"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + AND "t7"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') ) AND ( - "t6"."l_quantity" >= 10 + "t7"."l_quantity" >= 10 ) ) AND ( - "t6"."l_quantity" <= 20 + "t7"."l_quantity" <= 20 ) ) - AND "t6"."p_size" BETWEEN 1 AND 10 + AND "t7"."p_size" BETWEEN 1 AND 10 ) - AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) ) @@ -155,24 +155,24 @@ FROM ( ( ( ( - "t6"."p_brand" = 'Brand#34' + "t7"."p_brand" = 'Brand#34' ) - AND "t6"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND "t7"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') ) AND ( - "t6"."l_quantity" >= 20 + "t7"."l_quantity" >= 20 ) ) AND ( - "t6"."l_quantity" <= 30 + "t7"."l_quantity" <= 30 ) ) - AND "t6"."p_size" BETWEEN 1 AND 15 + AND "t7"."p_size" BETWEEN 1 AND 15 ) - AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) -) AS "t7" \ No newline at end of file +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql index 3b49410ea996..4d18b54ab6c0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql @@ -1,19 +1,19 @@ SELECT - "t12"."s_name" AS "s_name", - "t12"."s_address" AS "s_address" + "t13"."s_name", + "t13"."s_address" FROM ( SELECT - "t5"."s_suppkey" AS "s_suppkey", - "t5"."s_name" AS "s_name", - "t5"."s_address" AS "s_address", - "t5"."s_nationkey" AS "s_nationkey", - "t5"."s_phone" AS "s_phone", - "t5"."s_acctbal" AS "s_acctbal", - "t5"."s_comment" AS "s_comment", - "t7"."n_nationkey" AS "n_nationkey", - "t7"."n_name" AS "n_name", - "t7"."n_regionkey" AS "n_regionkey", - "t7"."n_comment" AS "n_comment" + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_nationkey", + "t8"."s_phone", + "t8"."s_acctbal", + "t8"."s_comment", + "t9"."n_nationkey", + "t9"."n_name", + "t9"."n_regionkey", + "t9"."n_comment" FROM ( SELECT "t0"."S_SUPPKEY" AS "s_suppkey", @@ -24,7 +24,7 @@ FROM ( "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t0" - ) AS "t5" + ) AS "t8" INNER JOIN ( SELECT "t2"."N_NATIONKEY" AS "n_nationkey", @@ -32,27 +32,35 @@ FROM ( "t2"."N_REGIONKEY" AS "n_regionkey", "t2"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t2" - ) AS "t7" - ON "t5"."s_nationkey" = "t7"."n_nationkey" -) AS "t12" + ) AS "t9" + ON "t8"."s_nationkey" = "t9"."n_nationkey" +) AS "t13" WHERE - "t12"."n_name" = 'CANADA' - AND "t12"."s_suppkey" IN (( + "t13"."n_name" = 'CANADA' + AND "t13"."s_suppkey" IN ( SELECT - "t1"."PS_SUPPKEY" AS "ps_suppkey" - FROM "PARTSUPP" AS "t1" + "t6"."ps_suppkey" + FROM ( + SELECT + "t1"."PS_PARTKEY" AS "ps_partkey", + "t1"."PS_SUPPKEY" AS "ps_suppkey", + "t1"."PS_AVAILQTY" AS "ps_availqty", + "t1"."PS_SUPPLYCOST" AS "ps_supplycost", + "t1"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t1" + ) AS "t6" WHERE - "t1"."PS_PARTKEY" IN (( + "t6"."ps_partkey" IN ( SELECT "t3"."P_PARTKEY" AS "p_partkey" FROM "PART" AS "t3" WHERE "t3"."P_NAME" LIKE 'forest%' - )) - AND "t1"."PS_AVAILQTY" > ( + ) + AND "t6"."ps_availqty" > ( ( SELECT - SUM("t9"."l_quantity") AS "Sum(l_quantity)" + SUM("t11"."l_quantity") AS "Sum(l_quantity)" FROM ( SELECT "t4"."L_ORDERKEY" AS "l_orderkey", @@ -73,13 +81,13 @@ WHERE "t4"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t4" WHERE - "t4"."L_PARTKEY" = "t1"."PS_PARTKEY" - AND "t4"."L_SUPPKEY" = "t1"."PS_SUPPKEY" + "t4"."L_PARTKEY" = "t6"."ps_partkey" + AND "t4"."L_SUPPKEY" = "t6"."ps_suppkey" AND "t4"."L_SHIPDATE" >= DATEFROMPARTS(1994, 1, 1) AND "t4"."L_SHIPDATE" < DATEFROMPARTS(1995, 1, 1) - ) AS "t9" + ) AS "t11" ) * 0.5 ) - )) + ) ORDER BY - "t12"."s_name" ASC \ No newline at end of file + "t13"."s_name" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql index 89f5d7d5071c..5b785b3ea5c9 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql @@ -1,28 +1,28 @@ SELECT - "t20"."s_name" AS "s_name", - "t20"."numwait" AS "numwait" + "t21"."s_name", + "t21"."numwait" FROM ( SELECT - "t19"."s_name" AS "s_name", + "t20"."s_name", COUNT(*) AS "numwait" FROM ( SELECT - "t16"."l1_orderkey" AS "l1_orderkey", - "t16"."o_orderstatus" AS "o_orderstatus", - "t16"."l_receiptdate" AS "l_receiptdate", - "t16"."l_commitdate" AS "l_commitdate", - "t16"."l1_suppkey" AS "l1_suppkey", - "t16"."s_name" AS "s_name", - "t16"."n_name" AS "n_name" + "t17"."l1_orderkey", + "t17"."o_orderstatus", + "t17"."l_receiptdate", + "t17"."l_commitdate", + "t17"."l1_suppkey", + "t17"."s_name", + "t17"."n_name" FROM ( SELECT - "t8"."l_orderkey" AS "l1_orderkey", - "t11"."o_orderstatus" AS "o_orderstatus", - "t8"."l_receiptdate" AS "l_receiptdate", - "t8"."l_commitdate" AS "l_commitdate", - "t8"."l_suppkey" AS "l1_suppkey", - "t4"."s_name" AS "s_name", - "t12"."n_name" AS "n_name" + "t9"."l_orderkey" AS "l1_orderkey", + "t12"."o_orderstatus", + "t9"."l_receiptdate", + "t9"."l_commitdate", + "t9"."l_suppkey" AS "l1_suppkey", + "t8"."s_name", + "t13"."n_name" FROM ( SELECT "t0"."S_SUPPKEY" AS "s_suppkey", @@ -33,7 +33,7 @@ FROM ( "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" FROM "SUPPLIER" AS "t0" - ) AS "t4" + ) AS "t8" INNER JOIN ( SELECT "t1"."L_ORDERKEY" AS "l_orderkey", @@ -53,8 +53,8 @@ FROM ( "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t1" - ) AS "t8" - ON "t4"."s_suppkey" = "t8"."l_suppkey" + ) AS "t9" + ON "t8"."s_suppkey" = "t9"."l_suppkey" INNER JOIN ( SELECT "t2"."O_ORDERKEY" AS "o_orderkey", @@ -67,8 +67,8 @@ FROM ( "t2"."O_SHIPPRIORITY" AS "o_shippriority", "t2"."O_COMMENT" AS "o_comment" FROM "ORDERS" AS "t2" - ) AS "t11" - ON "t11"."o_orderkey" = "t8"."l_orderkey" + ) AS "t12" + ON "t12"."o_orderkey" = "t9"."l_orderkey" INNER JOIN ( SELECT "t3"."N_NATIONKEY" AS "n_nationkey", @@ -76,15 +76,46 @@ FROM ( "t3"."N_REGIONKEY" AS "n_regionkey", "t3"."N_COMMENT" AS "n_comment" FROM "NATION" AS "t3" - ) AS "t12" - ON "t4"."s_nationkey" = "t12"."n_nationkey" - ) AS "t16" + ) AS "t13" + ON "t8"."s_nationkey" = "t13"."n_nationkey" + ) AS "t17" WHERE - "t16"."o_orderstatus" = 'F' - AND "t16"."l_receiptdate" > "t16"."l_commitdate" - AND "t16"."n_name" = 'SAUDI ARABIA' + "t17"."o_orderstatus" = 'F' + AND "t17"."l_receiptdate" > "t17"."l_commitdate" + AND "t17"."n_name" = 'SAUDI ARABIA' AND EXISTS( - ( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t10" + WHERE + ( + "t10"."l_orderkey" = "t17"."l1_orderkey" + ) + AND ( + "t10"."l_suppkey" <> "t17"."l1_suppkey" + ) + ) + AND NOT ( + EXISTS( SELECT 1 AS "1" FROM ( @@ -106,61 +137,26 @@ FROM ( "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t1" - ) AS "t9" + ) AS "t11" WHERE ( - "t9"."l_orderkey" = "t16"."l1_orderkey" - ) - AND ( - "t9"."l_suppkey" <> "t16"."l1_suppkey" - ) - ) - ) - AND NOT ( - EXISTS( - ( - SELECT - 1 AS "1" - FROM ( - SELECT - "t1"."L_ORDERKEY" AS "l_orderkey", - "t1"."L_PARTKEY" AS "l_partkey", - "t1"."L_SUPPKEY" AS "l_suppkey", - "t1"."L_LINENUMBER" AS "l_linenumber", - "t1"."L_QUANTITY" AS "l_quantity", - "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", - "t1"."L_DISCOUNT" AS "l_discount", - "t1"."L_TAX" AS "l_tax", - "t1"."L_RETURNFLAG" AS "l_returnflag", - "t1"."L_LINESTATUS" AS "l_linestatus", - "t1"."L_SHIPDATE" AS "l_shipdate", - "t1"."L_COMMITDATE" AS "l_commitdate", - "t1"."L_RECEIPTDATE" AS "l_receiptdate", - "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", - "t1"."L_SHIPMODE" AS "l_shipmode", - "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" - ) AS "t10" - WHERE ( - ( - "t10"."l_orderkey" = "t16"."l1_orderkey" - ) - AND ( - "t10"."l_suppkey" <> "t16"."l1_suppkey" - ) + "t11"."l_orderkey" = "t17"."l1_orderkey" ) AND ( - "t10"."l_receiptdate" > "t10"."l_commitdate" + "t11"."l_suppkey" <> "t17"."l1_suppkey" ) - ) + ) + AND ( + "t11"."l_receiptdate" > "t11"."l_commitdate" + ) ) ) - ) AS "t19" + ) AS "t20" GROUP BY 1 -) AS "t20" +) AS "t21" ORDER BY - "t20"."numwait" DESC NULLS LAST, - "t20"."s_name" ASC + "t21"."numwait" DESC NULLS LAST, + "t21"."s_name" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql index a8e109a09208..ec0032c198a4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql @@ -1,20 +1,31 @@ SELECT - "t6"."cntrycode" AS "cntrycode", - "t6"."numcust" AS "numcust", - "t6"."totacctbal" AS "totacctbal" + "t7"."cntrycode", + "t7"."numcust", + "t7"."totacctbal" FROM ( SELECT - "t5"."cntrycode" AS "cntrycode", + "t6"."cntrycode", COUNT(*) AS "numcust", - SUM("t5"."c_acctbal") AS "totacctbal" + SUM("t6"."c_acctbal") AS "totacctbal" FROM ( SELECT - IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) AS "cntrycode", - "t0"."C_ACCTBAL" AS "c_acctbal" - FROM "CUSTOMER" AS "t0" + IFF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) AS "cntrycode", + "t2"."c_acctbal" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t2" WHERE - IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') - AND "t0"."C_ACCTBAL" > ( + IFF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + AND "t2"."c_acctbal" > ( SELECT AVG("t3"."c_acctbal") AS "Mean(c_acctbal)" FROM ( @@ -35,18 +46,16 @@ FROM ( ) AND NOT ( EXISTS( - ( - SELECT - 1 AS "1" - FROM "ORDERS" AS "t1" - WHERE - "t1"."O_CUSTKEY" = "t0"."C_CUSTKEY" - ) + SELECT + 1 AS "1" + FROM "ORDERS" AS "t1" + WHERE + "t1"."O_CUSTKEY" = "t2"."c_custkey" ) ) - ) AS "t5" + ) AS "t6" GROUP BY 1 -) AS "t6" +) AS "t7" ORDER BY - "t6"."cntrycode" ASC \ No newline at end of file + "t7"."cntrycode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/test_h08.py b/ibis/backends/tests/tpch/test_h08.py index 3ab657c3bec7..971a83c4c352 100644 --- a/ibis/backends/tests/tpch/test_h08.py +++ b/ibis/backends/tests/tpch/test_h08.py @@ -8,11 +8,6 @@ @tpch_test -@pytest.mark.notimpl( - ["snowflake"], - raises=AssertionError, - reason="ibis doesn't preserve decimal types in aggregations", -) @pytest.mark.xfail_version( trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", diff --git a/ibis/backends/tests/tpch/test_h11.py b/ibis/backends/tests/tpch/test_h11.py index 75439d06c8ce..e13ba99179f9 100644 --- a/ibis/backends/tests/tpch/test_h11.py +++ b/ibis/backends/tests/tpch/test_h11.py @@ -1,18 +1,11 @@ from __future__ import annotations -import pytest - import ibis from .conftest import tpch_test @tpch_test -@pytest.mark.broken( - ["snowflake"], - reason="ibis generates incorrect code for the right-hand-side of the exists statement", - raises=AssertionError, -) def test_tpc_h11(partsupp, supplier, nation): NATION = "GERMANY" FRACTION = 0.0001 diff --git a/ibis/backends/tests/tpch/test_h14.py b/ibis/backends/tests/tpch/test_h14.py index cb57d9911577..f72bbcaf6c2b 100644 --- a/ibis/backends/tests/tpch/test_h14.py +++ b/ibis/backends/tests/tpch/test_h14.py @@ -8,11 +8,6 @@ @tpch_test -@pytest.mark.notimpl( - ["snowflake"], - raises=AssertionError, - reason="ibis doesn't preserve decimal types in aggregations", -) @pytest.mark.xfail_version( trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", diff --git a/ibis/backends/tests/tpch/test_h17.py b/ibis/backends/tests/tpch/test_h17.py index fbe50eb78f7e..0d112d048c91 100644 --- a/ibis/backends/tests/tpch/test_h17.py +++ b/ibis/backends/tests/tpch/test_h17.py @@ -6,11 +6,6 @@ @tpch_test -@pytest.mark.notimpl( - ["snowflake"], - raises=AssertionError, - reason="ibis doesn't preserve decimal types in aggregations", -) @pytest.mark.xfail_version( trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", diff --git a/ibis/backends/tests/tpch/test_h21.py b/ibis/backends/tests/tpch/test_h21.py index f8aea4314c81..487b574bb615 100644 --- a/ibis/backends/tests/tpch/test_h21.py +++ b/ibis/backends/tests/tpch/test_h21.py @@ -1,19 +1,11 @@ from __future__ import annotations -import pytest -import sqlalchemy as sa - import ibis from .conftest import tpch_test @tpch_test -@pytest.mark.broken( - ["snowflake"], - reason="ibis generates overlapping aliases", - raises=sa.exc.CompileError, -) def test_tpc_h21(supplier, lineitem, orders, nation): """Suppliers Who Kept Orders Waiting Query (Q21) diff --git a/ibis/backends/tests/tpch/test_h22.py b/ibis/backends/tests/tpch/test_h22.py index f18da1d2930b..d505436c4927 100644 --- a/ibis/backends/tests/tpch/test_h22.py +++ b/ibis/backends/tests/tpch/test_h22.py @@ -1,16 +1,9 @@ from __future__ import annotations -import pytest - from .conftest import tpch_test @tpch_test -@pytest.mark.broken( - ["snowflake"], - reason="ibis generates incorrect code for the right-hand-side of the exists statement", - raises=AssertionError, -) def test_tpc_h22(customer, orders): """Global Sales Opportunity Query (Q22) diff --git a/pyproject.toml b/pyproject.toml index aa0d269b24b8..1d047429fb62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,13 +84,9 @@ pyspark = { version = ">=3,<3.4", optional = true } # pyspark is heavily broken # used to support posix regexen in the pandas, dask and sqlite backends regex = { version = ">=2021.7.6", optional = true } shapely = { version = ">=2,<3", optional = true } -# include an explicit dependency on `snowflake-connector-python` because the -# lack of lower bound on this dependency as specified in `snowflake-sqlalchemy` -# appears to cause poetry's solver to get stuck -# -# also, we don't support arbitrarily old versions of this library +# we don't support arbitrarily old versions of this library due to security +# issues with versions <3.0.2 snowflake-connector-python = { version = ">=3.0.2,<4,!=3.3.0b1", optional = true } -snowflake-sqlalchemy = { version = ">=1.4.1,<2", optional = true } sqlalchemy = { version = ">=1.4,<3", optional = true } sqlalchemy-exasol = { version = ">=4.6.0", optional = true } sqlalchemy-views = { version = ">=0.3.1,<1", optional = true } @@ -173,7 +169,6 @@ all = [ "regex", "shapely", "snowflake-connector-python", - "snowflake-sqlalchemy", "sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views", @@ -208,12 +203,7 @@ risingwave = [ "sqlalchemy-risingwave", ] pyspark = ["pyspark", "sqlalchemy", "packaging"] -snowflake = [ - "snowflake-connector-python", - "snowflake-sqlalchemy", - "sqlalchemy-views", - "packaging", -] +snowflake = ["snowflake-connector-python", "packaging"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] trino = ["trino", "sqlalchemy", "sqlalchemy-views"] # non-backend extras From 24abf1ee0ca70b91f762322573cad847585c5623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 2 Jan 2024 17:18:09 +0100 Subject: [PATCH 031/161] refactor(common): support union types as well as forward references in the dispatch utilities --- ibis/common/dispatch.py | 143 +++++++++++++++++++---------- ibis/common/patterns.py | 34 ++++--- ibis/common/tests/test_dispatch.py | 40 +++++++- ibis/common/typing.py | 19 +++- ibis/expr/api.py | 59 +++++------- ibis/util.py | 14 +++ 6 files changed, 208 insertions(+), 101 deletions(-) diff --git a/ibis/common/dispatch.py b/ibis/common/dispatch.py index 28999b452e37..2b5c4bd5ee0e 100644 --- a/ibis/common/dispatch.py +++ b/ibis/common/dispatch.py @@ -6,7 +6,14 @@ import re from collections import defaultdict -from ibis.util import import_object +from ibis.common.typing import ( + Union, + UnionType, + evaluate_annotations, + get_args, + get_origin, +) +from ibis.util import import_object, unalias_package def normalize(r: str | re.Pattern): @@ -26,71 +33,113 @@ def normalize(r: str | re.Pattern): return re.compile("^" + r.lstrip("^").rstrip("$") + "$") -def lazy_singledispatch(func): - """A `singledispatch` implementation that supports lazily registering implementations.""" - - lookup = {object: func} - abc_lookup = {} - lazy_lookup = defaultdict(dict) +class SingleDispatch: + def __init__(self, func, typ=None): + self.lookup = {} + self.abc_lookup = {} + self.lazy_lookup = defaultdict(dict) + self.func = func + self.add(func, typ) + + def add(self, func, typ=None): + if typ is None: + annots = getattr(func, "__annotations__", {}) + typehints = evaluate_annotations(annots, func.__module__, best_effort=True) + if typehints: + typ, *_ = typehints.values() + if get_origin(typ) in (Union, UnionType): + for t in get_args(typ): + self.add(func, t) + else: + self.add(func, typ) + else: + self.add(func, object) + elif isinstance(typ, tuple): + for t in typ: + self.add(func, t) + elif isinstance(typ, abc.ABCMeta): + if typ in self.abc_lookup: + raise TypeError(f"{typ} is already registered") + self.abc_lookup[typ] = func + elif isinstance(typ, str): + package, rest = typ.split(".", 1) + package = unalias_package(package) + typ = f"{package}.{rest}" + if typ in self.lazy_lookup[package]: + raise TypeError(f"{typ} is already registered") + self.lazy_lookup[package][typ] = func + else: + if typ in self.lookup: + raise TypeError(f"{typ} is already registered") + self.lookup[typ] = func + return func - def register(cls, func=None): - """Registers a new implementation for arguments of type `cls`.""" + def register(self, typ, func=None): + """Register a new implementation for arguments of type `cls`.""" def inner(func): - if isinstance(cls, tuple): - for t in cls: - register(t, func) - elif isinstance(cls, abc.ABCMeta): - abc_lookup[cls] = func - elif isinstance(cls, str): - module = cls.split(".", 1)[0] - lazy_lookup[module][cls] = func - else: - lookup[cls] = func + self.add(func, typ) return func return inner if func is None else inner(func) - def dispatch(cls): + def dispatch(self, typ): """Return the implementation for the given `cls`.""" - for cls2 in cls.__mro__: + for klass in typ.__mro__: # 1. Check for a concrete implementation try: - impl = lookup[cls2] + impl = self.lookup[klass] except KeyError: pass else: - if cls is not cls2: + if typ is not klass: # Cache implementation - lookup[cls] = impl + self.lookup[typ] = impl return impl # 2. Check lazy implementations - module = cls2.__module__.split(".", 1)[0] - if lazy := lazy_lookup.get(module): + package = klass.__module__.split(".", 1)[0] + if lazy := self.lazy_lookup.get(package): # Import all lazy implementations first before registering # (which should never fail), to ensure an error anywhere # doesn't result in a half-registered state. new = {import_object(name): func for name, func in lazy.items()} - lookup.update(new) + self.lookup.update(new) # drop lazy implementations, idempotent for thread safety - lazy_lookup.pop(module, None) - return dispatch(cls) + self.lazy_lookup.pop(package, None) + return self.dispatch(typ) # 3. Check for abcs - for abc_cls, impl in abc_lookup.items(): - if issubclass(cls, abc_cls): - lookup[cls] = impl + for abc_class, impl in self.abc_lookup.items(): + if issubclass(typ, abc_class): + self.lookup[typ] = impl return impl - # Can never get here, since a base `object` implementation is - # always registered - raise AssertionError("should never get here") # pragma: no cover + raise TypeError(f"Could not find implementation for {typ}") + + def __call__(self, arg, *args, **kwargs): + impl = self.dispatch(type(arg)) + return impl(arg, *args, **kwargs) + + def __get__(self, obj, cls=None): + def _method(*args, **kwargs): + method = self.dispatch(type(args[0])) + method = method.__get__(obj, cls) + return method(*args, **kwargs) + + functools.update_wrapper(_method, self.func) + return _method + + +def lazy_singledispatch(func): + """A `singledispatch` implementation that supports lazily registering implementations.""" + + dispatcher = SingleDispatch(func, object) @functools.wraps(func) def call(arg, *args, **kwargs): - return dispatch(type(arg))(arg, *args, **kwargs) - - call.dispatch = dispatch - call.register = register + impl = dispatcher.dispatch(type(arg)) + return impl(arg, *args, **kwargs) + call.dispatch = dispatcher.dispatch + call.register = dispatcher.register return call @@ -117,21 +166,21 @@ def __new__(cls, name, bases, dct): # multiple functions are defined with the same name, so create # a dispatcher function first, *rest = value - func = functools.singledispatchmethod(first) + func = SingleDispatch(first) for impl in rest: - func.register(impl) + func.add(impl) namespace[key] = func elif all(isinstance(v, classmethod) for v in value): first, *rest = value - func = functools.singledispatchmethod(first.__func__) - for v in rest: - func.register(v.__func__) + func = SingleDispatch(first.__func__) + for impl in rest: + func.add(impl.__func__) namespace[key] = classmethod(func) elif all(isinstance(v, staticmethod) for v in value): first, *rest = value - func = functools.singledispatch(first.__func__) - for v in rest: - func.register(v.__func__) + func = SingleDispatch(first.__func__) + for impl in rest: + func.add(impl.__func__) namespace[key] = staticmethod(func) else: raise TypeError(f"Multiple attributes are defined with name {key}") diff --git a/ibis/common/patterns.py b/ibis/common/patterns.py index 2e48c697d5f1..3b4085868aa6 100644 --- a/ibis/common/patterns.py +++ b/ibis/common/patterns.py @@ -31,7 +31,6 @@ _, # noqa: F401 resolver, ) -from ibis.common.dispatch import lazy_singledispatch from ibis.common.typing import ( Coercible, CoercionError, @@ -42,7 +41,7 @@ get_bound_typevars, get_type_params, ) -from ibis.util import is_iterable, promote_tuple +from ibis.util import import_object, is_iterable, unalias_package T_co = TypeVar("T_co", covariant=True) @@ -719,21 +718,28 @@ class LazyInstanceOf(Slotted, Pattern): The types to check against. """ - __slots__ = ("types", "check") - types: tuple[type, ...] - check: Callable + __fields__ = ("qualname", "package") + __slots__ = ("qualname", "package", "loaded") + qualname: str + package: str + loaded: type - def __init__(self, types): - types = promote_tuple(types) - check = lazy_singledispatch(lambda x: False) - check.register(types, lambda x: True) - super().__init__(types=types, check=check) + def __init__(self, qualname): + package = unalias_package(qualname.split(".", 1)[0]) + super().__init__(qualname=qualname, package=package) def match(self, value, context): - if self.check(value): - return value - else: - return NoMatch + if hasattr(self, "loaded"): + return value if isinstance(value, self.loaded) else NoMatch + + for klass in type(value).__mro__: + package = klass.__module__.split(".", 1)[0] + if package == self.package: + typ = import_object(self.qualname) + object.__setattr__(self, "loaded", typ) + return value if isinstance(value, typ) else NoMatch + + return NoMatch class CoercedTo(Slotted, Pattern, Generic[T_co]): diff --git a/ibis/common/tests/test_dispatch.py b/ibis/common/tests/test_dispatch.py index 5f34c533851d..5cce61447fc1 100644 --- a/ibis/common/tests/test_dispatch.py +++ b/ibis/common/tests/test_dispatch.py @@ -2,10 +2,14 @@ import collections import decimal +from typing import TYPE_CHECKING, Union from ibis.common.dispatch import Dispatched, lazy_singledispatch # ruff: noqa: F811 +if TYPE_CHECKING: + import pandas as pd + import pyarrow as pa def test_lazy_singledispatch(): @@ -122,6 +126,14 @@ def _(a): assert foo(sum) == "callable" +class A: + pass + + +class B: + pass + + class Visitor(Dispatched): def a(self): return "a" @@ -132,6 +144,9 @@ def b(self, x: int): def b(self, x: str): return "b_str" + def b(self, x: Union[A, B]): + return "b_union" + @classmethod def c(cls, x: int, **kwargs): return "c_int" @@ -154,6 +169,15 @@ def e(x: int): def e(x: str): return "e_str" + def f(self, df: dict): + return "f_dict" + + def f(self, df: pd.DataFrame): + return "f_pandas" + + def f(self, df: pa.Table): + return "f_pyarrow" + class Subvisitor(Visitor): def b(self, x): @@ -173,9 +197,11 @@ def c(cls, s: float): def test_dispatched(): v = Visitor() - assert v.a == v.a + assert v.a() == "a" assert v.b(1) == "b_int" assert v.b("1") == "b_str" + assert v.b(A()) == "b_union" + assert v.b(B()) == "b_union" assert v.d(1) == "d_int" assert v.d("1") == "d_str" @@ -193,3 +219,15 @@ def test_dispatched(): assert Subvisitor.c(1.1) == "c_float" assert Subvisitor.e(1) == "e_int" + + +def test_dispatched_lazy(): + import pyarrow as pa + + empty_pyarrow_table = pa.Table.from_arrays([]) + empty_pandas_table = empty_pyarrow_table.to_pandas() + + v = Visitor() + assert v.f({}) == "f_dict" + assert v.f(empty_pyarrow_table) == "f_pyarrow" + assert v.f(empty_pandas_table) == "f_pandas" diff --git a/ibis/common/typing.py b/ibis/common/typing.py index 170ca2bd2b40..0ae48a2fc7c9 100644 --- a/ibis/common/typing.py +++ b/ibis/common/typing.py @@ -172,6 +172,7 @@ def evaluate_annotations( annots: dict[str, str], module_name: str, class_name: Optional[str] = None, + best_effort: bool = False, ) -> dict[str, Any]: """Evaluate type annotations that are strings. @@ -185,6 +186,8 @@ def evaluate_annotations( class_name The name of the class that the annotations are defined in, hence providing Self type. + best_effort + Whether to ignore errors when evaluating type annotations. Returns ------- @@ -202,10 +205,18 @@ def evaluate_annotations( localns = None else: localns = dict(Self=f"{module_name}.{class_name}") - return { - k: eval(v, globalns, localns) if isinstance(v, str) else v # noqa: PGH001 - for k, v in annots.items() - } + + result = {} + for k, v in annots.items(): + if isinstance(v, str): + try: + v = eval(v, globalns, localns) # noqa: PGH001 + except NameError: + if not best_effort: + raise + result[k] = v + + return result def format_typehint(typ: Any) -> str: diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 85015156e8d9..c570c49bebed 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -345,17 +345,6 @@ def table( return ops.UnboundTable(name=name, schema=schema).to_expr() -@lazy_singledispatch -def _memtable( - data, - *, - columns: Iterable[str] | None = None, - schema: SupportsSchema | None = None, - name: str | None = None, -): - raise NotImplementedError(type(data)) - - def memtable( data, *, @@ -443,33 +432,13 @@ def memtable( return _memtable(data, name=name, schema=schema, columns=columns) -@_memtable.register("pyarrow.Table") -def _memtable_from_pyarrow_table( - data: pa.Table, - *, - name: str | None = None, - schema: SupportsSchema | None = None, - columns: Iterable[str] | None = None, -): - from ibis.formats.pyarrow import PyArrowTableProxy - - if columns is not None: - assert schema is None, "if `columns` is not `None` then `schema` must be `None`" - schema = sch.Schema(dict(zip(columns, sch.infer(data).values()))) - return ops.InMemoryTable( - name=name if name is not None else util.gen_name("pyarrow_memtable"), - schema=sch.infer(data) if schema is None else schema, - data=PyArrowTableProxy(data), - ).to_expr() - - -@_memtable.register(object) -def _memtable_from_dataframe( +@lazy_singledispatch +def _memtable( data: pd.DataFrame | Any, *, - name: str | None = None, - schema: SupportsSchema | None = None, columns: Iterable[str] | None = None, + schema: SupportsSchema | None = None, + name: str | None = None, ) -> Table: import pandas as pd @@ -515,6 +484,26 @@ def _memtable_from_dataframe( return op.to_expr() +@_memtable.register("pyarrow.Table") +def _memtable_from_pyarrow_table( + data: pa.Table, + *, + name: str | None = None, + schema: SupportsSchema | None = None, + columns: Iterable[str] | None = None, +): + from ibis.formats.pyarrow import PyArrowTableProxy + + if columns is not None: + assert schema is None, "if `columns` is not `None` then `schema` must be `None`" + schema = sch.Schema(dict(zip(columns, sch.infer(data).values()))) + return ops.InMemoryTable( + name=name if name is not None else util.gen_name("pyarrow_memtable"), + schema=sch.infer(data) if schema is None else schema, + data=PyArrowTableProxy(data), + ).to_expr() + + def _deferred_method_call(expr, method_name): method = operator.methodcaller(method_name) if isinstance(expr, str): diff --git a/ibis/util.py b/ibis/util.py index 9b95a12d2c47..244ed74087ba 100644 --- a/ibis/util.py +++ b/ibis/util.py @@ -494,6 +494,20 @@ def backend_entry_points() -> list[importlib.metadata.EntryPoint]: return sorted(eps) +_common_package_aliases = { + "pa": "pyarrow", + "pd": "pandas", + "np": "numpy", + "sk": "sklearn", + "sp": "scipy", + "tf": "tensorflow", +} + + +def unalias_package(name: str) -> str: + return _common_package_aliases.get(name, name) + + def import_object(qualname: str) -> Any: """Attempt to import an object given its full qualname. From b89273eb51815daec2f883202252cc4dc108e17b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 3 Jan 2024 11:12:20 -0500 Subject: [PATCH 032/161] ci(snowflake): enable for `the-epic-split` branch --- .github/workflows/ibis-backends-cloud.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ibis-backends-cloud.yml b/.github/workflows/ibis-backends-cloud.yml index 3219fd3fd431..7ba51ae8bd37 100644 --- a/.github/workflows/ibis-backends-cloud.yml +++ b/.github/workflows/ibis-backends-cloud.yml @@ -11,6 +11,7 @@ on: - ".envrc" branches: - main + - the-epic-split permissions: # this allows extractions/setup-just to list releases for `just` at a higher @@ -42,8 +43,8 @@ jobs: - "3.9" - "3.11" backend: - - name: bigquery - title: BigQuery + # - name: bigquery + # title: BigQuery - name: snowflake title: Snowflake steps: From 5ee9c26973f51cd6f7e9b842533bd0b4c8704725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 2 Jan 2024 19:44:42 +0100 Subject: [PATCH 033/161] fir(ir): `asof` join `tolerance` parameter should post-filter and post-join instead of adding a predicate --- .../test_format/test_asof_join/repr.txt | 2 +- ibis/expr/tests/test_dereference.py | 23 ++++- ibis/expr/tests/test_newrels.py | 21 ++++ ibis/expr/types/joins.py | 99 +++++++++++-------- ibis/expr/types/relations.py | 9 +- ibis/tests/expr/test_table.py | 41 ++++---- 6 files changed, 130 insertions(+), 65 deletions(-) diff --git a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt index 6524ea7fb5b7..a29fc083cf3a 100644 --- a/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_asof_join/repr.txt @@ -8,7 +8,7 @@ r1 := UnboundTable: right JoinChain[r0] JoinLink[asof, r1] - r0.time1 <= r1.time2 + r0.time1 >= r1.time2 JoinLink[inner, r1] r0.value == r1.value2 values: diff --git a/ibis/expr/tests/test_dereference.py b/ibis/expr/tests/test_dereference.py index 8f827d218f50..af569fab4a04 100644 --- a/ibis/expr/tests/test_dereference.py +++ b/ibis/expr/tests/test_dereference.py @@ -17,9 +17,30 @@ def dereference_expect(expected): return {k.op(): v.op() for k, v in expected.items()} +def test_dereference_project(): + p = t.projection([t.int_col, t.double_col]) + + mapping = dereference_mapping([p.op()]) + expected = dereference_expect( + { + p.int_col: p.int_col, + p.double_col: p.double_col, + t.int_col: p.int_col, + t.double_col: p.double_col, + } + ) + assert mapping == expected + + def test_dereference_mapping_self_reference(): v = t.view() mapping = dereference_mapping([v.op()]) - expected = dereference_expect({}) + expected = dereference_expect( + { + v.int_col: v.int_col, + v.double_col: v.double_col, + v.string_col: v.string_col, + } + ) assert mapping == expected diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index e7398755595f..7ba9b4b3a327 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -1266,6 +1266,27 @@ def test_join_between_joins(): assert expr.op() == expected +def test_join_with_filtered_join_of_left(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"a": "int64", "b": "string"}) + + joined = t1.left_join(t2, [t1.a == t2.a]).filter(t1.a < 5) + expr = t1.left_join(joined, [t1.a == joined.a]).select(t1) + + with join_tables(t1, joined) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("left", r2, [r1.a == r2.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + }, + ) + assert expr.op() == expected + + def test_join_method_docstrings(): t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 919515d9c1bc..861c3e8095c2 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -9,7 +9,7 @@ import ibis.expr.operations as ops from ibis import util -from ibis.expr.types import Table, ValueExpr +from ibis.expr.types import Table, Value from ibis.common.deferred import Deferred from ibis.expr.analysis import flatten_predicates from ibis.common.exceptions import ExpressionError, IntegrityError @@ -88,7 +88,7 @@ def dereference_binop(pred, deref_left, deref_right): def dereference_value(pred, deref_left, deref_right): deref_both = {**deref_left, **deref_right} - if isinstance(pred, ops.Binary) and pred.left == pred.right: + if isinstance(pred, ops.Binary) and pred.left.relations == pred.right.relations: return dereference_binop(pred, deref_left, deref_right) else: return pred.replace(deref_both, filter=ops.Value) @@ -103,13 +103,14 @@ def prepare_predicates( for pred in util.promote_list(predicates): if pred is True or pred is False: yield ops.Literal(pred, dtype="bool") - elif isinstance(pred, ValueExpr): - node = pred.op() - yield dereference_value(node, deref_left, deref_right) + elif isinstance(pred, Value): + for node in flatten_predicates(pred.op()): + yield dereference_value(node, deref_left, deref_right) elif isinstance(pred, Deferred): # resolve deferred expressions on the left table - node = pred.resolve(left).op() - yield dereference_value(node, deref_left, deref_right) + pred = pred.resolve(left).op() + for node in flatten_predicates(pred): + yield dereference_value(node, deref_left, deref_right) else: if isinstance(pred, tuple): if len(pred) != 2: @@ -193,14 +194,15 @@ def join( # noqa: D102 subs_right = dereference_mapping_right(right) # bind and dereference the predicates - preds = prepare_predicates( - left, - right, - predicates, - deref_left=subs_left, - deref_right=subs_right, + preds = list( + prepare_predicates( + left, + right, + predicates, + deref_left=subs_left, + deref_right=subs_right, + ) ) - preds = flatten_predicates(list(preds)) if not preds and how != "cross": # if there are no predicates, default to every row matching unless # the join is a cross join, because a cross join already has this @@ -236,12 +238,33 @@ def asof_join( # noqa: D102 ): predicates = util.promote_list(predicates) + util.promote_list(by) if tolerance is not None: - if not isinstance(on, str): - raise TypeError( - "tolerance can only be specified when predicates is a string" - ) - # construct a predicate with two sides from the two tables - predicates.append(self[on] <= right[on] + tolerance) + if isinstance(on, str): + # self is always a JoinChain so reference one of the join tables + left_on = self.op().values[on].to_expr() + right_on = right[on] + on = left_on >= right_on + elif isinstance(on, Value): + node = on.op() + if not isinstance(node, ops.Binary): + raise InputTypeError("`on` must be a comparison expression") + left_on = node.left.to_expr() + right_on = node.right.to_expr() + else: + raise TypeError("`on` must be a string or a ValueExpr") + + joined = self.asof_join( + right, on=on, predicates=predicates, lname=lname, rname=rname + ) + filtered = joined.filter( + left_on <= right_on + tolerance, left_on >= right_on - tolerance + ) + right_on = right_on.op().replace({right.op(): filtered.op()}).to_expr() + + result = self.left_join( + filtered, predicates=[left_on == right_on] + predicates + ) + values = {**self.op().values, **filtered.op().values} + return result.select(values) left = self.op() right = ops.JoinTable(right, index=left.length) @@ -249,28 +272,24 @@ def asof_join( # noqa: D102 subs_right = dereference_mapping_right(right) # TODO(kszucs): add extra validation for `on` with clear error messages - preds = list( - prepare_predicates( - left, - right, - [on], - deref_left=subs_left, - deref_right=subs_right, - comparison=ops.LessEqual, - ) + (on,) = prepare_predicates( + left, + right, + [on], + deref_left=subs_left, + deref_right=subs_right, + comparison=ops.GreaterEqual, ) - preds += flatten_predicates( - list( - prepare_predicates( - left, - right, - predicates, - deref_left=subs_left, - deref_right=subs_right, - comparison=ops.Equals, - ) - ) + predicates = prepare_predicates( + left, + right, + predicates, + deref_left=subs_left, + deref_right=subs_right, + comparison=ops.Equals, ) + preds = [on, *predicates] + values, collisions = disambiguate_fields( "asof", left.values, right.fields, lname, rname ) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index cbc9285e6877..3fb93b478736 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -155,8 +155,14 @@ def unwrap_aliases(values: Iterator[ir.Value]) -> Mapping[str, ir.Value]: def dereference_mapping(parents): - mapping = {} parents = util.promote_list(parents) + mapping = {} + + for parent in parents: + # do not defereference fields referencing the requested parents + for k, v in parent.fields.items(): + mapping[v] = v + for parent in parents: for k, v in parent.values.items(): if isinstance(v, ops.Field): @@ -171,6 +177,7 @@ def dereference_mapping(parents): elif v.relations and v not in mapping: # do not dereference literal expressions mapping[v] = ops.Field(parent, k) + return mapping diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index cc97eeefa316..bed1cb84168e 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -923,7 +923,7 @@ def test_asof_join_with_by(): r2 = join_without_by.op().rest[0].table.to_expr() expected = ops.JoinChain( first=r1, - rest=[ops.JoinLink("asof", r2, [r1.time <= r2.time])], + rest=[ops.JoinLink("asof", r2, [r1.time >= r2.time])], values={ "time": r1.time, "key": r1.key, @@ -940,7 +940,7 @@ def test_asof_join_with_by(): expected = ops.JoinChain( first=r1, rest=[ - ops.JoinLink("asof", r2, [r1.time <= r2.time, r1.key == r2.key]), + ops.JoinLink("asof", r2, [r1.time >= r2.time, r1.key == r2.key]), ], values={ "time": r1.time, @@ -978,26 +978,23 @@ def test_asof_join_with_tolerance(ibis_interval, timedelta_interval): for interval in [ibis_interval, timedelta_interval]: joined = api.asof_join(left, right, "time", tolerance=interval) - with join_tables(left, right) as (r1, r2): - expected = ops.JoinChain( - first=r1, - rest=[ - ops.JoinLink( - "asof", - r2, - [r1.time <= r2.time, r1.time <= (r2.time + interval)], - ) - ], - values={ - "time": r1.time, - "key": r1.key, - "value": r1.value, - "time_right": r2.time, - "key_right": r2.key, - "value2": r2.value2, - }, - ) - assert joined.op() == expected + + asof = left.asof_join(right, "time") + filt = asof.filter( + [ + asof.time <= asof.time_right + interval, + asof.time >= asof.time_right - interval, + ] + ) + join = left.left_join(filt, [left.time == filt.time]) + expected = join.select( + left, + time_right=filt.time_right, + key_right=filt.key_right, + value2=filt.value2, + ) + + assert joined.equals(expected) def test_equijoin_schema_merge(): From 218061e440693e34fb810fe91a2dc2b8b4e2e788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 3 Jan 2024 15:50:18 +0100 Subject: [PATCH 034/161] feat(duckdb): support `asof` joins including `tolerance` parameter --- ibis/backends/base/sqlglot/compiler.py | 4 +- ibis/backends/tests/test_asof_join.py | 147 +++++++++++++++++++++++++ ibis/backends/tests/test_join.py | 4 +- ibis/expr/tests/test_dereference.py | 2 +- ibis/expr/types/joins.py | 12 ++ 5 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 ibis/backends/tests/test_asof_join.py diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index fbfdd0082157..687d21f83ed3 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -868,14 +868,14 @@ def visit_JoinLink(self, op, *, how, table, predicates): "anti": "left", "cross": None, "outer": "full", - "asof": "left", + "asof": "asof", "any_left": "left", "any_inner": None, } kinds = { "any_left": "any", "any_inner": "any", - "asof": "asof", + "asof": "left", "inner": "inner", "left": "outer", "right": "outer", diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py new file mode 100644 index 000000000000..ad7678665678 --- /dev/null +++ b/ibis/backends/tests/test_asof_join.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import operator + +import pandas as pd +import pandas.testing as tm +import pytest + +import ibis + + +@pytest.fixture(scope="module") +def time_df1(): + return pd.DataFrame( + { + "time": pd.to_datetime([1, 2, 3, 4], unit="s"), + "value": [1.1, 2.2, 3.3, 4.4], + "group": ["a", "a", "a", "a"], + } + ) + + +@pytest.fixture(scope="module") +def time_df2(): + return pd.DataFrame( + { + "time": pd.to_datetime([2, 4], unit="s"), + "other_value": [1.2, 2.0], + "group": ["a", "a"], + } + ) + + +@pytest.fixture(scope="module") +def time_keyed_df1(): + return pd.DataFrame( + { + "time": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=6) + ), + "key": [1, 2, 3, 1, 2, 3], + "value": [1.2, 1.4, 2.0, 4.0, 8.0, 16.0], + } + ) + + +@pytest.fixture(scope="module") +def time_keyed_df2(): + return pd.DataFrame( + { + "time": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", freq="3D", periods=3) + ), + "key": [1, 2, 3], + "other_value": [1.1, 1.2, 2.2], + } + ) + + +@pytest.fixture(scope="module") +def time_left(time_df1): + return ibis.memtable(time_df1) + + +@pytest.fixture(scope="module") +def time_right(time_df2): + return ibis.memtable(time_df2) + + +@pytest.fixture(scope="module") +def time_keyed_left(time_keyed_df1): + return ibis.memtable(time_keyed_df1) + + +@pytest.fixture(scope="module") +def time_keyed_right(time_keyed_df2): + return ibis.memtable(time_keyed_df2) + + +@pytest.mark.parametrize( + ("direction", "op"), + [ + ("backward", operator.ge), + ("forward", operator.le), + ], +) +@pytest.mark.notimpl(["datafusion", "snowflake"]) +def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): + on = op(time_left["time"], time_right["time"]) + expr = time_left.asof_join(time_right, on=on, predicates="group") + + result = con.execute(expr) + expected = pd.merge_asof( + time_df1, time_df2, on="time", by="group", direction=direction + ) + + result = result.sort_values(["group", "time"]).reset_index(drop=True) + expected = expected.sort_values(["group", "time"]).reset_index(drop=True) + + tm.assert_frame_equal(result[expected.columns], expected) + with pytest.raises(AssertionError): + tm.assert_series_equal(result["time"], result["time_right"]) + + +@pytest.mark.parametrize( + ("direction", "op"), + [ + ("backward", operator.ge), + ("forward", operator.le), + ], +) +@pytest.mark.broken( + ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds" +) +@pytest.mark.notimpl(["datafusion", "snowflake"]) +def test_keyed_asof_join_with_tolerance( + con, + time_keyed_left, + time_keyed_right, + time_keyed_df1, + time_keyed_df2, + direction, + op, +): + on = op(time_keyed_left["time"], time_keyed_right["time"]) + expr = time_keyed_left.asof_join( + time_keyed_right, on=on, by="key", tolerance=ibis.interval(days=2) + ) + + result = con.execute(expr) + expected = pd.merge_asof( + time_keyed_df1, + time_keyed_df2, + on="time", + by="key", + tolerance=pd.Timedelta("2D"), + direction=direction, + ) + + result = result.sort_values(["key", "time"]).reset_index(drop=True) + expected = expected.sort_values(["key", "time"]).reset_index(drop=True) + + tm.assert_frame_equal(result[expected.columns], expected) + with pytest.raises(AssertionError): + tm.assert_series_equal(result["time"], result["time_right"]) + with pytest.raises(AssertionError): + tm.assert_series_equal(result["key"], result["key_right"]) diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index a0b8f0aa1a88..cfe287eca026 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -289,9 +289,9 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): reason="polars doesn't support join predicates", ) @pytest.mark.notimpl( - ["dask", "pandas"], + ["dask"], raises=TypeError, - reason="dask and pandas don't support join predicates", + reason="dask doesn't support join predicates", ) @pytest.mark.notimpl( ["exasol"], diff --git a/ibis/expr/tests/test_dereference.py b/ibis/expr/tests/test_dereference.py index af569fab4a04..e19234f92084 100644 --- a/ibis/expr/tests/test_dereference.py +++ b/ibis/expr/tests/test_dereference.py @@ -18,7 +18,7 @@ def dereference_expect(expected): def test_dereference_project(): - p = t.projection([t.int_col, t.double_col]) + p = t.select([t.int_col, t.double_col]) mapping = dereference_mapping([p.op()]) expected = dereference_expect( diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 861c3e8095c2..957d16f4253f 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -238,6 +238,15 @@ def asof_join( # noqa: D102 ): predicates = util.promote_list(predicates) + util.promote_list(by) if tolerance is not None: + # `tolerance` parameter is mimicking the pandas API, but we express + # it at the expression level by a sequence of operations: + # 1. perform the `asof` join with the `on` an `predicates` parameters + # where the `on` parameter is an inequality predicate + # 2. filter the asof join result using the `tolerance` parameter and + # the `on` parameter + # 3. perform a left join between the original left table and the + # filtered asof join result using the `on` parameter but this + # time as an equality predicate if isinstance(on, str): # self is always a JoinChain so reference one of the join tables left_on = self.op().values[on].to_expr() @@ -260,6 +269,9 @@ def asof_join( # noqa: D102 ) right_on = right_on.op().replace({right.op(): filtered.op()}).to_expr() + # without joining twice the table would not contain the rows from + # the left table that do not match any row from the right table + # given the tolerance filter result = self.left_join( filtered, predicates=[left_on == right_on] + predicates ) From e266585b0d95872dcf70a1020efd58c6e71d3ad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 4 Jan 2024 12:31:07 +0100 Subject: [PATCH 035/161] ci: remove `merge_group` (#7899) --- .github/workflows/check-generated-files.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/check-generated-files.yml b/.github/workflows/check-generated-files.yml index 86cce46341dd..49102ff564af 100644 --- a/.github/workflows/check-generated-files.yml +++ b/.github/workflows/check-generated-files.yml @@ -13,7 +13,6 @@ on: - poetry.lock - pyproject.toml - requirements-dev.txt - merge_group: concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} From 52c5051da58d3e84a6af194565fea34400777b61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 4 Jan 2024 12:46:03 +0100 Subject: [PATCH 036/161] refactor(pandas): port the pandas backend with an improved execution model (#7797) Since we need to reimplement/port all of the backends for #7752, I took an attempt at reimplementing the pandas backend using a new execution engine. Previously the pandas backend was implemented using a top-down execution model and each operation was executing using a multidispatched function. While it served us well for a long time, it had a few drawbacks: - it was often hard to understand what was going on due to the complex preparation steps and various execution hooks - the multidispatched functions were hard to debug, additionally they supported a wide variety of inputs making the implementation rather bulky - due to the previous reaon, several inputs combinations were not supported, e.g. value operations with multiple columnar inputs - the `Scope` object was used to pass around the execution context which was created for each operation separately and the results were not reusable even though the same operation was executed multiple times The new execution model has changed in several ways: - there is a rewrite layer before execution which lowers the input expression to a form closer to the pandas execution model, this makes it much easier to implement the operations and also makes the input "plan" inspectable - the execution is now topologically sorted and executed in a bottom-up manner; the intermediate results are reused, making the execution more efficient while also aggressively cleaned up as soon as they are not needed anymore to reduce the memory usage - the execute function is now single-dispatched making the implementation easier to locate and debug - the inputs now broadcasted to columnar shape so that the same implementation can be used for multiple input shape combinations, this removes several special cases from the implementation in exchange of a negligible performance overhead - there are helper utilities making it easier to implement compute kernels for the various value operations: `rowwise`, `columnwise`, `elementwise`, `serieswise`; if there are multiple implementations available for a given operation, the most efficient one is selected based on the input shapes The new backend implementation has a higher feature coverage while the implementation is one third of the size of the previous one. BREAKING CHANGE: the `timecontext` feature is not supported anymore --- .github/workflows/ibis-backends.yml | 8 +- ibis/backends/base/df/__init__.py | 0 ibis/backends/base/df/scope.py | 211 --- ibis/backends/base/df/timecontext.py | 304 ---- .../dask/tests/execution/test_join.py | 1 + ibis/backends/pandas/__init__.py | 47 +- ibis/backends/pandas/aggcontext.py | 710 -------- ibis/backends/pandas/convert.py | 88 + ibis/backends/pandas/core.py | 605 ------- ibis/backends/pandas/dispatch.py | 110 -- ibis/backends/pandas/dispatcher.py | 113 -- ibis/backends/pandas/execution/__init__.py | 13 - ibis/backends/pandas/execution/arrays.py | 172 -- ibis/backends/pandas/execution/constants.py | 106 -- ibis/backends/pandas/execution/decimal.py | 135 -- ibis/backends/pandas/execution/generic.py | 1479 ----------------- ibis/backends/pandas/execution/join.py | 183 -- ibis/backends/pandas/execution/maps.py | 208 --- ibis/backends/pandas/execution/selection.py | 337 ---- ibis/backends/pandas/execution/strings.py | 560 ------- ibis/backends/pandas/execution/structs.py | 44 - ibis/backends/pandas/execution/temporal.py | 341 ---- ibis/backends/pandas/execution/timecontext.py | 93 -- ibis/backends/pandas/execution/util.py | 144 -- ibis/backends/pandas/execution/window.py | 526 ------ ibis/backends/pandas/executor.py | 761 +++++++++ ibis/backends/pandas/helpers.py | 211 +++ ibis/backends/pandas/kernels.py | 513 ++++++ ibis/backends/pandas/rewrites.py | 322 ++++ ibis/backends/pandas/tests/conftest.py | 286 ++++ .../pandas/tests/execution/__init__.py | 0 .../pandas/tests/execution/conftest.py | 289 ---- .../tests/execution/test_timecontext.py | 399 ----- ibis/backends/pandas/tests/test_aggcontext.py | 167 -- .../tests/{execution => }/test_arrays.py | 7 + .../pandas/tests/{execution => }/test_cast.py | 40 +- ibis/backends/pandas/tests/test_core.py | 65 +- ibis/backends/pandas/tests/test_dispatcher.py | 143 -- .../tests/{execution => }/test_functions.py | 35 +- ibis/backends/pandas/tests/test_helpers.py | 72 + .../pandas/tests/{execution => }/test_join.py | 69 +- .../pandas/tests/{execution => }/test_maps.py | 0 .../tests/{execution => }/test_operations.py | 6 +- .../tests/{execution => }/test_strings.py | 24 +- .../tests/{execution => }/test_structs.py | 6 +- .../tests/{execution => }/test_temporal.py | 5 +- ibis/backends/pandas/tests/test_udf.py | 42 +- .../tests/{execution => }/test_window.py | 48 +- ibis/backends/pandas/trace.py | 170 -- ibis/backends/pandas/udf.py | 145 -- ibis/backends/tests/test_aggregation.py | 19 +- ibis/backends/tests/test_array.py | 33 +- ibis/backends/tests/test_generic.py | 6 +- ibis/backends/tests/test_interactive.py | 4 + ibis/backends/tests/test_param.py | 4 +- ibis/backends/tests/test_string.py | 2 +- ibis/backends/tests/test_temporal.py | 10 +- ibis/backends/tests/test_timecontext.py | 4 +- ibis/backends/tests/test_vectorized_udf.py | 3 +- ibis/backends/tests/test_window.py | 28 +- ibis/expr/operations/reductions.py | 1 + ibis/formats/pandas.py | 24 +- ibis/formats/tests/test_dask.py | 9 - 63 files changed, 2517 insertions(+), 7993 deletions(-) delete mode 100644 ibis/backends/base/df/__init__.py delete mode 100644 ibis/backends/base/df/scope.py delete mode 100644 ibis/backends/base/df/timecontext.py delete mode 100644 ibis/backends/pandas/aggcontext.py create mode 100644 ibis/backends/pandas/convert.py delete mode 100644 ibis/backends/pandas/core.py delete mode 100644 ibis/backends/pandas/dispatch.py delete mode 100644 ibis/backends/pandas/dispatcher.py delete mode 100644 ibis/backends/pandas/execution/__init__.py delete mode 100644 ibis/backends/pandas/execution/arrays.py delete mode 100644 ibis/backends/pandas/execution/constants.py delete mode 100644 ibis/backends/pandas/execution/decimal.py delete mode 100644 ibis/backends/pandas/execution/generic.py delete mode 100644 ibis/backends/pandas/execution/join.py delete mode 100644 ibis/backends/pandas/execution/maps.py delete mode 100644 ibis/backends/pandas/execution/selection.py delete mode 100644 ibis/backends/pandas/execution/strings.py delete mode 100644 ibis/backends/pandas/execution/structs.py delete mode 100644 ibis/backends/pandas/execution/temporal.py delete mode 100644 ibis/backends/pandas/execution/timecontext.py delete mode 100644 ibis/backends/pandas/execution/util.py delete mode 100644 ibis/backends/pandas/execution/window.py create mode 100644 ibis/backends/pandas/executor.py create mode 100644 ibis/backends/pandas/helpers.py create mode 100644 ibis/backends/pandas/kernels.py create mode 100644 ibis/backends/pandas/rewrites.py delete mode 100644 ibis/backends/pandas/tests/execution/__init__.py delete mode 100644 ibis/backends/pandas/tests/execution/conftest.py delete mode 100644 ibis/backends/pandas/tests/execution/test_timecontext.py delete mode 100644 ibis/backends/pandas/tests/test_aggcontext.py rename ibis/backends/pandas/tests/{execution => }/test_arrays.py (96%) rename ibis/backends/pandas/tests/{execution => }/test_cast.py (80%) delete mode 100644 ibis/backends/pandas/tests/test_dispatcher.py rename ibis/backends/pandas/tests/{execution => }/test_functions.py (92%) create mode 100644 ibis/backends/pandas/tests/test_helpers.py rename ibis/backends/pandas/tests/{execution => }/test_join.py (89%) rename ibis/backends/pandas/tests/{execution => }/test_maps.py (100%) rename ibis/backends/pandas/tests/{execution => }/test_operations.py (99%) rename ibis/backends/pandas/tests/{execution => }/test_strings.py (89%) rename ibis/backends/pandas/tests/{execution => }/test_structs.py (95%) rename ibis/backends/pandas/tests/{execution => }/test_temporal.py (98%) rename ibis/backends/pandas/tests/{execution => }/test_window.py (93%) delete mode 100644 ibis/backends/pandas/trace.py diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index ff7caf83b69d..baffb490b3d0 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -77,10 +77,10 @@ jobs: # title: Dask # extras: # - dask - # - name: pandas - # title: Pandas - # extras: - # - pandas + - name: pandas + title: Pandas + extras: + - pandas # - name: sqlite # title: SQLite # extras: diff --git a/ibis/backends/base/df/__init__.py b/ibis/backends/base/df/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/ibis/backends/base/df/scope.py b/ibis/backends/base/df/scope.py deleted file mode 100644 index 1d41da93464d..000000000000 --- a/ibis/backends/base/df/scope.py +++ /dev/null @@ -1,211 +0,0 @@ -"""Module for scope. - -The motivation of Scope is to cache data for calculated ops. - -`scope` in Scope class is the main cache. It is a dictionary mapping -ibis node instances to concrete data, and the time context associate -with it (if any). - -When there are no time contexts associate with the cached result, getting -and setting values in Scope would be as simple as get and set in a normal -dictionary. With time contexts, we need the following logic for getting -and setting items in scope: - -Before setting the value op in scope we need to perform the following -check first: - -Test if `op` is in `scope` yet -- No, then put `op` in `scope`, set 'timecontext' to be the current -`timecontext` (None if `timecontext` is not present), set 'value' to be -the actual data. -- Yes, then get the time context stored in `scope` for `op` as -`old_timecontext`, and compare it with current `timecontext`: -If current `timecontext` is a subset of `_timecontext`, that means we -already cached a larger range of data. Do nothing and we will trim data in -later execution process. -If current `timecontext` is a superset of `old_timecontext`, that means we -need to update cache. Set 'value' to be the current data and set -'timecontext' to be the current `timecontext` for `op`. -If current `timecontext` is neither a subset nor a superset of -`old_timcontext`, but they overlap, or not overlap at all (For example -when there is a window that looks forward, over a window that looks -back), in this case, we should not trust the data stored either because -the data stored in scope doesn't cover the current time context. -For simplicity, we update cache in this case, instead of merge data of -different time contexts. -""" -from __future__ import annotations - -from collections import namedtuple -from typing import TYPE_CHECKING, Any - -import pandas as pd - -from ibis.backends.base.df.timecontext import TimeContextRelation, compare_timecontext - -if TYPE_CHECKING: - from collections.abc import Iterable - - from ibis.expr.operations import Node - -TimeContext = tuple[pd.Timestamp, pd.Timestamp] - -ScopeItem = namedtuple("ScopeItem", ["timecontext", "value"]) - - -class Scope: - def __init__( - self, - param: dict[Node, Any] | None = None, - timecontext: TimeContext | None = None, - ): - """Create a new scope. - - Associate None as timecontext by default. This is mostly used to - init a scope with a set of given params. - """ - self._items = ( - {op: ScopeItem(timecontext, value) for op, value in param.items()} - if param - else {} - ) - - def __contains__(self, op): - """Given an `op`, return if `op` is present in Scope. - - Note that this `__contain__` method doesn't take `timecontext` - as a parameter. This could be used to iterate all keys in - current scope, or any case that doesn't care about value, just - simply test if `op` is in scope or not. - When trying to get value in scope, use `get_value(op, timecontext)` - instead. Because the cached data could be trusted only if: - 1. `op` is in `scope`, and, - 2. The `timecontext` associated with `op` is a time context equal - to, or larger than the current time context. - """ - return op in self._items - - def __iter__(self): - return iter(self._items.keys()) - - def set_value(self, op: Node, timecontext: TimeContext | None, value: Any) -> None: - """Set values in scope. - - Given an `op`, `timecontext` and `value`, set `op` and - `(value, timecontext)` in scope. - - This method doesn't simply override and set, but takes time context - into consideration. - - If there is a value associated with the key, but time context is - smaller than the current time context we are going to set, `get_value` - will return None and we will proceed to set the new value in scope. - - Parameters - ---------- - op - Key in scope - timecontext - Time context - value - the cached result to save in scope, an object whose type may - differ in different backends. - """ - if self.get_value(op, timecontext) is None: - self._items[op] = ScopeItem(timecontext, value) - - def get_value(self, op: Node, timecontext: TimeContext | None = None) -> Any: - """Given a op and timecontext, get the result from scope. - - Parameters - ---------- - op - Key in scope - timecontext - Time context - - Returns - ------- - Any - The cached result, an object whose type may differ in different - backends. - """ - if op not in self: - return None - - # for ops without timecontext - if timecontext is None: - return self._items[op].value - else: - # For op with timecontext, there are some ops cannot use cached - # result with a different (larger) timecontext to get the - # correct result. - # For example, a groupby followed by count, if we use a larger or - # smaller dataset from cache, we will get an error in result. - # Such ops with global aggregation, ops whose result is - # depending on other rows in result Dataframe, cannot use cached - # result with different time context to optimize calculation. - # These are time context sensitive operations. Since these cases - # are rare in actual use case, we just enable optimization for - # all nodes for now. - cached_timecontext = self._items[op].timecontext - if cached_timecontext: - relation = compare_timecontext(timecontext, cached_timecontext) - if relation == TimeContextRelation.SUBSET: - return self._items[op].value - else: - return self._items[op].value - return None - - def merge_scope(self, other_scope: Scope, overwrite=False) -> Scope: - """Merge items in `other_scope` into this scope. - - Parameters - ---------- - other_scope - Scope to be merged with - overwrite - if `True`, force overwrite `value` if node already exists. - - Returns - ------- - Scope - a new Scope instance with items in two scopes merged. - """ - result = Scope() - - for op in self: - result._items[op] = self._items[op] - - for op in other_scope: - # if get_scope returns a not None value, then data is already - # cached in scope and it is at least a greater range than - # the current timecontext, so we drop the item. Otherwise - # add it into scope. - v = other_scope._items[op] - if overwrite or result.get_value(op, v.timecontext) is None: - result._items[op] = v - return result - - def merge_scopes(self, other_scopes: Iterable[Scope], overwrite=False) -> Scope: - """Merge items in `other_scopes` into this scope. - - Parameters - ---------- - other_scopes - scopes to be merged with - overwrite - if `True`, force overwrite value if node already exists. - - Returns - ------- - Scope - a new Scope instance with items in input scopes merged. - """ - result = Scope() - for op in self: - result._items[op] = self._items[op] - - for s in other_scopes: - result = result.merge_scope(s, overwrite) - return result diff --git a/ibis/backends/base/df/timecontext.py b/ibis/backends/base/df/timecontext.py deleted file mode 100644 index f84dd473bc4c..000000000000 --- a/ibis/backends/base/df/timecontext.py +++ /dev/null @@ -1,304 +0,0 @@ -"""Time context module. - -This is an implementation of time context extension without affecting the -existing SQL-like execution model for backends. - -Most of the execution is built on the foundation that "Data is uniquely -defined by the op tree". This is true in SQL analysis where there is no -ambiguity what the result of executing a Table is. - -In time series analysis, however, this is not necessarily True. We have defined -an extension to ibis execution for time series analysis where the result of -executing a Table is defined by the Table plus the time context are -associated with the execution. - -Time context specifies the temporal range of a query, it carries the start and -end datetimes. For example, a Table can represent the query select count(a) -from table, but the result of that is different with time context -("20190101", "20200101") vs ("20200101", "20210101"), because what data is in -"table" depends also on the time context. - -While data in scope is public and global for all nodes, `timecontext` is -intended to store 'local' time context data for each node in execution. i.e., -each subtree of an expr tree can have different time context. Which makes it -so that when executing each node, we also need to know the "local time context" -for that node. - -And we propose to store these data as 'timecontext', calculate in execution -pass it along to children nodes, in the ibis tree. See each backends for -implementation details. - -Time context adjustment algorithm - In an Ibis tree, time context is local for each node, and they should be - adjusted accordingly for some specific nodes. Those operations may - require extra data outside of the global time context that user defines. - For example, in asof_join, we need to look back extra `tolerance` daays - for the right table to get the data for joining. Similarly for window - operation with preceding and following. - Algorithm to calculate context adjustment are defined in this module - and could be used by multiple backends. -""" - -from __future__ import annotations - -import enum -import functools -from typing import TYPE_CHECKING, Any - -import pandas as pd - -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis import config - -TimeContext = tuple[pd.Timestamp, pd.Timestamp] - - -if TYPE_CHECKING: - from ibis.backends.base.df.scope import Scope - - -# In order to use time context feature, there must be a column of Timestamp -# type, and named as 'time' in Table. This TIME_COL constant will be -# used in filtering data from a table or columns of a table. It can be changed -# by running: -# -# ibis.config.options.context_adjustment.time_col = "other_time_col" - - -def get_time_col(): - return config.options.context_adjustment.time_col - - -class TimeContextRelation(enum.Enum): - """Enum to classify the relationship between two time contexts. - - Assume that we have two timecontext `c1 (begin1, end1)`, `c2(begin2, end2)`: - - - `SUBSET` means `c1` is a subset of `c2`, `begin1` is greater than or - equal to `begin2`, and `end1` is less than or equal to `end2`. - - `SUPERSET` means that `begin1` is earlier than `begin2`, and `end1` - is later than `end2`. - - If neither of the two contexts is a superset of each other, and they - share some time range in common, we called them `OVERLAP`. - - `NONOVERLAP` means the two contexts doesn't overlap at all, which - means `end1` is earlier than `begin2` or `end2` is earlier than - `begin1`. - """ - - SUBSET = 0 - SUPERSET = 1 - OVERLAP = 2 - NONOVERLAP = 3 - - -def compare_timecontext( - left_context: TimeContext, right_context: TimeContext -) -> TimeContextRelation: - """Compare two time contexts and return the relationship between them.""" - left_begin, left_end = left_context - right_begin, right_end = right_context - if right_begin <= left_begin and right_end >= left_end: - return TimeContextRelation.SUBSET - elif right_begin >= left_begin and right_end <= left_end: - return TimeContextRelation.SUPERSET - elif right_end < left_begin or left_end < right_begin: - return TimeContextRelation.NONOVERLAP - else: - return TimeContextRelation.OVERLAP - - -def canonicalize_context( - timecontext: TimeContext | None, -) -> TimeContext | None: - """Canonicalize a timecontext with type pandas.Timestamp for its begin and end time.""" - - SUPPORTS_TIMESTAMP_TYPE = pd.Timestamp - if not isinstance(timecontext, tuple) or len(timecontext) != 2: - raise com.IbisError(f"Timecontext {timecontext} should specify (begin, end)") - - begin, end = timecontext - - if not isinstance(begin, SUPPORTS_TIMESTAMP_TYPE): - raise com.IbisError( - f"begin time value {begin} of type {type(begin)} is not" - " of type pd.Timestamp" - ) - if not isinstance(end, SUPPORTS_TIMESTAMP_TYPE): - raise com.IbisError( - f"end time value {end} of type {type(begin)} is not of type pd.Timestamp" - ) - if begin > end: - raise com.IbisError( - f"begin time {begin} must be before or equal to end time {end}" - ) - return begin, end - - -def localize_context(timecontext: TimeContext, timezone: str) -> TimeContext: - """Localize tz-naive context.""" - begin, end = timecontext - if begin.tz is None: - begin = begin.tz_localize(timezone) - - if end.tz is None: - end = end.tz_localize(timezone) - - return begin, end - - -def construct_time_context_aware_series( - series: pd.Series, frame: pd.DataFrame -) -> pd.Series: - """Construct a Series by adding 'time' in its MultiIndex. - - In window execution, the result Series of udf may need - to be trimmed by timecontext. In order to do so, 'time' - must be added as an index to the Series. We extract - time column from the parent Dataframe `frame`. - See `trim_window_result` in execution/window.py for - trimming implementation. - - Examples - -------- - >>> import pandas as pd - >>> from ibis.backends.base.df.timecontext import ( - ... construct_time_context_aware_series, - ... ) - >>> df = pd.DataFrame( - ... { - ... "time": pd.Series(pd.date_range(start="2017-01-02", periods=3).values), - ... "id": [1, 2, 3], - ... "value": [1.1, 2.2, 3.3], - ... } - ... ) - >>> df - time id value - 0 2017-01-02 1 1.1 - 1 2017-01-03 2 2.2 - 2 2017-01-04 3 3.3 - >>> series = df["value"] - >>> series - 0 1.1 - 1 2.2 - 2 3.3 - Name: value, dtype: float64 - >>> construct_time_context_aware_series(series, df) # quartodoc: +SKIP # doctest: +SKIP - time - 0 2017-01-02 1.1 - 1 2017-01-03 2.2 - 2 2017-01-04 3.3 - Name: value, dtype: float64 - - The index will be a MultiIndex of the original RangeIndex - and a DateTimeIndex. - - >>> timed_series = construct_time_context_aware_series(series, df) - >>> timed_series # quartodoc: +SKIP # doctest: +SKIP - time - 0 2017-01-02 1.1 - 1 2017-01-03 2.2 - 2 2017-01-04 3.3 - Name: value, dtype: float64 - - >>> construct_time_context_aware_series( - ... timed_series, df - ... ) # quartodoc: +SKIP # doctest: +SKIP - time - 0 2017-01-02 1.1 - 1 2017-01-03 2.2 - 2 2017-01-04 3.3 - Name: value, dtype: float64 - The result is unchanged for a series already has 'time' as its index. - """ - time_col = get_time_col() - if time_col == frame.index.name: - time_index = frame.index - elif time_col in frame: - time_index = pd.Index(frame[time_col]) - else: - raise com.IbisError(f'"time" column not present in DataFrame {frame}') - if time_col not in series.index.names: - series.index = pd.MultiIndex.from_arrays( - list(map(series.index.get_level_values, range(series.index.nlevels))) - + [time_index], - names=series.index.names + [time_col], - ) - return series - - -@functools.singledispatch -def adjust_context(op: Any, scope: Scope, timecontext: TimeContext) -> TimeContext: - """Adjust the `timecontext` for `op`. - - Parameters - ---------- - op - Ibis operation. - scope - Incoming scope. - timecontext - Time context associated with the node. - - Returns - ------- - TimeContext - For `op` that is not of type Node, raise an error to avoid failing - silently since the default behavior is to return `timecontext`. - """ - raise com.IbisError(f"Unsupported input type for adjust context for {op}") - - -@adjust_context.register(ops.Node) -def adjust_context_node( - op: ops.Node, scope: Scope, timecontext: TimeContext -) -> TimeContext: - # For any node, by default, do not adjust time context - return timecontext - - -@adjust_context.register(ops.Alias) -def adjust_context_alias( - op: ops.Node, scope: Scope, timecontext: TimeContext -) -> TimeContext: - # For any node, by default, do not adjust time context - return adjust_context(op.arg, scope, timecontext) - - -@adjust_context.register(ops.AsOfJoin) -def adjust_context_asof_join( - op: ops.AsOfJoin, scope: Scope, timecontext: TimeContext -) -> TimeContext: - begin, end = timecontext - - if op.tolerance is not None: - from ibis.backends.pandas.execution import execute - - timedelta = execute(op.tolerance) - return (begin - timedelta, end) - - return timecontext - - -@adjust_context.register(ops.WindowFunction) -def adjust_context_window( - op: ops.WindowFunction, scope: Scope, timecontext: TimeContext -) -> TimeContext: - # TODO(kszucs): this file should be really moved to the pandas - # backend instead of the current central placement - from ibis.backends.pandas.execution import execute - - # adjust time context by preceding and following - begin, end = timecontext - - if op.frame.start is not None: - value = execute(op.frame.start.value) - if value: - begin = begin - value - - if op.frame.end is not None: - value = execute(op.frame.end.value) - if value: - end = end + value - - return (begin, end) diff --git a/ibis/backends/dask/tests/execution/test_join.py b/ibis/backends/dask/tests/execution/test_join.py index e9805c74c142..e76097b65cdd 100644 --- a/ibis/backends/dask/tests/execution/test_join.py +++ b/ibis/backends/dask/tests/execution/test_join.py @@ -96,6 +96,7 @@ def test_join_with_multiple_predicates(how, left, right, df1, df2): left, right.key3, right.other_value ] result = expr.execute().sort_values(by=["key"]).reset_index(drop=True) + expected = ( dd.merge(df1, df2, how=how, left_on=["key", "key2"], right_on=["key", "key3"]) .compute(scheduler="single-threaded") diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 4349400c50ab..881a460b7f5e 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations -import importlib from functools import lru_cache from typing import TYPE_CHECKING, Any @@ -22,8 +21,6 @@ import pathlib from collections.abc import Mapping, MutableMapping -raise RuntimeError("Temporarily make the pandas backend dysfunctional") - class BasePandasBackend(BaseBackend): """Base class for backends based on pandas.""" @@ -51,9 +48,6 @@ def do_connect( >>> ibis.pandas.connect({"t": pd.DataFrame({"a": [1, 2, 3]})}) """ - # register dispatchers - from ibis.backends.pandas import execution, udf # noqa: F401 - self.dictionary = dictionary or {} self.schemas: MutableMapping[str, sch.Schema] = {} @@ -256,34 +250,13 @@ def _convert_object(cls, obj: Any) -> Any: @classmethod @lru_cache def _get_operations(cls): - backend = f"ibis.backends.{cls.name}" - - execution = importlib.import_module(f"{backend}.execution") - execute_node = execution.execute_node + from ibis.backends.pandas.kernels import supported_operations - # import UDF to pick up AnalyticVectorizedUDF and others - importlib.import_module(f"{backend}.udf") - - dispatch = importlib.import_module(f"{backend}.dispatch") - pre_execute = dispatch.pre_execute - - return frozenset( - op - for op, *_ in execute_node.funcs.keys() | pre_execute.funcs.keys() - if issubclass(op, ops.Value) - ) + return supported_operations @classmethod def has_operation(cls, operation: type[ops.Value]) -> bool: - # Pandas doesn't support geospatial ops, but the dispatcher implements - # a common base class that makes it appear that it does. Explicitly - # exclude these operations. - if issubclass(operation, (ops.GeoSpatialUnOp, ops.GeoSpatialBinOp)): - return False - op_classes = cls._get_operations() - return operation in op_classes or any( - issubclass(operation, op_impl) for op_impl in op_classes - ) + return operation in cls._get_operations() def _clean_up_cached_table(self, op): del self.dictionary[op.name] @@ -331,7 +304,7 @@ class Backend(BasePandasBackend): name = "pandas" def execute(self, query, params=None, limit="default", **kwargs): - from ibis.backends.pandas.core import execute_and_reset + from ibis.backends.pandas.executor import Executor if limit != "default" and limit is not None: raise ValueError( @@ -346,16 +319,10 @@ def execute(self, query, params=None, limit="default", **kwargs): ) ) - node = query.op() - - if params is None: - params = {} - else: - params = { - k.op() if isinstance(k, ir.Expr) else k: v for k, v in params.items() - } + params = params or {} + params = {k.op() if isinstance(k, ir.Expr) else k: v for k, v in params.items()} - return execute_and_reset(node, params=params, **kwargs) + return Executor.execute(query.op(), backend=self, params=params) def _load_into_cache(self, name, expr): self.create_table(name, expr.execute()) diff --git a/ibis/backends/pandas/aggcontext.py b/ibis/backends/pandas/aggcontext.py deleted file mode 100644 index 64a4f73bc686..000000000000 --- a/ibis/backends/pandas/aggcontext.py +++ /dev/null @@ -1,710 +0,0 @@ -"""Implements an object to describe the context of a window aggregation. - -For any particular aggregation such as ``sum``, ``mean``, etc we need to decide -based on the presence or absence of other expressions like ``group_by`` and -``order_by`` whether we should call a different method of aggregation. - -Here are the different aggregation contexts and the conditions under which they -are used. - -Note that in the pandas backend, only trailing and cumulative windows are -supported right now. - -No ``group_by`` or ``order_by``: ``context.Summarize()`` --------------------------------------------------------- -This is an aggregation on a column, repeated for every row in the table. - -SQL - -:: - - SELECT SUM(value) OVER () AS sum_value FROM t - -Pandas - -:: - >>> import pandas as pd - >>> import numpy as np - >>> df = pd.DataFrame( - ... { - ... "key": list("aabc"), - ... "value": np.random.randn(4), - ... "time": pd.date_range(periods=4, start="now"), - ... } - ... ) - >>> s = pd.Series(df.value.sum(), index=df.index, name="sum_value") - >>> s # quartodoc: +SKIP # doctest: +SKIP - -Ibis - -:: - - >>> import ibis - >>> schema = dict(time="timestamp", key="string", value="double") - >>> t = ibis.table(schema, name="t") - >>> t[ - ... t, t.value.sum().name("sum_value") - ... ].sum_value # quartodoc: +SKIP # doctest: +SKIP - - -``group_by``, no ``order_by``: ``context.Transform()`` ------------------------------------------------------- - -This performs an aggregation per group and repeats it across every row in the -group. - -SQL - -:: - - SELECT SUM(value) OVER (PARTITION BY key) AS sum_value - FROM t - -Pandas - -:: - - >>> import pandas as pd - >>> import numpy as np - >>> df = pd.DataFrame( - ... { - ... "key": list("aabc"), - ... "value": np.random.randn(4), - ... "time": pd.date_range(periods=4, start="now"), - ... } - ... ) - >>> df.groupby("key").value.transform("sum") # quartodoc: +SKIP # doctest: +SKIP - -Ibis - -:: - - >>> import ibis - >>> schema = dict(time="timestamp", key="string", value="double") - >>> t = ibis.table(schema, name="t") - >>> t.value.sum().over( - ... ibis.window(group_by=t.key) - ... ) # quartodoc: +SKIP # doctest: +SKIP - -``order_by``, no ``group_by``: ``context.Cumulative()``/``context.Rolling()`` ------------------------------------------------------------------------------ - -Cumulative and trailing window operations. - -Cumulative -~~~~~~~~~~ - -Also called expanding. - -SQL - -:: - - SELECT SUM(value) OVER ( - ORDER BY time ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW - ) AS sum_value - FROM t - - -Pandas - -:: - - >>> import pandas as pd - >>> import numpy as np - >>> df = pd.DataFrame( - ... { - ... "key": list("aabc"), - ... "value": np.random.randn(4), - ... "time": pd.date_range(periods=4, start="now"), - ... } - ... ) - >>> df.sort_values("time").value.cumsum() # quartodoc: +SKIP # doctest: +SKIP - -Ibis - -:: - - >>> import ibis - >>> schema = dict(time="timestamp", key="string", value="double") - >>> t = ibis.table(schema, name="t") - >>> window = ibis.cumulative_window(order_by=t.time) - >>> t.value.sum().over(window) # quartodoc: +SKIP # doctest: +SKIP - -Moving -~~~~~~ - -Also called referred to as "rolling" in other libraries such as pandas. - -SQL - -:: - - SELECT SUM(value) OVER ( - ORDER BY time ROWS BETWEEN 3 PRECEDING AND CURRENT ROW - ) AS sum_value - FROM t - - -Pandas - -:: - - >>> import pandas as pd - >>> import numpy as np - >>> df = pd.DataFrame( - ... { - ... "key": list("aabc"), - ... "value": np.random.randn(4), - ... "time": pd.date_range(periods=4, start="now"), - ... } - ... ) - >>> df.sort_values("time").value.rolling( - ... 3 - ... ).sum() # quartodoc: +SKIP # doctest: +SKIP - -Ibis - -:: - - >>> import ibis - >>> schema = dict(time="timestamp", key="string", value="double") - >>> t = ibis.table(schema, name="t") - >>> window = ibis.trailing_window(3, order_by=t.time) - >>> t.value.sum().over(window) # quartodoc: +SKIP # doctest: +SKIP - - -``group_by`` and ``order_by``: ``context.Cumulative()``/``context.Rolling()`` ------------------------------------------------------------------------------ - -This performs a cumulative or rolling operation within a group. - -SQL - -:: - - SELECT SUM(value) OVER ( - PARTITION BY key ORDER BY time ROWS BETWEEN 4 PRECEDING AND CURRENT ROW - ) AS sum_value - FROM t - - -Pandas - -:: - - >>> import pandas as pd - >>> import numpy as np - >>> df = pd.DataFrame( - ... { - ... "key": list("aabc"), - ... "value": np.random.randn(4), - ... "time": pd.date_range(periods=4, start="now"), - ... } - ... ) - >>> sorter = lambda df: df.sort_values("time") - >>> gb = ( - ... df.groupby("key", group_keys=False) - ... .apply(sorter) - ... .reset_index(drop=True) - ... .groupby("key") - ... ) - >>> rolling = gb.value.rolling(2) - >>> rolling.sum() # quartodoc: +SKIP # doctest: +SKIP - -Ibis - -:: - - >>> import ibis - >>> schema = dict(time="timestamp", key="string", value="double") - >>> t = ibis.table(schema, name="t") - >>> window = ibis.trailing_window(2, order_by=t.time, group_by=t.key) - >>> t.value.sum().over(window) # quartodoc: +SKIP # doctest: +SKIP -""" - -from __future__ import annotations - -import abc -import functools -import itertools -import operator -from typing import TYPE_CHECKING, Any, Callable - -import pandas as pd -from pandas.core.groupby import SeriesGroupBy - -import ibis -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -import ibis.util -from ibis.backends.base.df.timecontext import ( - construct_time_context_aware_series, - get_time_col, -) - -if TYPE_CHECKING: - from collections.abc import Iterator - - import numpy as np - - -class AggregationContext(abc.ABC): - __slots__ = ( - "parent", - "group_by", - "order_by", - "dtype", - "max_lookback", - "output_type", - ) - - def __init__( - self, - parent=None, - group_by=None, - order_by=None, - max_lookback=None, - output_type=None, - ): - self.parent = parent - self.group_by = group_by - self.order_by = order_by - self.dtype = None if output_type is None else output_type.to_pandas() - self.output_type = output_type - self.max_lookback = max_lookback - - @abc.abstractmethod - def agg(self, grouped_data, function, *args, **kwargs): - pass - - -def wrap_for_apply( - function: Callable, - args: tuple[Any, ...] | None = None, - kwargs: dict[str, Any] | None = None, -) -> Callable: - """Wrap a function for use with Pandas `apply`. - - Parameters - ---------- - function : Callable - A function to be used with Pandas `apply`. - args : Optional[Tuple[Any, ...]] - args to be passed to function when it is called by Pandas `apply` - kwargs : Optional[Dict[str, Any]] - kwargs to be passed to function when it is called by Pandas `apply` - """ - assert callable(function), f"function {function} is not callable" - - new_args: tuple[Any, ...] = () - if args is not None: - new_args = args - - new_kwargs: dict[str, Any] = {} - if kwargs is not None: - new_kwargs = kwargs - - @functools.wraps(function) - def wrapped_func( - data: Any, - function: Callable = function, - args: tuple[Any, ...] = new_args, - kwargs: dict[str, Any] = new_kwargs, - ) -> Callable: - return function(data, *args, **kwargs) - - return wrapped_func - - -def wrap_for_agg( - function: Callable, - args: tuple[Any, ...], - kwargs: dict[str, Any], -) -> Callable: - """Wrap a function for use with Pandas `agg`. - - This includes special logic that will force Pandas `agg` to always treat - the function as an aggregation function. Details: - - When passed a function, Pandas `agg` will either: - 1) Behave like Pandas `apply` and treat the function as a N->N mapping - function (i.e. calls the function once for every value in the Series - that `agg` is being called on), OR - 2) Treat the function as a N->1 aggregation function (i.e. calls the - function once on the entire Series) - Pandas `agg` will use behavior #1 unless an error is raised when doing so. - - We want to force Pandas `agg` to use behavior #2. To do this, we will wrap - the function with logic that checks that a Series is being passed in, and - raises a TypeError otherwise. When Pandas `agg` is attempting to use - behavior #1 but sees the TypeError, it will fall back to behavior #2. - - Parameters - ---------- - function : Callable - An aggregation function to be used with Pandas `agg`. - args : Tuple[Any, ...] - args to be passed to function when it is called by Pandas `agg` - kwargs : Dict[str, Any] - kwargs to be passed to function when it is called by Pandas `agg` - """ - assert callable(function), f"function {function} is not callable" - - @functools.wraps(function) - def wrapped_func( - data: Any, - function: Callable = function, - args: tuple[Any, ...] = args, - kwargs: dict[str, Any] = kwargs, - ) -> Callable: - # `data` will be a scalar here if Pandas `agg` is trying to behave like - # like Pandas `apply`. - if not isinstance(data, pd.Series): - # Force `agg` to NOT behave like `apply`. We want Pandas to use - # `function` as an aggregation function, not as a mapping function. - raise TypeError( - f"This function expects a Series, but saw an object of type " - f"{type(data)} instead." - ) - return function(data, *args, **kwargs) - - return wrapped_func - - -class Summarize(AggregationContext): - __slots__ = () - - def agg(self, grouped_data, function, *args, **kwargs): - if isinstance(function, str): - return getattr(grouped_data, function)(*args, **kwargs) - - if not callable(function): - raise TypeError(f"Object {function} is not callable or a string") - - if isinstance(grouped_data, pd.core.groupby.generic.SeriesGroupBy) and len( - grouped_data - ): - # `SeriesGroupBy.agg` does not allow np.arrays to be returned - # from UDFs. To avoid `SeriesGroupBy.agg`, we will call the - # aggregation function manually on each group. (#2768) - aggs = {} - for k, v in grouped_data: - func_args = [d.get_group(k) for d in args] - aggs[k] = function(v, *func_args, **kwargs) - grouped_col_name = v.name - return ( - pd.Series(aggs) - .rename(grouped_col_name) - .rename_axis(grouped_data.grouper.names) - ) - else: - return grouped_data.agg(wrap_for_agg(function, args, kwargs)) - - -class Transform(AggregationContext): - __slots__ = () - - def agg(self, grouped_data, function, *args, **kwargs): - # If this is a multi column UDF, then we cannot use - # "transform" here (Data must be 1-dimensional) - # Instead, we need to use "apply", which can return a non - # numeric type, e.g, tuple of two double. - if self.output_type.is_struct(): - res = grouped_data.apply(function, *args, **kwargs) - else: - res = grouped_data.transform(function, *args, **kwargs) - - # The result series uses the name of the input. We should - # unset it to avoid confusion, when result is not guaranteed - # to be the same series / have the same type after transform - res.name = None - return res - - -@functools.singledispatch -def compute_window_spec(dtype, obj): - raise com.IbisTypeError( - f"Unknown dtype type {dtype} and object {obj} for compute_window_spec" - ) - - -@compute_window_spec.register(dt.Integer) -def compute_window_spec_none(_, obj): - """Helper method only used for row-based windows. - - Window spec in ibis is an inclusive window bound. A bound of 0 - indicates the current row. Window spec in Pandas indicates window - size. Therefore, we must add 1 to the ibis window bound to get the - expected behavior. - """ - from ibis.backends.pandas.core import execute - - value = execute(obj) - return value + 1 - - -@compute_window_spec.register(dt.Interval) -def compute_window_spec_interval(_, obj): - from ibis.backends.pandas.core import execute - - value = execute(obj) - return pd.tseries.frequencies.to_offset(value) - - -def window_agg_built_in( - frame: pd.DataFrame, - windowed: pd.core.window.Window, - function: str, - max_lookback: ops.Literal, - *args: tuple[Any, ...], - **kwargs: dict[str, Any], -) -> pd.Series: - """Apply window aggregation with built-in aggregators.""" - assert isinstance(function, str) - method = operator.methodcaller(function, *args, **kwargs) - - if max_lookback is not None: - agg_method = method - - def sliced_agg(s): - return agg_method(s.iloc[-max_lookback.value :]) - - method = operator.methodcaller("apply", sliced_agg, raw=False) - - result = method(windowed) - index = result.index - result.index = pd.MultiIndex.from_arrays( - [frame.index] + list(map(index.get_level_values, range(index.nlevels))), - names=[frame.index.name] + index.names, - ) - return result - - -def create_window_input_iter( - grouped_data: SeriesGroupBy | pd.Series, - masked_window_lower_indices: pd.Series, - masked_window_upper_indices: pd.Series, -) -> Iterator[np.ndarray]: - # create a generator for each input series - # the generator will yield a slice of the - # input series for each valid window - data = getattr(grouped_data, "obj", grouped_data).values - lower_indices_array = masked_window_lower_indices.values - upper_indices_array = masked_window_upper_indices.values - for i in range(len(lower_indices_array)): - lower_index = lower_indices_array[i] - upper_index = upper_indices_array[i] - yield data[lower_index:upper_index] - - -def window_agg_udf( - grouped_data: SeriesGroupBy, - function: Callable, - window_lower_indices: pd.Series, - window_upper_indices: pd.Series, - mask: pd.Series, - result_index: pd.Index, - dtype: np.dtype, - max_lookback: int, - *args: tuple[Any, ...], - **kwargs: dict[str, Any], -) -> pd.Series: - """Apply window aggregation with UDFs. - - Notes - ----- - Use custom logic to computing rolling window UDF instead of - using pandas's rolling function. - This is because pandas's rolling function doesn't support - multi param UDFs. - """ - assert len(window_lower_indices) == len(window_upper_indices) - assert len(window_lower_indices) == len(mask) - - # Reset index here so we don't need to deal with mismatching - # indices - window_lower_indices = window_lower_indices.reset_index(drop=True) - window_upper_indices = window_upper_indices.reset_index(drop=True) - mask = mask.reset_index(drop=True) - - # Compute window indices and manually roll - # over the window. - - # If an window has only nan values, we output nan for - # the window result. This follows pandas rolling apply - # behavior. - - # The first input column is in grouped_data, but there may - # be additional input columns in args. - inputs = (grouped_data,) + args - - masked_window_lower_indices = window_lower_indices[mask].astype("i8") - masked_window_upper_indices = window_upper_indices[mask].astype("i8") - - input_iters = [ - create_window_input_iter( - arg, masked_window_lower_indices, masked_window_upper_indices - ) - if isinstance(arg, (pd.Series, SeriesGroupBy)) - else itertools.repeat(arg) - for arg in inputs - ] - - valid_result = pd.Series( - function(*(next(gen) for gen in input_iters)) - for i in range(len(masked_window_lower_indices)) - ) - - valid_result = pd.Series(valid_result) - valid_result.index = masked_window_lower_indices.index - result = pd.Series(index=mask.index, dtype=dtype) - result[mask] = valid_result - result.index = result_index - - return result - - -class Window(AggregationContext): - __slots__ = ("construct_window",) - - def __init__(self, kind, *args, **kwargs): - super().__init__( - parent=kwargs.pop("parent", None), - group_by=kwargs.pop("group_by", None), - order_by=kwargs.pop("order_by", None), - output_type=kwargs.pop("output_type"), - max_lookback=kwargs.pop("max_lookback", None), - ) - self.construct_window = operator.methodcaller(kind, *args, **kwargs) - - def agg( - self, - grouped_data: pd.Series | SeriesGroupBy, - function: str | Callable, - *args: Any, - **kwargs: Any, - ) -> pd.Series: - # avoid a pandas warning about numpy arrays being passed through - # directly - group_by = self.group_by - order_by = self.order_by - - assert group_by or order_by - - # Get the DataFrame from which the operand originated - # (passed in when constructing this context object in - # execute_node(ops.Window)) - parent = self.parent - frame = getattr(parent, "obj", parent) - obj = getattr(grouped_data, "obj", grouped_data) - name = obj.name - if frame[name] is not obj or name in group_by or name in order_by: - name = f"{name}_{ibis.util.guid()}" - frame = frame.assign(**{name: obj}) - - # set the index to our order_by keys and append it to the existing - # index - # TODO: see if we can do this in the caller, when the context - # is constructed rather than pulling out the data - columns = group_by + order_by + [name] - # Create a new frame to avoid mutating the original one - indexed_by_ordering = frame[columns].copy() - # placeholder column to compute window_sizes below - indexed_by_ordering["_placeholder"] = 0 - indexed_by_ordering = indexed_by_ordering.set_index(order_by) - - # regroup if needed - if group_by: - grouped_frame = indexed_by_ordering.groupby(group_by, group_keys=False) - else: - grouped_frame = indexed_by_ordering - grouped = grouped_frame[name] - - if callable(function): - # To compute the window_size, we need to construct a - # RollingGroupby and compute count using construct_window. - # However, if the RollingGroupby is not numeric, e.g., - # we are calling window UDF on a timestamp column, we - # cannot compute rolling count directly because: - # (1) windowed.count() will exclude NaN observations - # , which results in incorrect window sizes. - # (2) windowed.apply(len, raw=True) will include NaN - # observations, but doesn't work on non-numeric types. - # https://github.com/pandas-dev/pandas/issues/23002 - # To deal with this, we create a _placeholder column - - windowed_frame = self.construct_window(grouped_frame) - window_sizes = windowed_frame["_placeholder"].count().reset_index(drop=True) - mask = ~(window_sizes.isna()) - window_upper_indices = pd.Series(range(len(window_sizes))) + 1 - window_lower_indices = window_upper_indices - window_sizes - # The result Series of udf may need to be trimmed by - # timecontext. In order to do so, 'time' must be added - # as an index to the Series, if present. Here We extract - # time column from the parent Dataframe `frame`. - if get_time_col() in frame: - result_index = construct_time_context_aware_series(obj, frame).index - else: - result_index = obj.index - result = window_agg_udf( - grouped_data, - function, - window_lower_indices, - window_upper_indices, - mask, - result_index, - self.dtype, - self.max_lookback, - *args, - **kwargs, - ) - else: - # perform the per-group rolling operation - windowed = self.construct_window(grouped) - result = window_agg_built_in( - frame, - windowed, - function, - self.max_lookback, - *args, - **kwargs, - ) - try: - return result.astype(self.dtype, copy=False) - except (TypeError, ValueError): - return result - - -class Cumulative(Window): - __slots__ = () - - def __init__(self, *args, **kwargs): - super().__init__("expanding", *args, **kwargs) - - -class Moving(Window): - __slots__ = () - - def __init__(self, start, max_lookback, *args, **kwargs): - from ibis.backends.pandas.core import timedelta_types - - start = compute_window_spec(start.dtype, start.value) - if isinstance(start, timedelta_types + (pd.offsets.DateOffset,)): - closed = "both" - else: - closed = None - - super().__init__( - "rolling", - start, - *args, - max_lookback=max_lookback, - closed=closed, - min_periods=1, - **kwargs, - ) - - def short_circuit_method(self, grouped_data, function): - raise AttributeError("No short circuit method for rolling operations") diff --git a/ibis/backends/pandas/convert.py b/ibis/backends/pandas/convert.py new file mode 100644 index 000000000000..76528d3e9258 --- /dev/null +++ b/ibis/backends/pandas/convert.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import pandas as pd +import pandas.api.types as pdt + +import ibis.expr.datatypes as dt +from ibis.formats.pandas import DataMapper, PandasType + + +class PandasConverter(DataMapper): + @classmethod + def convert_scalar(cls, obj, dtype): + series = pd.Series([obj]) + casted = cls.convert_column(series, dtype) + return casted[0] + + @classmethod + def convert_column(cls, obj, dtype): + pandas_type = PandasType.from_ibis(dtype) + + method_name = f"convert_{dtype.__class__.__name__}" + convert_method = getattr(cls, method_name, cls.convert_default) + + return convert_method(obj, dtype, pandas_type) + + @classmethod + def convert_default(cls, s, dtype, pandas_type): + if pandas_type == object: + func = lambda x: x if x is pd.NA else dt.normalize(dtype, x) + return s.map(func, na_action="ignore").astype(pandas_type) + else: + return s.astype(pandas_type) + + @classmethod + def convert_Integer(cls, s, dtype, pandas_type): + if pdt.is_datetime64_any_dtype(s.dtype): + return s.astype("int64").floordiv(int(1e9)).astype(pandas_type) + else: + return s.astype(pandas_type, errors="ignore") + + convert_SignedInteger = convert_UnsignedInteger = convert_Integer + convert_Int64 = convert_Int32 = convert_Int16 = convert_Int8 = convert_SignedInteger + convert_UInt64 = ( + convert_UInt32 + ) = convert_UInt16 = convert_UInt8 = convert_UnsignedInteger + + @classmethod + def convert_Floating(cls, s, dtype, pandas_type): + if pdt.is_datetime64_any_dtype(s.dtype): + return s.astype("int64").floordiv(int(1e9)).astype(pandas_type) + else: + return s.astype(pandas_type, errors="ignore") + + convert_Float64 = convert_Float32 = convert_Float16 = convert_Floating + + @classmethod + def convert_Timestamp(cls, s, dtype, pandas_type): + if isinstance(dtype, pd.DatetimeTZDtype): + return s.dt.tz_convert(dtype.timezone) + elif pdt.is_datetime64_dtype(s.dtype): + return s.dt.tz_localize(dtype.timezone) + elif pdt.is_numeric_dtype(s.dtype): + return pd.to_datetime(s, unit="s").dt.tz_localize(dtype.timezone) + else: + try: + return s.astype(pandas_type) + except TypeError: + try: + return pd.to_datetime(s).dt.tz_convert(dtype.timezone) + except TypeError: + return pd.to_datetime(s).dt.tz_localize(dtype.timezone) + + @classmethod + def convert_Date(cls, s, dtype, pandas_type): + if isinstance(s.dtype, pd.DatetimeTZDtype): + s = s.dt.tz_convert("UTC").dt.tz_localize(None) + elif pdt.is_numeric_dtype(s.dtype): + s = pd.to_datetime(s, unit="D") + else: + s = pd.to_datetime(s).astype(pandas_type, errors="ignore") + + return s.dt.normalize() + + @classmethod + def convert_String(cls, s, dtype, pandas_type): + # TODO(kszucs): should switch to the new pandas string type and convert + # object columns using s.convert_dtypes() method + return s.map(str, na_action="ignore").astype(object) diff --git a/ibis/backends/pandas/core.py b/ibis/backends/pandas/core.py deleted file mode 100644 index ef29b2bb29cc..000000000000 --- a/ibis/backends/pandas/core.py +++ /dev/null @@ -1,605 +0,0 @@ -"""The pandas backend. - -The pandas backend is a departure from the typical ibis backend in that it -doesn't compile to anything, and the execution of the ibis expression is under -the purview of ibis itself rather than executing SQL on a server. - -Design ------- -The pandas backend uses a technique called `multiple dispatch -`_, implemented in a -third-party open source library called `multipledispatch -`_. - -Multiple dispatch is a generalization of standard single-dispatch runtime -polymorphism to multiple arguments. - -Compilation ------------ -This is a no-op because we execute ibis expressions directly. - -Execution ---------- -Execution is divided into different dispatched functions, each arising from -a different use case. - -A top level function `execute` exists to provide the API for executing an ibis -expression against in-memory data. - -The general flow of execution is: - -:: - If the current operation is in scope: - return it - Else: - execute the arguments of the current node - - execute the current node with its executed arguments - -Specifically, execute is comprised of a series of steps that happen at -different times during the loop. - -1. ``compute_time_context`` ---------------------------- -First, at the beginning of the main execution loop, ``compute_time_context`` is -called. This function computes time contexts, and pass them to all children of -the current node. These time contexts could be used in later steps to get data. -This is essential for time series Table, and related operations that adjust -time context, such as window, asof_join, etc. - -By default, this function simply pass the unchanged time context to all -children nodes. - - -2. ``pre_execute`` ------------------- -Second, ``pre_execute`` is called. -This function serves a similar purpose to ``data_preload``, the key difference -being that ``pre_execute`` is called *every time* there's a call to execute. - -By default this function does nothing. - -3. ``execute_node`` -------------------- - -Then, when an expression is ready to be evaluated we call -:func:`~ibis.backends.pandas.core.execute` on the expressions arguments and -then :func:`~ibis.backends.pandas.dispatch.execute_node` on the expression -with its now-materialized arguments. - -4. ``post_execute`` -------------------- -The final step--``post_execute``--is called immediately after the previous call -to ``execute_node`` and takes the instance of the -:class:`~ibis.expr.operations.Node` just computed and the result of the -computation. - -The purpose of this function is to allow additional computation to happen in -the context of the current level of the execution loop. You might be wondering -That may sound vague, so let's look at an example. - -Let's say you want to take a three day rolling average, and you want to include -3 days of data prior to the first date of the input. You don't want to see that -data in the result for a few reasons, one of which is that it would break the -contract of window functions: given N rows of input there are N rows of output. - -Defining a ``post_execute`` rule for :class:`~ibis.expr.operations.Window` -allows you to encode such logic. One might want to implement this using -:class:`~ibis.expr.operations.ScalarParameter`, in which case the ``scope`` -passed to ``post_execute`` would be the bound values passed in at the time the -``execute`` method was called. - - -Scope ------ -Scope is used across the execution phases, it iss a map that maps Ibis -operators to actual data. It is used to cache data for calculated ops. It is -an optimization to reused executed results. - -With time context included, the key is op associated with each expression; -And scope value is another key-value map: -- value: pd.DataFrame or pd.Series that is the result of executing key op -- timecontext: of type TimeContext, the time context associated with the data -stored in value - -See ibis.common.scope for details about the implementation. -""" - -from __future__ import annotations - -import datetime -import functools -import numbers -from typing import TYPE_CHECKING, Any, Callable - -import numpy as np -import pandas as pd -from multipledispatch import Dispatcher - -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -import ibis.util -from ibis.backends.base import BaseBackend -from ibis.backends.base.df.scope import Scope -from ibis.backends.base.df.timecontext import TimeContext, canonicalize_context -from ibis.backends.pandas import aggcontext as agg_ctx -from ibis.backends.pandas.dispatch import ( - execute_literal, - execute_node, - post_execute, - pre_execute, -) -from ibis.backends.pandas.trace import trace - -if TYPE_CHECKING: - from collections.abc import Iterable, Mapping - -integer_types = np.integer, int -floating_types = (numbers.Real,) -numeric_types = integer_types + floating_types -boolean_types = bool, np.bool_ -fixed_width_types = numeric_types + boolean_types -date_types = (datetime.date,) -time_types = (datetime.time,) -timestamp_types = pd.Timestamp, datetime.datetime, np.datetime64 -timedelta_types = pd.Timedelta, datetime.timedelta, np.timedelta64 -temporal_types = date_types + time_types + timestamp_types + timedelta_types -scalar_types = fixed_width_types + temporal_types -simple_types = scalar_types + (str, type(None)) - - -@functools.singledispatch -def is_computable_input(arg): - """All inputs are not computable without a specific override.""" - return False - - -@is_computable_input.register(BaseBackend) -@is_computable_input.register(ops.Node) -@is_computable_input.register(dt.DataType) -@is_computable_input.register(type(None)) -@is_computable_input.register(tuple) -def is_computable_input_arg(arg): - """Return whether `arg` is a valid computable argument.""" - return True - - -# Register is_computable_input for each scalar type (int, float, date, etc). -# We use consume here to avoid leaking the iteration variable into the module. -ibis.util.consume( - is_computable_input.register(t)(is_computable_input_arg) for t in scalar_types -) - - -def execute_with_scope( - node: ops.Node, - scope: Scope, - timecontext: TimeContext | None = None, - aggcontext: agg_ctx.AggregationContext | None = None, - clients=None, - **kwargs: Any, -): - """Execute an expression `expr`, with data provided in `scope`. - - Parameters - ---------- - node - The operation node to execute. - scope - A Scope class, with dictionary mapping `ibis.expr.operations.Node` - subclass instances to concrete data such as a pandas DataFrame. - timecontext - A tuple of (begin, end) that is passed from parent Node to children - see [timecontext.py](ibis/backends/pandas/execution/timecontext.py) for - detailed usage for this time context. - aggcontext - Aggregation context - clients - Iterable of clients - kwargs - Keyword arguments - """ - # Call pre_execute, to allow clients to intercept the expression before - # computing anything *and* before associating leaf nodes with data. This - # allows clients to provide their own data for each leaf. - if clients is None: - clients, _ = node.to_expr()._find_backends() - - if aggcontext is None: - aggcontext = agg_ctx.Summarize() - - pre_executed_scope = pre_execute( - node, - *clients, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - **kwargs, - ) - new_scope = scope.merge_scope(pre_executed_scope) - result = execute_until_in_scope( - node, - new_scope, - timecontext=timecontext, - aggcontext=aggcontext, - clients=clients, - # XXX: we *explicitly* pass in scope and not new_scope here so that - # post_execute sees the scope of execute_with_scope, not the scope of - # execute_until_in_scope - post_execute_=functools.partial( - post_execute, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - clients=clients, - **kwargs, - ), - **kwargs, - ).get_value(node, timecontext) - return result - - -@trace -def execute_until_in_scope( - node, - scope: Scope, - timecontext: TimeContext | None = None, - aggcontext: agg_ctx.AggregationContext | None = None, - clients: Iterable | None = None, - post_execute_: Callable | None = None, - **kwargs: Any, -) -> Scope: - """Execute until our op is in `scope`.""" - # these should never be None - assert aggcontext is not None, "aggcontext is None" - assert clients is not None, "clients is None" - assert post_execute_ is not None, "post_execute_ is None" - - # base case: our op has been computed (or is a leaf data node), so - # return the corresponding value - if scope.get_value(node, timecontext) is not None: - return scope - if isinstance(node, ops.Literal): - # special case literals to avoid the overhead of dispatching - # execute_node - return Scope( - { - node: execute_literal( - node, - node.value, - node.dtype, - aggcontext=aggcontext, - **kwargs, - ) - }, - timecontext, - ) - - # figure out what arguments we're able to compute on based on the - # expressions inputs. things like expressions, None, and scalar types are - # computable whereas ``list``s are not - computable_args = [ - arg for arg in get_node_arguments(node) if is_computable_input(arg) - ] - - # pre_executed_states is a list of states with same the length of - # computable_args, these states are passed to each arg - if timecontext: - arg_timecontexts = compute_time_context( - node, - num_args=len(computable_args), - timecontext=timecontext, - clients=clients, - scope=scope, - ) - else: - arg_timecontexts = [None] * len(computable_args) - - pre_executed_scope = pre_execute( - node, - *clients, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - **kwargs, - ) - - new_scope = scope.merge_scope(pre_executed_scope) - - # Short circuit: if pre_execute puts op in scope, then we don't need to - # execute its computable_args - if new_scope.get_value(node, timecontext) is not None: - return new_scope - - # recursively compute each node's arguments until we've changed type. - # compute_time_context should return with a list with the same length - # as computable_args, the two lists will be zipping together for - # further execution - if len(arg_timecontexts) != len(computable_args): - raise com.IbisError( - "arg_timecontexts differ with computable_arg in length " - f"for type:\n{type(node).__name__}." - ) - - scopes = [ - execute_until_in_scope( - arg, - new_scope, - timecontext=timecontext, - aggcontext=aggcontext, - post_execute_=post_execute_, - clients=clients, - **kwargs, - ) - if isinstance(arg, ops.Node) - else Scope({arg: arg}, timecontext) - for (arg, timecontext) in zip(computable_args, arg_timecontexts) - ] - - # if we're unable to find data then raise an exception - if not scopes and computable_args: - raise com.UnboundExpressionError(f"Unable to find data for node:\n{node!r}") - - # there should be exactly one dictionary per computable argument - assert len(computable_args) == len(scopes) - - new_scope = new_scope.merge_scopes(scopes) - # pass our computed arguments to this node's execute_node implementation - data = [ - new_scope.get_value(arg, timecontext) if isinstance(arg, ops.Node) else arg - for (arg, timecontext) in zip(computable_args, arg_timecontexts) - ] - result = execute_node( - node, - *data, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - clients=clients, - **kwargs, - ) - computed = post_execute_( - node, result, timecontext=timecontext, aggcontext=aggcontext, **kwargs - ) - return Scope({node: computed}, timecontext) - - -execute = Dispatcher("execute") - - -@execute.register(ops.Node) -@trace -def main_execute( - node: ops.Node, - params: Mapping[ops.Node, Any] | None = None, - scope: Scope | None = None, - timecontext: TimeContext | None = None, - aggcontext: agg_ctx.AggregationContext | None = None, - cache: Mapping[ops.Node, Any] | None = None, - **kwargs: Any, -): - """Execute an expression against data that are bound to it. - - If no data are bound, raise an Exception. - - Parameters - ---------- - node : ibis.expr.operations.Node - The operation node to execute - params : Mapping[ibis.expr.operations.Node, object] - The data that an unbound parameter in `node` maps to - scope : Mapping[ibis.expr.operations.Node, object] - Additional scope, mapping ibis operations to data - timecontext : Optional[TimeContext] - timecontext needed for execution - aggcontext : Optional[ibis.backends.pandas.aggcontext.AggregationContext] - An object indicating how to compute aggregations. For example, - a rolling mean needs to be computed differently than the mean of a - column. - cache - Mapping for storing computation results. - kwargs - Additional arguments that can potentially be used by individual node - execution - - Returns - ------- - result : Union[ - pandas.Series, pandas.DataFrame, ibis.backends.pandas.core.simple_types - ] - - Raises - ------ - ValueError - * If no data are bound to the input expression - """ - if scope is None: - scope = Scope() - - if timecontext is not None: - # convert timecontext to datetime type, if time strings are provided - timecontext = canonicalize_context(timecontext) - - if params is None: - params = {} - - if cache is None: - cache = {} - - scope = scope.merge_scope(Scope(params, timecontext)) - return execute_with_scope( - node, - scope, - timecontext=timecontext, - aggcontext=aggcontext, - cache=cache, - **kwargs, - ) - - -def execute_and_reset( - node, - params=None, - scope=None, - timecontext: TimeContext | None = None, - aggcontext=None, - **kwargs, -): - """Execute an expression against data that are bound to it. - - If no data are bound, raise an Exception. - - The difference between this function and - `ibis.backends.pandas.core.execute` is that this function resets the index - of the result, if the result has an index. - - Parameters - ---------- - node : ibis.expr.operations.Node - The operation node to execute - params : Mapping[ibis.expr.operation.Node, object] - The data that an unbound parameter in `node` maps to - scope : Mapping[ibis.expr.operations.Node, object] - Additional scope, mapping ibis operations to data - timecontext : Optional[TimeContext] - timecontext needed for execution - aggcontext : Optional[ibis.backends.pandas.aggcontext.AggregationContext] - An object indicating how to compute aggregations. For example, - a rolling mean needs to be computed differently than the mean of a - column. - kwargs : Dict[str, object] - Additional arguments that can potentially be used by individual node - execution - - Returns - ------- - pandas.Series | pandas.DataFrame | ibis.backends.pandas.core.simple_types - Result of execution - - Raises - ------ - ValueError - * If no data are bound to the input expression - """ - result = execute( - node, - params=params, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - **kwargs, - ) - return _apply_schema(node, result) - - -def _apply_schema(op: ops.Node, result: pd.DataFrame | pd.Series): - from ibis.formats.pandas import PandasData - - assert isinstance(op, ops.Node), type(op) - if isinstance(result, pd.DataFrame): - df = result.reset_index().loc[:, list(op.schema.names)] - return PandasData.convert_table(df, op.schema) - elif isinstance(result, pd.Series): - schema = op.to_expr().as_table().schema() - df = PandasData.convert_table(result.to_frame(), schema) - return df.iloc[:, 0].reset_index(drop=True) - else: - return result - - -compute_time_context = Dispatcher( - "compute_time_context", - doc="""Compute the time context for a node in execution. - -Notes ------ -For a given node, return with a list of timecontext that are going to be -passed to its children nodes. - -Time context is useful when data is not uniquely defined by op tree. For example, -a table `t` can represent the query `SELECT count(a) FROM table`, but the -result of that is different with time context `(pd.Timestamp("20190101"), -pd.Timestamp("20200101"))` vs `(pd.Timestamp("20200101"), -pd.Timestamp("20210101“))` because what data is in `table` also depends on -the time context. Such context may be different for different nodes, that is, -each node may have a different time context. - -This function computes attributes that are going to be used in execution and -passes these attributes to child nodes. - -Parameters ----------- -clients : List[ibis.backends.base.BaseBackend] - backends for execution -timecontext : Optional[TimeContext] - begin and end time context needed for execution - -Returns -------- -List[Optional[TimeContext]] - A list of timecontexts for children nodes of the current node. Note that - timecontext are calculated for children nodes of computable args only. - The length of the return list is same of the length of computable inputs. - See `computable_args` in `execute_until_in_scope` -""", -) - - -@compute_time_context.register(ops.Node) -def compute_time_context_default( - node: ops.Node, - scope: Scope, - timecontext: TimeContext | None = None, - **kwargs, -): - return [timecontext for arg in get_node_arguments(node) if is_computable_input(arg)] - - -get_node_arguments = Dispatcher("get_node_arguments") - - -@get_node_arguments.register(ops.Node) -def get_node_arguments_default(node): - return node.args - - -@get_node_arguments.register(ops.ScalarParameter) -def get_node_arguments_parameter(node): - return () - - -@get_node_arguments.register(ops.DatabaseTable) -def get_node_arguments_table(node): - return (node.source,) - - -@get_node_arguments.register(ops.DropNa) -def get_node_arguments_dropna(node): - return (node.table,) - - -@get_node_arguments.register(ops.Selection) -def get_node_arguments_selection(node): - return (node.table,) - - -@get_node_arguments.register(ops.Aggregation) -def get_node_arguments_aggregation(node): - return (node.table,) - - -@get_node_arguments.register(ops.WindowFunction) -def get_node_arguments_window(node): - return get_node_arguments(node.func)[:1] - - -@get_node_arguments.register( - ( - ops.ElementWiseVectorizedUDF, - ops.ReductionVectorizedUDF, - ops.AnalyticVectorizedUDF, - ) -) -def get_node_arguments_udf(node): - return node.func_args diff --git a/ibis/backends/pandas/dispatch.py b/ibis/backends/pandas/dispatch.py deleted file mode 100644 index b5e080ade3bb..000000000000 --- a/ibis/backends/pandas/dispatch.py +++ /dev/null @@ -1,110 +0,0 @@ -from __future__ import annotations - -from functools import partial - -from multipledispatch import Dispatcher - -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base import BaseBackend -from ibis.backends.base.df.scope import Scope -from ibis.backends.pandas.trace import TraceTwoLevelDispatcher - -# Individual operation execution -execute_node = TraceTwoLevelDispatcher( - "execute_node", - doc=( - "Execute an individual operation given the operation and its computed " - "arguments" - ), -) - - -@execute_node.register(ops.Node, [object]) -def raise_unknown_op(node, *args, **kwargs): - signature = ", ".join(type(arg).__name__ for arg in args) - raise com.OperationNotDefinedError( - "Operation is not implemented for this backend with " - f"signature: execute_node({type(node).__name__}, {signature})" - ) - - -@execute_node.register(ops.TableNode) -def raise_unknown_table_node(node, **kwargs): - raise com.UnboundExpressionError( - f"Node of type {type(node).__name__!r} has no data bound to it. " - "You probably tried to execute an expression without a data " - "source." - ) - - -pre_execute = Dispatcher( - "pre_execute", - doc="""\ -Given a node, compute a (possibly partial) scope prior to standard execution. - -Notes ------ -This function is useful if parts of the tree structure need to be executed at -the same time or if there are other reasons to need to interrupt the regular -depth-first traversal of the tree. -""", -) - - -# Default returns an empty scope -@pre_execute.register(ops.Node) -@pre_execute.register(ops.Node, BaseBackend) -def pre_execute_default(node, *clients, **kwargs): - return Scope() - - -# Merge the results of all client pre-execution with scope -@pre_execute.register(ops.Node, [BaseBackend]) -def pre_execute_multiple_clients(node, *clients, scope=None, **kwargs): - scope = scope.merge_scopes( - list(map(partial(pre_execute, node, scope=scope, **kwargs), clients)) - ) - return scope - - -execute_literal = Dispatcher( - "execute_literal", - doc="""\ -Special case literal execution to avoid the dispatching overhead of -``execute_node``. - -Parameters ----------- -op : ibis.expr.operations.Node -value : object - The literal value of the object, e.g., int, float. -datatype : ibis.expr.datatypes.DataType - Used to specialize on expressions whose underlying value is of a different - type than its would-be type. For example, interval values are represented - by an integer. -""", -) - - -post_execute = Dispatcher( - "post_execute", - doc="""\ -Execute code on the result of a computation. - -Parameters ----------- -op : ibis.expr.operations.Node - The operation that was just executed -data : object - The result of the computation -""", -) - - -@post_execute.register(ops.Node, object) -def post_execute_default(op, data, **kwargs): - return data - - -execute = Dispatcher("execute") diff --git a/ibis/backends/pandas/dispatcher.py b/ibis/backends/pandas/dispatcher.py deleted file mode 100644 index 6240c0106c3f..000000000000 --- a/ibis/backends/pandas/dispatcher.py +++ /dev/null @@ -1,113 +0,0 @@ -from __future__ import annotations - -from multipledispatch import Dispatcher - - -class TwoLevelDispatcher(Dispatcher): - """A `multipledispatch.Dispatcher` with two levels of dispatching. - - The major change is that this class no longer trigger reorder in - dispatch_iter. Because the majority of the slowness is happening - in reorder, this implementation makes dispatch_iter faster. - Instead, this implementation will trigger reorder in the meta dispatcher - and second level dispatcher. Because the number of registered signatures - for each dispatcher is much smaller in this implementation (In pandas - backend, the number of signatures in one level implementation is - O(1000), and the max number of signatures for the meta dispatcher and - second level dispatcher is O(100)), the overall dispatch_iter is faster. - - This implementation consist of three Dispatcher instance: - - (1) This dispatcher, or the instance of this class itself. This class - inherits Dispatcher to avoid duplicating __call__, cache, ambiguities - detection, as well as properties like ordering and funcs. - - (2) First level dispatcher, aka, meta dispatcher. This is the dispatcher - is used to dispatch to the second level dispatcher using the type of the - first arg. - - (3) Second level dispatcher. This is the actual dispatcher used for linear - searching of matched function given type of args. - - Implementation notes: - - (1) register: - This method will now (a) create the second level dispatcher - if missing and register it with the meta dispatcher. (b) return a function - decorator that will register with all the second level dispatcher. Note - that multiple second level dispatcher could be registered with because this - is supported: - - @foo.register((C1, C2), ...) - - The decorator will also register with this dispatcher so that func and - ordering works properly. - - (2) dispatcher_iter - Instead of searching through self.ordering, this method now searches - through: - (a) dispatch_iter of the meta dispatcher (to find matching second level - dispatcher). - (b) for each second level dispatcher, searches through its dispatch_iter. - Because dispatch_iter of meta dispatcher and second level dispatcher - searches through registered functions in proper order (from subclasses to - base classes). - - (3) ambiguity detection, ordering, and funcs - Because this dispatcher has the same func and ordering property as - multipledispatch.Dispatcher. We can completely reuse the ambiguity - detection logic of Dispatcher. Note: - (a) we never actually linear search through ordering of this dispatcher - for dispatching. It's only used for ambiguity detection. - (b) deleting an entry from func of this dispatcher (i.e. del - dispatcher.func[A, B]) does not unregister it. Entries from the second - level dispatcher also needs to be deleted. This is OK because it is not - public API. - - Difference in behavior: - (1) ambiguity detection - Because this implementation doesn't not trigger total reorder of signatures - in dispatch_iter, ambiguity warning will trigger when user calls - "ordering", instead of "dispatch". - """ - - def __init__(self, name, doc=None): - super().__init__(name, doc) - self._meta_dispatcher = Dispatcher(f"{name}_meta") - - def register(self, *types, **kwargs): - type0 = types[0] - - if isinstance(type0, type): - type0 = [type0] - - dispatchers = [] - - for t in type0: - if (t,) in self._meta_dispatcher.funcs: - dispatcher = self._meta_dispatcher.funcs[(t,)] - else: - dispatcher = Dispatcher(f"{self.name}_{t.__name__}") - self._meta_dispatcher.register(t)(dispatcher) - - dispatchers.append((t, dispatcher)) - - def _(func): - self.add(types, func, **kwargs) - for t, dispatcher in dispatchers: - dispatcher.add((t, *types[1:]), func, **kwargs) - return func - - return _ - - def __delitem__(self, types): - del self.funcs[types] - del self._meta_dispatcher.funcs[types[:1]].funcs[types] - if not self._meta_dispatcher.funcs[types[:1]].funcs: - del self._meta_dispatcher.funcs[types[1:]] - - def dispatch_iter(self, *types): - for dispatcher in self._meta_dispatcher.dispatch_iter(types[0]): - func = dispatcher.dispatch(*types) - if func is not None: - yield func diff --git a/ibis/backends/pandas/execution/__init__.py b/ibis/backends/pandas/execution/__init__.py deleted file mode 100644 index 5a79d5166b93..000000000000 --- a/ibis/backends/pandas/execution/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from __future__ import annotations - -from ibis.backends.pandas.execution.arrays import * # noqa: F403 -from ibis.backends.pandas.execution.decimal import * # noqa: F403 -from ibis.backends.pandas.execution.generic import * # noqa: F403 -from ibis.backends.pandas.execution.join import * # noqa: F403 -from ibis.backends.pandas.execution.maps import * # noqa: F403 -from ibis.backends.pandas.execution.selection import * # noqa: F403 -from ibis.backends.pandas.execution.strings import * # noqa: F403 -from ibis.backends.pandas.execution.structs import * # noqa: F403 -from ibis.backends.pandas.execution.temporal import * # noqa: F403 -from ibis.backends.pandas.execution.timecontext import * # noqa: F403 -from ibis.backends.pandas.execution.window import * # noqa: F403 diff --git a/ibis/backends/pandas/execution/arrays.py b/ibis/backends/pandas/execution/arrays.py deleted file mode 100644 index 20461f022241..000000000000 --- a/ibis/backends/pandas/execution/arrays.py +++ /dev/null @@ -1,172 +0,0 @@ -from __future__ import annotations - -import itertools -import operator -from functools import partial -from typing import TYPE_CHECKING, Any - -import numpy as np -import pandas as pd -from pandas.core.groupby import SeriesGroupBy - -import ibis.expr.operations as ops -from ibis.backends.pandas.core import execute -from ibis.backends.pandas.dispatch import execute_node - -if TYPE_CHECKING: - from collections.abc import Collection - - -@execute_node.register(ops.Array, tuple) -def execute_array(op, cols, **kwargs): - vals = [execute(arg, **kwargs) for arg in cols] - length = next((len(v) for v in vals if isinstance(v, pd.Series)), None) - - if length is None: - return vals - - def ensure_series(v): - if isinstance(v, pd.Series): - return v - else: - return pd.Series(v, index=range(length)) - - # pd.concat() can only handle array-likes. - # If we're given a scalar, we need to broadcast it as a Series. - df = pd.concat([ensure_series(v) for v in vals], axis=1) - return df.apply(lambda row: np.array(row, dtype=object), axis=1) - - -@execute_node.register(ops.ArrayLength, pd.Series) -def execute_array_length(op, data, **kwargs): - return data.apply(len) - - -@execute_node.register(ops.ArrayLength, (list, np.ndarray)) -def execute_array_length_scalar(op, data, **kwargs): - return len(data) - - -@execute_node.register(ops.ArraySlice, pd.Series, int, (int, type(None))) -def execute_array_slice(op, data, start, stop, **kwargs): - return data.apply(operator.itemgetter(slice(start, stop))) - - -@execute_node.register(ops.ArraySlice, (list, np.ndarray), int, (int, type(None))) -def execute_array_slice_scalar(op, data, start, stop, **kwargs): - return data[start:stop] - - -@execute_node.register(ops.ArrayIndex, pd.Series, int) -def execute_array_index(op, data, index, **kwargs): - return data.apply( - lambda array, index=index: ( - array[index] if -len(array) <= index < len(array) else None - ) - ) - - -@execute_node.register(ops.ArrayIndex, (list, np.ndarray), int) -def execute_array_index_scalar(op, data, index, **kwargs): - try: - return data[index] - except IndexError: - return None - - -@execute_node.register(ops.ArrayContains, (list, np.ndarray), object) -def execute_node_contains_value_array(op, haystack, needle, **kwargs): - return needle in haystack - - -def _concat_iterables_to_series(*iters: Collection[Any]) -> pd.Series: - """Concatenate two collections to create a Series. - - The two collections are assumed to have the same length. - - Used for ArrayConcat implementation. - """ - first, *rest = iters - assert all(len(series) == len(first) for series in rest) - # Doing the iteration using `map` is much faster than doing the iteration - # using `Series.apply` due to Pandas-related overhead. - return pd.Series(map(lambda *args: np.concatenate(args), first, *rest)) - - -@execute_node.register(ops.ArrayConcat, tuple) -def execute_array_concat(op, args, **kwargs): - return execute_node(op, *map(partial(execute, **kwargs), args), **kwargs) - - -@execute_node.register(ops.ArrayConcat, pd.Series, pd.Series, [pd.Series]) -def execute_array_concat_series(op, first, second, *args, **kwargs): - return _concat_iterables_to_series(first, second, *args) - - -@execute_node.register( - ops.ArrayConcat, (list, np.ndarray), pd.Series, [(pd.Series, list, np.ndarray)] -) -def execute_array_concat_mixed_left(op, left, right, *args, **kwargs): - # ArrayConcat given a column (pd.Series) and a scalar (np.ndarray). - # We will broadcast the scalar to the length of the column. - # Broadcast `left` to the length of `right` - left = np.tile(left, (len(right), 1)) - return _concat_iterables_to_series(left, right) - - -@execute_node.register( - ops.ArrayConcat, pd.Series, (list, np.ndarray), [(pd.Series, list, np.ndarray)] -) -def execute_array_concat_mixed_right(op, left, right, *args, **kwargs): - # Broadcast `right` to the length of `left` - right = np.tile(right, (len(left), 1)) - return _concat_iterables_to_series(left, right) - - -@execute_node.register( - ops.ArrayConcat, (list, np.ndarray), (list, np.ndarray), [(list, np.ndarray)] -) -def execute_array_concat_scalar(op, left, right, *args, **kwargs): - return np.concatenate([left, right, *args]) - - -@execute_node.register(ops.ArrayRepeat, pd.Series, int) -def execute_array_repeat(op, data, n, **kwargs): - # Negative n will be treated as 0 (repeat will produce empty array) - n = max(n, 0) - return pd.Series(np.tile(arr, n) for arr in data) - - -@execute_node.register(ops.ArrayRepeat, (list, np.ndarray), int) -def execute_array_repeat_scalar(op, data, n, **kwargs): - # Negative n will be treated as 0 (repeat will produce empty array) - return np.tile(data, max(n, 0)) - - -@execute_node.register(ops.ArrayCollect, pd.Series, (type(None), pd.Series)) -def execute_array_collect(op, data, where, aggcontext=None, **kwargs): - return aggcontext.agg(data.loc[where] if where is not None else data, np.array) - - -@execute_node.register(ops.ArrayCollect, SeriesGroupBy, (type(None), pd.Series)) -def execute_array_collect_groupby(op, data, where, aggcontext=None, **kwargs): - return aggcontext.agg( - ( - data.obj.loc[where].groupby(data.grouping.grouper) - if where is not None - else data - ), - np.array, - ) - - -@execute_node.register(ops.Unnest, pd.Series) -def execute_unnest(op, data, **kwargs): - return data[data.map(lambda v: bool(len(v)), na_action="ignore")].explode() - - -@execute_node.register(ops.ArrayFlatten, pd.Series) -def execute_array_flatten(op, data, **kwargs): - return data.map( - lambda v: list(itertools.chain.from_iterable(v)), na_action="ignore" - ) diff --git a/ibis/backends/pandas/execution/constants.py b/ibis/backends/pandas/execution/constants.py deleted file mode 100644 index 0e543561a869..000000000000 --- a/ibis/backends/pandas/execution/constants.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Constants for the pandas backend.""" - -from __future__ import annotations - -import operator - -import numpy as np -import pandas as pd - -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -import ibis.util - -JOIN_TYPES = { - ops.LeftJoin: "left", - ops.RightJoin: "right", - ops.InnerJoin: "inner", - ops.OuterJoin: "outer", -} - - -LEFT_JOIN_SUFFIX = f"_ibis_left_{ibis.util.guid()}" -RIGHT_JOIN_SUFFIX = f"_ibis_right_{ibis.util.guid()}" -JOIN_SUFFIXES = LEFT_JOIN_SUFFIX, RIGHT_JOIN_SUFFIX -ALTERNATE_SUFFIXES = { - LEFT_JOIN_SUFFIX: RIGHT_JOIN_SUFFIX, - RIGHT_JOIN_SUFFIX: LEFT_JOIN_SUFFIX, -} - - -IBIS_TYPE_TO_PANDAS_TYPE: dict[dt.DataType, type | str] = { - dt.float16: np.float16, - dt.float32: np.float32, - dt.float64: np.float64, - dt.float32: np.float32, - dt.float64: np.float64, - dt.int8: np.int8, - dt.int16: np.int16, - dt.int32: np.int32, - dt.int64: np.int64, - dt.string: str, - dt.timestamp: "datetime64[ns]", - dt.boolean: np.bool_, - dt.json: str, - dt.float16.copy(nullable=False): np.float16, - dt.float32.copy(nullable=False): np.float32, - dt.float64.copy(nullable=False): np.float64, - dt.float32.copy(nullable=False): np.float32, - dt.float64.copy(nullable=False): np.float64, - dt.int8.copy(nullable=False): np.int8, - dt.int16.copy(nullable=False): np.int16, - dt.int32.copy(nullable=False): np.int32, - dt.int64.copy(nullable=False): np.int64, - dt.string.copy(nullable=False): str, - dt.timestamp.copy(nullable=False): "datetime64[ns]", - dt.boolean.copy(nullable=False): np.bool_, - dt.json.copy(nullable=False): str, -} - - -IBIS_TO_PYTHON_LITERAL_TYPES = { - dt.boolean: bool, - dt.float64: float, - dt.float32: float, - dt.int64: int, - dt.int32: int, - dt.int16: int, - dt.int8: int, - dt.string: str, - dt.date: lambda x: pd.Timestamp(x).to_pydatetime().date(), - dt.boolean.copy(nullable=False): bool, - dt.float64.copy(nullable=False): float, - dt.float32.copy(nullable=False): float, - dt.int64.copy(nullable=False): int, - dt.int32.copy(nullable=False): int, - dt.int16.copy(nullable=False): int, - dt.int8.copy(nullable=False): int, - dt.string.copy(nullable=False): str, - dt.date.copy(nullable=False): lambda x: pd.Timestamp(x).to_pydatetime().date(), -} - - -BINARY_OPERATIONS = { - ops.Greater: operator.gt, - ops.Less: operator.lt, - ops.LessEqual: operator.le, - ops.GreaterEqual: operator.ge, - ops.Equals: operator.eq, - ops.NotEquals: operator.ne, - ops.And: operator.and_, - ops.Or: operator.or_, - ops.Xor: operator.xor, - ops.Add: operator.add, - ops.Subtract: operator.sub, - ops.Multiply: operator.mul, - ops.Divide: operator.truediv, - ops.FloorDivide: operator.floordiv, - ops.Modulus: operator.mod, - ops.Power: operator.pow, - ops.IdenticalTo: lambda x, y: (x == y) | (pd.isnull(x) & pd.isnull(y)), - ops.BitwiseXor: lambda x, y: np.bitwise_xor(x, y), - ops.BitwiseOr: lambda x, y: np.bitwise_or(x, y), - ops.BitwiseAnd: lambda x, y: np.bitwise_and(x, y), - ops.BitwiseLeftShift: lambda x, y: np.left_shift(x, y), - ops.BitwiseRightShift: lambda x, y: np.right_shift(x, y), -} diff --git a/ibis/backends/pandas/execution/decimal.py b/ibis/backends/pandas/execution/decimal.py deleted file mode 100644 index ac34bea4e8a2..000000000000 --- a/ibis/backends/pandas/execution/decimal.py +++ /dev/null @@ -1,135 +0,0 @@ -from __future__ import annotations - -import decimal -import math -import numbers - -import numpy as np -import pandas as pd - -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.pandas.dispatch import execute_node -from ibis.common.exceptions import OperationNotDefinedError - - -@execute_node.register(ops.Ln, decimal.Decimal) -def execute_decimal_natural_log(op, data, **kwargs): - try: - return data.ln() - except decimal.InvalidOperation: - return decimal.Decimal("NaN") - - -@execute_node.register(ops.Log, decimal.Decimal, decimal.Decimal) -def execute_decimal_log_with_decimal_base(op, data, base, **kwargs): - try: - return data.ln() / base.ln() - except decimal.InvalidOperation: - return decimal.Decimal("NaN") - - -@execute_node.register(ops.Log, decimal.Decimal, type(None)) -def execute_decimal_log_with_no_base(op, data, _, **kwargs): - return execute_decimal_natural_log(op, data, **kwargs) - - -@execute_node.register(ops.Log, decimal.Decimal, numbers.Real) -def execute_decimal_log_with_real_base(op, data, base, **kwargs): - return execute_node(op, data, decimal.Decimal(base), **kwargs) - - -@execute_node.register(ops.Log, decimal.Decimal, np.integer) -def execute_decimal_log_with_np_integer_base(op, data, base, **kwargs): - return execute_node(op, data, int(base), **kwargs) - - -@execute_node.register(ops.Log2, decimal.Decimal) -def execute_decimal_log2(op, data, **kwargs): - try: - return data.ln() / decimal.Decimal(2).ln() - except decimal.InvalidOperation: - return decimal.Decimal("NaN") - - -# While ops.Negate is a subclass of ops.Unary, multipledispatch will be -# faster if we provide types that can potentially match the types of inputs -# exactly -@execute_node.register((ops.Unary, ops.Negate), decimal.Decimal) -def execute_decimal_unary(op, data, **kwargs): - op_type = type(op) - operation_name = op_type.__name__.lower() - function = getattr( - decimal.Decimal, - operation_name, - None, - ) - if function is None: - math_function = getattr(math, operation_name, None) - if math_function is None: - raise OperationNotDefinedError(f"{op_type.__name__} not supported") - function = lambda x: decimal.Decimal(math_function(x)) - try: - return function(data) - except decimal.InvalidOperation: - return decimal.Decimal("NaN") - - -@execute_node.register(ops.Sign, decimal.Decimal) -def execute_decimal_sign(op, data, **kwargs): - return data if not data else decimal.Decimal(1).copy_sign(data) - - -@execute_node.register(ops.Abs, decimal.Decimal) -def execute_decimal_abs(op, data, **kwargs): - return abs(data) - - -@execute_node.register(ops.Round, decimal.Decimal, (np.integer, int)) -def execute_round_decimal(op, data, places, **kwargs): - # If we only allowed Python 3, we wouldn't have to implement any of this; - # we could just call round(data, places) :( - tuple_value = data.as_tuple() - precision = len(tuple_value.digits) - integer_part_length = precision + min(tuple_value.exponent, 0) - - if places < 0: - decimal_format_string = "0.{}E+{:d}".format( - "0" * (integer_part_length - 1 + places), - max(integer_part_length + places, abs(places)), - ) - else: - decimal_format_string = "{}.{}".format("0" * integer_part_length, "0" * places) - - places = decimal.Decimal(decimal_format_string) - return data.quantize(places) - - -@execute_node.register(ops.Round, decimal.Decimal, type(None)) -def execute_round_decimal_no_places(op, data, _, **kwargs): - return np.int64(round(data)) - - -@execute_node.register(ops.Cast, pd.Series, dt.Decimal) -def execute_cast_series_to_decimal(op, data, type, **kwargs): - precision = type.precision - scale = type.scale - context = decimal.Context(prec=precision) - places = context.create_decimal( - "{}.{}".format("0" * (precision - scale), "0" * scale) - ) - return data.apply( - lambda x, context=context, places=places: ( - context.create_decimal(x).quantize(places) - ) - ) - - -@execute_node.register(ops.E) -def execute_e(op, **kwargs): - return np.e - - -@execute_node.register(ops.Pi) -def execute_pi(op, **kwargs): - return np.pi diff --git a/ibis/backends/pandas/execution/generic.py b/ibis/backends/pandas/execution/generic.py deleted file mode 100644 index 7c8b53cc2f79..000000000000 --- a/ibis/backends/pandas/execution/generic.py +++ /dev/null @@ -1,1479 +0,0 @@ -"""Execution rules for generic ibis operations.""" - -from __future__ import annotations - -import collections -import contextlib -import datetime -import decimal -import functools -import math -import numbers -import operator -from collections.abc import Mapping, Sized - -import numpy as np -import pandas as pd -import pytz -import toolz -from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy - -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis.backends.base.df.scope import Scope -from ibis.backends.base.df.timecontext import TimeContext, get_time_col -from ibis.backends.pandas import Backend as PandasBackend -from ibis.backends.pandas import aggcontext as agg_ctx -from ibis.backends.pandas.core import ( - boolean_types, - date_types, - execute, - fixed_width_types, - floating_types, - integer_types, - numeric_types, - scalar_types, - simple_types, - timedelta_types, - timestamp_types, -) -from ibis.backends.pandas.dispatch import execute_literal, execute_node -from ibis.backends.pandas.execution import constants -from ibis.backends.pandas.execution.util import coerce_to_output, get_grouping - - -# By default return the literal value -@execute_literal.register(ops.Literal, object, dt.DataType) -def execute_node_literal_value_datatype(op, value, datatype, **kwargs): - return value - - -# Because True and 1 hash to the same value, if we have True or False in scope -# keys while executing anything that should evaluate to 1 or 0 evaluates to -# True or False respectively. This is a hack to work around that by casting the -# bool to an integer. -@execute_literal.register(ops.Literal, object, dt.Integer) -def execute_node_literal_any_integer_datatype(op, value, datatype, **kwargs): - if value is None: - return value - return int(value) - - -@execute_literal.register(ops.Literal, object, dt.Boolean) -def execute_node_literal_any_boolean_datatype(op, value, datatype, **kwargs): - if value is None: - return value - return bool(value) - - -@execute_literal.register(ops.Literal, object, dt.Floating) -def execute_node_literal_any_floating_datatype(op, value, datatype, **kwargs): - if value is None: - return value - return float(value) - - -@execute_literal.register(ops.Literal, object, dt.Array) -def execute_node_literal_any_array_datatype(op, value, datatype, **kwargs): - if value is None: - return value - return np.array(value) - - -@execute_literal.register(ops.Literal, dt.DataType) -def execute_node_literal_datatype(op, datatype, **kwargs): - return op.value - - -@execute_literal.register( - ops.Literal, (*timedelta_types, str, *integer_types, type(None)), dt.Interval -) -def execute_interval_literal(op, value, dtype, **kwargs): - if value is None: - return pd.NaT - return pd.Timedelta(value, dtype.unit.short) - - -@execute_node.register(ops.Limit, pd.DataFrame, integer_types, integer_types) -def execute_limit_frame(op, data, nrows: int, offset: int, **kwargs): - return data.iloc[offset : offset + nrows] - - -@execute_node.register(ops.Limit, pd.DataFrame, type(None), integer_types) -def execute_limit_frame_no_limit(op, data, nrows: None, offset: int, **kwargs): - return data.iloc[offset:] - - -@execute_node.register(ops.Cast, SeriesGroupBy, dt.DataType) -def execute_cast_series_group_by(op, data, type, **kwargs): - result = execute_cast_series_generic(op, data.obj, type, **kwargs) - return result.groupby(get_grouping(data.grouper.groupings), group_keys=False) - - -@execute_node.register(ops.Cast, pd.Series, dt.DataType) -def execute_cast_series_generic(op, data, type, **kwargs): - out = data.astype(constants.IBIS_TYPE_TO_PANDAS_TYPE[type]) - if type.is_integer(): - if op.arg.dtype.is_timestamp(): - return out.floordiv(int(1e9)) - elif op.arg.dtype.is_date(): - return out.floordiv(int(24 * 60 * 60 * 1e9)) - return out - - -@execute_node.register(ops.Cast, pd.Series, dt.Array) -def execute_cast_series_array(op, data, type, **kwargs): - value_type = type.value_type - numpy_type = constants.IBIS_TYPE_TO_PANDAS_TYPE.get(value_type, None) - if numpy_type is None: - raise ValueError( - "Array value type must be a primitive type " - "(e.g., number, string, or timestamp)" - ) - - def cast_to_array(array, numpy_type=numpy_type): - elems = [ - el if el is None else np.array(el, dtype=numpy_type).item() for el in array - ] - try: - return np.array(elems, dtype=numpy_type) - except TypeError: - return np.array(elems) - - return data.map(cast_to_array) - - -@execute_node.register(ops.Cast, list, dt.Array) -def execute_cast_list_array(op, data, type, **kwargs): - value_type = type.value_type - numpy_type = constants.IBIS_TYPE_TO_PANDAS_TYPE.get(value_type, None) - if numpy_type is None: - raise ValueError( - "Array value type must be a primitive type " - "(e.g., number, string, or timestamp)" - ) - - def cast_to_array(array, numpy_type=numpy_type): - elems = [ - el if el is None else np.array(el, dtype=numpy_type).item() for el in array - ] - try: - return np.array(elems, dtype=numpy_type) - except TypeError: - return np.array(elems) - - return cast_to_array(data) - - -@execute_node.register(ops.Cast, pd.Series, dt.Timestamp) -def execute_cast_series_timestamp(op, data, type, **kwargs): - arg = op.arg - from_type = arg.dtype - - if from_type.equals(type): # noop cast - return data - - tz = type.timezone - - if from_type.is_timestamp(): - from_tz = from_type.timezone - if tz is None and from_tz is None: - return data - elif tz is None or from_tz is None: - return data.dt.tz_localize(tz) - elif tz is not None and from_tz is not None: - return data.dt.tz_convert(tz) - elif from_type.is_date(): - return data if tz is None else data.dt.tz_localize(tz) - - if from_type.is_string() or from_type.is_integer(): - if from_type.is_integer(): - timestamps = pd.to_datetime(data.values, unit="s") - else: - timestamps = pd.to_datetime(data.values) - if getattr(timestamps.dtype, "tz", None) is not None: - method_name = "tz_convert" - else: - method_name = "tz_localize" - method = getattr(timestamps, method_name) - timestamps = method(tz) - return pd.Series(timestamps, index=data.index, name=data.name) - - raise TypeError(f"Don't know how to cast {from_type} to {type}") - - -def _normalize(values, original_index, name, timezone=None): - index = pd.DatetimeIndex(values, tz=timezone) - return pd.Series(index.normalize(), index=original_index, name=name) - - -@execute_node.register(ops.Cast, pd.Series, dt.Date) -def execute_cast_series_date(op, data, type, **kwargs): - arg = op.args[0] - from_type = arg.dtype - - if from_type.equals(type): - return data - - if from_type.is_timestamp(): - return _normalize( - data.values, data.index, data.name, timezone=from_type.timezone - ) - - if from_type.is_string(): - values = data.values - datetimes = pd.to_datetime(values) - with contextlib.suppress(TypeError): - datetimes = datetimes.tz_convert(None) - dates = _normalize(datetimes, data.index, data.name) - return pd.Series(dates, index=data.index, name=data.name) - - if from_type.is_integer(): - return pd.Series( - pd.to_datetime(data.values, unit="D").values, - index=data.index, - name=data.name, - ) - - raise TypeError(f"Don't know how to cast {from_type} to {type}") - - -@execute_node.register(ops.SortKey, pd.Series, bool) -def execute_sort_key_series(op, data, _, **kwargs): - return data - - -def call_numpy_ufunc(func, op, data, **kwargs): - if getattr(data, "dtype", None) == np.dtype(np.object_): - return data.apply(functools.partial(execute_node, op, **kwargs)) - if func is None: - raise com.OperationNotDefinedError(f"{type(op).__name__} not supported") - return func(data) - - -@execute_node.register(ops.Negate, fixed_width_types + timedelta_types) -def execute_obj_negate(op, data, **kwargs): - return -data - - -@execute_node.register(ops.Negate, pd.Series) -def execute_series_negate(op, data, **kwargs): - return call_numpy_ufunc(np.negative, op, data, **kwargs) - - -@execute_node.register(ops.Negate, SeriesGroupBy) -def execute_series_group_by_negate(op, data, **kwargs): - return execute_series_negate(op, data.obj, **kwargs).groupby( - get_grouping(data.grouper.groupings), group_keys=False - ) - - -@execute_node.register(ops.Unary, pd.Series) -def execute_series_unary_op(op, data, **kwargs): - op_type = type(op) - if op_type == ops.BitwiseNot: - function = np.bitwise_not - else: - function = getattr(np, op_type.__name__.lower()) - return call_numpy_ufunc(function, op, data, **kwargs) - - -@execute_node.register(ops.Acos, (pd.Series, *numeric_types)) -def execute_series_acos(_, data, **kwargs): - return np.arccos(data) - - -@execute_node.register(ops.Asin, (pd.Series, *numeric_types)) -def execute_series_asin(_, data, **kwargs): - return np.arcsin(data) - - -@execute_node.register(ops.Atan, (pd.Series, *numeric_types)) -def execute_series_atan(_, data, **kwargs): - return np.arctan(data) - - -@execute_node.register(ops.Cot, (pd.Series, *numeric_types)) -def execute_series_cot(_, data, **kwargs): - return 1.0 / np.tan(data) - - -@execute_node.register( - ops.Atan2, (pd.Series, *numeric_types), (pd.Series, *numeric_types) -) -def execute_series_atan2(_, y, x, **kwargs): - return np.arctan2(y, x) - - -@execute_node.register((ops.Cos, ops.Sin, ops.Tan), (pd.Series, *numeric_types)) -def execute_series_trig(op, data, **kwargs): - function = getattr(np, type(op).__name__.lower()) - return call_numpy_ufunc(function, op, data, **kwargs) - - -@execute_node.register(ops.Radians, (pd.Series, *numeric_types)) -def execute_series_radians(_, data, **kwargs): - return np.radians(data) - - -@execute_node.register(ops.Degrees, (pd.Series, *numeric_types)) -def execute_series_degrees(_, data, **kwargs): - return np.degrees(data) - - -@execute_node.register((ops.Ceil, ops.Floor), pd.Series) -def execute_series_ceil(op, data, **kwargs): - return_type = np.object_ if data.dtype == np.object_ else np.int64 - func = getattr(np, type(op).__name__.lower()) - return call_numpy_ufunc(func, op, data, **kwargs).astype(return_type) - - -@execute_node.register(ops.BitwiseNot, integer_types) -def execute_int_bitwise_not(op, data, **kwargs): - return np.invert(data) - - -def vectorize_object(op, arg, *args, **kwargs): - func = np.vectorize(functools.partial(execute_node, op, **kwargs)) - return pd.Series(func(arg, *args), index=arg.index, name=arg.name) - - -@execute_node.register( - ops.Log, pd.Series, (pd.Series, numbers.Real, decimal.Decimal, type(None)) -) -def execute_series_log_with_base(op, data, base, **kwargs): - if data.dtype == np.dtype(np.object_): - return vectorize_object(op, data, base, **kwargs) - - if base is None: - return np.log(data) - return np.log(data) / np.log(base) - - -@execute_node.register(ops.Ln, pd.Series) -def execute_series_natural_log(op, data, **kwargs): - if data.dtype == np.dtype(np.object_): - return data.apply(functools.partial(execute_node, op, **kwargs)) - return np.log(data) - - -@execute_node.register( - ops.Clip, - pd.Series, - (pd.Series, type(None)) + numeric_types, - (pd.Series, type(None)) + numeric_types, -) -def execute_series_clip(op, data, lower, upper, **kwargs): - return data.clip(lower=lower, upper=upper) - - -@execute_node.register( - ops.Quantile, - pd.Series, - (np.ndarray, *numeric_types), - (pd.Series, type(None)), -) -def execute_series_quantile(op, data, quantile, mask, aggcontext=None, **_): - return aggcontext.agg( - data if mask is None else data.loc[mask], - "quantile", - q=quantile, - ) - - -@execute_node.register(ops.Quantile, pd.Series, (np.ndarray, *numeric_types)) -def execute_series_quantile_default(op, data, quantile, aggcontext=None, **_): - return aggcontext.agg(data, "quantile", q=quantile) - - -@execute_node.register( - ops.Quantile, - SeriesGroupBy, - (np.ndarray, *numeric_types), - (SeriesGroupBy, type(None)), -) -def execute_series_group_by_quantile(op, data, quantile, mask, aggcontext=None, **_): - return aggcontext.agg( - data, - ( - "quantile" - if mask is None - else functools.partial(_filtered_reduction, mask.obj, pd.Series.quantile) - ), - q=quantile, - ) - - -@execute_node.register( - ops.MultiQuantile, - pd.Series, - (np.ndarray, *numeric_types), - (pd.Series, type(None)), -) -def execute_series_quantile_multi(op, data, quantile, mask, aggcontext=None, **_): - return np.array( - aggcontext.agg(data if mask is None else data.loc[mask], "quantile", q=quantile) - ) - - -@execute_node.register( - ops.MultiQuantile, - SeriesGroupBy, - np.ndarray, - (SeriesGroupBy, type(None)), -) -def execute_series_quantile_multi_groupby( - op, data, quantile, mask, aggcontext=None, **kwargs -): - def q(x, quantile): - result = x.quantile(quantile).tolist() - return [result for _ in range(len(x))] - - return aggcontext.agg( - data, - q if mask is None else functools.partial(_filtered_reduction, mask.obj, q), - quantile, - ) - - -@execute_node.register(ops.MultiQuantile, SeriesGroupBy, np.ndarray) -def execute_series_quantile_multi_groupby_default( - op, data, quantile, aggcontext=None, **_ -): - def q(x, quantile): - result = x.quantile(quantile).tolist() - return [result for _ in range(len(x))] - - return aggcontext.agg(data, q, quantile) - - -@execute_node.register(ops.Cast, type(None), dt.DataType) -def execute_cast_null_to_anything(op, data, type, **kwargs): - return None - - -@execute_node.register(ops.Cast, datetime.datetime, dt.String) -def execute_cast_datetime_or_timestamp_to_string(op, data, type, **kwargs): - """Cast timestamps to strings.""" - return str(data) - - -@execute_node.register(ops.Cast, datetime.datetime, dt.Int64) -def execute_cast_timestamp_to_integer(op, data, type, **kwargs): - """Cast timestamps to integers.""" - t = pd.Timestamp(data) - return pd.NA if pd.isna(t) else int(t.timestamp()) - - -@execute_node.register(ops.Cast, (np.bool_, bool), dt.Timestamp) -def execute_cast_bool_to_timestamp(op, data, type, **kwargs): - raise TypeError( - "Casting boolean values to timestamps does not make sense. If you " - "really want to cast boolean values to timestamps please cast to " - "int64 first then to timestamp: " - "value.cast('int64').cast('timestamp')" - ) - - -@execute_node.register(ops.Cast, (np.bool_, bool), dt.Interval) -def execute_cast_bool_to_interval(op, data, type, **kwargs): - raise TypeError( - "Casting boolean values to intervals does not make sense. If you " - "really want to cast boolean values to intervals please cast to " - "int64 first then to interval: " - "value.cast('int64').cast(ibis.expr.datatypes.Interval(...))" - ) - - -@execute_node.register(ops.Cast, integer_types, dt.Timestamp) -def execute_cast_integer_to_timestamp(op, data, type, **kwargs): - """Cast integer to timestamp.""" - return pd.Timestamp(data, unit="s", tz=type.timezone) - - -@execute_node.register(ops.Cast, str, dt.Timestamp) -def execute_cast_string_to_timestamp(op, data, type, **kwargs): - """Cast string to timestamp.""" - return pd.Timestamp(data, tz=type.timezone) - - -@execute_node.register(ops.Cast, datetime.datetime, dt.Timestamp) -def execute_cast_timestamp_to_timestamp(op, data, type, **kwargs): - """Cast timestamps to other timestamps including timezone if necessary.""" - input_timezone = data.tzinfo - target_timezone = type.timezone - - if input_timezone == target_timezone: - return data - - if input_timezone is None or target_timezone is None: - return data.astimezone( - tz=None if target_timezone is None else pytz.timezone(target_timezone) - ) - - return data.astimezone(tz=pytz.timezone(target_timezone)) - - -@execute_node.register(ops.Cast, fixed_width_types + (str,), dt.DataType) -def execute_cast_string_literal(op, data, type, **kwargs): - try: - cast_function = constants.IBIS_TO_PYTHON_LITERAL_TYPES[type] - except KeyError: - raise TypeError(f"Don't know how to cast {data!r} to type {type}") - else: - return cast_function(data) - - -@execute_node.register(ops.Cast, Mapping, dt.DataType) -def execute_cast_mapping_literal(op, data, type, **kwargs): - data = ( - (ops.Literal(k, type.key_type), ops.Literal(v, type.value_type)) - for k, v in data.items() - ) - return {execute(k, **kwargs): execute(v, **kwargs) for k, v in data} - - -@execute_node.register(ops.Round, scalar_types, (int, type(None))) -def execute_round_scalars(op, data, places, **kwargs): - return round(data, places) if places else round(data) - - -@execute_node.register(ops.Round, pd.Series, (pd.Series, np.integer, type(None), int)) -def execute_round_series(op, data, places, **kwargs): - if data.dtype == np.dtype(np.object_): - return vectorize_object(op, data, places, **kwargs) - result = data.round(places or 0) - return result if places else result.astype("int64") - - -@execute_node.register(ops.TableColumn, (pd.DataFrame, DataFrameGroupBy)) -def execute_table_column_df_or_df_groupby(op, data, **kwargs): - return data[op.name] - - -@execute_node.register(ops.Aggregation, pd.DataFrame) -def execute_aggregation_dataframe( - op, - data, - scope=None, - timecontext: TimeContext | None = None, - **kwargs, -): - assert op.metrics, "no metrics found during aggregation execution" - - if op.sort_keys: - raise NotImplementedError("sorting on aggregations not yet implemented") - - if op.predicates: - predicate = functools.reduce( - operator.and_, - ( - execute(p, scope=scope, timecontext=timecontext, **kwargs) - for p in op.predicates - ), - ) - data = data.loc[predicate] - - columns: dict[str, str] = {} - - if op.by: - grouping_keys = [ - key.name - if isinstance(key, ops.TableColumn) - else execute(key, scope=scope, timecontext=timecontext, **kwargs).rename( - key.name - ) - for key in op.by - ] - source = data.groupby( - grouping_keys[0] if len(grouping_keys) == 1 else grouping_keys, - group_keys=False, - ) - else: - source = data - - scope = scope.merge_scope(Scope({op.table: source}, timecontext)) - - pieces = [ - coerce_to_output( - execute(metric, scope=scope, timecontext=timecontext, **kwargs), - metric, - ) - for metric in op.metrics - ] - - result = pd.concat(pieces, axis=1) - - # If grouping, need a reset to get the grouping key back as a column - if op.by: - result = result.reset_index() - - result.columns = [columns.get(c, c) for c in result.columns] - - if op.having: - # .having(...) is only accessible on groupby, so this should never - # raise - if not op.by: - raise ValueError( - "Filtering out aggregation values is not allowed without at " - "least one grouping key" - ) - - # TODO(phillipc): Don't recompute identical subexpressions - predicate = functools.reduce( - operator.and_, - ( - execute(h, scope=scope, timecontext=timecontext, **kwargs) - for h in op.having - ), - ) - assert len(predicate) == len( - result - ), "length of predicate does not match length of DataFrame" - result = result.loc[predicate.values] - return result - - -@execute_node.register(ops.Reduction, SeriesGroupBy, type(None)) -def execute_reduction_series_groupby(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg(data, type(op).__name__.lower()) - - -@execute_node.register(ops.First, SeriesGroupBy, type(None)) -def execute_first_series_groupby(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg(data, lambda x: getattr(x, "iat", x)[0]) - - -@execute_node.register(ops.Last, SeriesGroupBy, type(None)) -def execute_last_series_groupby(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg(data, lambda x: getattr(x, "iat", x)[-1]) - - -variance_ddof = {"pop": 0, "sample": 1} - - -@execute_node.register(ops.Variance, SeriesGroupBy, type(None)) -def execute_reduction_series_groupby_var(op, data, _, aggcontext=None, **kwargs): - return aggcontext.agg(data, "var", ddof=variance_ddof[op.how]) - - -@execute_node.register(ops.StandardDev, SeriesGroupBy, type(None)) -def execute_reduction_series_groupby_std(op, data, _, aggcontext=None, **kwargs): - return aggcontext.agg(data, "std", ddof=variance_ddof[op.how]) - - -@execute_node.register( - (ops.CountDistinct, ops.ApproxCountDistinct), - SeriesGroupBy, - type(None), -) -def execute_count_distinct_series_groupby(op, data, _, aggcontext=None, **kwargs): - return aggcontext.agg(data, "nunique") - - -@execute_node.register(ops.Arbitrary, SeriesGroupBy, type(None)) -def execute_arbitrary_series_groupby(op, data, _, aggcontext=None, **kwargs): - how = op.how - if how is None: - how = "first" - - if how not in {"first", "last"}: - raise com.OperationNotDefinedError(f"Arbitrary {how!r} is not supported") - return aggcontext.agg(data, how) - - -@execute_node.register( - (ops.ArgMin, ops.ArgMax), - SeriesGroupBy, - SeriesGroupBy, - type(None), -) -def execute_reduction_series_groupby_argidx( - op, data, key, _, aggcontext=None, **kwargs -): - method = operator.methodcaller(op.__class__.__name__.lower()) - - def reduce(data, key=key.obj, method=method): - return data.iloc[method(key.loc[data.index])] - - return aggcontext.agg(data, reduce) - - -def _filtered_reduction(mask, method, data): - return method(data[mask[data.index]]) - - -@execute_node.register(ops.Reduction, SeriesGroupBy, SeriesGroupBy) -def execute_reduction_series_gb_mask(op, data, mask, aggcontext=None, **kwargs): - method = operator.methodcaller(type(op).__name__.lower()) - return aggcontext.agg( - data, functools.partial(_filtered_reduction, mask.obj, method) - ) - - -@execute_node.register(ops.First, SeriesGroupBy, SeriesGroupBy) -def execute_first_series_gb_mask(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data, functools.partial(_filtered_reduction, mask.obj, lambda x: x.iloc[0]) - ) - - -@execute_node.register(ops.Last, SeriesGroupBy, SeriesGroupBy) -def execute_last_series_gb_mask(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data, functools.partial(_filtered_reduction, mask.obj, lambda x: x.iloc[-1]) - ) - - -@execute_node.register( - (ops.CountDistinct, ops.ApproxCountDistinct), - SeriesGroupBy, - SeriesGroupBy, -) -def execute_count_distinct_series_groupby_mask( - op, data, mask, aggcontext=None, **kwargs -): - return aggcontext.agg( - data, - functools.partial(_filtered_reduction, mask.obj, pd.Series.nunique), - ) - - -@execute_node.register(ops.Variance, SeriesGroupBy, SeriesGroupBy) -def execute_var_series_groupby_mask(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data, - lambda x, mask=mask.obj, ddof=variance_ddof[op.how]: ( - x[mask[x.index]].var(ddof=ddof) - ), - ) - - -@execute_node.register(ops.StandardDev, SeriesGroupBy, SeriesGroupBy) -def execute_std_series_groupby_mask(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data, - lambda x, mask=mask.obj, ddof=variance_ddof[op.how]: ( - x[mask[x.index]].std(ddof=ddof) - ), - ) - - -@execute_node.register(ops.CountStar, DataFrameGroupBy, type(None)) -def execute_count_star_frame_groupby(op, data, _, **kwargs): - return data.size() - - -@execute_node.register(ops.CountDistinctStar, DataFrameGroupBy, type(None)) -def execute_count_distinct_star_frame_groupby(op, data, _, **kwargs): - return data.nunique() - - -@execute_node.register(ops.Reduction, pd.Series, (pd.Series, type(None))) -def execute_reduction_series_mask(op, data, mask, aggcontext=None, **kwargs): - operand = data[mask] if mask is not None else data - return aggcontext.agg(operand, type(op).__name__.lower()) - - -@execute_node.register(ops.First, pd.Series, (pd.Series, type(None))) -def execute_first_series_mask(op, data, mask, aggcontext=None, **kwargs): - operand = data[mask] if mask is not None else data - - def _first(x): - return getattr(x, "iloc", x)[0] - - return aggcontext.agg(operand, _first) - - -@execute_node.register(ops.Last, pd.Series, (pd.Series, type(None))) -def execute_last_series_mask(op, data, mask, aggcontext=None, **kwargs): - operand = data[mask] if mask is not None else data - - def _last(x): - return getattr(x, "iloc", x)[-1] - - return aggcontext.agg(operand, _last) - - -@execute_node.register( - (ops.CountDistinct, ops.ApproxCountDistinct), - pd.Series, - (pd.Series, type(None)), -) -def execute_count_distinct_series_mask(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg(data[mask] if mask is not None else data, "nunique") - - -@execute_node.register(ops.Arbitrary, pd.Series, (pd.Series, type(None))) -def execute_arbitrary_series_mask(op, data, mask, aggcontext=None, **kwargs): - if op.how == "first": - index = 0 - elif op.how == "last": - index = -1 - else: - raise com.OperationNotDefinedError(f"Arbitrary {op.how!r} is not supported") - - data = data[mask] if mask is not None else data - return data.iloc[index] - - -@execute_node.register(ops.StandardDev, pd.Series, (pd.Series, type(None))) -def execute_standard_dev_series(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - "std", - ddof=variance_ddof[op.how], - ) - - -@execute_node.register(ops.Variance, pd.Series, (pd.Series, type(None))) -def execute_variance_series(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - "var", - ddof=variance_ddof[op.how], - ) - - -@execute_node.register((ops.Any, ops.All), pd.Series, (pd.Series, type(None))) -def execute_any_all_series(op, data, mask, aggcontext=None, **kwargs): - if mask is not None: - data = data.loc[mask] - if isinstance(aggcontext, (agg_ctx.Summarize, agg_ctx.Transform)): - result = aggcontext.agg(data, type(op).__name__.lower()) - else: - result = aggcontext.agg( - data, lambda data: getattr(data, type(op).__name__.lower())() - ) - try: - return result.astype(bool) - except TypeError: - return result - - -@execute_node.register((ops.Any, ops.All), SeriesGroupBy, type(None)) -def execute_any_all_series_group_by(op, data, mask, aggcontext=None, **kwargs): - if mask is not None: - data = data.obj.loc[mask].groupby(get_grouping(data.grouper.groupings)) - if isinstance(aggcontext, (agg_ctx.Summarize, agg_ctx.Transform)): - result = aggcontext.agg(data, type(op).__name__.lower()) - else: - result = aggcontext.agg( - data, lambda data: getattr(data, type(op).__name__.lower())() - ) - try: - return result.astype(bool) - except TypeError: - return result - - -@execute_node.register(ops.CountStar, pd.DataFrame, type(None)) -def execute_count_star_frame(op, data, _, **kwargs): - return len(data) - - -@execute_node.register(ops.CountStar, pd.DataFrame, pd.Series) -def execute_count_star_frame_filter(op, data, where, **kwargs): - return len(data) - len(where) + where.sum() - - -@execute_node.register(ops.CountDistinctStar, pd.DataFrame, type(None)) -def execute_count_distinct_star_frame(op, data, _, **kwargs): - return len(data.drop_duplicates()) - - -@execute_node.register(ops.CountDistinctStar, pd.DataFrame, pd.Series) -def execute_count_distinct_star_frame_filter(op, data, filt, **kwargs): - return len(data.loc[filt].drop_duplicates()) - - -@execute_node.register(ops.BitAnd, pd.Series, (pd.Series, type(None))) -def execute_bit_and_series(_, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - np.bitwise_and.reduce, - ) - - -@execute_node.register(ops.BitOr, pd.Series, (pd.Series, type(None))) -def execute_bit_or_series(_, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - np.bitwise_or.reduce, - ) - - -@execute_node.register(ops.BitXor, pd.Series, (pd.Series, type(None))) -def execute_bit_xor_series(_, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - np.bitwise_xor.reduce, - ) - - -@execute_node.register( - (ops.ArgMin, ops.ArgMax), - pd.Series, - pd.Series, - (pd.Series, type(None)), -) -def execute_argmin_series_mask(op, data, key, mask, aggcontext=None, **kwargs): - method_name = op.__class__.__name__.lower() - masked_key = key[mask] if mask is not None else key - idx = aggcontext.agg(masked_key, method_name) - masked = data[mask] if mask is not None else data - return masked.iloc[idx] - - -@execute_node.register(ops.Mode, pd.Series, (pd.Series, type(None))) -def execute_mode_series(_, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, lambda x: x.mode().iloc[0] - ) - - -@execute_node.register(ops.Mode, SeriesGroupBy, (SeriesGroupBy, type(None))) -def execute_mode_series_groupby(_, data, mask, aggcontext=None, **kwargs): - def mode(x): - return x.mode().iloc[0] - - if mask is not None: - mode = functools.partial(_filtered_reduction, mask.obj, mode) - - return aggcontext.agg(data, mode) - - -@execute_node.register(ops.ApproxMedian, pd.Series, (pd.Series, type(None))) -def execute_approx_median_series(_, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, lambda x: x.median() - ) - - -@execute_node.register(ops.ApproxMedian, SeriesGroupBy, (SeriesGroupBy, type(None))) -def execute_approx_median_series_groupby(_, data, mask, aggcontext=None, **kwargs): - median = pd.Series.median - - if mask is not None: - median = functools.partial(_filtered_reduction, mask.obj, median) - - return aggcontext.agg(data, median) - - -@execute_node.register((ops.Not, ops.Negate), (bool, np.bool_)) -def execute_not_bool(_, data, **kwargs): - return not data - - -def _execute_binary_op_impl(op, left, right, **_): - op_type = type(op) - try: - operation = constants.BINARY_OPERATIONS[op_type] - except KeyError: - raise com.OperationNotDefinedError( - f"Binary operation {op_type.__name__} not implemented" - ) - else: - return operation(left, right) - - -@execute_node.register(ops.Binary, pd.Series, pd.Series) -@execute_node.register( - (ops.NumericBinary, ops.LogicalBinary, ops.Comparison), - numeric_types, - pd.Series, -) -@execute_node.register( - (ops.NumericBinary, ops.LogicalBinary, ops.Comparison), - pd.Series, - numeric_types, -) -@execute_node.register( - (ops.NumericBinary, ops.LogicalBinary, ops.Comparison), - numeric_types, - numeric_types, -) -@execute_node.register((ops.Comparison, ops.Add, ops.Multiply), pd.Series, str) -@execute_node.register((ops.Comparison, ops.Add, ops.Multiply), str, pd.Series) -@execute_node.register((ops.Comparison, ops.Add), str, str) -@execute_node.register(ops.Multiply, integer_types, str) -@execute_node.register(ops.Multiply, str, integer_types) -@execute_node.register(ops.Comparison, pd.Series, timestamp_types) -@execute_node.register(ops.Comparison, timedelta_types, pd.Series) -@execute_node.register(ops.BitwiseBinary, integer_types, integer_types) -@execute_node.register(ops.BitwiseBinary, pd.Series, integer_types) -@execute_node.register(ops.BitwiseBinary, integer_types, pd.Series) -def execute_binary_op(op, left, right, **kwargs): - return _execute_binary_op_impl(op, left, right, **kwargs) - - -@execute_node.register(ops.Comparison, pd.Series, date_types) -def execute_binary_op_date(op, left, right, **kwargs): - return _execute_binary_op_impl( - op, pd.to_datetime(left), pd.to_datetime(right), **kwargs - ) - - -@execute_node.register(ops.Binary, SeriesGroupBy, SeriesGroupBy) -def execute_binary_op_series_group_by(op, left, right, **kwargs): - left_groupings = get_grouping(left.grouper.groupings) - right_groupings = get_grouping(right.grouper.groupings) - if left_groupings != right_groupings: - raise ValueError( - f"Cannot perform {type(op).__name__} operation on two series with " - "different groupings" - ) - result = execute_binary_op(op, left.obj, right.obj, **kwargs) - return result.groupby(left_groupings, group_keys=False) - - -@execute_node.register(ops.Binary, SeriesGroupBy, simple_types) -def execute_binary_op_series_gb_simple(op, left, right, **kwargs): - result = execute_binary_op(op, left.obj, right, **kwargs) - return result.groupby(get_grouping(left.grouper.groupings), group_keys=False) - - -@execute_node.register(ops.Binary, simple_types, SeriesGroupBy) -def execute_binary_op_simple_series_gb(op, left, right, **kwargs): - result = execute_binary_op(op, left, right.obj, **kwargs) - return result.groupby(get_grouping(right.grouper.groupings), group_keys=False) - - -@execute_node.register(ops.Unary, SeriesGroupBy) -def execute_unary_op_series_gb(op, operand, **kwargs): - result = execute_node(op, operand.obj, **kwargs) - return result.groupby(get_grouping(operand.grouper.groupings), group_keys=False) - - -@execute_node.register( - (ops.Log, ops.Round), - SeriesGroupBy, - (numbers.Real, decimal.Decimal, type(None)), -) -def execute_log_series_gb_others(op, left, right, **kwargs): - result = execute_node(op, left.obj, right, **kwargs) - return result.groupby(get_grouping(left.grouper.groupings), group_keys=False) - - -@execute_node.register((ops.Log, ops.Round), SeriesGroupBy, SeriesGroupBy) -def execute_log_series_gb_series_gb(op, left, right, **kwargs): - result = execute_node(op, left.obj, right.obj, **kwargs) - return result.groupby(get_grouping(left.grouper.groupings), group_keys=False) - - -@execute_node.register(ops.Not, pd.Series) -def execute_not_series(op, data, **kwargs): - return ~data - - -@execute_node.register(ops.StringSplit, pd.Series, (pd.Series, str)) -def execute_string_split(op, data, delimiter, **kwargs): - # Doing the iteration using `map` is much faster than doing the iteration - # using `Series.apply` due to Pandas-related overhead. - return pd.Series(np.array(s.split(delimiter)) for s in data) - - -@execute_node.register( - ops.Between, - pd.Series, - (pd.Series, numbers.Real, str, datetime.datetime), - (pd.Series, numbers.Real, str, datetime.datetime), -) -def execute_between(op, data, lower, upper, **kwargs): - return data.between(lower, upper) - - -@execute_node.register(ops.Union, pd.DataFrame, pd.DataFrame, bool) -def execute_union_dataframe_dataframe( - op, left: pd.DataFrame, right: pd.DataFrame, distinct, **kwargs -): - result = pd.concat([left, right], axis=0) - return result.drop_duplicates() if distinct else result - - -@execute_node.register(ops.Intersection, pd.DataFrame, pd.DataFrame, bool) -def execute_intersection_dataframe_dataframe( - op, - left: pd.DataFrame, - right: pd.DataFrame, - distinct: bool, - **kwargs, -): - if not distinct: - raise NotImplementedError( - "`distinct=False` is not supported by the pandas backend" - ) - result = left.merge(right, on=list(left.columns), how="inner") - return result - - -@execute_node.register(ops.Difference, pd.DataFrame, pd.DataFrame, bool) -def execute_difference_dataframe_dataframe( - op, - left: pd.DataFrame, - right: pd.DataFrame, - distinct: bool, - **kwargs, -): - if not distinct: - raise NotImplementedError( - "`distinct=False` is not supported by the pandas backend" - ) - merged = left.merge(right, on=list(left.columns), how="outer", indicator=True) - result = merged[merged["_merge"] == "left_only"].drop("_merge", axis=1) - return result - - -@execute_node.register(ops.IsNull, pd.Series) -def execute_series_isnull(op, data, **kwargs): - return data.isnull() - - -@execute_node.register(ops.NotNull, pd.Series) -def execute_series_notnnull(op, data, **kwargs): - return data.notnull() - - -@execute_node.register(ops.IsNan, (pd.Series, floating_types)) -def execute_isnan(op, data, **kwargs): - try: - return np.isnan(data) - except (TypeError, ValueError): - # if `data` contains `None` np.isnan will complain - # so we take advantage of NaN not equaling itself - # to do the correct thing - return data != data - - -@execute_node.register(ops.IsInf, (pd.Series, floating_types)) -def execute_isinf(op, data, **kwargs): - return np.isinf(data) - - -@execute_node.register(ops.SelfReference, pd.DataFrame) -def execute_node_self_reference_dataframe(op, data, **kwargs): - return data - - -@execute_node.register(ops.Alias, object) -def execute_alias(op, data, **kwargs): - # just return the underlying argument because the naming is handled - # by the translator for the top level expression - return data - - -@execute_node.register(ops.StringConcat, tuple) -def execute_node_string_concat(op, values, **kwargs): - values = [execute(arg, **kwargs) for arg in values] - return functools.reduce(operator.add, values) - - -@execute_node.register(ops.StringJoin, collections.abc.Sequence) -def execute_node_string_join(op, args, **kwargs): - return op.sep.join(args) - - -@execute_node.register(ops.InValues, object, tuple) -def execute_node_scalar_in_values(op, data, elements, **kwargs): - elements = [execute(arg, **kwargs) for arg in elements] - return data in elements - - -@execute_node.register(ops.InColumn, object, np.ndarray) -def execute_node_scalar_in_column(op, data, elements, **kwargs): - return data in elements - - -@execute_node.register(ops.InValues, pd.Series, tuple) -def execute_node_column_in_values(op, data, elements, **kwargs): - elements = [execute(arg, **kwargs) for arg in elements] - return data.isin(elements) - - -@execute_node.register(ops.InColumn, pd.Series, pd.Series) -def execute_node_column_in_column(op, data, elements, **kwargs): - return data.isin(elements) - - -@execute_node.register(ops.InValues, SeriesGroupBy, tuple) -def execute_node_group_in_values(op, data, elements, **kwargs): - elements = [execute(arg, **kwargs) for arg in elements] - return data.obj.isin(elements).groupby( - get_grouping(data.grouper.groupings), group_keys=False - ) - - -@execute_node.register(ops.InColumn, SeriesGroupBy, pd.Series) -def execute_node_group_in_column(op, data, elements, **kwargs): - return data.obj.isin(elements).groupby( - get_grouping(data.grouper.groupings), group_keys=False - ) - - -def pd_where(cond, true, false): - """Execute `where` following ibis's intended semantics.""" - if isinstance(cond, pd.Series): - if not isinstance(true, pd.Series): - true = pd.Series( - np.repeat(true, len(cond)), name=cond.name, index=cond.index - ) - return true.where(cond, other=false) - if cond: - if isinstance(false, pd.Series) and not isinstance(true, pd.Series): - return pd.Series(np.repeat(true, len(false))) - return true - else: - if isinstance(true, pd.Series) and not isinstance(false, pd.Series): - return pd.Series(np.repeat(false, len(true)), index=true.index) - return false - - -@execute_node.register(ops.IfElse, (pd.Series, *boolean_types), pd.Series, pd.Series) -@execute_node.register(ops.IfElse, (pd.Series, *boolean_types), pd.Series, simple_types) -@execute_node.register(ops.IfElse, (pd.Series, *boolean_types), simple_types, pd.Series) -@execute_node.register(ops.IfElse, (pd.Series, *boolean_types), type(None), type(None)) -def execute_node_where(op, cond, true, false, **kwargs): - return pd_where(cond, true, false) - - -# For true/false as scalars, we only support identical type pairs + None to -# limit the size of the dispatch table and not have to worry about type -# promotion. -for typ in (str, *scalar_types): - for cond_typ in (pd.Series, *boolean_types): - execute_node.register(ops.IfElse, cond_typ, typ, typ)(execute_node_where) - execute_node.register(ops.IfElse, cond_typ, type(None), typ)(execute_node_where) - execute_node.register(ops.IfElse, cond_typ, typ, type(None))(execute_node_where) - - -@execute_node.register(ops.DatabaseTable, PandasBackend) -def execute_database_table_client( - op, client, timecontext: TimeContext | None, **kwargs -): - df = client.dictionary[op.name] - if timecontext: - begin, end = timecontext - time_col = get_time_col() - if time_col not in df: - raise com.IbisError( - f"Table {op.name} must have a time column named {time_col}" - " to execute with time context." - ) - # filter with time context - mask = df[time_col].between(begin, end) - return df.loc[mask].reset_index(drop=True) - return df - - -MATH_FUNCTIONS = { - ops.Floor: math.floor, - ops.Ln: math.log, - ops.Log2: lambda x: math.log(x, 2), - ops.Log10: math.log10, - ops.Exp: math.exp, - ops.Sqrt: math.sqrt, - ops.Abs: abs, - ops.Ceil: math.ceil, - ops.Sign: lambda x: 0 if not x else -1 if x < 0 else 1, -} - -MATH_FUNCTION_TYPES = tuple(MATH_FUNCTIONS.keys()) - - -@execute_node.register(MATH_FUNCTION_TYPES, numeric_types) -def execute_node_math_function_number(op, value, **kwargs): - return MATH_FUNCTIONS[type(op)](value) - - -@execute_node.register(ops.Log, numeric_types, numeric_types) -def execute_node_log_number_number(op, value, base, **kwargs): - return math.log(value, base) - - -@execute_node.register(ops.DropNa, pd.DataFrame) -def execute_node_dropna_dataframe(op, df, **kwargs): - if op.subset is not None: - subset = [col.name for col in op.subset] - else: - subset = None - return df.dropna(how=op.how, subset=subset) - - -@execute_node.register(ops.FillNa, pd.DataFrame, simple_types) -def execute_node_fillna_dataframe_scalar(op, df, replacements, **kwargs): - return df.fillna(replacements) - - -@execute_node.register(ops.FillNa, pd.DataFrame) -def execute_node_fillna_dataframe_dict(op, df, **kwargs): - return df.fillna(dict(op.replacements)) - - -@execute_node.register(ops.NullIf, simple_types, simple_types) -def execute_node_nullif_scalars(op, value1, value2, **kwargs): - return np.nan if value1 == value2 else value1 - - -@execute_node.register(ops.NullIf, pd.Series, (pd.Series, *simple_types)) -def execute_node_nullif_series(op, left, right, **kwargs): - return left.where(left != right) - - -@execute_node.register(ops.NullIf, simple_types, pd.Series) -def execute_node_nullif_scalar_series(op, value, series, **kwargs): - return series.where(series != value) - - -def coalesce(values): - return functools.reduce( - lambda a1, a2: np.where(pd.isnull(a1), a2, a1), - values, - ) - - -@toolz.curry -def promote_to_sequence(length, obj): - try: - return obj.values - except AttributeError: - return np.repeat(obj, length) - - -def compute_row_reduction(func, values, **kwargs): - final_sizes = {len(x) for x in values if isinstance(x, Sized)} - if not final_sizes: - return func(values) - (final_size,) = final_sizes - raw = func(list(map(promote_to_sequence(final_size), values)), **kwargs) - return pd.Series(raw).squeeze() - - -@execute_node.register(ops.Greatest, tuple) -def execute_node_greatest_list(op, values, **kwargs): - values = [execute(arg, **kwargs) for arg in values] - return compute_row_reduction(np.maximum.reduce, values, axis=0) - - -@execute_node.register(ops.Least, tuple) -def execute_node_least_list(op, values, **kwargs): - values = [execute(arg, **kwargs) for arg in values] - return compute_row_reduction(np.minimum.reduce, values, axis=0) - - -@execute_node.register(ops.Coalesce, tuple) -def execute_node_coalesce(op, values, **kwargs): - # TODO: this is slow - values = [execute(arg, **kwargs) for arg in values] - return compute_row_reduction(coalesce, values) - - -def wrap_case_result(raw, expr): - """Wrap a CASE statement result in a Series and handle returning scalars. - - Parameters - ---------- - raw : ndarray[T] - The raw results of executing the ``CASE`` expression - expr : Value - The expression from the which `raw` was computed - - Returns - ------- - Union[scalar, Series] - """ - raw_1d = np.atleast_1d(raw) - if np.any(pd.isnull(raw_1d)): - result = pd.Series(raw_1d) - else: - result = pd.Series( - raw_1d, dtype=constants.IBIS_TYPE_TO_PANDAS_TYPE[expr.type()] - ) - if result.size == 1 and isinstance(expr, ir.Scalar): - value = result.iloc[0] - try: - return value.item() - except AttributeError: - return value - return result - - -def _build_select(op, whens, thens, otherwise, func=None, **kwargs): - if func is None: - func = lambda x: x - - whens_ = [] - grouped = 0 - for when in whens: - res = execute(when, **kwargs) - obj = getattr(res, "obj", res) - grouped += obj is not res - whens_.append(obj) - - thens_ = [] - for then in thens: - res = execute(then, **kwargs) - obj = getattr(res, "obj", res) - grouped += obj is not res - thens_.append(obj) - - if otherwise is None: - otherwise = np.nan - - raw = np.select(func(whens_), thens_, otherwise) - - if grouped: - return pd.Series(raw).groupby(get_grouping(res.grouper.groupings)) - return wrap_case_result(raw, op.to_expr()) - - -@execute_node.register(ops.SearchedCase, tuple, tuple, object) -def execute_searched_case(op, whens, thens, otherwise, **kwargs): - return _build_select(op, whens, thens, otherwise, **kwargs) - - -@execute_node.register(ops.SimpleCase, object, tuple, tuple, object) -def execute_simple_case_scalar(op, value, whens, thens, otherwise, **kwargs): - value = getattr(value, "obj", value) - return _build_select( - op, - whens, - thens, - otherwise, - func=lambda whens: np.asarray(whens) == value, - **kwargs, - ) - - -@execute_node.register(ops.SimpleCase, (pd.Series, SeriesGroupBy), tuple, tuple, object) -def execute_simple_case_series(op, value, whens, thens, otherwise, **kwargs): - value = getattr(value, "obj", value) - return _build_select( - op, - whens, - thens, - otherwise, - func=lambda whens: [value == when for when in whens], - **kwargs, - ) - - -@execute_node.register(ops.Distinct, pd.DataFrame) -def execute_distinct_dataframe(op, df, **kwargs): - return df.drop_duplicates() - - -@execute_node.register(ops.TableArrayView, pd.DataFrame) -def execute_table_array_view(op, _, **kwargs): - return execute(op.table).squeeze() - - -@execute_node.register(ops.InMemoryTable) -def execute_in_memory_table(op, **kwargs): - return op.data.to_frame() - - -@execute_node.register(ops.Sample, pd.DataFrame, object, object) -def execute_sample(op, data, fraction, seed, **kwargs): - return data.sample(frac=fraction, random_state=seed) diff --git a/ibis/backends/pandas/execution/join.py b/ibis/backends/pandas/execution/join.py deleted file mode 100644 index adf39079f659..000000000000 --- a/ibis/backends/pandas/execution/join.py +++ /dev/null @@ -1,183 +0,0 @@ -from __future__ import annotations - -import itertools - -import pandas as pd - -import ibis.expr.analysis as an -import ibis.expr.operations as ops -from ibis.backends.pandas.core import execute -from ibis.backends.pandas.dispatch import execute_node -from ibis.backends.pandas.execution import constants -from ibis.common.exceptions import UnsupportedOperationError - - -def _compute_join_column(column, **kwargs): - if isinstance(column, ops.TableColumn): - new_column = column.name - else: - new_column = execute(column, **kwargs) - root_table, *_ = an.find_immediate_parent_tables(column) - return new_column, root_table - - -@execute_node.register(ops.CrossJoin, pd.DataFrame, pd.DataFrame, tuple) -def execute_cross_join(op, left, right, predicates, **kwargs): - """Execute a cross join in pandas. - - Notes - ----- - We create a dummy column of all :data:`True` instances and use that as the - join key. This results in the desired Cartesian product behavior guaranteed - by cross join. - """ - assert not predicates, "cross join predicates must be empty" - return pd.merge( - left, - right, - how="cross", - copy=False, - suffixes=constants.JOIN_SUFFIXES, - ) - - -def _get_semi_anti_join_filter(op, left, right, predicates, **kwargs): - left_on, right_on = _construct_join_predicate_columns( - op, - predicates, - **kwargs, - ) - inner = left.merge( - right[right_on].drop_duplicates(), - on=left_on, - how="left", - indicator=True, - ) - return (inner["_merge"] == "both").values - - -@execute_node.register(ops.LeftSemiJoin, pd.DataFrame, pd.DataFrame, tuple) -def execute_left_semi_join(op, left, right, predicates, **kwargs): - """Execute a left semi join in pandas.""" - inner_filt = _get_semi_anti_join_filter( - op, - left, - right, - predicates, - **kwargs, - ) - return left.loc[inner_filt, :] - - -@execute_node.register(ops.LeftAntiJoin, pd.DataFrame, pd.DataFrame, tuple) -def execute_left_anti_join(op, left, right, predicates, **kwargs): - """Execute a left anti join in pandas.""" - inner_filt = _get_semi_anti_join_filter( - op, - left, - right, - predicates, - **kwargs, - ) - return left.loc[~inner_filt, :] - - -def _construct_join_predicate_columns(op, predicates, **kwargs): - on = {op.left: [], op.right: []} - - for predicate in predicates: - if not isinstance(predicate, ops.Equals): - raise TypeError("Only equality join predicates supported with pandas") - new_left_column, left_pred_root = _compute_join_column(predicate.left, **kwargs) - on[left_pred_root].append(new_left_column) - - new_right_column, right_pred_root = _compute_join_column( - predicate.right, **kwargs - ) - on[right_pred_root].append(new_right_column) - return on[op.left], on[op.right] - - -@execute_node.register(ops.Join, pd.DataFrame, pd.DataFrame, tuple) -def execute_join(op, left, right, predicates, **kwargs): - op_type = type(op) - - try: - how = constants.JOIN_TYPES[op_type] - except KeyError: - raise UnsupportedOperationError(f"{op_type.__name__} not supported") - - left_on, right_on = _construct_join_predicate_columns(op, predicates, **kwargs) - - df = pd.merge( - left, - right, - how=how, - left_on=left_on, - right_on=right_on, - suffixes=constants.JOIN_SUFFIXES, - ) - return df - - -@execute_node.register( - ops.AsOfJoin, - pd.DataFrame, - pd.DataFrame, - tuple, - (pd.Timedelta, type(None)), - tuple, -) -def execute_asof_join(op, left, right, by, tolerance, predicates, **kwargs): - left_on, right_on = _extract_predicate_names(predicates) - left_by, right_by = _extract_predicate_names(by) - - # Add default join suffixes to predicates and groups and rename the - # corresponding columns before the `merge_asof`. If we don't do this and the - # predicates have the same column name, we lose the original RHS column - # values in the output. Instead, the RHS values are copies of the LHS values. - # xref https://github.com/ibis-project/ibis/issues/6080 - left_on_suffixed = [x + constants.JOIN_SUFFIXES[0] for x in left_on] - right_on_suffixed = [x + constants.JOIN_SUFFIXES[1] for x in right_on] - - left_by_suffixed = [x + constants.JOIN_SUFFIXES[0] for x in left_by] - right_by_suffixed = [x + constants.JOIN_SUFFIXES[1] for x in right_by] - - left = left.rename( - columns=dict( - itertools.chain( - zip(left_on, left_on_suffixed), zip(left_by, left_by_suffixed) - ) - ) - ) - right = right.rename( - columns=dict( - itertools.chain( - zip(right_on, right_on_suffixed), zip(right_by, right_by_suffixed) - ) - ) - ) - - return pd.merge_asof( - left=left, - right=right, - left_on=left_on_suffixed, - right_on=right_on_suffixed, - left_by=left_by_suffixed or None, - right_by=right_by_suffixed or None, - tolerance=tolerance, - suffixes=constants.JOIN_SUFFIXES, - ) - - -def _extract_predicate_names(predicates): - lefts = [] - rights = [] - for predicate in predicates: - if not isinstance(predicate, ops.Equals): - raise TypeError("Only equality join predicates supported with pandas") - left_name = predicate.left.name - right_name = predicate.right.name - lefts.append(left_name) - rights.append(right_name) - return lefts, rights diff --git a/ibis/backends/pandas/execution/maps.py b/ibis/backends/pandas/execution/maps.py deleted file mode 100644 index 2da84583362c..000000000000 --- a/ibis/backends/pandas/execution/maps.py +++ /dev/null @@ -1,208 +0,0 @@ -from __future__ import annotations - -import collections -import functools - -import numpy as np -import pandas as pd -import toolz - -import ibis.expr.operations as ops -from ibis.backends.pandas.dispatch import execute_node - - -@execute_node.register(ops.Map, np.ndarray, np.ndarray) -def map_ndarray_ndarray(op, keys, values, **kwargs): - return dict(zip(keys, values)) - - -@execute_node.register(ops.Map, pd.Series, pd.Series) -def map_series_series(op, keys, values, **kwargs): - return keys.combine(values, lambda a, b: dict(zip(a, b))) - - -@execute_node.register(ops.MapLength, pd.Series) -def map_length_series(op, data, **kwargs): - # TODO: investigate whether calling a lambda is faster - return data.dropna().map(len).reindex(data.index) - - -@execute_node.register(ops.MapLength, (collections.abc.Mapping, type(None))) -def map_length_dict(op, data, **kwargs): - return None if data is None else len(data) - - -@execute_node.register(ops.MapGet, pd.Series, object, object) -def map_get_series_scalar_scalar(op, data, key, default, **kwargs): - return data.map(functools.partial(safe_get, key=key, default=default)) - - -@execute_node.register(ops.MapGet, pd.Series, object, pd.Series) -def map_get_series_scalar_series(op, data, key, default, **kwargs): - defaultiter = iter(default.values) - return data.map( - lambda mapping, key=key, defaultiter=defaultiter: safe_get( - mapping, key, next(defaultiter) - ) - ) - - -@execute_node.register(ops.MapGet, pd.Series, pd.Series, object) -def map_get_series_series_scalar(op, data, key, default, **kwargs): - keyiter = iter(key.values) - return data.map( - lambda mapping, keyiter=keyiter, default=default: safe_get( - mapping, next(keyiter), default - ) - ) - - -@execute_node.register(ops.MapGet, pd.Series, pd.Series, pd.Series) -def map_get_series_series_series(op, data, key, default): - keyiter = iter(key.values) - defaultiter = iter(default.values) - - def get(mapping, keyiter=keyiter, defaultiter=defaultiter): - return safe_get(mapping, next(keyiter), next(defaultiter)) - - return data.map(get) - - -@execute_node.register(ops.MapGet, collections.abc.Mapping, object, object) -def map_get_dict_scalar_scalar(op, data, key, default, **kwargs): - return safe_get(data, key, default) - - -@execute_node.register(ops.MapGet, collections.abc.Mapping, object, pd.Series) -def map_get_dict_scalar_series(op, data, key, default, **kwargs): - return default.map(lambda d, data=data, key=key: safe_get(data, key, d)) - - -@execute_node.register(ops.MapGet, collections.abc.Mapping, pd.Series, object) -def map_get_dict_series_scalar(op, data, key, default, **kwargs): - return key.map(lambda k, data=data, default=default: safe_get(data, k, default)) - - -@execute_node.register(ops.MapGet, collections.abc.Mapping, pd.Series, pd.Series) -def map_get_dict_series_series(op, data, key, default, **kwargs): - defaultiter = iter(default.values) - return key.map( - lambda k, data=data, defaultiter=defaultiter: safe_get( - data, k, next(defaultiter) - ) - ) - - -@execute_node.register(ops.MapContains, collections.abc.Mapping, object) -def map_contains_dict_object(op, data, key, **kwargs): - return safe_contains(data, key) - - -@execute_node.register(ops.MapContains, collections.abc.Mapping, pd.Series) -def map_contains_dict_series(op, data, key, **kwargs): - return key.map(lambda k, data=data: safe_contains(data, k)) - - -@execute_node.register(ops.MapContains, pd.Series, object) -def map_contains_series_object(op, data, key, **kwargs): - return data.map(lambda d: safe_contains(d, key)) - - -@execute_node.register(ops.MapContains, pd.Series, pd.Series) -def map_contains_series_series(op, data, key, **kwargs): - return data.combine(key, lambda d, k: safe_contains(d, k)) - - -def safe_method(mapping, method, *args, **kwargs): - if mapping is None: - return None - try: - method = getattr(mapping, method) - except AttributeError: - return None - else: - return method(*args, **kwargs) - - -def safe_get(mapping, key, default=None): - return safe_method(mapping, "get", key, default) - - -def safe_contains(mapping, key): - return safe_method(mapping, "__contains__", key) - - -def safe_keys(mapping): - result = safe_method(mapping, "keys") - if result is None: - return None - # list(...) to unpack iterable - return np.array(list(result)) - - -def safe_values(mapping): - result = safe_method(mapping, "values") - if result is None: - return None - # list(...) to unpack iterable - return np.array(list(result), dtype="object") - - -@execute_node.register(ops.MapKeys, pd.Series) -def map_keys_series(op, data, **kwargs): - return data.map(safe_keys) - - -@execute_node.register(ops.MapKeys, (collections.abc.Mapping, type(None))) -def map_keys_dict(op, data, **kwargs): - if data is None: - return None - # list(...) to unpack iterable - return np.array(list(data.keys())) - - -@execute_node.register(ops.MapValues, pd.Series) -def map_values_series(op, data, **kwargs): - res = data.map(safe_values) - return res - - -@execute_node.register(ops.MapValues, (collections.abc.Mapping, type(None))) -def map_values_dict(op, data, **kwargs): - if data is None: - return None - # list(...) to unpack iterable - return np.array(list(data.values())) - - -def safe_merge(*maps): - return None if any(m is None for m in maps) else toolz.merge(*maps) - - -@execute_node.register( - ops.MapMerge, - (collections.abc.Mapping, type(None)), - (collections.abc.Mapping, type(None)), -) -def map_merge_dict_dict(op, lhs, rhs, **kwargs): - return safe_merge(lhs, rhs) - - -@execute_node.register(ops.MapMerge, (collections.abc.Mapping, type(None)), pd.Series) -def map_merge_dict_series(op, lhs, rhs, **kwargs): - if lhs is None: - return pd.Series([None] * len(rhs)) - return rhs.map(lambda m, lhs=lhs: safe_merge(lhs, m)) - - -@execute_node.register(ops.MapMerge, pd.Series, (collections.abc.Mapping, type(None))) -def map_merge_series_dict(op, lhs, rhs, **kwargs): - if rhs is None: - return pd.Series([None] * len(lhs)) - return lhs.map(lambda m, rhs=rhs: safe_merge(m, rhs)) - - -@execute_node.register(ops.MapMerge, pd.Series, pd.Series) -def map_merge_series_series(op, lhs, rhs, **kwargs): - rhsiter = iter(rhs.values) - return lhs.map(lambda m, rhsiter=rhsiter: safe_merge(m, next(rhsiter))) diff --git a/ibis/backends/pandas/execution/selection.py b/ibis/backends/pandas/execution/selection.py deleted file mode 100644 index b1f8a0ee6659..000000000000 --- a/ibis/backends/pandas/execution/selection.py +++ /dev/null @@ -1,337 +0,0 @@ -"""Dispatching code for Selection operations.""" - -from __future__ import annotations - -import functools -import operator -from collections import defaultdict -from typing import TYPE_CHECKING, Any - -import pandas as pd -from toolz import concatv, first - -import ibis.expr.analysis as an -import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis.backends.base.df.scope import Scope -from ibis.backends.pandas.core import execute -from ibis.backends.pandas.dispatch import execute_node -from ibis.backends.pandas.execution import constants, util -from ibis.backends.pandas.execution.util import coerce_to_output - -if TYPE_CHECKING: - from collections.abc import Iterable - - from ibis.backends.base.df.timecontext import TimeContext - - -def compute_projection( - node: ops.Node, - parent: ops.Selection, - data: pd.DataFrame, - scope: Scope | None = None, - timecontext: TimeContext | None = None, - **kwargs: Any, -): - """Compute a projection. - - `ibis.expr.types.Scalar` instances occur when a specific column projection - is a window operation. - """ - if isinstance(node, ops.TableNode): - if node == parent.table: - return data - - assert isinstance(parent.table, ops.Join) - assert node in (parent.table.left, parent.table.right) - - mapping = remap_overlapping_column_names( - parent.table, - root_table=node, - data_columns=frozenset(data.columns), - ) - return map_new_column_names_to_data(mapping, data) - elif isinstance(node, ops.Value): - name = node.name - assert name is not None, "Value selection name is None" - - if node.shape.is_scalar(): - data_columns = frozenset(data.columns) - - if scope is None: - scope = Scope() - - scope = scope.merge_scopes( - Scope( - { - t: map_new_column_names_to_data( - remap_overlapping_column_names( - parent.table, t, data_columns - ), - data, - ) - }, - timecontext, - ) - for t in an.find_immediate_parent_tables(node) - ) - scalar = execute(node, scope=scope, **kwargs) - result = pd.Series([scalar], name=name).repeat(len(data.index)) - result.index = data.index - return result - else: - if isinstance(node, ops.TableColumn): - if name in data: - return data[name].rename(name) - - if not isinstance(parent.table, ops.Join): - raise KeyError(name) - - suffix = util.get_join_suffix_for_op(node, parent.table) - return data.loc[:, name + suffix].rename(name) - - data_columns = frozenset(data.columns) - - scope = scope.merge_scopes( - Scope( - { - t: map_new_column_names_to_data( - remap_overlapping_column_names( - parent.table, t, data_columns - ), - data, - ) - }, - timecontext, - ) - for t in an.find_immediate_parent_tables(node) - ) - - result = execute(node, scope=scope, timecontext=timecontext, **kwargs) - return coerce_to_output(result, node, data.index) - else: - raise TypeError(node) - - -def remap_overlapping_column_names(table, root_table, data_columns): - """Return a mapping of suffixed column names to column names without suffixes. - - Parameters - ---------- - table : TableNode - The ``TableNode`` we're selecting from. - root_table : TableNode - The root table of the expression we're selecting from. - data_columns - The available columns to select from - - Returns - ------- - dict[str, str] - A mapping from possibly-suffixed column names to column names without - suffixes. - """ - if not isinstance(table, ops.Join): - return None - - left_root, right_root = an.find_immediate_parent_tables([table.left, table.right]) - suffixes = { - left_root: constants.LEFT_JOIN_SUFFIX, - right_root: constants.RIGHT_JOIN_SUFFIX, - } - - # if we're selecting from the root table and that's not the left or right - # child, don't add a suffix - # - # this can happen when selecting directly from a join as opposed to - # explicitly referencing the left or right tables - # - # we use setdefault here because the root_table can be the left/right table - # which we may have already put into `suffixes` - suffixes.setdefault(root_table, "") - - suffix = suffixes[root_table] - - column_names = [ - ({name, f"{name}{suffix}"} & data_columns, name) - for name in root_table.schema.names - ] - mapping = { - first(col_name): final_name for col_name, final_name in column_names if col_name - } - return mapping - - -def map_new_column_names_to_data(mapping, df): - if mapping: - return df.loc[:, mapping.keys()].rename(columns=mapping) - return df - - -def _compute_predicates( - table_op: ops.TableNode, - predicates: Iterable[ir.BooleanColumn], - data: pd.DataFrame, - scope: Scope, - timecontext: TimeContext | None, - **kwargs: Any, -) -> pd.Series: - """Compute the predicates for a table operation. - - This handles the cases where `predicates` are computed columns, in addition - to the simple case of named columns coming directly from the input table. - """ - for predicate in predicates: - # Map each root table of the predicate to the data so that we compute - # predicates on the result instead of any left or right tables if the - # Selection is on a Join. Project data to only include columns from - # the root table. - root_tables = an.find_immediate_parent_tables(predicate) - - # handle suffixes - data_columns = frozenset(data.columns) - - additional_scope = Scope() - for root_table in root_tables: - mapping = remap_overlapping_column_names(table_op, root_table, data_columns) - new_data = map_new_column_names_to_data(mapping, data) - additional_scope = additional_scope.merge_scope( - Scope({root_table: new_data}, timecontext) - ) - - scope = scope.merge_scope(additional_scope) - yield execute(predicate, scope=scope, **kwargs) - - -def build_df_from_selection( - selections: list[ops.Value], - data: pd.DataFrame, - table: ops.Node, -) -> pd.DataFrame: - """Build up a df by doing direct selections, renaming if necessary. - - Special logic for: - - Joins where suffixes have been added to column names - - Cases where new columns are created and selected. - """ - cols = defaultdict(list) - - for node in selections: - selection = node.name - if selection not in data: - if not isinstance(table, ops.Join): - raise KeyError(selection) - join_suffix = util.get_join_suffix_for_op(node, table) - if selection + join_suffix not in data: - raise KeyError(selection) - selection += join_suffix - cols[selection].append(node.name) - - result = data[list(cols.keys())] - - renamed_cols = {} - for from_col, to_cols in cols.items(): - if len(to_cols) == 1 and from_col != to_cols[0]: - renamed_cols[from_col] = to_cols[0] - else: - for new_col in to_cols: - if from_col != new_col: - result[new_col] = result[from_col] - - if renamed_cols: - result = result.rename(columns=renamed_cols) - - return result - - -def build_df_from_projection( - selection_exprs: list[ir.Expr], - op: ops.Selection, - data: pd.DataFrame, - **kwargs, -) -> pd.DataFrame: - data_pieces = [ - compute_projection(node, op, data, **kwargs) for node in selection_exprs - ] - - new_pieces = [ - piece.reset_index(level=list(range(1, piece.index.nlevels)), drop=True) - if piece.index.nlevels > 1 - else piece - for piece in data_pieces - ] - # Result series might be trimmed by time context, thus index may - # have changed. To concat rows properly, we first `sort_index` on - # each pieces then assign data index manually to series - # - # If cardinality changes (e.g. unnest/explode), trying to do this - # won't work so don't try? - for i, piece in enumerate(new_pieces): - new_pieces[i] = piece.sort_index() - if len(new_pieces[i].index) == len(data.index): - new_pieces[i].index = data.index - - return pd.concat(new_pieces, axis=1) - - -@execute_node.register(ops.Selection, pd.DataFrame) -def execute_selection_dataframe( - op, - data, - scope: Scope, - timecontext: TimeContext | None, - **kwargs, -): - result = data - - # Build up the individual pandas structures from column expressions - if op.selections: - if all(isinstance(s, ops.TableColumn) for s in op.selections): - result = build_df_from_selection(op.selections, data, op.table) - else: - result = build_df_from_projection( - op.selections, - op, - data, - scope=scope, - timecontext=timecontext, - **kwargs, - ) - - if op.predicates: - predicates = _compute_predicates( - op.table, op.predicates, data, scope, timecontext, **kwargs - ) - predicate = functools.reduce(operator.and_, predicates) - assert len(predicate) == len( - result - ), "Selection predicate length does not match underlying table" - result = result.loc[predicate] - - if op.sort_keys: - result, grouping_keys, ordering_keys = util.compute_sorted_frame( - result, - order_by=op.sort_keys, - scope=scope, - timecontext=timecontext, - **kwargs, - ) - else: - grouping_keys = ordering_keys = () - - # return early if we do not have any temporary grouping or ordering columns - assert not grouping_keys, "group by should never show up in Selection" - if not ordering_keys: - return result - - # create a sequence of columns that we need to drop - temporary_columns = pd.Index(concatv(grouping_keys, ordering_keys)).difference( - data.columns - ) - - # no reason to call drop if we don't need to - if temporary_columns.empty: - return result - - # drop every temporary column we created for ordering or grouping - return result.drop(temporary_columns, axis=1) diff --git a/ibis/backends/pandas/execution/strings.py b/ibis/backends/pandas/execution/strings.py deleted file mode 100644 index 66e325b6d367..000000000000 --- a/ibis/backends/pandas/execution/strings.py +++ /dev/null @@ -1,560 +0,0 @@ -from __future__ import annotations - -import itertools -import json -import operator -from functools import partial, reduce -from urllib.parse import parse_qs, urlsplit - -import numpy as np -import pandas as pd -import toolz -from pandas.core.groupby import SeriesGroupBy - -try: - import regex as re -except ImportError: - import re - -import ibis.expr.operations as ops -import ibis.util -from ibis.backends.pandas.core import execute, integer_types, scalar_types -from ibis.backends.pandas.dispatch import execute_node -from ibis.backends.pandas.execution.util import get_grouping - - -@execute_node.register(ops.StringLength, pd.Series) -def execute_string_length_series(op, data, **kwargs): - return data.str.len().astype("int32") - - -@execute_node.register( - ops.Substring, pd.Series, integer_types, (type(None), *integer_types) -) -def execute_substring_int_int(op, data, start, length, **kwargs): - if length is None: - return data.str[start:] - else: - return data.str[start : start + length] - - -@execute_node.register(ops.Substring, pd.Series, pd.Series, integer_types) -def execute_substring_series_int(op, data, start, length, **kwargs): - return execute_substring_series_series( - op, data, start, pd.Series(np.repeat(length, len(start))), **kwargs - ) - - -@execute_node.register(ops.Substring, pd.Series, integer_types, pd.Series) -def execute_string_substring_int_series(op, data, start, length, **kwargs): - return execute_substring_series_series( - op, data, pd.Series(np.repeat(start, len(length))), length, **kwargs - ) - - -@execute_node.register(ops.Substring, pd.Series, pd.Series, pd.Series) -def execute_substring_series_series(op, data, start, length, **kwargs): - end = start + length - - return pd.Series( - [ - None - if (begin is not None and pd.isnull(begin)) - or (stop is not None and pd.isnull(stop)) - else value[begin:stop] - for value, begin, stop in zip(data, start.values, end.values) - ], - dtype=data.dtype, - name=data.name, - ) - - -@execute_node.register(ops.Strip, pd.Series) -def execute_string_strip(op, data, **kwargs): - return data.str.strip() - - -@execute_node.register(ops.LStrip, pd.Series) -def execute_string_lstrip(op, data, **kwargs): - return data.str.lstrip() - - -@execute_node.register(ops.RStrip, pd.Series) -def execute_string_rstrip(op, data, **kwargs): - return data.str.rstrip() - - -@execute_node.register( - ops.LPad, pd.Series, (pd.Series,) + integer_types, (pd.Series, str) -) -def execute_string_lpad(op, data, length, pad, **kwargs): - return data.str.pad(length, side="left", fillchar=pad) - - -@execute_node.register( - ops.RPad, pd.Series, (pd.Series,) + integer_types, (pd.Series, str) -) -def execute_string_rpad(op, data, length, pad, **kwargs): - return data.str.pad(length, side="right", fillchar=pad) - - -@execute_node.register(ops.Reverse, pd.Series) -def execute_string_reverse(op, data, **kwargs): - return data.str[::-1] - - -@execute_node.register(ops.Lowercase, pd.Series) -def execute_string_lower(op, data, **kwargs): - return data.str.lower() - - -@execute_node.register(ops.Uppercase, pd.Series) -def execute_string_upper(op, data, **kwargs): - return data.str.upper() - - -@execute_node.register(ops.Capitalize, (pd.Series, str)) -def execute_string_capitalize(op, data, **kwargs): - return getattr(data, "str", data).capitalize() - - -@execute_node.register(ops.Repeat, pd.Series, (pd.Series,) + integer_types) -def execute_string_repeat(op, data, times, **kwargs): - return data.str.repeat(times) - - -@execute_node.register(ops.StringContains, pd.Series, (pd.Series, str)) -def execute_string_contains(_, data, needle, **kwargs): - return data.str.contains(needle) - - -@execute_node.register( - ops.StringFind, - pd.Series, - (pd.Series, str), - (pd.Series, type(None)) + integer_types, - (pd.Series, type(None)) + integer_types, -) -def execute_string_find(op, data, needle, start, end, **kwargs): - return data.str.find(needle, start, end) - - -def _sql_like_to_regex(pattern, escape): - cur_i = 0 - pattern_length = len(pattern) - - while cur_i < pattern_length: - nxt_i = cur_i + 1 - - cur = pattern[cur_i] - nxt = pattern[nxt_i] if nxt_i < pattern_length else None - - skip = 1 - - if nxt is not None and escape is not None and cur == escape: - yield nxt - skip = 2 - elif cur == "%": - yield ".*" - elif cur == "_": - yield "." - else: - yield cur - - cur_i += skip - - -def sql_like_to_regex(pattern: str, escape: str | None = None) -> str: - """Convert a SQL `LIKE` pattern to an equivalent Python regular expression. - - Parameters - ---------- - pattern - A LIKE pattern with the following semantics: - * `%` matches zero or more characters - * `_` matches exactly one character - * To escape `%` and `_` (or to match the `escape` parameter - itself), prefix the desired character with `escape`. - escape - Escape character - - Returns - ------- - str - A regular expression pattern equivalent to the input SQL `LIKE` pattern. - - Examples - -------- - >>> sql_like_to_regex("6%") # default is to not escape anything - '^6.*$' - >>> sql_like_to_regex("6^%", escape="^") - '^6%$' - >>> sql_like_to_regex("6_") - '^6.$' - >>> sql_like_to_regex("6/_", escape="/") - '^6_$' - >>> sql_like_to_regex("%abc") # any string ending with "abc" - '^.*abc$' - >>> sql_like_to_regex("abc%") # any string starting with "abc" - '^abc.*$' - """ - return f"^{''.join(_sql_like_to_regex(pattern, escape))}$" - - -@execute_node.register(ops.StringSQLLike, pd.Series, str, (str, type(None))) -def execute_string_like_series_string(op, data, pattern, escape, **kwargs): - new_pattern = sql_like_to_regex(pattern, escape=escape) - return data.str.contains(new_pattern, regex=True) - - -@execute_node.register(ops.StringSQLLike, SeriesGroupBy, str, str) -def execute_string_like_series_groupby_string(op, data, pattern, escape, **kwargs): - return execute_string_like_series_string( - op, data.obj, pattern, escape, **kwargs - ).groupby(get_grouping(data.grouper.groupings), group_keys=False) - - -@execute_node.register(ops.GroupConcat, pd.Series, str, (pd.Series, type(None))) -def execute_group_concat_series_mask(op, data, sep, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - lambda series, sep=sep: sep.join(series.values), - ) - - -@execute_node.register(ops.GroupConcat, SeriesGroupBy, str, type(None)) -def execute_group_concat_series_gb(op, data, sep, _, aggcontext=None, **kwargs): - return aggcontext.agg(data, lambda data, sep=sep: sep.join(data.values.astype(str))) - - -@execute_node.register(ops.GroupConcat, SeriesGroupBy, str, SeriesGroupBy) -def execute_group_concat_series_gb_mask(op, data, sep, mask, aggcontext=None, **kwargs): - def method(series, sep=sep): - if series.empty: - return pd.NA - return sep.join(series.values.astype(str)) - - return aggcontext.agg( - data, - lambda data, mask=mask.obj, method=method: method(data[mask[data.index]]), - ) - - -@execute_node.register(ops.StringAscii, pd.Series) -def execute_string_ascii(op, data, **kwargs): - return data.map(ord).astype("int32") - - -@execute_node.register(ops.StringAscii, SeriesGroupBy) -def execute_string_ascii_group_by(op, data, **kwargs): - return execute_string_ascii(op, data, **kwargs).groupby( - get_grouping(data.grouper.groupings), group_keys=False - ) - - -@execute_node.register(ops.RegexSearch, pd.Series, str) -def execute_series_regex_search(op, data, pattern, **kwargs): - pattern = re.compile(pattern) - return data.map(lambda x, pattern=pattern: pattern.search(x) is not None) - - -@execute_node.register(ops.RegexSearch, SeriesGroupBy, str) -def execute_series_regex_search_gb(op, data, pattern, **kwargs): - return execute_series_regex_search( - op, data, getattr(pattern, "obj", pattern), **kwargs - ).groupby(get_grouping(data.grouper.groupings), group_keys=False) - - -@execute_node.register(ops.StartsWith, pd.Series, str) -def execute_series_starts_with(op, data, pattern, **kwargs): - return data.str.startswith(pattern) - - -@execute_node.register(ops.EndsWith, pd.Series, str) -def execute_series_ends_with(op, data, pattern, **kwargs): - return data.str.endswith(pattern) - - -@execute_node.register(ops.RegexExtract, pd.Series, str, integer_types) -def execute_series_regex_extract(op, data, pattern, index, **kwargs): - pattern = re.compile(pattern) - return pd.Series( - [ - None if (match is None or index > match.lastindex) else match[index] - for match in map(pattern.search, data) - ], - dtype=data.dtype, - name=data.name, - ) - - -@execute_node.register(ops.RegexExtract, SeriesGroupBy, str, integer_types) -def execute_series_regex_extract_gb(op, data, pattern, index, **kwargs): - return execute_series_regex_extract(op, data.obj, pattern, index, **kwargs).groupby( - get_grouping(data.grouper.groupings), group_keys=False - ) - - -@execute_node.register(ops.RegexReplace, pd.Series, str, str) -def execute_series_regex_replace(op, data, pattern, replacement, **kwargs): - pattern = re.compile(pattern) - - def replacer(x, pattern=pattern): - return pattern.sub(replacement, x) - - return data.apply(replacer) - - -@execute_node.register(ops.RegexReplace, str, str, str) -def execute_str_regex_replace(_, arg, pattern, replacement, **kwargs): - return re.sub(pattern, replacement, arg) - - -@execute_node.register(ops.RegexReplace, SeriesGroupBy, str, str) -def execute_series_regex_replace_gb(op, data, pattern, replacement, **kwargs): - return execute_series_regex_replace( - data.obj, pattern, replacement, **kwargs - ).groupby(get_grouping(data.grouper.groupings), group_keys=False) - - -@execute_node.register(ops.Translate, pd.Series, pd.Series, pd.Series) -def execute_series_translate_series_series(op, data, from_string, to_string, **kwargs): - tables = [ - str.maketrans(source, target) for source, target in zip(from_string, to_string) - ] - return pd.Series( - [string.translate(table) for string, table in zip(data, tables)], - dtype=data.dtype, - name=data.name, - ) - - -@execute_node.register(ops.Translate, pd.Series, pd.Series, str) -def execute_series_translate_series_scalar(op, data, from_string, to_string, **kwargs): - tables = [str.maketrans(source, to_string) for source in from_string] - return pd.Series( - [string.translate(table) for string, table in zip(data, tables)], - dtype=data.dtype, - name=data.name, - ) - - -@execute_node.register(ops.Translate, pd.Series, str, pd.Series) -def execute_series_translate_scalar_series(op, data, from_string, to_string, **kwargs): - tables = [str.maketrans(from_string, target) for target in to_string] - return pd.Series( - [string.translate(table) for string, table in zip(data, tables)], - dtype=data.dtype, - name=data.name, - ) - - -@execute_node.register(ops.Translate, pd.Series, str, str) -def execute_series_translate_scalar_scalar(op, data, from_string, to_string, **kwargs): - return data.str.translate(str.maketrans(from_string, to_string)) - - -@execute_node.register(ops.StrRight, pd.Series, integer_types) -def execute_series_right(op, data, nchars, **kwargs): - return data.str[-nchars:] - - -@execute_node.register(ops.StrRight, SeriesGroupBy, integer_types) -def execute_series_right_gb(op, data, nchars, **kwargs): - return execute_series_right(op, data.obj, nchars).groupby( - get_grouping(data.grouper.groupings), group_keys=False - ) - - -@execute_node.register(ops.StringReplace, pd.Series, (pd.Series, str), (pd.Series, str)) -def execute_series_string_replace(_, data, needle, replacement, **kwargs): - return data.str.replace(needle, replacement) - - -@execute_node.register(ops.StringJoin, (pd.Series, str), tuple) -def execute_series_join_scalar_sep(op, sep, args, **kwargs): - data = [execute(arg, **kwargs) for arg in args] - return reduce(lambda x, y: x + sep + y, data) - - -def haystack_to_series_of_lists(haystack, index=None): - if index is None: - index = toolz.first( - piece.index for piece in haystack if hasattr(piece, "index") - ) - pieces = reduce( - operator.add, - ( - pd.Series(getattr(piece, "values", piece), index=index).map( - ibis.util.promote_list - ) - for piece in haystack - ), - ) - return pieces - - -@execute_node.register(ops.FindInSet, pd.Series, tuple) -def execute_series_find_in_set(op, needle, haystack, **kwargs): - haystack = [execute(arg, **kwargs) for arg in haystack] - pieces = haystack_to_series_of_lists(haystack, index=needle.index) - index = itertools.count() - return pieces.map( - lambda elements, needle=needle, index=index: ( - ibis.util.safe_index(elements, needle.iat[next(index)]) - ) - ) - - -@execute_node.register(ops.FindInSet, SeriesGroupBy, list) -def execute_series_group_by_find_in_set(op, needle, haystack, **kwargs): - pieces = [getattr(piece, "obj", piece) for piece in haystack] - return execute_series_find_in_set(op, needle.obj, pieces, **kwargs).groupby( - get_grouping(needle.grouper.groupings), group_keys=False - ) - - -@execute_node.register(ops.FindInSet, scalar_types, list) -def execute_string_group_by_find_in_set(op, needle, haystack, **kwargs): - # `list` could contain series, series groupbys, or scalars - # mixing series and series groupbys is not allowed - series_in_haystack = [ - type(piece) - for piece in haystack - if isinstance(piece, (pd.Series, SeriesGroupBy)) - ] - - if not series_in_haystack: - return ibis.util.safe_index(haystack, needle) - - try: - (collection_type,) = frozenset(map(type, series_in_haystack)) - except ValueError: - raise ValueError("Mixing Series and SeriesGroupBy is not allowed") - - pieces = haystack_to_series_of_lists( - [getattr(piece, "obj", piece) for piece in haystack] - ) - - result = pieces.map(toolz.flip(ibis.util.safe_index)(needle)) - if issubclass(collection_type, pd.Series): - return result - - assert issubclass(collection_type, SeriesGroupBy) - - return result.groupby( - get_grouping( - toolz.first( - piece.grouper.groupings - for piece in haystack - if hasattr(piece, "grouper") - ) - ), - group_keys=False, - ) - - -def try_getitem(value, key): - try: - # try to deserialize the value -> return None if it's None - if (js := json.loads(value)) is None: - return None - except (json.JSONDecodeError, TypeError): - # if there's an error related to decoding or a type error return None - return None - - try: - # try to extract the value as an array element or mapping key - return js[key] - except (KeyError, IndexError, TypeError): - # KeyError: missing mapping key - # IndexError: missing sequence key - # TypeError: `js` doesn't implement __getitem__, either at all or for - # the type of `key` - return None - - -@execute_node.register(ops.JSONGetItem, pd.Series, (str, int)) -def execute_json_getitem_series_str_int(_, data, key, **kwargs): - return pd.Series(map(partial(try_getitem, key=key), data), dtype="object") - - -@execute_node.register(ops.JSONGetItem, pd.Series, pd.Series) -def execute_json_getitem_series_series(_, data, key, **kwargs): - return pd.Series(map(try_getitem, data, key), dtype="object") - - -def _extract_url_field(data, field_name): - if isinstance(data, str): - return getattr(urlsplit(data), field_name, "") - - return pd.Series( - [getattr(urlsplit(string), field_name, "") for string in data], - dtype=data.dtype, - name=data.name, - ) - - -@execute_node.register(ops.ExtractProtocol, (pd.Series, str)) -def execute_extract_protocol(op, data, **kwargs): - return _extract_url_field(data, "scheme") - - -@execute_node.register(ops.ExtractAuthority, (pd.Series, str)) -def execute_extract_authority(op, data, **kwargs): - return _extract_url_field(data, "netloc") - - -@execute_node.register(ops.ExtractPath, (pd.Series, str)) -def execute_extract_path(op, data, **kwargs): - return _extract_url_field(data, "path") - - -@execute_node.register(ops.ExtractFragment, (pd.Series, str)) -def execute_extract_fragment(op, data, **kwargs): - return _extract_url_field(data, "fragment") - - -@execute_node.register(ops.ExtractHost, (pd.Series, str)) -def execute_extract_host(op, data, **kwargs): - return _extract_url_field(data, "hostname") - - -@execute_node.register(ops.ExtractQuery, (pd.Series, str), (str, type(None))) -def execute_extract_query(op, data, key, **kwargs): - def extract_query_param(url, param_name): - query = urlsplit(url).query - if param_name is not None: - value = parse_qs(query)[param_name] - return value if len(value) > 1 else value[0] - else: - return query - - if isinstance(data, str): - return extract_query_param(data, key) - - return pd.Series( - [extract_query_param(url, key) for url in data], - dtype=data.dtype, - name=data.name, - ) - - -@execute_node.register(ops.ExtractUserInfo, (pd.Series, str)) -def execute_extract_user_info(op, data, **kwargs): - def extract_user_info(url): - url_parts = urlsplit(url) - - username = url_parts.username or "" - password = url_parts.password or "" - - return f"{username}:{password}" - - if isinstance(data, str): - return extract_user_info(data) - - return pd.Series( - [extract_user_info(string) for string in data], - dtype=data.dtype, - name=data.name, - ) diff --git a/ibis/backends/pandas/execution/structs.py b/ibis/backends/pandas/execution/structs.py deleted file mode 100644 index a2bcf7a94e11..000000000000 --- a/ibis/backends/pandas/execution/structs.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Pandas backend execution of struct fields and literals.""" - -from __future__ import annotations - -import collections -import functools - -import pandas as pd -from pandas.core.groupby import SeriesGroupBy - -import ibis.expr.operations as ops -from ibis.backends.pandas.dispatch import execute_node -from ibis.backends.pandas.execution.util import get_grouping - - -@execute_node.register(ops.StructField, (collections.abc.Mapping, pd.DataFrame)) -def execute_node_struct_field_dict(op, data, **kwargs): - return data[op.field] - - -@execute_node.register(ops.StructField, (type(None), type(pd.NA), float)) -def execute_node_struct_field_none(op, data, **_): - assert (isinstance(data, float) and pd.isna(data)) or not isinstance(data, float) - return pd.NA - - -def _safe_getter(value, field: str): - if pd.isna(value): - return pd.NA - else: - return value[field] - - -@execute_node.register(ops.StructField, pd.Series) -def execute_node_struct_field_series(op, data, **kwargs): - getter = functools.partial(_safe_getter, field=op.field) - return data.map(getter).rename(op.field) - - -@execute_node.register(ops.StructField, SeriesGroupBy) -def execute_node_struct_field_series_group_by(op, data, **kwargs): - getter = functools.partial(_safe_getter, field=op.field) - groupings = get_grouping(data.grouper.groupings) - return data.obj.map(getter).rename(op.field).groupby(groupings, group_keys=False) diff --git a/ibis/backends/pandas/execution/temporal.py b/ibis/backends/pandas/execution/temporal.py deleted file mode 100644 index a2f2b5d8b5ec..000000000000 --- a/ibis/backends/pandas/execution/temporal.py +++ /dev/null @@ -1,341 +0,0 @@ -from __future__ import annotations - -import datetime - -import numpy as np -import pandas as pd -from pandas.core.groupby import SeriesGroupBy - -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.base import BaseBackend -from ibis.backends.base.df.scope import Scope -from ibis.backends.pandas.core import ( - date_types, - integer_types, - numeric_types, - timedelta_types, - timestamp_types, -) -from ibis.backends.pandas.dispatch import execute_node, pre_execute -from ibis.backends.pandas.execution.util import get_grouping - - -@execute_node.register(ops.Strftime, pd.Timestamp, str) -def execute_strftime_timestamp_str(op, data, format_string, **kwargs): - return data.strftime(format_string) - - -@execute_node.register(ops.Strftime, pd.Series, str) -def execute_strftime_series_str(op, data, format_string, **kwargs): - return data.dt.strftime(format_string) - - -@execute_node.register(ops.ExtractTemporalField, datetime.datetime) -def execute_extract_timestamp_field_timestamp(op, data, **kwargs): - field_name = type(op).__name__.lower().replace("extract", "") - return getattr(data, field_name) - - -@execute_node.register(ops.ExtractTemporalField, pd.Series) -def execute_extract_timestamp_field_series(op, data, **kwargs): - field_name = type(op).__name__.lower().replace("extract", "") - if field_name == "weekofyear": - return data.dt.isocalendar().week.astype(np.int32) - return getattr(data.dt, field_name).astype(np.int32) - - -@execute_node.register(ops.ExtractMillisecond, datetime.datetime) -def execute_extract_millisecond_timestamp(op, data, **kwargs): - return int(data.microsecond // 1_000) - - -@execute_node.register(ops.ExtractMicrosecond, datetime.datetime) -def execute_extract_microsecond_timestamp(op, data, **kwargs): - return int(data.microsecond) - - -@execute_node.register(ops.ExtractMillisecond, pd.Series) -def execute_extract_millisecond_series(op, data, **kwargs): - return (data.dt.microsecond // 1_000).astype(np.int32) - - -@execute_node.register(ops.ExtractMicrosecond, pd.Series) -def execute_extract_microsecond_series(op, data, **kwargs): - return data.dt.microsecond.astype(np.int32) - - -@execute_node.register(ops.ExtractEpochSeconds, pd.Series) -def execute_epoch_seconds_series(op, data, **kwargs): - return ( - data.astype("datetime64[ns]") - .astype("int64") - .floordiv(1_000_000_000) - .astype("int32") - ) - - -@execute_node.register(ops.ExtractEpochSeconds, (pd.Timestamp, datetime.datetime)) -def execute_epoch_seconds_literal(op, data, **kwargs): - return pd.Timestamp(data).floor("s").value // 1_000_000_000 - - -@execute_node.register( - ops.BetweenTime, - pd.Series, - (pd.Series, str, datetime.time), - (pd.Series, str, datetime.time), -) -def execute_between_time(op, data, lower, upper, **kwargs): - idx = pd.DatetimeIndex(data) - if idx.tz is not None: - idx = idx.tz_convert(None) # make naive because times are naive - indexer = idx.indexer_between_time(lower, upper) - result = np.zeros(len(data), dtype=np.bool_) - result[indexer] = True - return pd.Series(result) - - -@execute_node.register(ops.Date, pd.Series) -def execute_timestamp_date(op, data, **kwargs): - return data.dt.floor("d") - - -PANDAS_UNITS = { - "m": "Min", - "ms": "L", -} - - -@execute_node.register((ops.TimestampTruncate, ops.DateTruncate), pd.Series) -def execute_timestamp_truncate(op, data, **kwargs): - dt = data.dt - unit = PANDAS_UNITS.get(op.unit.short, op.unit.short) - try: - return dt.floor(unit) - except ValueError: - return dt.to_period(unit).dt.to_timestamp() - - -OFFSET_CLASS = { - "Y": pd.offsets.DateOffset, - "Q": pd.offsets.DateOffset, - "M": pd.offsets.DateOffset, - "W": pd.offsets.DateOffset, - # all other units are timedelta64s -} - - -@execute_node.register(ops.IntervalFromInteger, pd.Series) -def execute_interval_from_integer_series(op, data, **kwargs): - unit = op.unit.short - resolution = op.unit.plural - cls = OFFSET_CLASS.get(unit, None) - - # fast path for timedelta conversion - if cls is None: - return data.astype(f"timedelta64[{unit}]") - return data.apply(lambda n, cls=cls, resolution=resolution: cls(**{resolution: n})) - - -@execute_node.register(ops.IntervalFromInteger, integer_types) -def execute_interval_from_integer_integer_types(op, data, **kwargs): - unit = op.unit.short - resolution = op.unit.plural - cls = OFFSET_CLASS.get(unit, None) - - if cls is None: - return pd.Timedelta(data, unit=unit) - return cls(**{resolution: data}) - - -@execute_node.register(ops.Cast, pd.Series, dt.Interval) -def execute_cast_integer_to_interval_series(op, data, type, **kwargs): - to = op.to - unit = to.unit.short - resolution = to.unit.plural - cls = OFFSET_CLASS.get(unit, None) - - if cls is None: - return data.astype(f"timedelta64[{unit}]") - return data.apply(lambda n, cls=cls, resolution=resolution: cls(**{resolution: n})) - - -@execute_node.register(ops.Cast, integer_types, dt.Interval) -def execute_cast_integer_to_interval_integer_types(op, data, type, **kwargs): - to = op.to - unit = to.unit.short - resolution = to.unit.plural - cls = OFFSET_CLASS.get(unit, None) - - if cls is None: - return pd.Timedelta(data, unit=unit) - return cls(**{resolution: data}) - - -@execute_node.register(ops.TimestampAdd, timestamp_types, timedelta_types) -def execute_timestamp_add_datetime_timedelta(op, left, right, **kwargs): - return pd.Timestamp(left) + pd.Timedelta(right) - - -@execute_node.register(ops.TimestampAdd, timestamp_types, pd.Series) -def execute_timestamp_add_datetime_series(op, left, right, **kwargs): - return pd.Timestamp(left) + right - - -@execute_node.register(ops.IntervalAdd, timedelta_types, timedelta_types) -def execute_interval_add_delta_delta(op, left, right, **kwargs): - return op.op(pd.Timedelta(left), pd.Timedelta(right)) - - -@execute_node.register(ops.IntervalAdd, timedelta_types, pd.Series) -@execute_node.register( - ops.IntervalMultiply, timedelta_types, numeric_types + (pd.Series,) -) -def execute_interval_add_multiply_delta_series(op, left, right, **kwargs): - return op.op(pd.Timedelta(left), right) - - -@execute_node.register((ops.TimestampAdd, ops.IntervalAdd), pd.Series, timedelta_types) -def execute_timestamp_interval_add_series_delta(op, left, right, **kwargs): - return left + pd.Timedelta(right) - - -@execute_node.register((ops.TimestampAdd, ops.IntervalAdd), pd.Series, pd.Series) -def execute_timestamp_interval_add_series_series(op, left, right, **kwargs): - return left + right - - -@execute_node.register(ops.TimestampSub, timestamp_types, timedelta_types) -def execute_timestamp_sub_datetime_timedelta(op, left, right, **kwargs): - return pd.Timestamp(left) - pd.Timedelta(right) - - -@execute_node.register( - (ops.TimestampDiff, ops.TimestampSub), timestamp_types, pd.Series -) -def execute_timestamp_diff_sub_datetime_series(op, left, right, **kwargs): - return pd.Timestamp(left) - right - - -@execute_node.register(ops.TimestampSub, pd.Series, timedelta_types) -def execute_timestamp_sub_series_timedelta(op, left, right, **kwargs): - return left - pd.Timedelta(right) - - -@execute_node.register( - (ops.TimestampDiff, ops.TimestampSub, ops.IntervalSubtract), - pd.Series, - pd.Series, -) -def execute_timestamp_diff_sub_series_series(op, left, right, **kwargs): - return left - right - - -@execute_node.register(ops.TimestampDiff, timestamp_types, timestamp_types) -def execute_timestamp_diff_datetime_datetime(op, left, right, **kwargs): - return pd.Timestamp(left) - pd.Timestamp(right) - - -@execute_node.register(ops.TimestampDiff, pd.Series, timestamp_types) -def execute_timestamp_diff_series_datetime(op, left, right, **kwargs): - return left - pd.Timestamp(right) - - -@execute_node.register(ops.IntervalMultiply, pd.Series, numeric_types + (pd.Series,)) -@execute_node.register( - ops.IntervalFloorDivide, - (pd.Timedelta, pd.Series), - numeric_types + (pd.Series,), -) -def execute_interval_multiply_fdiv_series_numeric(op, left, right, **kwargs): - return op.op(left, right) - - -@execute_node.register(ops.TimestampFromUNIX, (pd.Series,) + integer_types) -def execute_timestamp_from_unix(op, data, **kwargs): - return pd.to_datetime(data, unit=op.unit.short) - - -@pre_execute.register(ops.TimestampNow) -@pre_execute.register(ops.TimestampNow, BaseBackend) -def pre_execute_timestamp_now(op, *args, **kwargs): - timecontext = kwargs.get("timecontext", None) - now = pd.Timestamp("now", tz="UTC").tz_localize(None) - return Scope({op: now}, timecontext) - - -@execute_node.register(ops.DayOfWeekIndex, (str, datetime.date)) -def execute_day_of_week_index_any(op, value, **kwargs): - return pd.Timestamp(value).dayofweek - - -@execute_node.register(ops.DayOfWeekIndex, pd.Series) -def execute_day_of_week_index_series(op, data, **kwargs): - return data.dt.dayofweek.astype(np.int16) - - -@execute_node.register(ops.DayOfWeekIndex, SeriesGroupBy) -def execute_day_of_week_index_series_group_by(op, data, **kwargs): - groupings = get_grouping(data.grouper.groupings) - return data.obj.dt.dayofweek.astype(np.int16).groupby(groupings, group_keys=False) - - -def day_name(obj: pd.core.indexes.accessors.DatetimeProperties | pd.Timestamp) -> str: - """Backwards compatible name-of-day getting function. - - Returns - ------- - str - The name of the day corresponding to `obj` - """ - try: - return obj.day_name() - except AttributeError: - return obj.weekday_name - - -@execute_node.register(ops.DayOfWeekName, (str, datetime.date)) -def execute_day_of_week_name_any(op, value, **kwargs): - return day_name(pd.Timestamp(value)) - - -@execute_node.register(ops.DayOfWeekName, pd.Series) -def execute_day_of_week_name_series(op, data, **kwargs): - return day_name(data.dt) - - -@execute_node.register(ops.DayOfWeekName, SeriesGroupBy) -def execute_day_of_week_name_series_group_by(op, data, **kwargs): - return day_name(data.obj.dt).groupby( - get_grouping(data.grouper.groupings), group_keys=False - ) - - -@execute_node.register(ops.DateSub, date_types, timedelta_types) -@execute_node.register(ops.DateSub, pd.Series, timedelta_types) -@execute_node.register((ops.DateDiff, ops.DateSub), pd.Series, pd.Series) -@execute_node.register(ops.DateDiff, date_types, date_types) -def execute_date_sub_diff(op, left, right, **kwargs): - return left - right - - -@execute_node.register((ops.DateDiff, ops.DateSub), date_types, pd.Series) -def execute_date_sub_diff_date_series(op, left, right, **kwargs): - return pd.Timestamp(left, unit="D") - right - - -@execute_node.register(ops.DateDiff, pd.Series, date_types) -def execute_date_sub_diff_series_date(op, left, right, **kwargs): - return left - pd.Timestamp(right, unit="D") - - -@execute_node.register(ops.DateAdd, pd.Series, timedelta_types) -@execute_node.register(ops.DateAdd, timedelta_types, pd.Series) -@execute_node.register(ops.DateAdd, pd.Series, pd.Series) -@execute_node.register(ops.DateAdd, date_types, timedelta_types) -@execute_node.register(ops.DateAdd, timedelta_types, date_types) -@execute_node.register(ops.DateAdd, date_types, pd.Series) -@execute_node.register(ops.DateAdd, pd.Series, date_types) -def execute_date_add(op, left, right, **kwargs): - return left + right diff --git a/ibis/backends/pandas/execution/timecontext.py b/ibis/backends/pandas/execution/timecontext.py deleted file mode 100644 index c9be8f75757f..000000000000 --- a/ibis/backends/pandas/execution/timecontext.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Implementation of compute_time_context for time context related operations. - -Time context of a node is computed at the beginning of execution phase. - -To use time context to load time series data: - -For operations like window, asof_join that adjust time context in execution, -implement ``compute_time_context`` to pass different time contexts to child -nodes. - -If ``pre_execute`` preloads any data, it should use timecontext to trim data -to be in the time range. - -``execute_node`` of a leaf node can use timecontext to trim data, or to pass -it as a filter in the database query. - -In some cases, data need to be trimmed in ``post_execute``. - -Note: In order to use the feature we implemented here, there must be a -column of Timestamp type, and named as 'time' in Table. And this 'time' -column should be preserved across the expression tree. If 'time' column is -dropped then execution will result in error. -See ``execute_database_table_client`` in ``generic.py``. -And we assume timecontext is passed in as a tuple (begin, end) where begin and -end are timestamp, or datetime string like "20100101". Time range is inclusive -(include both begin and end points). - -This is an optional feature. The result of executing an expression without time -context is conceptually the same as executing an expression with (-inf, inf) -time context. -""" -from __future__ import annotations - -from typing import TYPE_CHECKING - -import ibis.expr.operations as ops -from ibis.backends.base.df.timecontext import TimeContext, adjust_context -from ibis.backends.pandas.core import ( - compute_time_context, - get_node_arguments, - is_computable_input, -) - -if TYPE_CHECKING: - from ibis.backends.base import BaseBackend - from ibis.backends.base.df.scope import Scope - - -@compute_time_context.register(ops.AsOfJoin) -def compute_time_context_asof_join( - op: ops.AsOfJoin, - scope: Scope, - clients: list[BaseBackend], - timecontext: TimeContext | None = None, - **kwargs, -): - new_timecontexts = [ - timecontext for arg in get_node_arguments(op) if is_computable_input(arg) - ] - - if not timecontext: - return new_timecontexts - - # right table is the second node in children - new_timecontexts = [ - new_timecontexts[0], - adjust_context(op, scope, timecontext), - *new_timecontexts[2:], - ] - return new_timecontexts - - -@compute_time_context.register(ops.Window) -def compute_time_context_window( - op: ops.Window, - scope: Scope, - clients: list[BaseBackend], - timecontext: TimeContext | None = None, - **kwargs, -): - new_timecontexts = [ - timecontext for arg in get_node_arguments(op) if is_computable_input(arg) - ] - - if not timecontext: - return new_timecontexts - - result = adjust_context(op, scope, timecontext) - - new_timecontexts = [ - result for arg in get_node_arguments(op) if is_computable_input(arg) - ] - return new_timecontexts diff --git a/ibis/backends/pandas/execution/util.py b/ibis/backends/pandas/execution/util.py deleted file mode 100644 index 15b43c8832bd..000000000000 --- a/ibis/backends/pandas/execution/util.py +++ /dev/null @@ -1,144 +0,0 @@ -from __future__ import annotations - -from typing import Any - -import pandas as pd - -import ibis.expr.analysis as an -import ibis.expr.operations as ops -import ibis.util -from ibis.backends.base.df.scope import Scope -from ibis.backends.pandas.core import execute -from ibis.backends.pandas.execution import constants - - -def get_grouping(grouper): - # this is such an annoying hack - assert isinstance(grouper, list) - if len(grouper) == 1: - return grouper[0] - return grouper - - -def get_join_suffix_for_op(op: ops.TableColumn, join_op: ops.Join): - (root_table,) = an.find_immediate_parent_tables(op) - left_root, right_root = an.find_immediate_parent_tables( - [join_op.left, join_op.right] - ) - return { - left_root: constants.LEFT_JOIN_SUFFIX, - right_root: constants.RIGHT_JOIN_SUFFIX, - }[root_table] - - -def compute_sort_key(key, data, timecontext, scope=None, **kwargs): - if key.shape.is_columnar(): - if key.name in data: - return key.name, None - else: - if scope is None: - scope = Scope() - scope = scope.merge_scopes( - Scope({t: data}, timecontext) - for t in an.find_immediate_parent_tables(key) - ) - new_column = execute(key, scope=scope, **kwargs) - name = ibis.util.guid() - new_column.name = name - return name, new_column - else: - raise NotImplementedError( - "Scalar sort keys are not yet supported in the pandas backend" - ) - - -def compute_sorted_frame(df, order_by, group_by=(), timecontext=None, **kwargs): - sort_keys = [] - ascending = [] - - for value in group_by: - sort_keys.append(value) - ascending.append(True) - for key in order_by: - sort_keys.append(key) - ascending.append(key.ascending) - - new_columns = {} - computed_sort_keys = [] - for key in sort_keys: - computed_sort_key, temporary_column = compute_sort_key( - key, df, timecontext, **kwargs - ) - computed_sort_keys.append(computed_sort_key) - - if temporary_column is not None: - new_columns[computed_sort_key] = temporary_column - - result = df.assign(**new_columns) - try: - result = result.sort_values( - computed_sort_keys, ascending=ascending, kind="mergesort" - ) - except TypeError: - result = result.sort_values(computed_sort_keys, ascending=ascending) - # TODO: we'll eventually need to return this frame with the temporary - # columns and drop them in the caller (maybe using post_execute?) - ngrouping_keys = len(group_by) - return ( - result, - computed_sort_keys[:ngrouping_keys], - computed_sort_keys[ngrouping_keys:], - ) - - -def coerce_to_output( - result: Any, node: ops.Node, index: pd.Index | None = None -) -> pd.Series | pd.DataFrame: - """Cast the result to either a Series or DataFrame. - - This method casts result of an execution to a Series or DataFrame, - depending on the type of the expression and shape of the result. - - Parameters - ---------- - result: Any - The result to cast - node: ibis.expr.operations.Node - The operation node associated with the result - index: pd.Index - Optional. If passed, scalar results will be broadcasted according - to the index. - - Returns - ------- - result: A Series or DataFrame - - Examples - -------- - For dataframe outputs, see ``ibis.util.coerce_to_dataframe``. - - >>> coerce_to_output(pd.Series(1), node) # quartodoc: +SKIP # doctest: +SKIP - 0 1 - Name: result, dtype: int64 - >>> coerce_to_output(1, node) # quartodoc: +SKIP # doctest: +SKIP - 0 1 - Name: result, dtype: int64 - >>> coerce_to_output(1, node, [1, 2, 3]) # quartodoc: +SKIP # doctest: +SKIP - 1 1 - 2 1 - 3 1 - Name: result, dtype: int64 - >>> coerce_to_output([1, 2, 3], node) # quartodoc: +SKIP # doctest: +SKIP - 0 [1, 2, 3] - Name: result, dtype: object - """ - if isinstance(result, pd.DataFrame): - rows = result.to_dict(orient="records") - return pd.Series(rows, name=node.name) - - # columnar result - if isinstance(result, pd.Series): - return result.rename(node.name) - - # Wrap `result` into a single-element Series. - return pd.Series([result], name=node.name) diff --git a/ibis/backends/pandas/execution/window.py b/ibis/backends/pandas/execution/window.py deleted file mode 100644 index 39475ecc2bb6..000000000000 --- a/ibis/backends/pandas/execution/window.py +++ /dev/null @@ -1,526 +0,0 @@ -"""Code for computing window functions with ibis and pandas.""" - -from __future__ import annotations - -import operator -from typing import TYPE_CHECKING, Any, Callable, NoReturn - -import numpy as np -import pandas as pd -import toolz -from multipledispatch import Dispatcher -from pandas.core.groupby import SeriesGroupBy - -import ibis.expr.analysis as an -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope -from ibis.backends.base.df.timecontext import ( - TimeContext, - construct_time_context_aware_series, - get_time_col, -) -from ibis.backends.pandas import aggcontext as agg_ctx -from ibis.backends.pandas.core import ( - compute_time_context, - date_types, - execute, - integer_types, - simple_types, - timedelta_types, - timestamp_types, -) -from ibis.backends.pandas.dispatch import execute_node, pre_execute -from ibis.backends.pandas.execution import util - -if TYPE_CHECKING: - from ibis.backends.pandas.aggcontext import AggregationContext - - -def _post_process_empty( - result: Any, - parent: pd.DataFrame, - order_by: list[str], - group_by: list[str], - timecontext: TimeContext | None, -) -> pd.Series: - # This is the post process of the no groupby nor orderby window - # `result` could be a Series, DataFrame, or a scalar. generated - # by `agg` method of class `Window`. For window without grouby or - # orderby, `agg` calls pands method directly. So if timecontext is - # present, we need to insert 'time' column into index for trimming the - # result. For cases when grouby or orderby is present, `agg` calls - # Ibis method `window_agg_built_in` and `window_agg_udf`, time - # context is already inserted there. - assert not order_by and not group_by - if isinstance(result, (pd.Series, pd.DataFrame)): - if timecontext: - result = construct_time_context_aware_series(result, parent) - return result - else: - # `result` is a scalar when a reduction operation is being - # applied over the window, since reduction operations are N->1 - # in this case we do not need to trim result by timecontext, - # just expand reduction result to be a Series with `index`. - index = parent.index - result = pd.Series([result]).repeat(len(index)) - result.index = index - return result - - -def _post_process_group_by( - series: pd.Series, - parent: pd.DataFrame, - order_by: list[str], - group_by: list[str], - timecontext: TimeContext | None, -) -> pd.Series: - assert not order_by and group_by - return series - - -def _post_process_order_by( - series, - parent: pd.DataFrame, - order_by: list[str], - group_by: list[str], - timecontext: TimeContext | None, -) -> pd.Series: - assert order_by and not group_by - indexed_parent = parent.set_index(order_by) - index = indexed_parent.index - - # get the names of the levels that will be in the result - series_index_names = frozenset(series.index.names) - - # get the levels common to series.index, in the order that they occur in - # the parent's index - reordered_levels = [name for name in index.names if name in series_index_names] - - if len(reordered_levels) > 1: - series = series.reorder_levels(reordered_levels) - - series = series.iloc[index.argsort(kind="mergesort")] - return series - - -def _post_process_group_by_order_by( - series: pd.Series, - parent: pd.DataFrame, - order_by: list[str], - group_by: list[str], - timecontext: TimeContext | None, -) -> pd.Series: - indexed_parent = parent.set_index(group_by + order_by, append=True) - index = indexed_parent.index - - # get the names of the levels that will be in the result - series_index_names = frozenset(series.index.names) - - # get the levels common to series.index, in the order that they occur in - # the parent's index - reordered_levels = [name for name in index.names if name in series_index_names] - - if len(reordered_levels) > 1: - series = series.reorder_levels(reordered_levels) - return series - - -get_aggcontext = Dispatcher("get_aggcontext") - - -@get_aggcontext.register(object) -def get_aggcontext_default( - window, - *, - scope, - operand, - parent, - group_by, - order_by, - **kwargs, -) -> NoReturn: - raise NotImplementedError( - f"get_aggcontext is not implemented for {type(window).__name__}" - ) - - -@get_aggcontext.register(ops.WindowFrame) -def get_aggcontext_window( - frame, - *, - scope, - operand, - parent, - group_by, - order_by, - **kwargs, -) -> AggregationContext: - # no order by or group by: default summarization aggcontext - # - # if we're reducing and we have an order by expression then we need to - # expand or roll. - # - # otherwise we're transforming - output_type = operand.dtype - - if not group_by and not order_by: - aggcontext = agg_ctx.Summarize(parent=parent, output_type=output_type) - elif group_by and not order_by: - # groupby transform (window with a partition by clause in SQL parlance) - aggcontext = agg_ctx.Transform( - parent=parent, - group_by=group_by, - order_by=order_by, - output_type=output_type, - ) - elif frame.start is not None: - if isinstance(frame, ops.RowsWindowFrame): - max_lookback = frame.max_lookback - else: - max_lookback = None - - aggcontext = agg_ctx.Moving( - frame.start, - # FIXME(kszucs): I don't think that we have a proper max_lookback test - # case because passing None here is not braking anything - max_lookback=max_lookback, - parent=parent, - group_by=group_by, - order_by=order_by, - output_type=output_type, - ) - else: - # expanding window - aggcontext = agg_ctx.Cumulative( - parent=parent, - group_by=group_by, - order_by=order_by, - output_type=output_type, - ) - - return aggcontext - - -def trim_window_result(data: pd.Series | pd.DataFrame, timecontext: TimeContext | None): - """Trim data within time range defined by timecontext. - - This is a util function used in ``execute_window_op``, where time - context might be adjusted for calculation. Data must be trimmed - within the original time context before return. - `data` is a pd.Series with Multiindex for most cases, for multi - column udf result, `data` could be a pd.DataFrame - - Params - ------ - data: pd.Series or pd.DataFrame - timecontext: Optional[TimeContext] - - Returns - ------- - a trimmed pd.Series or or pd.DataFrame with the same Multiindex - as data's - """ - # noop if timecontext is None - if not timecontext: - return data - assert isinstance( - data, (pd.Series, pd.DataFrame) - ), "window computed columns is not a pd.Series nor a pd.DataFrame" - - # reset multiindex, convert Series into a DataFrame - df = data.reset_index() - - # Filter the data, here we preserve the time index so that when user is - # computing a single column, the computation and the relevant time - # indexes are returned. - time_col = get_time_col() - if time_col not in df: - return data - - subset = df.loc[df[time_col].between(*timecontext)] - - # Get columns to set for index - if isinstance(data, pd.Series): - # if Series doesn't contain a name, reset_index will assign - # '0' as the column name for the column of value - name = data.name if data.name else 0 - index_columns = list(subset.columns.difference([name])) - else: - name = data.columns - index_columns = list(subset.columns.difference(name)) - - # set the correct index for return Series / DataFrame - indexed_subset = subset.set_index(index_columns) - return indexed_subset[name] - - -@execute_node.register(ops.WindowFunction, [pd.Series]) -def execute_window_op( - op, - *data, - scope: Scope | None = None, - timecontext: TimeContext | None = None, - aggcontext=None, - clients=None, - **kwargs, -): - func, frame = op.func, op.frame - - if frame.how == "range" and any( - not col.dtype.is_temporal() for col in frame.order_by - ): - raise NotImplementedError( - "The pandas backend only implements range windows with temporal " - "ordering keys" - ) - - # pre execute "manually" here because otherwise we wouldn't pickup - # relevant scope changes from the child operand since we're managing - # execution of that by hand - - adjusted_timecontext = None - if timecontext: - arg_timecontexts = compute_time_context( - op, timecontext=timecontext, clients=clients, scope=scope - ) - # timecontext is the original time context required by parent node - # of this Window, while adjusted_timecontext is the adjusted context - # of this Window, since we are doing a manual execution here, use - # adjusted_timecontext in later execution phases - adjusted_timecontext = arg_timecontexts[0] - - pre_executed_scope = pre_execute( - func, - *clients, - scope=scope, - timecontext=adjusted_timecontext, - aggcontext=aggcontext, - **kwargs, - ) - if scope is None: - scope = pre_executed_scope - else: - scope = scope.merge_scope(pre_executed_scope) - - root_table = an.find_first_base_table(op) - data = execute( - root_table, - scope=scope, - timecontext=adjusted_timecontext, - clients=clients, - aggcontext=aggcontext, - **kwargs, - ) - - grouping_keys = [ - key.name - if isinstance(key, ops.TableColumn) - else execute( - key, - scope=scope, - clients=clients, - timecontext=adjusted_timecontext, - aggcontext=aggcontext, - **kwargs, - ) - for key in frame.group_by - ] - - if not frame.order_by: - ordering_keys = [] - - post_process: Callable[ - [Any, pd.DataFrame, list[str], list[str], TimeContext | None], - pd.Series, - ] - if frame.group_by: - if frame.order_by: - sorted_df, grouping_keys, ordering_keys = util.compute_sorted_frame( - data, - frame.order_by, - group_by=frame.group_by, - timecontext=adjusted_timecontext, - **kwargs, - ) - source = sorted_df.groupby(grouping_keys, sort=True, group_keys=False) - post_process = _post_process_group_by_order_by - else: - source = data.groupby(grouping_keys, sort=False, group_keys=False) - post_process = _post_process_group_by - elif frame.order_by: - source, grouping_keys, ordering_keys = util.compute_sorted_frame( - data, frame.order_by, timecontext=adjusted_timecontext, **kwargs - ) - post_process = _post_process_order_by - else: - source = data - post_process = _post_process_empty - - # Here groupby object should be add to the corresponding node in scope - # for execution, data will be overwrite to a groupby object, so we - # force an update regardless of time context - new_scope = scope.merge_scopes( - [ - Scope({t: source}, adjusted_timecontext) - for t in an.find_immediate_parent_tables(func) - ], - overwrite=True, - ) - - aggcontext = get_aggcontext( - frame, - scope=scope, - operand=func, - parent=source, - group_by=grouping_keys, - order_by=ordering_keys, - **kwargs, - ) - result = execute( - func, - scope=new_scope, - timecontext=adjusted_timecontext, - aggcontext=aggcontext, - clients=clients, - **kwargs, - ) - result = post_process( - result, - data, - ordering_keys, - grouping_keys, - adjusted_timecontext, - ) - assert len(data) == len( - result - ), "input data source and computed column do not have the same length" - - # trim data to original time context - result = trim_window_result(result, timecontext) - return result - - -def post_lead_lag(result, default): - if not pd.isnull(default): - return result.fillna(default) - return result - - -@execute_node.register( - (ops.Lead, ops.Lag), - (pd.Series, SeriesGroupBy), - integer_types + (type(None),), - simple_types + (type(None),), -) -def execute_series_lead_lag(op, data, offset, default, **kwargs): - func = toolz.identity if isinstance(op, ops.Lag) else operator.neg - result = data.shift(func(1 if offset is None else offset)) - return post_lead_lag(result, default) - - -@execute_node.register( - (ops.Lead, ops.Lag), - (pd.Series, SeriesGroupBy), - timedelta_types, - date_types + timestamp_types + (str, type(None)), -) -def execute_series_lead_lag_timedelta( - op, data, offset, default, aggcontext=None, **kwargs -): - """Shift a column relative to another one in units of time instead of rows.""" - # lagging adds time (delayed), leading subtracts time (moved up) - func = operator.add if isinstance(op, ops.Lag) else operator.sub - group_by = aggcontext.group_by - order_by = aggcontext.order_by - - # get the parent object from which `data` originated - parent = aggcontext.parent - - # get the DataFrame from the parent object, handling the DataFrameGroupBy - # case - parent_df = getattr(parent, "obj", parent) - - # index our parent df by grouping and ordering keys - indexed_original_df = parent_df.set_index(group_by + order_by) - - # perform the time shift - adjusted_parent_df = parent_df.assign( - **{k: func(parent_df[k], offset) for k in order_by} - ) - - # index the parent *after* adjustment - adjusted_indexed_parent = adjusted_parent_df.set_index(group_by + order_by) - - # get the column we care about - result = adjusted_indexed_parent[getattr(data, "obj", data).name] - - # reindex the shifted data by the original frame's index - result = result.reindex(indexed_original_df.index) - - # add a default if necessary - return post_lead_lag(result, default) - - -@execute_node.register(ops.FirstValue, pd.Series) -def execute_series_first_value(op, data, **kwargs): - return data.iloc[np.repeat(0, len(data))] - - -def _getter(x: pd.Series | np.ndarray, idx: int): - return getattr(x, "values", x)[idx] - - -@execute_node.register(ops.FirstValue, SeriesGroupBy) -def execute_series_group_by_first_value(op, data, aggcontext=None, **kwargs): - return aggcontext.agg(data, lambda x: _getter(x, 0)) - - -@execute_node.register(ops.LastValue, pd.Series) -def execute_series_last_value(op, data, **kwargs): - return data.iloc[np.repeat(-1, len(data))] - - -@execute_node.register(ops.LastValue, SeriesGroupBy) -def execute_series_group_by_last_value(op, data, aggcontext=None, **kwargs): - return aggcontext.agg(data, lambda x: _getter(x, -1)) - - -@execute_node.register(ops.MinRank) -def execute_series_min_rank(op, aggcontext=None, **kwargs): - (key,) = aggcontext.order_by - df = aggcontext.parent - data = df[key] - return data.rank(method="min", ascending=True).astype("int64") - 1 - - -@execute_node.register(ops.DenseRank) -def execute_series_dense_rank(op, aggcontext=None, **kwargs): - (key,) = aggcontext.order_by - df = aggcontext.parent - data = df[key] - return data.rank(method="dense", ascending=True).astype("int64") - 1 - - -@execute_node.register(ops.PercentRank) -def execute_series_group_by_percent_rank(op, aggcontext=None, **kwargs): - (key,) = aggcontext.order_by - df = aggcontext.parent - data = df[key] - - result = data.rank(method="min", ascending=True) - 1 - - if isinstance(data, SeriesGroupBy): - nrows = data.transform("count") - else: - nrows = len(data) - - result /= nrows - 1 - return result - - -@execute_node.register(ops.CumeDist) -def execute_series_group_by_cume_dist(op, aggcontext=None, **kwargs): - (key,) = aggcontext.order_by - df = aggcontext.parent - data = df[key] - return data.rank(method="min", ascending=True, pct=True) diff --git a/ibis/backends/pandas/executor.py b/ibis/backends/pandas/executor.py new file mode 100644 index 000000000000..f9dd69a3c027 --- /dev/null +++ b/ibis/backends/pandas/executor.py @@ -0,0 +1,761 @@ +from __future__ import annotations + +import operator +from functools import reduce + +import numpy as np +import pandas as pd + +import ibis.expr.operations as ops +from ibis.backends.pandas.convert import PandasConverter +from ibis.backends.pandas.helpers import ( + GroupedFrame, + RangeFrame, + RowsFrame, + UngroupedFrame, + agg, + asframe, + asseries, + columnwise, + elementwise, + rowwise, + serieswise, +) +from ibis.backends.pandas.kernels import pick_kernel +from ibis.backends.pandas.rewrites import ( + PandasAggregate, + PandasAsofJoin, + PandasJoin, + PandasLimit, + PandasRename, + PandasScalarSubquery, + plan, +) +from ibis.common.dispatch import Dispatched +from ibis.common.exceptions import OperationNotDefinedError, UnboundExpressionError +from ibis.formats.pandas import PandasData +from ibis.util import gen_name + +# ruff: noqa: F811 + + +_reduction_operations = { + ops.Min: lambda x: x.min(), + ops.Max: lambda x: x.max(), + ops.Sum: lambda x: x.sum(), + ops.Mean: lambda x: x.mean(), + ops.Count: lambda x: x.count(), + ops.Mode: lambda x: x.mode().iat[0], + ops.Any: lambda x: x.any(), + ops.All: lambda x: x.all(), + ops.Median: lambda x: x.median(), + ops.ApproxMedian: lambda x: x.median(), + ops.BitAnd: lambda x: np.bitwise_and.reduce(x.values), + ops.BitOr: lambda x: np.bitwise_or.reduce(x.values), + ops.BitXor: lambda x: np.bitwise_xor.reduce(x.values), + ops.Last: lambda x: x.iat[-1], + ops.First: lambda x: x.iat[0], + ops.CountDistinct: lambda x: x.nunique(), + ops.ApproxCountDistinct: lambda x: x.nunique(), + ops.ArrayCollect: lambda x: x.tolist(), +} + + +class Executor(Dispatched): + @classmethod + def visit(cls, op: ops.Node, **kwargs): + raise OperationNotDefinedError( + f"Operation {op!r} is not implemented for the pandas backend" + ) + + @classmethod + def visit(cls, op: ops.Literal, value, dtype): + if dtype.is_interval(): + value = pd.Timedelta(value, dtype.unit.short) + elif dtype.is_array(): + value = np.array(value) + elif dtype.is_date(): + value = pd.Timestamp(value, tz="UTC").tz_localize(None) + return value + + @classmethod + def visit(cls, op: ops.Field, rel, name): + return rel[name] + + @classmethod + def visit(cls, op: ops.Alias, arg, name): + try: + return arg.rename(name) + except AttributeError: + return arg + + @classmethod + def visit(cls, op: ops.SortKey, expr, ascending): + return expr + + @classmethod + def visit(cls, op: ops.Cast, arg, to): + if isinstance(arg, pd.Series): + return PandasConverter.convert_column(arg, to) + else: + return PandasConverter.convert_scalar(arg, to) + + @classmethod + def visit(cls, op: ops.TypeOf, arg): + raise OperationNotDefinedError("TypeOf is not implemented") + + @classmethod + def visit(cls, op: ops.RandomScalar): + raise OperationNotDefinedError("RandomScalar is not implemented") + + @classmethod + def visit(cls, op: ops.Greatest, arg): + return columnwise(lambda df: df.max(axis=1), arg) + + @classmethod + def visit(cls, op: ops.Least, arg): + return columnwise(lambda df: df.min(axis=1), arg) + + @classmethod + def visit(cls, op: ops.Coalesce, arg): + return columnwise(lambda df: df.bfill(axis=1).iloc[:, 0], arg) + + @classmethod + def visit(cls, op: ops.Value, **operands): + return pick_kernel(op, operands) + + @classmethod + def visit(cls, op: ops.IsNan, arg): + try: + return np.isnan(arg) + except (TypeError, ValueError): + # if `arg` contains `None` np.isnan will complain + # so we take advantage of NaN not equaling itself + # to do the correct thing + return arg != arg + + @classmethod + def visit(cls, op: ops.SearchedCase, cases, results, default): + cases, _ = asframe(cases, concat=False) + results, _ = asframe(results, concat=False) + out = np.select(cases, results, default) + return pd.Series(out) + + @classmethod + def visit(cls, op: ops.SimpleCase, base, cases, results, default): + if isinstance(default, pd.Series): + raise NotImplementedError( + "SimpleCase with a columnar shaped default value is not implemented" + ) + cases = tuple(base == case for case in cases) + cases, _ = asframe(cases, concat=False) + results, _ = asframe(results, concat=False) + out = np.select(cases, results, default) + return pd.Series(out) + + @classmethod + def visit(cls, op: ops.TimestampTruncate | ops.DateTruncate, arg, unit): + # TODO(kszucs): should use serieswise() + unit = {"m": "Min", "ms": "L"}.get(unit.short, unit.short) + try: + return arg.dt.floor(unit) + except ValueError: + return arg.dt.to_period(unit).dt.to_timestamp() + + @classmethod + def visit(cls, op: ops.IntervalFromInteger, unit, **kwargs): + if unit.short in {"Y", "Q", "M", "W"}: + return elementwise(lambda v: pd.DateOffset(**{unit.plural: v}), kwargs) + else: + return serieswise( + lambda arg: arg.astype(f"timedelta64[{unit.short}]"), kwargs + ) + + @classmethod + def visit(cls, op: ops.BetweenTime, arg, lower_bound, upper_bound): + idx = pd.DatetimeIndex(arg) + if idx.tz is not None: + idx = idx.tz_convert(None) # make naive because times are naive + indexer = idx.indexer_between_time(lower_bound, upper_bound) + result = np.zeros(len(arg), dtype=np.bool_) + result[indexer] = True + return pd.Series(result) + + @classmethod + def visit(cls, op: ops.FindInSet, needle, values): + (needle, *haystack), _ = asframe((needle, *values), concat=False) + condlist = [needle == col for col in haystack] + choicelist = [i for i, _ in enumerate(haystack)] + result = np.select(condlist, choicelist, default=-1) + return pd.Series(result, name=op.name) + + @classmethod + def visit(cls, op: ops.Array, exprs): + return rowwise(lambda row: np.array(row, dtype=object), exprs) + + @classmethod + def visit(cls, op: ops.ArrayConcat, arg): + return rowwise(lambda row: np.concatenate(row.values), arg) + + @classmethod + def visit(cls, op: ops.Unnest, arg): + arg = asseries(arg) + mask = arg.map(lambda v: bool(len(v)), na_action="ignore") + return arg[mask].explode() + + @classmethod + def visit( + cls, op: ops.ElementWiseVectorizedUDF, func, func_args, input_type, return_type + ): + """Execute an elementwise UDF.""" + + res = func(*func_args) + if isinstance(res, pd.DataFrame): + # it is important otherwise it is going to fill up the memory + res = res.apply(lambda row: row.to_dict(), axis=1) + + return res + + ############################# Reductions ################################## + + @classmethod + def visit(cls, op: ops.Reduction, arg, where): + func = _reduction_operations[type(op)] + return agg(func, arg, where) + + @classmethod + def visit(cls, op: ops.CountStar, arg, where): + def agg(df): + if where is None: + return len(df) + else: + return df[where.name].sum() + + return agg + + @classmethod + def visit(cls, op: ops.CountDistinctStar, arg, where): + def agg(df): + if where is None: + return df.nunique() + else: + return df[where.name].nunique() + + return agg + + @classmethod + def visit(cls, op: ops.Arbitrary, arg, where, how): + if how == "first": + return agg(lambda x: x.iat[0], arg, where) + elif how == "last": + return agg(lambda x: x.iat[-1], arg, where) + else: + raise OperationNotDefinedError(f"Arbitrary {how!r} is not supported") + + @classmethod + def visit(cls, op: ops.ArgMin | ops.ArgMax, arg, key, where): + func = operator.methodcaller(op.__class__.__name__.lower()) + + if where is None: + + def agg(df): + indices = func(df[key.name]) + return df[arg.name].iloc[indices] + else: + + def agg(df): + mask = df[where.name] + filtered = df[mask] + indices = func(filtered[key.name]) + return filtered[arg.name].iloc[indices] + + return agg + + @classmethod + def visit(cls, op: ops.Variance, arg, where, how): + ddof = {"pop": 0, "sample": 1}[how] + return agg(lambda x: x.var(ddof=ddof), arg, where) + + @classmethod + def visit(cls, op: ops.StandardDev, arg, where, how): + ddof = {"pop": 0, "sample": 1}[how] + return agg(lambda x: x.std(ddof=ddof), arg, where) + + @classmethod + def visit(cls, op: ops.Correlation, left, right, where, how): + if where is None: + + def agg(df): + return df[left.name].corr(df[right.name]) + else: + + def agg(df): + mask = df[where.name] + lhs = df[left.name][mask] + rhs = df[right.name][mask] + return lhs.corr(rhs) + + return agg + + @classmethod + def visit(cls, op: ops.Covariance, left, right, where, how): + ddof = {"pop": 0, "sample": 1}[how] + if where is None: + + def agg(df): + return df[left.name].cov(df[right.name], ddof=ddof) + else: + + def agg(df): + mask = df[where.name] + lhs = df[left.name][mask] + rhs = df[right.name][mask] + return lhs.cov(rhs, ddof=ddof) + + return agg + + @classmethod + def visit(cls, op: ops.GroupConcat, arg, sep, where): + if where is None: + + def agg(df): + return sep.join(df[arg.name].astype(str)) + else: + + def agg(df): + mask = df[where.name] + group = df[arg.name][mask] + if group.empty: + return pd.NA + return sep.join(group) + + return agg + + @classmethod + def visit(cls, op: ops.Quantile, arg, quantile, where): + return agg(lambda x: x.quantile(quantile), arg, where) + + @classmethod + def visit(cls, op: ops.MultiQuantile, arg, quantile, where): + return agg(lambda x: list(x.quantile(quantile)), arg, where) + + @classmethod + def visit( + cls, op: ops.ReductionVectorizedUDF, func, func_args, input_type, return_type + ): + def agg(df): + args = [df[col.name] for col in func_args] + return func(*args) + + return agg + + ############################# Analytic #################################### + + @classmethod + def visit(cls, op: ops.RowNumber): + def agg(df, order_keys): + return pd.Series(np.arange(len(df)), index=df.index) + + return agg + + @classmethod + def visit(cls, op: ops.Lag | ops.Lead, arg, offset, default): + if isinstance(op, ops.Lag): + sign = lambda x: x + else: + sign = lambda x: -x + + if op.offset is not None and op.offset.dtype.is_interval(): + + def agg(df, order_keys): + df = df.set_index(order_keys) + col = df[arg.name].shift(freq=sign(offset)) + return col.reindex(df.index, fill_value=default) + else: + offset = 1 if offset is None else offset + + def agg(df, order_keys): + return df[arg.name].shift(sign(offset), fill_value=default) + + return agg + + @classmethod + def visit(cls, op: ops.MinRank | ops.DenseRank): + method = "dense" if isinstance(op, ops.DenseRank) else "min" + + def agg(df, order_keys): + if len(order_keys) == 0: + raise ValueError("order_by argument is required for rank functions") + elif len(order_keys) == 1: + s = df[order_keys[0]] + else: + s = df[order_keys].apply(tuple, axis=1) + + return s.rank(method=method).astype("int64") - 1 + + return agg + + @classmethod + def visit(cls, op: ops.PercentRank): + def agg(df, order_keys): + if len(order_keys) == 0: + raise ValueError("order_by argument is required for rank functions") + elif len(order_keys) == 1: + s = df[order_keys[0]] + else: + s = df[order_keys].apply(tuple, axis=1) + + return s.rank(method="min").sub(1).div(len(df) - 1) + + return agg + + @classmethod + def visit(cls, op: ops.CumeDist): + def agg(df, order_keys): + if len(order_keys) == 0: + raise ValueError("order_by argument is required for rank functions") + elif len(order_keys) == 1: + s = df[order_keys[0]] + else: + s = df[order_keys].apply(tuple, axis=1) + + return s.rank(method="average", pct=True) + + return agg + + @classmethod + def visit(cls, op: ops.FirstValue | ops.LastValue, arg): + i = 0 if isinstance(op, ops.FirstValue) else -1 + + def agg(df, order_keys): + return df[arg.name].iat[i] + + return agg + + @classmethod + def visit( + cls, op: ops.AnalyticVectorizedUDF, func, func_args, input_type, return_type + ): + def agg(df, order_keys): + args = [df[col.name] for col in func_args] + return func(*args) + + return agg + + ############################ Window functions ############################# + + @classmethod + def visit(cls, op: ops.WindowBoundary, value, preceding): + return value + + @classmethod + def visit( + cls, op: ops.WindowFrame, table, start, end, group_by, order_by, **kwargs + ): + if start is not None: + start = asseries(start, len(table)) + if op.start.preceding: + start = -start + if end is not None: + end = asseries(end, len(table)) + if op.end.preceding: + end = -end + + table = table.assign(__start__=start, __end__=end) + + # TODO(kszucs): order by ibis.random() is not supported because it is + # excluded from the group by keys due to its scalar shape + group_keys = [group.name for group in op.group_by] + order_keys = [key.name for key in op.order_by if key.shape.is_columnar()] + ascending = [key.ascending for key in op.order_by if key.shape.is_columnar()] + + if order_by: + table = table.sort_values(order_keys, ascending=ascending, kind="mergesort") + + if group_by: + frame = GroupedFrame(df=table, group_keys=group_keys) + else: + frame = UngroupedFrame(df=table) + + if start is None and end is None: + return frame + elif op.how == "rows": + return RowsFrame(parent=frame) + elif op.how == "range": + if len(order_keys) != 1: + raise NotImplementedError( + "Only single column order by is supported for range window frames" + ) + return RangeFrame(parent=frame, order_key=order_keys[0]) + else: + raise NotImplementedError(f"Unsupported window frame type: {op.how}") + + @classmethod + def visit(cls, op: ops.WindowFunction, func, frame): + if isinstance(op.func, ops.Analytic): + order_keys = [key.name for key in op.frame.order_by] + return frame.apply_analytic(func, order_keys=order_keys) + else: + return frame.apply_reduction(func) + + ############################ Relational ################################### + + @classmethod + def visit(cls, op: ops.DatabaseTable, name, schema, source, namespace): + try: + return source.dictionary[name] + except KeyError: + raise UnboundExpressionError( + f"{name} is not a table in the {source.name!r} backend, you " + "probably tried to execute an expression without a data source" + ) + + @classmethod + def visit(cls, op: ops.InMemoryTable, name, schema, data): + return data.to_frame() + + @classmethod + def visit(cls, op: ops.DummyTable, values): + df, _ = asframe(values) + return df + + @classmethod + def visit(cls, op: ops.SelfReference | ops.JoinTable, parent, **kwargs): + return parent + + @classmethod + def visit(cls, op: PandasRename, parent, mapping): + return parent.rename(columns=mapping) + + @classmethod + def visit(cls, op: PandasLimit, parent, n, offset): + n = n.iat[0, 0] + offset = offset.iat[0, 0] + if n is None: + return parent.iloc[offset:] + else: + return parent.iloc[offset : offset + n] + + @classmethod + def visit(cls, op: ops.Sample, parent, fraction, method, seed): + return parent.sample(frac=fraction, random_state=seed) + + @classmethod + def visit(cls, op: ops.Project, parent, values): + df, all_scalars = asframe(values) + if all_scalars and len(parent) != len(df): + df = pd.concat([df] * len(parent)) + return df + + @classmethod + def visit(cls, op: ops.Filter, parent, predicates): + if predicates: + pred = reduce(operator.and_, predicates) + if len(pred) != len(parent): + raise RuntimeError( + "Selection predicate length does not match underlying table" + ) + parent = parent.loc[pred].reset_index(drop=True) + return parent + + @classmethod + def visit(cls, op: ops.Sort, parent, keys): + # 1. add sort key columns to the dataframe if they are not already present + # 2. sort the dataframe using those columns + # 3. drop the sort key columns + ascending = [key.ascending for key in op.keys] + newcols = {gen_name("sort_key"): col for col in keys} + names = list(newcols.keys()) + df = parent.assign(**newcols) + df = df.sort_values(by=names, ascending=ascending, ignore_index=True) + return df.drop(names, axis=1) + + @classmethod + def visit(cls, op: PandasAggregate, parent, groups, metrics): + if groups: + parent = parent.groupby([col.name for col in groups.values()]) + metrics = {k: parent.apply(v) for k, v in metrics.items()} + result = pd.concat(metrics, axis=1).reset_index() + renames = {v.name: k for k, v in op.groups.items()} + return result.rename(columns=renames) + else: + results = {k: v(parent) for k, v in metrics.items()} + combined, _ = asframe(results) + return combined + + @classmethod + def visit(cls, op: PandasJoin, how, left, right, left_on, right_on): + # broadcast predicates if they are scalar values + left_size = len(left) + left_on = [asseries(v, left_size) for v in left_on] + right_size = len(right) + right_on = [asseries(v, right_size) for v in right_on] + + if how == "cross": + assert not left_on and not right_on + return pd.merge(left, right, how="cross") + elif how == "anti": + df = pd.merge( + left, + right, + how="outer", + left_on=left_on, + right_on=right_on, + indicator=True, + ) + df = df[df["_merge"] == "left_only"] + return df.drop(columns=["_merge"]) + elif how == "semi": + mask = asseries(True, left_size) + for left_pred, right_pred in zip(left_on, right_on): + mask = mask & left_pred.isin(right_pred) + return left[mask] + else: + df = left.merge(right, how=how, left_on=left_on, right_on=right_on) + return df.drop(columns=[f"key_{i}" for i in range(len(left_on))]) + + @classmethod + def visit( + cls, + op: PandasAsofJoin, + how, + left, + right, + left_on, + right_on, + left_by, + right_by, + operator, + ): + # broadcast predicates if they are scalar values + left_size = len(left) + right_size = len(right) + left_on = [asseries(v, left_size) for v in left_on] + left_by = [asseries(v, left_size) for v in left_by] + right_on = [asseries(v, right_size) for v in right_on] + right_by = [asseries(v, right_size) for v in right_by] + + # merge_asof only works with column names not with series + left_on = {gen_name("left"): s for s in left_on} + left_by = {gen_name("left"): s for s in left_by} + right_on = {gen_name("right"): s for s in right_on} + right_by = {gen_name("right"): s for s in right_by} + + left = left.assign(**left_on, **left_by) + right = right.assign(**right_on, **right_by) + + # construct the appropriate flags for merge_asof + if operator == ops.LessEqual: + direction = "forward" + allow_exact_matches = True + elif operator == ops.GreaterEqual: + direction = "backward" + allow_exact_matches = True + elif operator == ops.Less: + direction = "forward" + allow_exact_matches = False + elif operator == ops.Greater: + direction = "backward" + allow_exact_matches = False + elif operator == ops.Equals: + direction = "nearest" + allow_exact_matches = True + else: + raise NotImplementedError( + f"Operator {operator} not supported for asof join" + ) + + # merge_asof requires the left side to be sorted by the join keys + left = left.sort_values(by=list(left_on.keys())) + df = pd.merge_asof( + left, + right, + left_on=list(left_on.keys()), + right_on=list(right_on.keys()), + left_by=list(left_by.keys()) or None, + right_by=list(right_by.keys()) or None, + direction=direction, + allow_exact_matches=allow_exact_matches, + ) + return df + + @classmethod + def visit(cls, op: ops.Union, left, right, distinct): + result = pd.concat([left, right], axis=0) + return result.drop_duplicates() if distinct else result + + @classmethod + def visit(cls, op: ops.Intersection, left, right, distinct): + if not distinct: + raise NotImplementedError( + "`distinct=False` is not supported by the pandas backend" + ) + return left.merge(right, on=list(left.columns), how="inner") + + @classmethod + def visit(cls, op: ops.Difference, left, right, distinct): + if not distinct: + raise NotImplementedError( + "`distinct=False` is not supported by the pandas backend" + ) + merged = left.merge(right, on=list(left.columns), how="outer", indicator=True) + result = merged[merged["_merge"] == "left_only"].drop("_merge", axis=1) + return result + + @classmethod + def visit(cls, op: ops.Distinct, parent): + return parent.drop_duplicates() + + @classmethod + def visit(cls, op: ops.DropNa, parent, how, subset): + if op.subset is not None: + subset = [col.name for col in op.subset] + else: + subset = None + return parent.dropna(how=how, subset=subset) + + @classmethod + def visit(cls, op: ops.FillNa, parent, replacements): + return parent.fillna(replacements) + + @classmethod + def visit(cls, op: ops.InValues, value, options): + if isinstance(value, pd.Series): + return value.isin(options) + else: + return value in options + + @classmethod + def visit(cls, op: ops.InSubquery, rel, needle): + first_column = rel.iloc[:, 0] + if isinstance(needle, pd.Series): + return needle.isin(first_column) + else: + return needle in first_column + + @classmethod + def visit(cls, op: PandasScalarSubquery, rel): + return rel.iat[0, 0] + + @classmethod + def execute(cls, node, backend, params): + def fn(node, _, **kwargs): + return cls.visit(node, **kwargs) + + original = node + node = node.to_expr().as_table().op() + node = plan(node, backend=backend, params=params) + df = node.map_clear(fn) + + # TODO(kszucs): add a flag to disable this conversion because it can be + # expensive for columns with object dtype + df = PandasData.convert_table(df, node.schema) + if isinstance(original, ops.Value): + if original.shape.is_scalar(): + return df.iloc[0, 0] + elif original.shape.is_columnar(): + return df.iloc[:, 0] + else: + raise TypeError(f"Unexpected shape: {original.shape}") + else: + return df diff --git a/ibis/backends/pandas/helpers.py b/ibis/backends/pandas/helpers.py new file mode 100644 index 000000000000..d8bc9efd54eb --- /dev/null +++ b/ibis/backends/pandas/helpers.py @@ -0,0 +1,211 @@ +from __future__ import annotations + +import itertools +from typing import Callable + +import numpy as np +import pandas as pd + +from ibis.util import gen_name + + +def asseries(value, size=1): + """Ensure that value is a pandas Series object, broadcast if necessary.""" + if isinstance(value, pd.Series): + return value + elif isinstance(value, (list, np.ndarray)): + return pd.Series(itertools.repeat(np.array(value), size)) + else: + return pd.Series(np.repeat(value, size)) + + +def asframe(values: dict | tuple, concat=True): + """Construct a DataFrame from a dict or tuple of Series objects.""" + if isinstance(values, dict): + names, values = zip(*values.items()) + elif isinstance(values, tuple): + names = [f"_{i}" for i in range(len(values))] + else: + raise TypeError(f"values must be a dict, or tuple; got {type(values)}") + + size = 1 + all_scalars = True + for v in values: + if isinstance(v, pd.Series): + size = len(v) + all_scalars = False + break + + columns = [asseries(v, size) for v in values] + if concat: + df = pd.concat(columns, axis=1, keys=names).reset_index(drop=True) + return df, all_scalars + else: + return columns, all_scalars + + +def generic(func: Callable, operands): + return func(*operands.values()) + + +def rowwise(func: Callable, operands): + # dealing with a collection of series objects + df, all_scalars = asframe(operands) + result = df.apply(func, axis=1) # , **kwargs) + return result.iat[0] if all_scalars else result + + +def columnwise(func: Callable, operands): + df, all_scalars = asframe(operands) + result = func(df) + return result.iat[0] if all_scalars else result + + +def serieswise(func, operands): + (key, value), *rest = operands.items() + if isinstance(value, pd.Series): + # dealing with a single series object + return func(**operands) + else: + # dealing with a single scalar object + value = pd.Series([value]) + operands = {key: value, **dict(rest)} + return func(**operands).iat[0] + + +def elementwise(func, operands): + value = operands.pop(next(iter(operands))) + if isinstance(value, pd.Series): + # dealing with a single series object + if operands: + return value.apply(func, **operands) + else: + return value.map(func, na_action="ignore") + else: + # dealing with a single scalar object + return func(value, **operands) + + +def agg(func, arg_column, where_column): + if where_column is None: + + def applier(df): + return func(df[arg_column.name]) + else: + + def applier(df): + mask = df[where_column.name] + col = df[arg_column.name][mask] + return func(col) + + return applier + + +class UngroupedFrame: + def __init__(self, df): + self.df = df + + def groups(self): + yield self.df + + def apply_reduction(self, func, **kwargs): + result = func(self.df, **kwargs) + data = [result] * len(self.df) + return pd.Series(data, index=self.df.index) + + def apply_analytic(self, func, **kwargs): + return func(self.df, **kwargs) + + +class GroupedFrame: + def __init__(self, df, group_keys): + self.df = df + self.group_keys = group_keys + self.groupby = df.groupby(group_keys, as_index=True) + + def groups(self): + for _, df in self.groupby: + yield df + + def apply_analytic(self, func, **kwargs): + results = [func(df, **kwargs) for df in self.groups()] + return pd.concat(results) + + def apply_reduction(self, func, **kwargs): + name = gen_name("result") + result = self.groupby.apply(func, **kwargs).rename(name) + df = self.df.merge(result, left_on=self.group_keys, right_index=True) + return df[name] + + +class RowsFrame: + def __init__(self, parent): + self.parent = parent + + @staticmethod + def adjust(length, index, start_offset, end_offset): + if start_offset is None: + start_index = 0 + else: + start_index = index + start_offset + if start_index < 0: + start_index = 0 + elif start_index > length: + start_index = length + + if end_offset is None: + end_index = length + else: + end_index = index + end_offset + 1 + if end_index < 0: + end_index = 0 + elif end_index > length: + end_index = length + + return (start_index, end_index) + + def apply_analytic(self, func, **kwargs): + return self.parent.apply_analytic(func, **kwargs) + + def apply_reduction(self, func, **kwargs): + results = {} + for df in self.parent.groups(): + for i, (ix, row) in enumerate(df.iterrows()): + # TODO(kszucs): use unique column names for _start, _end + start, end = row["__start__"], row["__end__"] + start_index, end_index = self.adjust(len(df), i, start, end) + subdf = df.iloc[start_index:end_index] + results[ix] = func(subdf, **kwargs) + + return pd.Series(results) + + +class RangeFrame: + def __init__(self, parent, order_key): + self.parent = parent + self.order_key = order_key + + @staticmethod + def predicate(col, i, start, end): + value = col.iat[i] + if start is None: + return col <= value + end + elif end is None: + return col >= value + start + else: + return (col >= value + start) & (col <= value + end) + + def apply_analytic(self, func, **kwargs): + return self.parent.apply_analytic(func, **kwargs) + + def apply_reduction(self, func, **kwargs): + results = {} + for df in self.parent.groups(): + for i, (ix, row) in enumerate(df.iterrows()): + start, end = row["__start__"], row["__end__"] + column = df[self.order_key] + predicate = self.predicate(column, i, start, end) + subdf = df[predicate] + results[ix] = func(subdf, **kwargs) + + return pd.Series(results) diff --git a/ibis/backends/pandas/kernels.py b/ibis/backends/pandas/kernels.py new file mode 100644 index 000000000000..1e28095c1ee2 --- /dev/null +++ b/ibis/backends/pandas/kernels.py @@ -0,0 +1,513 @@ +from __future__ import annotations + +import decimal +import json +import math +import operator + +try: + import regex as re +except ImportError: + import re +from functools import reduce +from urllib.parse import parse_qs, urlsplit + +import numpy as np +import pandas as pd +import toolz + +import ibis.expr.operations as ops +from ibis.backends.pandas.helpers import ( + columnwise, + elementwise, + generic, + rowwise, + serieswise, +) +from ibis.common.exceptions import OperationNotDefinedError +from ibis.util import any_of + + +def substring_rowwise(row): + arg, start, length = row["arg"], row["start"], row["length"] + if length is None: + return arg[start:] + else: + return arg[start : start + length] + + +def substring_serieswise(arg, start, length): + if length is None: + return arg.str[start:] + else: + return arg.str[start : start + length] + + +def _sql_like_to_regex(pattern, escape): + """Convert a SQL `LIKE` pattern to an equivalent Python regular expression. + + Parameters + ---------- + pattern + A LIKE pattern with the following semantics: + * `%` matches zero or more characters + * `_` matches exactly one character + * To escape `%` and `_` (or to match the `escape` parameter + itself), prefix the desired character with `escape`. + escape + Escape character + + Returns + ------- + str + A regular expression pattern equivalent to the input SQL `LIKE` pattern. + + Examples + -------- + >>> sql_like_to_regex("6%") # default is to not escape anything + '^6.*$' + >>> sql_like_to_regex("6^%", escape="^") + '^6%$' + >>> sql_like_to_regex("6_") + '^6.$' + >>> sql_like_to_regex("6/_", escape="/") + '^6_$' + >>> sql_like_to_regex("%abc") # any string ending with "abc" + '^.*abc$' + >>> sql_like_to_regex("abc%") # any string starting with "abc" + '^abc.*$' + """ + cur_i = 0 + pattern_length = len(pattern) + + while cur_i < pattern_length: + nxt_i = cur_i + 1 + + cur = pattern[cur_i] + nxt = pattern[nxt_i] if nxt_i < pattern_length else None + + skip = 1 + + if nxt is not None and escape is not None and cur == escape: + yield nxt + skip = 2 + elif cur == "%": + yield ".*" + elif cur == "_": + yield "." + else: + yield cur + + cur_i += skip + + +def sql_like_to_regex(pattern, escape=None): + return f"^{''.join(_sql_like_to_regex(pattern, escape))}$" + + +def string_sqllike_serieswise(arg, pattern, escape): + pat = sql_like_to_regex(pattern, escape) + return arg.str.contains(pat, regex=True) + + +def string_sqlilike_serieswise(arg, pattern, escape): + pat = sql_like_to_regex(pattern, escape) + return arg.str.contains(pat, regex=True, flags=re.IGNORECASE) + + +def extract_userinfo_elementwise(x): + url_parts = urlsplit(x) + username = url_parts.username or "" + password = url_parts.password or "" + return f"{username}:{password}" + + +def extract_queryparam_rowwise(row): + query = urlsplit(row["arg"]).query + param_name = row["key"] + if param_name is not None: + value = parse_qs(query)[param_name] + return value if len(value) > 1 else value[0] + else: + return query + + +def array_index_rowwise(row): + try: + return row["arg"][row["index"]] + except IndexError: + return None + + +def array_position_rowwise(row): + try: + return row["arg"].index(row["other"]) + except ValueError: + return -1 + + +def integer_range_rowwise(row): + if not row["step"]: + return [] + return list(np.arange(row["start"], row["stop"], row["step"])) + + +def timestamp_range_rowwise(row): + if not row["step"]: + return [] + return list( + pd.date_range(row["start"], row["stop"], freq=row["step"], inclusive="left") + ) + + +def _safe_method(mapping, method, *args, **kwargs): + if mapping is None or mapping is pd.NA: + return None + try: + method = getattr(mapping, method) + except AttributeError: + return None + else: + result = method(*args, **kwargs) + return None if result is pd.NA else result + + +def safe_len(mapping): + return _safe_method(mapping, "__len__") + + +def safe_get(mapping, key, default=None): + return _safe_method(mapping, "get", key, default) + + +def safe_contains(mapping, key): + return _safe_method(mapping, "__contains__", key) + + +def safe_keys(mapping): + result = _safe_method(mapping, "keys") + if result is None: + return None + # list(...) to unpack iterable + return np.array(list(result)) + + +def safe_values(mapping): + result = _safe_method(mapping, "values") + if result is None or result is pd.NA: + return None + # list(...) to unpack iterable + return np.array(list(result), dtype="object") + + +def safe_merge(left, right): + if left is None or left is pd.NA: + return None + elif right is None or right is pd.NA: + return None + else: + return {**left, **right} + + +def safe_json_getitem(value, key): + try: + # try to deserialize the value -> return None if it's None + if (js := json.loads(value)) is None: + return None + except (json.JSONDecodeError, TypeError): + # if there's an error related to decoding or a type error return None + return None + + try: + # try to extract the value as an array element or mapping key + return js[key] + except (KeyError, IndexError, TypeError): + # KeyError: missing mapping key + # IndexError: missing sequence key + # TypeError: `js` doesn't implement __getitem__, either at all or for + # the type of `key` + return None + + +def safe_decimal(func): + def wrapper(x, **kwargs): + try: + return func(x, **kwargs) + except decimal.InvalidOperation: + return decimal.Decimal("NaN") + + return wrapper + + +def round_serieswise(arg, digits): + if digits is None: + return np.round(arg).astype("int64") + else: + return np.round(arg, digits).astype("float64") + + +_generic_impls = { + ops.Abs: abs, + ops.Acos: np.arccos, + ops.Add: operator.add, + ops.And: operator.and_, + ops.Asin: np.arcsin, + ops.Atan: np.arctan, + ops.Atan2: np.arctan2, + ops.BitwiseAnd: lambda x, y: np.bitwise_and(x, y), + ops.BitwiseLeftShift: lambda x, y: np.left_shift(x, y).astype("int64"), + ops.BitwiseNot: np.invert, + ops.BitwiseOr: lambda x, y: np.bitwise_or(x, y), + ops.BitwiseRightShift: lambda x, y: np.right_shift(x, y).astype("int64"), + ops.BitwiseXor: lambda x, y: np.bitwise_xor(x, y), + ops.Ceil: lambda x: np.ceil(x).astype("int64"), + ops.Cos: np.cos, + ops.Cot: lambda x: 1 / np.tan(x), + ops.DateAdd: operator.add, + ops.DateDiff: operator.sub, + ops.DateSub: operator.sub, + ops.Degrees: np.degrees, + ops.Divide: operator.truediv, + ops.Equals: operator.eq, + ops.Exp: np.exp, + ops.Floor: lambda x: np.floor(x).astype("int64"), + ops.FloorDivide: operator.floordiv, + ops.Greater: operator.gt, + ops.GreaterEqual: operator.ge, + ops.IdenticalTo: lambda x, y: (x == y) | (pd.isnull(x) & pd.isnull(y)), + ops.IntervalAdd: operator.add, + ops.IntervalFloorDivide: operator.floordiv, + ops.IntervalMultiply: operator.mul, + ops.IntervalSubtract: operator.sub, + ops.IsInf: np.isinf, + ops.IsNull: pd.isnull, + ops.Less: operator.lt, + ops.LessEqual: operator.le, + ops.Ln: np.log, + ops.Log10: np.log10, + ops.Log2: np.log2, + ops.Modulus: operator.mod, + ops.Multiply: operator.mul, + ops.Negate: lambda x: not x if isinstance(x, (bool, np.bool_)) else -x, + ops.Not: lambda x: not x if isinstance(x, (bool, np.bool_)) else ~x, + ops.NotEquals: operator.ne, + ops.NotNull: pd.notnull, + ops.Or: operator.or_, + ops.Power: operator.pow, + ops.Radians: np.radians, + ops.Sign: np.sign, + ops.Sin: np.sin, + ops.Sqrt: np.sqrt, + ops.Subtract: operator.sub, + ops.Tan: np.tan, + ops.TimestampAdd: operator.add, + ops.TimestampDiff: operator.sub, + ops.TimestampSub: operator.sub, + ops.Xor: operator.xor, + ops.E: lambda: np.e, + ops.Pi: lambda: np.pi, + ops.TimestampNow: lambda: pd.Timestamp("now", tz="UTC").tz_localize(None), + ops.StringConcat: lambda xs: reduce(operator.add, xs), + ops.StringJoin: lambda sep, xs: reduce(lambda x, y: x + sep + y, xs), + ops.Log: lambda x, base: np.log(x) if base is None else np.log(x) / np.log(base), +} + +_columnwise_impls = { + ops.Clip: lambda df: df["arg"].clip(lower=df["lower"], upper=df["upper"]), + ops.IfElse: lambda df: df["true_expr"].where( + df["bool_expr"], other=df["false_null_expr"] + ), + ops.NullIf: lambda df: df["arg"].where(df["arg"] != df["null_if_expr"]), + ops.Repeat: lambda df: df["arg"] * df["times"], +} + +_rowwise_impls = { + ops.ArrayContains: lambda row: row["other"] in row["arg"], + ops.ArrayIndex: array_index_rowwise, + ops.ArrayPosition: array_position_rowwise, + ops.ArrayRemove: lambda row: [x for x in row["arg"] if x != row["other"]], + ops.ArrayRepeat: lambda row: np.tile(row["arg"], max(0, row["times"])), + ops.ArraySlice: lambda row: row["arg"][row["start"] : row["stop"]], + ops.ArrayUnion: lambda row: toolz.unique(row["left"] + row["right"]), + ops.EndsWith: lambda row: row["arg"].endswith(row["end"]), + ops.IntegerRange: integer_range_rowwise, + ops.JSONGetItem: lambda row: safe_json_getitem(row["arg"], row["index"]), + ops.Map: lambda row: dict(zip(row["keys"], row["values"])), + ops.MapGet: lambda row: safe_get(row["arg"], row["key"], row["default"]), + ops.MapContains: lambda row: safe_contains(row["arg"], row["key"]), + ops.MapMerge: lambda row: safe_merge(row["left"], row["right"]), + ops.TimestampRange: timestamp_range_rowwise, + ops.LPad: lambda row: row["arg"].rjust(row["length"], row["pad"]), + ops.RegexExtract: lambda row: re.search(row["pattern"], row["arg"]).group( + row["index"] + ), + ops.RegexReplace: lambda row: re.sub( + row["pattern"], row["replacement"], row["arg"] + ), + ops.RegexSearch: lambda row: re.search(row["pattern"], row["arg"]) is not None, + ops.RPad: lambda row: row["arg"].ljust(row["length"], row["pad"]), + ops.StartsWith: lambda row: row["arg"].startswith(row["start"]), + ops.StringContains: lambda row: row["haystack"].contains(row["needle"]), + ops.StringFind: lambda row: row["arg"].find( + row["substr"], row["start"], row["end"] + ), + ops.StringReplace: lambda row: row["arg"].replace( + row["pattern"], row["replacement"] + ), + ops.StringSplit: lambda row: row["arg"].split(row["delimiter"]), + ops.StrRight: lambda row: row["arg"][-row["nchars"] :], + ops.Translate: lambda row: row["arg"].translate( + str.maketrans(row["from_str"], row["to_str"]) + ), + ops.Substring: substring_rowwise, + ops.ExtractQuery: extract_queryparam_rowwise, + ops.Strftime: lambda row: row["arg"].strftime(row["format_str"]), +} + +_serieswise_impls = { + ops.Between: lambda arg, lower_bound, upper_bound: arg.between( + lower_bound, upper_bound + ), + ops.Capitalize: lambda arg: arg.str.capitalize(), + ops.Date: lambda arg: arg.dt.floor("d"), + ops.DayOfWeekIndex: lambda arg: pd.to_datetime(arg).dt.dayofweek, + ops.DayOfWeekName: lambda arg: pd.to_datetime(arg).dt.day_name(), + ops.EndsWith: lambda arg, end: arg.str.endswith(end), + ops.ExtractDay: lambda arg: arg.dt.day, + ops.ExtractDayOfYear: lambda arg: arg.dt.dayofyear, + ops.ExtractEpochSeconds: lambda arg: arg.astype("datetime64[s]") + .astype("int64") + .astype("int32"), + ops.ExtractHour: lambda arg: arg.dt.hour, + ops.ExtractMicrosecond: lambda arg: arg.dt.microsecond, + ops.ExtractMillisecond: lambda arg: arg.dt.microsecond // 1000, + ops.ExtractMinute: lambda arg: arg.dt.minute, + ops.ExtractMonth: lambda arg: arg.dt.month, + ops.ExtractQuarter: lambda arg: arg.dt.quarter, + ops.ExtractSecond: lambda arg: arg.dt.second, + ops.ExtractWeekOfYear: lambda arg: arg.dt.isocalendar().week.astype("int32"), + ops.ExtractYear: lambda arg: arg.dt.year, + ops.Lowercase: lambda arg: arg.str.lower(), + ops.LPad: lambda arg, length, pad: arg.str.rjust(length, fillchar=pad), + ops.LStrip: lambda arg: arg.str.lstrip(), + ops.Repeat: lambda arg, times: arg.str.repeat(times), + ops.Reverse: lambda arg: arg.str[::-1], + ops.Round: round_serieswise, + ops.RPad: lambda arg, length, pad: arg.str.ljust(length, fillchar=pad), + ops.RStrip: lambda arg: arg.str.rstrip(), + ops.StartsWith: lambda arg, start: arg.str.startswith(start), + ops.StringAscii: lambda arg: arg.map(ord, na_action="ignore").astype("int32"), + ops.StringContains: lambda haystack, needle: haystack.str.contains( + needle, regex=False + ), + ops.StringFind: lambda arg, substr, start, end: arg.str.find(substr, start, end), + ops.StringLength: lambda arg: arg.str.len().astype("int32"), + ops.StringReplace: lambda arg, pattern, replacement: arg.str.replace( + pattern, replacement + ), + ops.StringSplit: lambda arg, delimiter: arg.str.split(delimiter), + ops.StringSQLLike: string_sqllike_serieswise, + ops.StringSQLILike: string_sqlilike_serieswise, + ops.Strip: lambda arg: arg.str.strip(), + ops.Strftime: lambda arg, format_str: arg.dt.strftime(format_str), + ops.StrRight: lambda arg, nchars: arg.str[-nchars:], + ops.Substring: substring_serieswise, + ops.Time: lambda arg: arg.dt.time, + ops.TimestampFromUNIX: lambda arg, unit: pd.to_datetime(arg, unit=unit.short), + ops.Translate: lambda arg, from_str, to_str: arg.str.translate( + str.maketrans(from_str, to_str) + ), + ops.Uppercase: lambda arg: arg.str.upper(), +} + +_elementwise_impls = { + ops.ExtractProtocol: lambda x: getattr(urlsplit(x), "scheme", ""), + ops.ExtractAuthority: lambda x: getattr(urlsplit(x), "netloc", ""), + ops.ExtractPath: lambda x: getattr(urlsplit(x), "path", ""), + ops.ExtractFragment: lambda x: getattr(urlsplit(x), "fragment", ""), + ops.ExtractHost: lambda x: getattr(urlsplit(x), "hostname", ""), + ops.ExtractUserInfo: extract_userinfo_elementwise, + ops.StructField: lambda x, field: safe_get(x, field), + ops.ArrayLength: len, + ops.ArrayFlatten: toolz.concat, + ops.ArraySort: sorted, + ops.ArrayDistinct: toolz.unique, + ops.MapLength: safe_len, + ops.MapKeys: safe_keys, + ops.MapValues: safe_values, +} + + +_elementwise_decimal_impls = { + ops.Round: lambda x, digits=0: round(x, digits), + ops.Log10: safe_decimal(lambda x: x.log10()), + ops.Ln: safe_decimal(lambda x: x.ln()), + ops.Exp: safe_decimal(lambda x: x.exp()), + ops.Floor: safe_decimal(math.floor), + ops.Ceil: safe_decimal(math.ceil), + ops.Sqrt: safe_decimal(lambda x: x.sqrt()), + ops.Log2: safe_decimal(lambda x: x.ln() / decimal.Decimal(2).ln()), + ops.Sign: safe_decimal(lambda x: math.copysign(1, x)), + ops.Log: safe_decimal(lambda x, base: x.ln() / decimal.Decimal(base).ln()), +} + + +def pick_kernel(op, operands): + typ = type(op) + + # decimal operations have special implementations + if op.dtype.is_decimal(): + func = _elementwise_decimal_impls[typ] + return elementwise(func, operands) + + # prefer generic implementations if available + if func := _generic_impls.get(typ): + return generic(func, operands) + + first, *rest = operands.values() + is_multi_arg = bool(rest) + is_multi_column = any_of(rest, pd.Series) + + if is_multi_column: + if func := _columnwise_impls.get(typ): + return columnwise(func, operands) + elif func := _rowwise_impls.get(typ): + return rowwise(func, operands) + else: + raise OperationNotDefinedError( + "No columnwise or rowwise implementation found for " + f"multi-column operation {typ}" + ) + elif is_multi_arg: + if func := _columnwise_impls.get(typ): + return columnwise(func, operands) + elif func := _serieswise_impls.get(typ): + return serieswise(func, operands) + elif func := _rowwise_impls.get(typ): + return rowwise(func, operands) + elif func := _elementwise_impls.get(typ): + return elementwise(func, operands) + else: + raise OperationNotDefinedError( + "No columnwise, serieswise, rowwise or elementwise " + f"implementation found for multi-argument operation {typ}" + ) + else: # noqa: PLR5501 + if func := _serieswise_impls.get(typ): + return serieswise(func, operands) + elif func := _elementwise_impls.get(typ): + return elementwise(func, operands) + else: + raise OperationNotDefinedError( + "No serieswise or elementwise implementation found for " + f"single-argument operation {typ}" + ) + + +supported_operations = ( + _generic_impls.keys() + | _columnwise_impls.keys() + | _rowwise_impls.keys() + | _serieswise_impls.keys() + | _elementwise_impls.keys() +) diff --git a/ibis/backends/pandas/rewrites.py b/ibis/backends/pandas/rewrites.py new file mode 100644 index 000000000000..7419f92d498d --- /dev/null +++ b/ibis/backends/pandas/rewrites.py @@ -0,0 +1,322 @@ +from __future__ import annotations + +from public import public + +import ibis +import ibis.expr.datashape as ds +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.common.annotations import attribute +from ibis.common.collections import FrozenDict +from ibis.common.patterns import replace +from ibis.common.typing import VarTuple # noqa: TCH001 +from ibis.expr.schema import Schema +from ibis.util import gen_name + + +class PandasRelation(ops.Relation): + pass + + +class PandasValue(ops.Value): + pass + + +@public +class PandasRename(PandasRelation): + parent: ops.Relation + mapping: FrozenDict[str, str] + + @classmethod + def from_prefix(cls, parent, prefix): + mapping = {k: f"{prefix}_{k}" for k in parent.schema} + return cls(parent, mapping) + + @attribute + def values(self): + return FrozenDict( + {to: ops.Field(self.parent, from_) for from_, to in self.mapping.items()} + ) + + @attribute + def schema(self): + return Schema( + {self.mapping[name]: dtype for name, dtype in self.parent.schema.items()} + ) + + +@public +class PandasJoin(PandasRelation): + left: ops.Relation + right: ops.Relation + left_on: VarTuple[ops.Value] + right_on: VarTuple[ops.Value] + how: str + + @attribute + def values(self): + return FrozenDict({**self.left.values, **self.right.values}) + + @attribute + def schema(self): + return self.left.schema | self.right.schema + + +@public +class PandasAsofJoin(PandasJoin): + left_by: VarTuple[ops.Value] + right_by: VarTuple[ops.Value] + operator: type + + +@public +class PandasAggregate(PandasRelation): + parent: ops.Relation + groups: FrozenDict[str, ops.Field] + metrics: FrozenDict[str, ops.Reduction] + + @attribute + def values(self): + return FrozenDict({**self.groups, **self.metrics}) + + @attribute + def schema(self): + return Schema({k: v.dtype for k, v in self.values.items()}) + + +@public +class PandasLimit(PandasRelation): + parent: ops.Relation + n: ops.Relation + offset: ops.Relation + + @attribute + def values(self): + return self.parent.values + + @attribute + def schema(self): + return self.parent.schema + + +@public +class PandasScalarSubquery(PandasValue): + # variant with no integrity checks + rel: ops.Relation + + shape = ds.scalar + + @attribute + def dtype(self): + return self.rel.schema.types[0] + + +def is_columnar(node): + return isinstance(node, ops.Value) and node.shape.is_columnar() + + +@replace(ops.Project) +def rewrite_project(_, **kwargs): + winfuncs = [] + for v in _.values.values(): + winfuncs.extend(v.find(ops.WindowFunction, ops.Value)) + + if not winfuncs: + return _ + + selects = {ops.Field(_.parent, k): k for k in _.parent.schema} + for node in winfuncs: + # add computed values from the window function + values = list(node.func.__args__) + # add computed values from the window frame + values += node.frame.group_by + values += [key.expr for key in node.frame.order_by] + if node.frame.start is not None: + values.append(node.frame.start.value) + if node.frame.end is not None: + values.append(node.frame.end.value) + + for v in values: + if is_columnar(v) and v not in selects: + selects[v] = gen_name("value") + + # STEP 1: construct the pre-projection + proj = ops.Project(_.parent, {v: k for k, v in selects.items()}) + subs = {node: ops.Field(proj, name) for name, node in proj.values.items()} + + # STEP 2: construct new window function nodes + metrics = {} + for node in winfuncs: + frame = node.frame + start = None if frame.start is None else frame.start.replace(subs) + end = None if frame.end is None else frame.end.replace(subs) + order_by = [key.replace(subs) for key in frame.order_by] + group_by = [key.replace(subs) for key in frame.group_by] + frame = frame.__class__( + proj, start=start, end=end, group_by=group_by, order_by=order_by + ) + metrics[node] = ops.WindowFunction(node.func.replace(subs), frame) + + # STEP 3: reconstruct the current projection with the window functions + subs.update(metrics) + values = {k: v.replace(subs, filter=ops.Value) for k, v in _.values.items()} + return ops.Project(proj, values) + + +@replace(ops.Aggregate) +def rewrite_aggregate(_, **kwargs): + selects = {ops.Field(_.parent, k): k for k in _.parent.schema} + for v in _.groups.values(): + if v not in selects: + selects[v] = gen_name("group") + + reductions = {} + for v in _.metrics.values(): + for reduction in v.find_topmost(ops.Reduction): + for arg in reduction.__args__: + if is_columnar(arg) and arg not in selects: + selects[arg] = gen_name("value") + if reduction not in reductions: + reductions[reduction] = gen_name("reduction") + + # STEP 1: construct the pre-projection + proj = ops.Project(_.parent, {v: k for k, v in selects.items()}) + + # STEP 2: construct the pandas aggregation + subs = {node: ops.Field(proj, name) for name, node in proj.values.items()} + groups = {name: ops.Field(proj, selects[node]) for name, node in _.groups.items()} + metrics = {name: node.replace(subs) for node, name in reductions.items()} + agg = PandasAggregate(proj, groups, metrics) + + # STEP 3: construct the post-projection + subs = {node: ops.Field(agg, name) for node, name in reductions.items()} + values = {name: ops.Field(agg, name) for name, node in _.groups.items()} + values.update({name: node.replace(subs) for name, node in _.metrics.items()}) + return ops.Project(agg, values) + + +def split_join_predicates(left, right, predicates, only_equality=True): + left_on = [] + right_on = [] + for pred in predicates: + if left not in pred.relations or right not in pred.relations: + # not a usual join predicate, so apply a trick by placing the + # predicate to the left side and adding a literal True to the right + # which the left side must be equal to + left_on.append(pred) + right_on.append(ops.Literal(True, dtype=dt.boolean)) + elif isinstance(pred, ops.Binary): + if only_equality and not isinstance(pred, ops.Equals): + raise TypeError("Only equality join predicates supported with pandas") + if left in pred.left.relations and right in pred.right.relations: + left_on.append(pred.left) + right_on.append(pred.right) + elif left in pred.right.relations and right in pred.left.relations: + left_on.append(pred.right) + right_on.append(pred.left) + else: + raise ValueError("Join predicate does not reference both tables") + else: + raise TypeError(f"Unsupported join predicate {pred}") + + return left_on, right_on + + +@replace(ops.JoinChain) +def rewrite_join(_, **kwargs): + prefixes = {} + prefixes[_.first] = prefix = str(len(prefixes)) + left = PandasRename.from_prefix(_.first, prefix) + + for link in _.rest: + prefixes[link.table] = prefix = str(len(prefixes)) + right = PandasRename.from_prefix(link.table, prefix) + + subs = {v: ops.Field(left, k) for k, v in left.values.items()} + subs.update({v: ops.Field(right, k) for k, v in right.values.items()}) + preds = [pred.replace(subs, filter=ops.Value) for pred in link.predicates] + + # separate ASOF from the rest of the joins + if link.how == "asof": + on, *by = preds + left_on, right_on = split_join_predicates( + left, right, [on], only_equality=False + ) + left_by, right_by = split_join_predicates(left, right, by) + left = PandasAsofJoin( + how="asof", + left=left, + right=right, + left_on=left_on, + right_on=right_on, + left_by=left_by, + right_by=right_by, + operator=type(on), + ) + else: + # need to replace the fields in the predicates + left_on, right_on = split_join_predicates(left, right, preds) + left = PandasJoin( + how=link.how, + left=left, + right=right, + left_on=left_on, + right_on=right_on, + ) + + subs = {v: ops.Field(left, k) for k, v in left.values.items()} + fields = {k: v.replace(subs, filter=ops.Value) for k, v in _.values.items()} + return ops.Project(left, fields) + + +@replace(ops.Limit) +def rewrite_limit(_, **kwargs): + if isinstance(_.n, ops.Value): + n = _.n.to_expr() + else: + n = ibis.literal(_.n) + + if isinstance(_.offset, ops.Value): + offset = _.offset.to_expr() + else: + offset = ibis.literal(_.offset) + + n = n.as_table().op() + if isinstance(n, ops.Aggregate): + n = rewrite_aggregate.match(n, context={}) + + offset = offset.as_table().op() + if isinstance(offset, ops.Aggregate): + offset = rewrite_aggregate.match(offset, context={}) + + return PandasLimit(_.parent, n, offset) + + +@replace(ops.ScalarSubquery) +def rewrite_scalar_subquery(_, **kwargs): + return PandasScalarSubquery(_.rel) + + +@replace(ops.ScalarParameter) +def replace_parameter(_, params, **kwargs): + return ops.Literal(value=params[_], dtype=_.dtype) + + +@replace(ops.UnboundTable) +def bind_unbound_table(_, backend, **kwargs): + return ops.DatabaseTable(name=_.name, schema=_.schema, source=backend) + + +def plan(node, backend, params): + ctx = {"params": params, "backend": backend} + node = node.replace(rewrite_scalar_subquery) + node = node.replace( + rewrite_project + | rewrite_aggregate + | rewrite_join + | rewrite_limit + | replace_parameter + | bind_unbound_table, + context=ctx, + ) + return node diff --git a/ibis/backends/pandas/tests/conftest.py b/ibis/backends/pandas/tests/conftest.py index 8aa998871d2a..41fcc924ed2c 100644 --- a/ibis/backends/pandas/tests/conftest.py +++ b/ibis/backends/pandas/tests/conftest.py @@ -1,9 +1,16 @@ from __future__ import annotations +import decimal from typing import Any +import numpy as np +import pandas as pd +import pytest + import ibis +import ibis.expr.datatypes as dt from ibis.backends.conftest import TEST_TABLES +from ibis.backends.pandas import Backend from ibis.backends.tests.base import BackendTest from ibis.backends.tests.data import array_types, json_types, struct_types, win @@ -32,3 +39,282 @@ def _load_data(self, **_: Any) -> None: @staticmethod def connect(*, tmpdir, worker_id, **kw): return ibis.pandas.connect(**kw) + + +@pytest.fixture(scope="module") +def df(): + return pd.DataFrame( + { + "plain_int64": list(range(1, 4)), + "plain_strings": list("abc"), + "plain_float64": [4.0, 5.0, 6.0], + "plain_datetimes_naive": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ), + "plain_datetimes_ny": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ).dt.tz_localize("America/New_York"), + "plain_datetimes_utc": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ).dt.tz_localize("UTC"), + "plain_uint64": pd.Series(range(1, 4), dtype=np.dtype("uint64")), + "dup_strings": list("dad"), + "dup_ints": [1, 2, 1], + "float64_as_strings": ["100.01", "234.23", "-999.34"], + "int64_as_strings": list(map(str, range(1, 4))), + "strings_with_space": [" ", "abab", "ddeeffgg"], + "translate_from_strings": ["rmz", "abc", "ghj"], + "translate_to_strings": ["lns", "ovk", "jfr"], + "int64_with_zeros": [0, 1, 0], + "float64_with_zeros": [1.0, 0.0, 1.0], + "float64_positive": [1.0, 2.0, 1.0], + "strings_with_nulls": ["a", None, "b"], + "datetime_strings_naive": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ).astype(str), + "datetime_strings_ny": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ) + .dt.tz_localize("America/New_York") + .astype(str), + "datetime_strings_utc": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ) + .dt.tz_localize("UTC") + .astype(str), + "decimal": list(map(decimal.Decimal, ["1.0", "2", "3.234"])), + "array_of_float64": [ + np.array([1.0, 2.0], dtype="float64"), + np.array([3.0], dtype="float64"), + np.array([], dtype="float64"), + ], + "array_of_int64": [ + np.array([1, 2], dtype="int64"), + np.array([], dtype="int64"), + np.array([3], dtype="int64"), + ], + "array_of_strings": [ + np.array(["a", "b"], dtype="object"), + np.array([], dtype="object"), + np.array(["c"], dtype="object"), + ], + "map_of_strings_integers": [{"a": 1, "b": 2}, None, {}], + "map_of_integers_strings": [{}, None, {1: "a", 2: "b"}], + "map_of_complex_values": [None, {"a": [1, 2, 3], "b": []}, {}], + } + ) + + +@pytest.fixture(scope="module") +def batting_df(data_dir): + num_rows = 1000 + start_index = 30 + df = pd.read_parquet(data_dir / "parquet" / "batting.parquet").iloc[ + start_index : start_index + num_rows + ] + return df.reset_index(drop=True) + + +@pytest.fixture(scope="module") +def awards_players_df(data_dir): + return pd.read_parquet(data_dir / "parquet" / "awards_players.parquet") + + +@pytest.fixture(scope="module") +def df1(): + return pd.DataFrame( + {"key": list("abcd"), "value": [3, 4, 5, 6], "key2": list("eeff")} + ) + + +@pytest.fixture(scope="module") +def df2(): + return pd.DataFrame( + {"key": list("ac"), "other_value": [4.0, 6.0], "key3": list("fe")} + ) + + +@pytest.fixture(scope="module") +def intersect_df2(): + return pd.DataFrame({"key": list("cd"), "value": [5, 6], "key2": list("ff")}) + + +@pytest.fixture(scope="module") +def time_df1(): + return pd.DataFrame( + {"time": pd.to_datetime([1, 2, 3, 4]), "value": [1.1, 2.2, 3.3, 4.4]} + ) + + +@pytest.fixture(scope="module") +def time_df2(): + return pd.DataFrame({"time": pd.to_datetime([2, 4]), "other_value": [1.2, 2.0]}) + + +@pytest.fixture(scope="module") +def time_df3(): + return pd.DataFrame( + { + "time": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=8).values + ), + "id": list(range(1, 5)) * 2, + "value": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], + } + ) + + +@pytest.fixture(scope="module") +def time_keyed_df1(): + return pd.DataFrame( + { + "time": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=6).values + ), + "key": [1, 2, 3, 1, 2, 3], + "value": [1.2, 1.4, 2.0, 4.0, 8.0, 16.0], + } + ) + + +@pytest.fixture(scope="module") +def time_keyed_df2(): + return pd.DataFrame( + { + "time": pd.Series( + pd.date_range( + start="2017-01-02 01:02:03.234", freq="3D", periods=3 + ).values + ), + "key": [1, 2, 3], + "other_value": [1.1, 1.2, 2.2], + } + ) + + +@pytest.fixture(scope="module") +def client( + df, + df1, + df2, + df3, + time_df1, + time_df2, + time_df3, + time_keyed_df1, + time_keyed_df2, + intersect_df2, +): + return Backend().connect( + { + "df": df, + "df1": df1, + "df2": df2, + "df3": df3, + "left": df1, + "right": df2, + "time_df1": time_df1, + "time_df2": time_df2, + "time_df3": time_df3, + "time_keyed_df1": time_keyed_df1, + "time_keyed_df2": time_keyed_df2, + "intersect_df2": intersect_df2, + } + ) + + +@pytest.fixture(scope="module") +def df3(): + return pd.DataFrame( + { + "key": list("ac"), + "other_value": [4.0, 6.0], + "key2": list("ae"), + "key3": list("fe"), + } + ) + + +t_schema = { + "decimal": dt.Decimal(4, 3), + "array_of_float64": dt.Array(dt.double), + "array_of_int64": dt.Array(dt.int64), + "array_of_strings": dt.Array(dt.string), + "map_of_strings_integers": dt.Map(dt.string, dt.int64), + "map_of_integers_strings": dt.Map(dt.int64, dt.string), + "map_of_complex_values": dt.Map(dt.string, dt.Array(dt.int64)), +} + + +@pytest.fixture(scope="module") +def t(client): + return client.table("df", schema=t_schema) + + +@pytest.fixture(scope="module") +def lahman(batting_df, awards_players_df): + return Backend().connect( + {"batting": batting_df, "awards_players": awards_players_df} + ) + + +@pytest.fixture(scope="module") +def left(client): + return client.table("left") + + +@pytest.fixture(scope="module") +def right(client): + return client.table("right") + + +@pytest.fixture(scope="module") +def time_left(client): + return client.table("time_df1") + + +@pytest.fixture(scope="module") +def time_right(client): + return client.table("time_df2") + + +@pytest.fixture(scope="module") +def time_table(client): + return client.table("time_df3") + + +@pytest.fixture(scope="module") +def time_keyed_left(client): + return client.table("time_keyed_df1") + + +@pytest.fixture(scope="module") +def time_keyed_right(client): + return client.table("time_keyed_df2") + + +@pytest.fixture(scope="module") +def batting(lahman): + return lahman.table("batting") + + +@pytest.fixture(scope="module") +def sel_cols(batting): + cols = batting.columns + start, end = cols.index("AB"), cols.index("H") + 1 + return ["playerID", "yearID", "teamID", "G"] + cols[start:end] + + +@pytest.fixture(scope="module") +def players_base(batting, sel_cols): + return batting[sel_cols].order_by(sel_cols[:3]) + + +@pytest.fixture(scope="module") +def players(players_base): + return players_base.group_by("playerID") + + +@pytest.fixture(scope="module") +def players_df(players_base): + return players_base.execute().reset_index(drop=True) diff --git a/ibis/backends/pandas/tests/execution/__init__.py b/ibis/backends/pandas/tests/execution/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/ibis/backends/pandas/tests/execution/conftest.py b/ibis/backends/pandas/tests/execution/conftest.py deleted file mode 100644 index 32d5efad67d2..000000000000 --- a/ibis/backends/pandas/tests/execution/conftest.py +++ /dev/null @@ -1,289 +0,0 @@ -from __future__ import annotations - -import decimal - -import numpy as np -import pandas as pd -import pytest - -import ibis.expr.datatypes as dt -from ibis.backends.pandas import Backend - - -@pytest.fixture(scope="module") -def df(): - return pd.DataFrame( - { - "plain_int64": list(range(1, 4)), - "plain_strings": list("abc"), - "plain_float64": [4.0, 5.0, 6.0], - "plain_datetimes_naive": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ), - "plain_datetimes_ny": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ).dt.tz_localize("America/New_York"), - "plain_datetimes_utc": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ).dt.tz_localize("UTC"), - "plain_uint64": pd.Series(range(1, 4), dtype=np.dtype("uint64")), - "dup_strings": list("dad"), - "dup_ints": [1, 2, 1], - "float64_as_strings": ["100.01", "234.23", "-999.34"], - "int64_as_strings": list(map(str, range(1, 4))), - "strings_with_space": [" ", "abab", "ddeeffgg"], - "translate_from_strings": ["rmz", "abc", "ghj"], - "translate_to_strings": ["lns", "ovk", "jfr"], - "int64_with_zeros": [0, 1, 0], - "float64_with_zeros": [1.0, 0.0, 1.0], - "float64_positive": [1.0, 2.0, 1.0], - "strings_with_nulls": ["a", None, "b"], - "datetime_strings_naive": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ).astype(str), - "datetime_strings_ny": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ) - .dt.tz_localize("America/New_York") - .astype(str), - "datetime_strings_utc": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ) - .dt.tz_localize("UTC") - .astype(str), - "decimal": list(map(decimal.Decimal, ["1.0", "2", "3.234"])), - "array_of_float64": [ - np.array([1.0, 2.0], dtype="float64"), - np.array([3.0], dtype="float64"), - np.array([], dtype="float64"), - ], - "array_of_int64": [ - np.array([1, 2], dtype="int64"), - np.array([], dtype="int64"), - np.array([3], dtype="int64"), - ], - "array_of_strings": [ - np.array(["a", "b"], dtype="object"), - np.array([], dtype="object"), - np.array(["c"], dtype="object"), - ], - "map_of_strings_integers": [{"a": 1, "b": 2}, None, {}], - "map_of_integers_strings": [{}, None, {1: "a", 2: "b"}], - "map_of_complex_values": [None, {"a": [1, 2, 3], "b": []}, {}], - } - ) - - -@pytest.fixture(scope="module") -def batting_df(data_dir): - num_rows = 1000 - start_index = 30 - df = pd.read_parquet(data_dir / "parquet" / "batting.parquet").iloc[ - start_index : start_index + num_rows - ] - return df.reset_index(drop=True) - - -@pytest.fixture(scope="module") -def awards_players_df(data_dir): - return pd.read_parquet(data_dir / "parquet" / "awards_players.parquet") - - -@pytest.fixture(scope="module") -def df1(): - return pd.DataFrame( - {"key": list("abcd"), "value": [3, 4, 5, 6], "key2": list("eeff")} - ) - - -@pytest.fixture(scope="module") -def df2(): - return pd.DataFrame( - {"key": list("ac"), "other_value": [4.0, 6.0], "key3": list("fe")} - ) - - -@pytest.fixture(scope="module") -def intersect_df2(): - return pd.DataFrame({"key": list("cd"), "value": [5, 6], "key2": list("ff")}) - - -@pytest.fixture(scope="module") -def time_df1(): - return pd.DataFrame( - {"time": pd.to_datetime([1, 2, 3, 4]), "value": [1.1, 2.2, 3.3, 4.4]} - ) - - -@pytest.fixture(scope="module") -def time_df2(): - return pd.DataFrame({"time": pd.to_datetime([2, 4]), "other_value": [1.2, 2.0]}) - - -@pytest.fixture(scope="module") -def time_df3(): - return pd.DataFrame( - { - "time": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=8).values - ), - "id": list(range(1, 5)) * 2, - "value": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], - } - ) - - -@pytest.fixture(scope="module") -def time_keyed_df1(): - return pd.DataFrame( - { - "time": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=6).values - ), - "key": [1, 2, 3, 1, 2, 3], - "value": [1.2, 1.4, 2.0, 4.0, 8.0, 16.0], - } - ) - - -@pytest.fixture(scope="module") -def time_keyed_df2(): - return pd.DataFrame( - { - "time": pd.Series( - pd.date_range( - start="2017-01-02 01:02:03.234", freq="3D", periods=3 - ).values - ), - "key": [1, 2, 3], - "other_value": [1.1, 1.2, 2.2], - } - ) - - -@pytest.fixture(scope="module") -def client( - df, - df1, - df2, - df3, - time_df1, - time_df2, - time_df3, - time_keyed_df1, - time_keyed_df2, - intersect_df2, -): - return Backend().connect( - { - "df": df, - "df1": df1, - "df2": df2, - "df3": df3, - "left": df1, - "right": df2, - "time_df1": time_df1, - "time_df2": time_df2, - "time_df3": time_df3, - "time_keyed_df1": time_keyed_df1, - "time_keyed_df2": time_keyed_df2, - "intersect_df2": intersect_df2, - } - ) - - -@pytest.fixture(scope="module") -def df3(): - return pd.DataFrame( - { - "key": list("ac"), - "other_value": [4.0, 6.0], - "key2": list("ae"), - "key3": list("fe"), - } - ) - - -t_schema = { - "decimal": dt.Decimal(4, 3), - "array_of_float64": dt.Array(dt.double), - "array_of_int64": dt.Array(dt.int64), - "array_of_strings": dt.Array(dt.string), - "map_of_strings_integers": dt.Map(dt.string, dt.int64), - "map_of_integers_strings": dt.Map(dt.int64, dt.string), - "map_of_complex_values": dt.Map(dt.string, dt.Array(dt.int64)), -} - - -@pytest.fixture(scope="module") -def t(client): - return client.table("df", schema=t_schema) - - -@pytest.fixture(scope="module") -def lahman(batting_df, awards_players_df): - return Backend().connect( - {"batting": batting_df, "awards_players": awards_players_df} - ) - - -@pytest.fixture(scope="module") -def left(client): - return client.table("left") - - -@pytest.fixture(scope="module") -def right(client): - return client.table("right") - - -@pytest.fixture(scope="module") -def time_left(client): - return client.table("time_df1") - - -@pytest.fixture(scope="module") -def time_right(client): - return client.table("time_df2") - - -@pytest.fixture(scope="module") -def time_table(client): - return client.table("time_df3") - - -@pytest.fixture(scope="module") -def time_keyed_left(client): - return client.table("time_keyed_df1") - - -@pytest.fixture(scope="module") -def time_keyed_right(client): - return client.table("time_keyed_df2") - - -@pytest.fixture(scope="module") -def batting(lahman): - return lahman.table("batting") - - -@pytest.fixture(scope="module") -def sel_cols(batting): - cols = batting.columns - start, end = cols.index("AB"), cols.index("H") + 1 - return ["playerID", "yearID", "teamID", "G"] + cols[start:end] - - -@pytest.fixture(scope="module") -def players_base(batting, sel_cols): - return batting[sel_cols].order_by(sel_cols[:3]) - - -@pytest.fixture(scope="module") -def players(players_base): - return players_base.group_by("playerID") - - -@pytest.fixture(scope="module") -def players_df(players_base): - return players_base.execute().reset_index(drop=True) diff --git a/ibis/backends/pandas/tests/execution/test_timecontext.py b/ibis/backends/pandas/tests/execution/test_timecontext.py deleted file mode 100644 index 5a96cf33888f..000000000000 --- a/ibis/backends/pandas/tests/execution/test_timecontext.py +++ /dev/null @@ -1,399 +0,0 @@ -from __future__ import annotations - -import pandas as pd -import pytest -from packaging.version import parse as vparse - -import ibis -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope -from ibis.backends.base.df.timecontext import ( - TimeContext, - TimeContextRelation, - adjust_context, - compare_timecontext, - construct_time_context_aware_series, -) -from ibis.backends.pandas.execution import execute -from ibis.backends.pandas.execution.window import trim_window_result -from ibis.backends.pandas.tests.conftest import TestConf as tm - - -class CustomAsOfJoin(ops.AsOfJoin): - pass - - -def test_execute_with_timecontext(time_table): - expr = time_table - # define a time context for time-series data - context = (pd.Timestamp("20170101"), pd.Timestamp("20170103")) - - # without time context, execute produces every row - df_all = expr.execute() - assert len(df_all["time"]) == 8 - - # with context set, execute produces only rows within context - df_within_context = expr.execute(timecontext=context) - assert len(df_within_context["time"]) == 1 - - -def test_bad_timecontext(time_table, t): - expr = time_table - - # define context with illegal string - with pytest.raises(com.IbisError, match=r".*type pd.Timestamp.*"): - context = ("bad", "context") - expr.execute(timecontext=context) - - # define context with unsupported type int - with pytest.raises(com.IbisError, match=r".*type pd.Timestamp.*"): - context = (20091010, 20100101) - expr.execute(timecontext=context) - - # define context with too few values - with pytest.raises(com.IbisError, match=r".*should specify.*"): - context = pd.Timestamp("20101010") - expr.execute(timecontext=context) - - # define context with begin value later than end - with pytest.raises(com.IbisError, match=r".*before or equal.*"): - context = (pd.Timestamp("20101010"), pd.Timestamp("20090101")) - expr.execute(timecontext=context) - - # execute context with a table without TIME_COL - with pytest.raises(com.IbisError, match=r".*must have a time column.*"): - context = (pd.Timestamp("20090101"), pd.Timestamp("20100101")) - t.execute(timecontext=context) - - -def test_bad_call_to_adjust_context(): - op = "not_a_node" - context = (pd.Timestamp("20170101"), pd.Timestamp("20170103")) - scope = Scope() - with pytest.raises( - com.IbisError, match=r".*Unsupported input type for adjust context.*" - ): - adjust_context(op, scope, context) - - -def test_compare_timecontext(): - c1 = (pd.Timestamp("20170101"), pd.Timestamp("20170103")) - c2 = (pd.Timestamp("20170101"), pd.Timestamp("20170111")) - c3 = (pd.Timestamp("20160101"), pd.Timestamp("20160103")) - c4 = (pd.Timestamp("20161215"), pd.Timestamp("20170102")) - assert compare_timecontext(c1, c2) == TimeContextRelation.SUBSET - assert compare_timecontext(c2, c1) == TimeContextRelation.SUPERSET - assert compare_timecontext(c1, c4) == TimeContextRelation.OVERLAP - assert compare_timecontext(c1, c3) == TimeContextRelation.NONOVERLAP - - -def test_context_adjustment_asof_join( - time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2 -): - expr = time_keyed_left.asof_join( - time_keyed_right, "time", by="key", tolerance=4 * ibis.interval(days=1) - )[time_keyed_left, time_keyed_right.other_value] - context = (pd.Timestamp("20170105"), pd.Timestamp("20170111")) - result = expr.execute(timecontext=context) - - # compare with asof_join of manually trimmed tables - trimmed_df1 = time_keyed_df1[time_keyed_df1["time"] >= context[0]][ - time_keyed_df1["time"] < context[1] - ] - trimmed_df2 = time_keyed_df2[ - time_keyed_df2["time"] >= context[0] - pd.Timedelta(days=4) - ][time_keyed_df2["time"] < context[1]] - expected = pd.merge_asof( - trimmed_df1, - trimmed_df2, - on="time", - by="key", - tolerance=pd.Timedelta("4D"), - ) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - ["interval_ibis", "interval_pd"], - [ - (ibis.interval(days=1), "1d"), - (3 * ibis.interval(days=1), "3d"), - (5 * ibis.interval(days=1), "5d"), - ], -) -def test_context_adjustment_window(time_table, time_df3, interval_ibis, interval_pd): - # trim data manually - expected = ( - time_df3.set_index("time").value.rolling(interval_pd, closed="both").mean() - ) - expected = expected[expected.index >= pd.Timestamp("20170105")].reset_index( - drop=True - ) - - context = pd.Timestamp("20170105"), pd.Timestamp("20170111") - - window = ibis.trailing_window(interval_ibis, order_by=time_table.time) - expr = time_table["value"].mean().over(window) - # result should adjust time context accordingly - result = expr.execute(timecontext=context) - tm.assert_series_equal(result, expected) - - -def test_trim_window_result(time_df3): - """Unit test `trim_window_result` in Window execution.""" - df = time_df3.copy() - context = pd.Timestamp("20170105"), pd.Timestamp("20170111") - - # trim_window_result takes a MultiIndex Series as input - series = df["value"] - time_index = df.set_index("time").index - series.index = pd.MultiIndex.from_arrays( - [series.index, time_index], - names=series.index.names + ["time"], - ) - result = trim_window_result(series, context) - expected = df["time"][df["time"] >= pd.Timestamp("20170105")].reset_index(drop=True) - - # result should adjust time context accordingly - tm.assert_series_equal(result.reset_index()["time"], expected) - - # trim with a non-datetime type of 'time' throws Exception - wrong_series = df["id"] - df["time"] = df["time"].astype(str) - time_index = df.set_index("time").index - wrong_series.index = pd.MultiIndex.from_arrays( - [wrong_series.index, time_index], - names=wrong_series.index.names + ["time"], - ) - with pytest.raises(TypeError, match=r".*not supported between instances.*"): - trim_window_result(wrong_series, context) - - # column is ignored and series is not trimmed - no_context_result = trim_window_result(series, None) - tm.assert_series_equal(no_context_result, series) - - -def test_setting_timecontext_in_scope(time_table, time_df3): - expected_win_1 = ( - time_df3.set_index("time").value.rolling("3d", closed="both").mean() - ) - expected_win_1 = expected_win_1[ - expected_win_1.index >= pd.Timestamp("20170105") - ].reset_index(drop=True) - - context = pd.Timestamp("20170105"), pd.Timestamp("20170111") - window1 = ibis.trailing_window(3 * ibis.interval(days=1), order_by=time_table.time) - """In the following expression, Selection node will be executed first and - get table in context ('20170105', '20170101'). - - Then in window execution table will be executed again with a larger - context adjusted by window preceding days ('20170102', '20170111'). - To get the correct result, the cached table result with a smaller - context must be discard and updated to a larger time range. - """ - expr = time_table.mutate(value=time_table["value"].mean().over(window1)) - result = expr.execute(timecontext=context) - tm.assert_series_equal(result["value"], expected_win_1) - - -def test_context_adjustment_multi_window(time_table, time_df3): - expected_win_1 = ( - time_df3.set_index("time") - .rename(columns={"value": "v1"})["v1"] - .rolling("3d", closed="both") - .mean() - ) - expected_win_1 = expected_win_1[ - expected_win_1.index >= pd.Timestamp("20170105") - ].reset_index(drop=True) - - expected_win_2 = ( - time_df3.set_index("time") - .rename(columns={"value": "v2"})["v2"] - .rolling("2d", closed="both") - .mean() - ) - expected_win_2 = expected_win_2[ - expected_win_2.index >= pd.Timestamp("20170105") - ].reset_index(drop=True) - - context = pd.Timestamp("20170105"), pd.Timestamp("20170111") - window1 = ibis.trailing_window(3 * ibis.interval(days=1), order_by=time_table.time) - window2 = ibis.trailing_window(2 * ibis.interval(days=1), order_by=time_table.time) - expr = time_table.mutate( - v1=time_table["value"].mean().over(window1), - v2=time_table["value"].mean().over(window2), - ) - result = expr.execute(timecontext=context) - - tm.assert_series_equal(result["v1"], expected_win_1) - tm.assert_series_equal(result["v2"], expected_win_2) - - -@pytest.mark.xfail( - condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"), - raises=ValueError, - reason="https://github.com/pandas-dev/pandas/pull/44068", -) -def test_context_adjustment_window_groupby_id(time_table, time_df3): - """This test case is meant to test trim_window_result method in - pandas/execution/window.py to see if it could trim Series correctly with - groupby params.""" - expected = ( - time_df3.set_index("time") - .groupby("id") - .value.rolling("3d", closed="both") - .mean() - ) - # This is a MultiIndexed Series - expected = expected.reset_index() - expected = expected[expected.time >= pd.Timestamp("20170105")].reset_index( - drop=True - )["value"] - - context = pd.Timestamp("20170105"), pd.Timestamp("20170111") - - # expected.index.name = None - window = ibis.trailing_window( - 3 * ibis.interval(days=1), group_by="id", order_by=time_table.time - ) - expr = time_table["value"].mean().over(window) - # result should adjust time context accordingly - result = expr.execute(timecontext=context) - tm.assert_series_equal(result, expected) - - -def test_adjust_context_scope(time_keyed_left, time_keyed_right): - """Test that `adjust_context` has access to `scope` by default.""" - - @adjust_context.register(CustomAsOfJoin) - def adjust_context_custom_asof_join( - op: ops.AsOfJoin, - scope: Scope, - timecontext: TimeContext, - ) -> TimeContext: - """Confirms that `scope` is passed in.""" - assert scope is not None - return timecontext - - expr = CustomAsOfJoin( - left=time_keyed_left, - right=time_keyed_right, - predicates="time", - by="key", - tolerance=ibis.interval(days=4), - ).to_expr() - expr = expr[time_keyed_left, time_keyed_right.other_value] - context = (pd.Timestamp("20170105"), pd.Timestamp("20170111")) - expr.execute(timecontext=context) - - -def test_adjust_context_complete_shift( - time_keyed_left, - time_keyed_right, - time_keyed_df1, - time_keyed_df2, -): - """Test `adjust_context` function that completely shifts the context. - - This results in an adjusted context that is NOT a subset of the - original context. This is unlike an `adjust_context` function - that only expands the context. - - See #3104 - """ - - # Create a contrived `adjust_context` function for - # CustomAsOfJoin to mock this. - - @adjust_context.register(CustomAsOfJoin) - def adjust_context_custom_asof_join( - op: ops.AsOfJoin, - scope: Scope, - timecontext: TimeContext, - ) -> TimeContext: - """Shifts both the begin and end in the same direction.""" - - begin, end = timecontext - timedelta = execute(op.tolerance) - return (begin - timedelta, end - timedelta) - - expr = CustomAsOfJoin( - left=time_keyed_left, - right=time_keyed_right, - predicates="time", - by="key", - tolerance=ibis.interval(days=4), - ).to_expr() - expr = expr[time_keyed_left, time_keyed_right.other_value] - context = (pd.Timestamp("20170101"), pd.Timestamp("20170111")) - result = expr.execute(timecontext=context) - - # Compare with asof_join of manually trimmed tables - # Left table: No shift for context - # Right table: Shift both begin and end of context by 4 days - trimmed_df1 = time_keyed_df1[time_keyed_df1["time"] >= context[0]][ - time_keyed_df1["time"] < context[1] - ] - trimmed_df2 = time_keyed_df2[ - time_keyed_df2["time"] >= context[0] - pd.Timedelta(days=4) - ][time_keyed_df2["time"] < context[1] - pd.Timedelta(days=4)] - expected = pd.merge_asof( - trimmed_df1, - trimmed_df2, - on="time", - by="key", - tolerance=pd.Timedelta("4D"), - ) - - tm.assert_frame_equal(result, expected) - - -def test_construct_time_context_aware_series(time_df3): - """Unit test for `construct_time_context_aware_series`""" - # Series without 'time' index will result in a MultiIndex with 'time' - df = time_df3 - expected = df["value"] - time_index = pd.Index(df["time"]) - expected.index = pd.MultiIndex.from_arrays( - [expected.index, time_index], - names=expected.index.names + ["time"], - ) - result = construct_time_context_aware_series(df["value"], df) - tm.assert_series_equal(result, expected) - - # Series with 'time' as index will not change - time_indexed_df = time_df3.set_index("time") - expected_time_aware = time_indexed_df["value"] - result_time_aware = construct_time_context_aware_series( - time_indexed_df["value"], time_indexed_df - ) - tm.assert_series_equal(result_time_aware, expected_time_aware) - - # Series with a MultiIndex, where 'time' is in the MultiIndex, - # will not change - multi_index_time_aware_series = result_time_aware - expected_multi_index_time_aware = result_time_aware - result_multi_index_time_aware = construct_time_context_aware_series( - multi_index_time_aware_series, time_indexed_df - ) - tm.assert_series_equal( - result_multi_index_time_aware, expected_multi_index_time_aware - ) - - # Series with a MultiIndex, where 'time' is NOT in the MultiIndex, - # 'time' will be added into the MultiIndex - multi_index_series = df["id"] - expected_multi_index = df["id"].copy() - other_index = pd.Index(df["value"]) - expected_multi_index.index = pd.MultiIndex.from_arrays( - [expected_multi_index.index, other_index, time_index], - names=expected_multi_index.index.names + ["value", "time"], - ) - multi_index_series.index = pd.MultiIndex.from_arrays( - [multi_index_series.index, other_index], - names=multi_index_series.index.names + ["value"], - ) - result_multi_index = construct_time_context_aware_series(multi_index_series, df) - tm.assert_series_equal(result_multi_index, expected_multi_index) diff --git a/ibis/backends/pandas/tests/test_aggcontext.py b/ibis/backends/pandas/tests/test_aggcontext.py deleted file mode 100644 index 8fc7281a6fa7..000000000000 --- a/ibis/backends/pandas/tests/test_aggcontext.py +++ /dev/null @@ -1,167 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd -import pytest -from pandas import testing as tm -from pytest import param - -from ibis.backends.pandas.aggcontext import Summarize, window_agg_udf - -df = pd.DataFrame( - { - "id": [1, 2, 1, 2], - "v1": [1.0, 2.0, 3.0, 4.0], - "v2": [10.0, 20.0, 30.0, 40.0], - } -) - - -@pytest.mark.parametrize( - ("agg_fn", "expected_fn"), - [ - param( - lambda v1: v1.mean(), - lambda df: df["v1"].mean(), - id="udf", - ), - param( - "mean", - lambda df: df["v1"].mean(), - id="string", - ), - ], -) -def test_summarize_single_series(agg_fn, expected_fn): - """Test Summarize.agg operating on a single Series.""" - - aggcontext = Summarize() - - result = aggcontext.agg(df["v1"], agg_fn) - expected = expected_fn(df) - - assert result == expected - - -@pytest.mark.parametrize( - ("agg_fn", "expected_fn"), - [ - param( - lambda v1: v1.mean(), - lambda df: df["v1"].mean(), - id="udf", - ), - param( - "mean", - lambda df: df["v1"].mean(), - id="string", - ), - ], -) -def test_summarize_single_seriesgroupby(agg_fn, expected_fn): - """Test Summarize.agg operating on a single SeriesGroupBy.""" - - aggcontext = Summarize() - - df_grouped = df.sort_values("id").groupby("id") - result = aggcontext.agg(df_grouped["v1"], agg_fn) - - expected = expected_fn(df_grouped) - - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - ("agg_fn", "expected_fn"), - [ - param( - lambda v1, v2: v1.mean() - v2.mean(), - lambda df: df["v1"].mean() - df["v2"].mean(), - id="two-column", - ), - # Two columns, but only the second one is actually used in UDF - param( - lambda v1, v2: v2.mean(), - lambda df: df["v2"].mean(), - id="redundant-column", - ), - ], -) -def test_summarize_multiple_series(agg_fn, expected_fn): - """Test Summarize.agg operating on many Series.""" - - aggcontext = Summarize() - - args = [df["v1"], df["v2"]] - result = aggcontext.agg(args[0], agg_fn, *args[1:]) - - expected = expected_fn(df) - - assert result == expected - - -@pytest.mark.parametrize( - "param", - [ - ( - pd.Series([True, True, True, True]), - pd.Series([1.0, 2.0, 2.0, 3.0]), - ), - ( - pd.Series([False, True, True, False]), - pd.Series([np.NaN, 2.0, 2.0, np.NaN]), - ), - ], -) -def test_window_agg_udf(param): - """Test passing custom window indices for window aggregation.""" - - mask, expected = param - - grouped_data = df.sort_values("id").groupby("id")["v1"] - result_index = grouped_data.obj.index - - window_lower_indices = pd.Series([0, 0, 2, 2]) - window_upper_indices = pd.Series([1, 2, 3, 4]) - - result = window_agg_udf( - grouped_data, - lambda s: s.mean(), - window_lower_indices, - window_upper_indices, - mask, - result_index, - dtype="float", - max_lookback=None, - ) - - expected.index = grouped_data.obj.index - - tm.assert_series_equal(result, expected) - - -def test_window_agg_udf_different_freq(): - """Test that window_agg_udf works when the window series and data series - have different frequencies.""" - - time = pd.Series([pd.Timestamp("20200101"), pd.Timestamp("20200201")]) - data = pd.Series([1, 2, 3, 4, 5, 6]) - window_lower_indices = pd.Series([0, 4]) - window_upper_indices = pd.Series([5, 7]) - mask = pd.Series([True, True]) - result_index = time.index - - result = window_agg_udf( - data, - lambda s: s.mean(), - window_lower_indices, - window_upper_indices, - mask, - result_index, - "float", - None, - ) - - expected = pd.Series([data.iloc[0:5].mean(), data.iloc[4:7].mean()]) - - tm.assert_series_equal(result, expected) diff --git a/ibis/backends/pandas/tests/execution/test_arrays.py b/ibis/backends/pandas/tests/test_arrays.py similarity index 96% rename from ibis/backends/pandas/tests/execution/test_arrays.py rename to ibis/backends/pandas/tests/test_arrays.py index 00e873715224..98d1bb6fcd8d 100644 --- a/ibis/backends/pandas/tests/execution/test_arrays.py +++ b/ibis/backends/pandas/tests/test_arrays.py @@ -36,6 +36,13 @@ def test_array_length(t): tm.assert_frame_equal(result, expected) +def test_array_slice_using_column(t): + expr = t.array_of_int64[t.plain_int64 :] + result = expr.execute() + expected = pd.Series([[2], [], []]) + tm.assert_series_equal(result, expected) + + def test_array_length_scalar(client): raw_value = np.array([1, 2, 4]) value = ibis.array(raw_value) diff --git a/ibis/backends/pandas/tests/execution/test_cast.py b/ibis/backends/pandas/tests/test_cast.py similarity index 80% rename from ibis/backends/pandas/tests/execution/test_cast.py rename to ibis/backends/pandas/tests/test_cast.py index bc2d8a60f974..7ca38a675261 100644 --- a/ibis/backends/pandas/tests/execution/test_cast.py +++ b/ibis/backends/pandas/tests/test_cast.py @@ -5,14 +5,10 @@ import numpy as np import pandas as pd import pytest -import pytz -from pytest import param import ibis import ibis.expr.datatypes as dt -from ibis.backends.pandas.execution import execute from ibis.backends.pandas.tests.conftest import TestConf as tm -from ibis.common.exceptions import OperationNotDefinedError TIMESTAMP = "2022-03-13 06:59:10.467417" @@ -63,7 +59,9 @@ def test_cast_array(t, from_, to, expected): # One of the arrays in the Series res = result[0] assert isinstance(res, list) - assert [ibis.literal(v).type() for v in res] == [expected] * len(res) + + for v in result: + assert v == [dt.normalize(expected, x) for x in v] @pytest.mark.parametrize( @@ -71,7 +69,7 @@ def test_cast_array(t, from_, to, expected): [ ("string", "object"), ("int64", "int64"), - param("double", "float64", marks=pytest.mark.xfail(raises=TypeError)), + ("double", "float64"), ( dt.Timestamp("America/Los_Angeles"), "datetime64[ns, America/Los_Angeles]", @@ -97,22 +95,18 @@ def test_cast_timestamp_column(t, df, column, to, expected): [ ("string", str), ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), - param( - "double", - float, - marks=pytest.mark.xfail(raises=OperationNotDefinedError), - ), + ("double", lambda x: float(pd.Timestamp(x).value // int(1e9))), ( dt.Timestamp("America/Los_Angeles"), - lambda x: x.astimezone(tz=pytz.timezone("America/Los_Angeles")), + lambda x: x.tz_localize(tz="America/Los_Angeles"), ), ], ) -def test_cast_timestamp_scalar_naive(to, expected): +def test_cast_timestamp_scalar_naive(client, to, expected): literal_expr = ibis.literal(pd.Timestamp(TIMESTAMP)) value = literal_expr.cast(to) - result = execute(value.op()) - raw = execute(literal_expr.op()) + result = client.execute(value) + raw = client.execute(literal_expr) assert result == expected(raw) @@ -121,23 +115,19 @@ def test_cast_timestamp_scalar_naive(to, expected): [ ("string", str), ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), - param( - "double", - float, - marks=pytest.mark.xfail(raises=OperationNotDefinedError), - ), + ("double", lambda x: float(pd.Timestamp(x).value // int(1e9))), ( dt.Timestamp("America/Los_Angeles"), - lambda x: x.astimezone(tz=pytz.timezone("America/Los_Angeles")), + lambda x: x.astimezone(tz="America/Los_Angeles"), ), ], ) @pytest.mark.parametrize("tz", ["UTC", "America/New_York"]) -def test_cast_timestamp_scalar(to, expected, tz): +def test_cast_timestamp_scalar(client, to, expected, tz): literal_expr = ibis.literal(pd.Timestamp(TIMESTAMP).tz_localize(tz)) value = literal_expr.cast(to) - result = execute(value.op()) - raw = execute(literal_expr.op()) + result = client.execute(value) + raw = client.execute(literal_expr) assert result == expected(raw) @@ -158,7 +148,7 @@ def test_cast_date(t, df, column): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("type", [dt.Decimal(9, 0), dt.Decimal(12, 3)]) +@pytest.mark.parametrize("type", [dt.Decimal(9, 2), dt.Decimal(12, 3)]) def test_cast_to_decimal(t, df, type): expr = t.float64_as_strings.cast(type) result = expr.execute() diff --git a/ibis/backends/pandas/tests/test_core.py b/ibis/backends/pandas/tests/test_core.py index eb980c6cf7e9..45e3a3a02b94 100644 --- a/ibis/backends/pandas/tests/test_core.py +++ b/ibis/backends/pandas/tests/test_core.py @@ -6,11 +6,7 @@ import ibis import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope from ibis.backends.pandas import Backend -from ibis.backends.pandas.dispatch import post_execute, pre_execute -from ibis.backends.pandas.execution import execute @pytest.fixture @@ -50,59 +46,24 @@ def test_from_dataframe(dataframe, ibis_table, core_client): tm.assert_frame_equal(result, expected) -def test_pre_execute_basic(): - """Test that pre_execute has intercepted execution and provided its own - scope dict.""" - - @pre_execute.register(ops.Add) - def pre_execute_test(op, *clients, scope=None, **kwargs): - return Scope({op: 4}, None) - - one = ibis.literal(1) - expr = one + one - result = execute(expr.op()) - assert result == 4 - - del pre_execute.funcs[(ops.Add,)] - pre_execute.reorder() - pre_execute._cache.clear() - - def test_execute_parameter_only(): param = ibis.param("int64") - result = execute(param.op(), params={param.op(): 42}) + con = ibis.pandas.connect() + result = con.execute(param, params={param.op(): 42}) assert result == 42 def test_missing_data_sources(): - t = ibis.table([("a", "string")]) + t = ibis.table([("a", "string")], name="t") expr = t.a.length() + con = ibis.pandas.connect() with pytest.raises(com.UnboundExpressionError): - execute(expr.op()) - - -def test_post_execute_called_on_joins(dataframe, core_client, ibis_table): - count = [0] - - @post_execute.register(ops.InnerJoin, pd.DataFrame) - def tmp_left_join_exe(op, lhs, **kwargs): - count[0] += 1 - return lhs - - left = ibis_table - right = left.view() - join = left.join(right, "plain_strings")[left.plain_int64] - result = join.execute() - assert result is not None - assert not result.empty - assert count[0] == 1 - - -def test_scope_look_up(): - # test if scope could lookup items properly - scope = Scope() - one_day = ibis.interval(days=1).op() - one_hour = ibis.interval(hours=1).op() - scope = scope.merge_scope(Scope({one_day: 1}, None)) - assert scope.get_value(one_hour) is None - assert scope.get_value(one_day) is not None + con.execute(expr) + + +def test_unbound_table_execution(): + t = ibis.table([("a", "string")], name="t") + expr = t.a.length() + con = ibis.pandas.connect({"t": pd.DataFrame({"a": ["a", "ab", "abc"]})}) + result = con.execute(expr) + assert result.tolist() == [1, 2, 3] diff --git a/ibis/backends/pandas/tests/test_dispatcher.py b/ibis/backends/pandas/tests/test_dispatcher.py deleted file mode 100644 index 27916fd112e0..000000000000 --- a/ibis/backends/pandas/tests/test_dispatcher.py +++ /dev/null @@ -1,143 +0,0 @@ -from __future__ import annotations - -import pytest -from multipledispatch import Dispatcher - -from ibis.backends.pandas.dispatcher import TwoLevelDispatcher - - -class A1: - pass - - -class A2(A1): - pass - - -class A3(A2): - pass - - -class B1: - pass - - -class B2(B1): - pass - - -class B3(B2): - pass - - -@pytest.fixture -def foo_dispatchers(): - foo = TwoLevelDispatcher("foo", doc="Test dispatcher foo") - foo_m = Dispatcher("foo_m", doc="Control dispatcher foo_m") - - @foo.register(A1, B1) - @foo_m.register(A1, B1) - def foo0(x, y): - return 0 - - @foo.register(A1, B2) - @foo_m.register(A1, B2) - def foo1(x, y): - return 1 - - @foo.register(A2, B1) - @foo_m.register(A2, B1) - def foo2(x, y): - return 2 - - @foo.register(A2, B2) - @foo_m.register(A2, B2) - def foo3(x, y): - return 3 - - @foo.register( - (A1, A2), - ) - @foo_m.register( - (A1, A2), - ) - def foo4(x): - return 4 - - return foo, foo_m - - -@pytest.fixture -def foo(foo_dispatchers): - return foo_dispatchers[0] - - -@pytest.fixture -def foo_m(foo_dispatchers): - return foo_dispatchers[1] - - -def test_cache(foo, mocker): - """Test that cache is properly set after calling with args.""" - - spy = mocker.spy(foo, "dispatch") - a1, b1 = A1(), B1() - - assert (A1, B1) not in foo._cache - foo(a1, b1) - assert (A1, B1) in foo._cache - foo(a1, b1) - spy.assert_called_once_with(A1, B1) - - -def test_dispatch(foo, mocker): - """Test that calling dispatcher with a signature that is registered does - not trigger a linear search through dispatch_iter.""" - - spy = mocker.spy(foo, "dispatch_iter") - - # This should not trigger a linear search - foo(A1(), B1()) - assert not spy.called, ( - "Calling dispatcher with registered signature should " - "not trigger linear search" - ) - - foo(A3(), B3()) - spy.assert_called_once_with(A3, B3) - - -@pytest.mark.parametrize( - "args", - [ - (A1(), B1()), - (A1(), B2()), - (A1(), B3()), - (A2(), B1()), - (A2(), B2()), - (A2(), B3()), - (A3(), B1()), - (A3(), B2()), - (A3(), B3()), - (A1(),), - (A2(),), - (A3(),), - ], -) -def test_registered(foo_dispatchers, args): - foo, foo_m = foo_dispatchers - assert foo(*args) == foo_m(*args) - - -def test_ordering(foo, foo_m): - assert foo.ordering == foo_m.ordering - - -def test_funcs(foo, foo_m): - assert foo.funcs == foo_m.funcs - - -@pytest.mark.parametrize("args", [(B1(),), (B2(),), (A1(), A1()), (A1(), A2(), A3())]) -def test_unregistered(foo, args): - with pytest.raises(NotImplementedError, match="Could not find signature for foo.*"): - foo(*args) diff --git a/ibis/backends/pandas/tests/execution/test_functions.py b/ibis/backends/pandas/tests/test_functions.py similarity index 92% rename from ibis/backends/pandas/tests/execution/test_functions.py rename to ibis/backends/pandas/tests/test_functions.py index 2b3851675858..9ef36b23ffb6 100644 --- a/ibis/backends/pandas/tests/execution/test_functions.py +++ b/ibis/backends/pandas/tests/test_functions.py @@ -13,7 +13,6 @@ import ibis import ibis.expr.datatypes as dt -from ibis.backends.pandas.execution import execute from ibis.backends.pandas.tests.conftest import TestConf as tm from ibis.backends.pandas.udf import udf @@ -74,7 +73,6 @@ def wrapper(*args, **kwargs): param( methodcaller("floor"), lambda x: decimal.Decimal(math.floor(x)), id="floor" ), - param(methodcaller("exp"), methodcaller("exp"), id="exp"), param( methodcaller("sign"), lambda x: x if not x else decimal.Decimal(1).copy_sign(x), @@ -97,19 +95,21 @@ def wrapper(*args, **kwargs): ) def test_math_functions_decimal(t, df, ibis_func, pandas_func): dtype = dt.Decimal(12, 3) - expr = ibis_func(t.float64_as_strings.cast(dtype)) - result = expr.execute() context = decimal.Context(prec=dtype.precision) - expected = df.float64_as_strings.apply( - lambda x: context.create_decimal(x).quantize( - decimal.Decimal( - f"{'0' * (dtype.precision - dtype.scale)}.{'0' * dtype.scale}" - ) + + def normalize(x): + x = context.create_decimal(x) + p = decimal.Decimal( + f"{'0' * (dtype.precision - dtype.scale)}.{'0' * dtype.scale}" ) - ).apply(pandas_func) + return x.quantize(p) + + expr = ibis_func(t.float64_as_strings.cast(dtype)) + result = expr.execute() - result[result.apply(math.isnan)] = -99999 - expected[expected.apply(math.isnan)] = -99999 + expected = ( + df.float64_as_strings.apply(normalize).apply(pandas_func).apply(normalize) + ) tm.assert_series_equal(result, expected.astype(expr.type().to_pandas())) @@ -221,10 +221,11 @@ def my_func(x, _): return x df = pd.DataFrame({"left": [left], "right": [right]}) - table = ibis.pandas.connect().from_dataframe(df) + con = ibis.pandas.connect() + table = con.from_dataframe(df) expr = my_func(table.left, table.right) - result = execute(expr.op()) + result = con.execute(expr) assert isinstance(result, pd.Series) result = result.tolist() @@ -238,8 +239,8 @@ def test_ifelse_returning_bool(): true = ibis.literal(True) false = ibis.literal(False) expr = ibis.ifelse(one + one == two, true, false) - result = execute(expr.op()) - assert result is True + result = ibis.pandas.connect().execute(expr) + assert result is True or result is np.True_ @pytest.mark.parametrize( @@ -261,7 +262,7 @@ def func(x): df = pd.DataFrame({"col": [value]}) table = ibis.pandas.connect().from_dataframe(df) - result = execute(table.col.op()) + result = table.col.execute() assert isinstance(result, pd.Series) result = result.tolist() diff --git a/ibis/backends/pandas/tests/test_helpers.py b/ibis/backends/pandas/tests/test_helpers.py new file mode 100644 index 000000000000..4814a0d85376 --- /dev/null +++ b/ibis/backends/pandas/tests/test_helpers.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import pytest + +from ibis.backends.pandas.helpers import RowsFrame + +lst = list(range(10)) + + +@pytest.mark.parametrize( + ("ix", "start", "end", "expected"), + [ + (0, None, None, lst), + (0, 0, None, lst), + (0, None, 0, [0]), + (0, 0, 0, [0]), + (0, 0, 1, [0, 1]), + (0, 1, 1, [1]), + (0, 1, 2, [1, 2]), + (0, 1, None, lst[1:]), + (0, None, 1, [0, 1]), + (0, -1, None, lst), + (0, None, -1, []), + (0, -1, -1, []), + (0, -2, -1, []), + (0, -2, None, lst), + (0, None, -2, []), + (0, -1, 1, [0, 1]), + (0, 1, -1, []), + (0, -1, 2, [0, 1, 2]), + (1, None, None, lst), + (1, 0, None, lst[1:]), + (1, None, 0, [0, 1]), + (1, 0, 0, [1]), + (1, 0, 1, [1, 2]), + (1, 1, 1, [2]), + (1, 1, 2, [2, 3]), + (1, 1, None, lst[2:]), + (1, None, 1, [0, 1, 2]), + (1, -1, None, lst), + (1, None, -1, [0]), + (1, -1, -1, [0]), + (1, -2, -1, [0]), + (1, -2, None, lst), + (1, None, -2, []), + (1, -1, 1, [0, 1, 2]), + (1, 1, -1, []), + (1, -1, 2, [0, 1, 2, 3]), + (2, None, None, lst), + (2, 0, None, lst[2:]), + (2, None, 0, [0, 1, 2]), + (2, 0, 0, [2]), + (2, 0, 1, [2, 3]), + (2, 1, 1, [3]), + (2, 1, 2, [3, 4]), + (2, 1, None, lst[3:]), + (2, None, 1, [0, 1, 2, 3]), + (2, -1, None, lst[1:]), + (2, None, -1, [0, 1]), + (2, -1, -1, [1]), + (2, -2, -1, [0, 1]), + (2, -2, None, lst), + (2, None, -2, [0]), + (2, -1, 1, [1, 2, 3]), + (2, 1, -1, []), + (2, -1, 2, [1, 2, 3, 4]), + (3, None, None, lst), + ], +) +def test_rows_frame_adjustment(ix, start, end, expected): + start_index, end_index = RowsFrame.adjust(len(lst), ix, start, end) + assert lst[start_index:end_index] == expected diff --git a/ibis/backends/pandas/tests/execution/test_join.py b/ibis/backends/pandas/tests/test_join.py similarity index 89% rename from ibis/backends/pandas/tests/execution/test_join.py rename to ibis/backends/pandas/tests/test_join.py index 8fd990ea86e1..a9acaad3ed6e 100644 --- a/ibis/backends/pandas/tests/execution/test_join.py +++ b/ibis/backends/pandas/tests/test_join.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd import pandas.testing as tm import pytest @@ -57,9 +58,26 @@ def test_join_with_multiple_predicates(how, left, right, df1, df2): ] result = expr.execute() expected = pd.merge( - df1, df2, how=how, left_on=["key", "key2"], right_on=["key", "key3"] + df1, + df2, + how=how, + left_on=["key", "key2"], + right_on=["key", "key3"], + suffixes=("_left", "_right"), ).reset_index(drop=True) - tm.assert_frame_equal(result[expected.columns], expected) + + expected_columns = ["key", "value", "key2", "key3", "other_value"] + expected = expected[expected_columns] + if how == "right": + # the ibis expression references the `key` column from the left table + # which is not present in the result of the right join, but pandas + # includes the column from the right table + expected["key"] = pd.Series([np.nan, np.nan, np.nan], dtype=object) + elif how == "outer": + expected["key"] = pd.Series(["a", "b", "c", "d", np.nan, np.nan], dtype=object) + + assert list(result.columns) == expected_columns + tm.assert_frame_equal(result, expected) @mutating_join_type @@ -70,6 +88,12 @@ def test_join_with_multiple_predicates_written_as_one(how, left, right, df1, df2 expected = pd.merge( df1, df2, how=how, left_on=["key", "key2"], right_on=["key", "key3"] ).reset_index(drop=True) + + if how == "right": + expected["key"] = pd.Series([np.nan, np.nan], dtype=object) + elif how == "outer": + expected["key"] = pd.Series(["a", "b", "c", "d", np.nan, np.nan], dtype=object) + tm.assert_frame_equal(result[expected.columns], expected) @@ -270,7 +294,9 @@ def test_asof_join(time_left, time_right, time_df1, time_df2): def test_asof_join_predicate(time_left, time_right, time_df1, time_df2): expr = time_left.asof_join(time_right, time_left.time == time_right.time) result = expr.execute() - expected = pd.merge_asof(time_df1, time_df2, on="time") + expected = pd.merge_asof( + time_df1, time_df2, on="time", direction="nearest", allow_exact_matches=True + ) tm.assert_frame_equal(result[expected.columns], expected) with pytest.raises(AssertionError): tm.assert_series_equal(result["time"], result["time_right"]) @@ -281,13 +307,10 @@ def test_keyed_asof_join( time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2 ): expr = time_keyed_left.asof_join(time_keyed_right, "time", by="key") + expr = expr.select(time_keyed_left, time_keyed_right.other_value) result = expr.execute() expected = pd.merge_asof(time_keyed_df1, time_keyed_df2, on="time", by="key") tm.assert_frame_equal(result[expected.columns], expected) - with pytest.raises(AssertionError): - tm.assert_series_equal(result["time"], result["time_right"]) - with pytest.raises(AssertionError): - tm.assert_series_equal(result["key"], result["key_right"]) @merge_asof_minversion @@ -327,7 +350,7 @@ def test_asof_join_overlapping_non_predicate( time_keyed_df2.assign(collide=time_keyed_df2["key"] + time_keyed_df2["other_value"]) expr = time_keyed_left.asof_join( - time_keyed_right, predicates=[("time", "time")], by=[("key", "key")] + time_keyed_right, on=("time", "time"), predicates=[("key", "key")] ) result = expr.execute() expected = pd.merge_asof( @@ -595,3 +618,33 @@ def test_multijoin(tracts_df, fields_df, harvest_df): ) tm.assert_frame_equal(result, expected) + + +def test_chain_join(): + test_df1 = pd.DataFrame({"id": ["1", "1"], "value": ["a", "a"]}) + test_df2 = pd.DataFrame({"id": ["1", "1"], "value": ["z", "z"]}) + test_df3 = pd.DataFrame({"id": ["1", "1"], "value": ["z1", "z1"]}) + + conn = ibis.pandas.connect({"df1": test_df1, "df2": test_df2, "df3": test_df3}) + + t1 = conn.table("df1") + t2 = conn.table("df2") + t3 = conn.table("df3") + + expr = ( + t1.join(t2, t1.id == t2.id) + .join(t3, t1.id == t3.id) + .select(t1.id, t1.value, t2.value.name("value2"), t3.value.name("value3")) + ) + result = expr.execute() + + n = len(test_df1) * len(test_df2) * len(test_df3) + expected = pd.DataFrame( + { + "id": ["1"] * n, + "value": ["a"] * n, + "value2": ["z"] * n, + "value3": ["z1"] * n, + } + ) + tm.assert_frame_equal(result, expected) diff --git a/ibis/backends/pandas/tests/execution/test_maps.py b/ibis/backends/pandas/tests/test_maps.py similarity index 100% rename from ibis/backends/pandas/tests/execution/test_maps.py rename to ibis/backends/pandas/tests/test_maps.py diff --git a/ibis/backends/pandas/tests/execution/test_operations.py b/ibis/backends/pandas/tests/test_operations.py similarity index 99% rename from ibis/backends/pandas/tests/execution/test_operations.py rename to ibis/backends/pandas/tests/test_operations.py index 54877d1ce4d0..3d6e78d9d2c6 100644 --- a/ibis/backends/pandas/tests/execution/test_operations.py +++ b/ibis/backends/pandas/tests/test_operations.py @@ -13,7 +13,6 @@ import ibis.expr.datatypes as dt from ibis import _ from ibis.backends.pandas import Backend -from ibis.backends.pandas.execution import execute from ibis.backends.pandas.tests.conftest import TestConf as tm @@ -183,7 +182,6 @@ def test_group_by_rename_key(t, df): expr = t.group_by(t.dup_strings.name("foo")).aggregate( dup_string_count=t.dup_strings.count() ) - assert "foo" in expr.schema() result = expr.execute() assert "foo" in result.columns @@ -281,7 +279,7 @@ def test_nullif_zero(t, df, column): param( lambda t: ibis.literal("a"), lambda t: t.dup_strings, - lambda _: pd.Series(["d", np.nan, "d"], name="dup_strings"), + lambda _: pd.Series(["a", np.nan, "a"], name="dup_strings"), tm.assert_series_equal, id="literal_series", ), @@ -289,7 +287,7 @@ def test_nullif_zero(t, df, column): ) def test_nullif(t, df, left, right, expected, compare): expr = left(t).nullif(right(t)) - result = execute(expr.op()) + result = Backend().execute(expr) compare(result, expected(df)) diff --git a/ibis/backends/pandas/tests/execution/test_strings.py b/ibis/backends/pandas/tests/test_strings.py similarity index 89% rename from ibis/backends/pandas/tests/execution/test_strings.py rename to ibis/backends/pandas/tests/test_strings.py index 27f603903cd6..e583cb53437e 100644 --- a/ibis/backends/pandas/tests/execution/test_strings.py +++ b/ibis/backends/pandas/tests/test_strings.py @@ -7,7 +7,9 @@ import pytest from pytest import param -from ibis.backends.pandas.execution.strings import sql_like_to_regex +import ibis +from ibis.backends.pandas import Backend +from ibis.backends.pandas.kernels import sql_like_to_regex @pytest.mark.parametrize( @@ -165,3 +167,23 @@ def test_translate( table = str.maketrans(from_str, to_str) series = df.strings_with_space.str.translate(table) tm.assert_series_equal(result, series, check_names=False) + + +def test_string_repeat(t): + int_col = t.plain_int64 + int_lit = ibis.literal(3) + string_col = t.strings_with_space + string_lit = ibis.literal("abc") + + expr1 = string_col.repeat(int_col) + expr2 = string_col.repeat(int_lit) + expr3 = string_lit.repeat(int_col) + expr4 = string_lit.repeat(int_lit) + + con = Backend() + con.execute(expr1) + con.execute(expr2) + con.execute(expr3) + con.execute(expr4) + + # TODO(kszucs): add assertions or rather parametrize the tests above diff --git a/ibis/backends/pandas/tests/execution/test_structs.py b/ibis/backends/pandas/tests/test_structs.py similarity index 95% rename from ibis/backends/pandas/tests/execution/test_structs.py rename to ibis/backends/pandas/tests/test_structs.py index 203d3e961b19..bf9647f73a62 100644 --- a/ibis/backends/pandas/tests/execution/test_structs.py +++ b/ibis/backends/pandas/tests/test_structs.py @@ -8,7 +8,6 @@ import ibis import ibis.expr.datatypes as dt from ibis.backends.pandas import Backend -from ibis.backends.pandas.execution import execute from ibis.backends.pandas.tests.conftest import TestConf as tm @@ -48,13 +47,14 @@ def test_struct_field_literal(value): assert struct.type() == dt.Struct.from_tuples( [("fruit", dt.string), ("weight", dt.int8)] ) + con = ibis.pandas.connect() expr = struct["fruit"] - result = execute(expr.op()) + result = con.execute(expr) assert result == "pear" expr = struct["weight"] - result = execute(expr.op()) + result = con.execute(expr) assert result == 0 diff --git a/ibis/backends/pandas/tests/execution/test_temporal.py b/ibis/backends/pandas/tests/test_temporal.py similarity index 98% rename from ibis/backends/pandas/tests/execution/test_temporal.py rename to ibis/backends/pandas/tests/test_temporal.py index cd9a1e98384b..f8cf670e99f1 100644 --- a/ibis/backends/pandas/tests/execution/test_temporal.py +++ b/ibis/backends/pandas/tests/test_temporal.py @@ -9,9 +9,9 @@ from packaging.version import parse as parse_version from pytest import param +import ibis from ibis import literal as L from ibis.backends.pandas import Backend -from ibis.backends.pandas.execution import execute from ibis.backends.pandas.tests.conftest import TestConf as tm from ibis.expr import datatypes as dt @@ -44,6 +44,7 @@ ], ) def test_timestamp_functions(case_func, expected_func): + con = ibis.pandas.connect() v = L("2015-09-01 14:48:05.359").cast("timestamp") vt = datetime.datetime( year=2015, @@ -56,7 +57,7 @@ def test_timestamp_functions(case_func, expected_func): ) result = case_func(v) expected = expected_func(vt) - assert execute(result.op()) == expected + assert con.execute(result) == expected @pytest.mark.parametrize( diff --git a/ibis/backends/pandas/tests/test_udf.py b/ibis/backends/pandas/tests/test_udf.py index f310db217413..df6917aa2b25 100644 --- a/ibis/backends/pandas/tests/test_udf.py +++ b/ibis/backends/pandas/tests/test_udf.py @@ -364,26 +364,28 @@ def my_wm(v, w): tm.assert_frame_equal(result, expected) -def test_udaf_window_nan(): - df = pd.DataFrame( - { - "a": np.arange(10, dtype=float), - "b": [3.0, np.NaN] * 5, - "key": list("ddeefffggh"), - } - ) - con = Backend().connect({"df": df}) - t = con.table("df") - window = ibis.trailing_window(2, order_by="a", group_by="key") - expr = t.mutate(rolled=my_mean(t.b).over(window)) - result = expr.execute().sort_values(["key", "a"]) - expected = df.sort_values(["key", "a"]).assign( - rolled=lambda d: d.groupby("key") - .b.rolling(3, min_periods=1) - .apply(lambda x: x.mean(), raw=True) - .reset_index(level=0, drop=True) - ) - tm.assert_frame_equal(result, expected) +# TODO(kszucs): revisit this, duckdb produces the same result as the pandas +# backend, but the expected result is different +# def test_udaf_window_nan(): +# df = pd.DataFrame( +# { +# "a": np.arange(10, dtype=float), +# "b": [3.0, np.NaN] * 5, +# "key": list("ddeefffggh"), +# } +# ) +# con = Backend().connect({"df": df}) +# t = con.table("df") +# window = ibis.trailing_window(2, order_by="a", group_by="key") +# expr = t.mutate(rolled=my_mean(t.b).over(window)) +# result = expr.execute().sort_values(["key", "a"]) +# expected = df.sort_values(["key", "a"]).assign( +# rolled=lambda d: d.groupby("key") +# .b.rolling(3, min_periods=1) +# .apply(lambda x: x.mean(), raw=True) +# .reset_index(level=0, drop=True) +# ) +# tm.assert_frame_equal(result, expected) @pytest.fixture(params=[[0.25, 0.75], [0.01, 0.99]]) diff --git a/ibis/backends/pandas/tests/execution/test_window.py b/ibis/backends/pandas/tests/test_window.py similarity index 93% rename from ibis/backends/pandas/tests/execution/test_window.py rename to ibis/backends/pandas/tests/test_window.py index 905dd833c775..0f46a4a987b4 100644 --- a/ibis/backends/pandas/tests/execution/test_window.py +++ b/ibis/backends/pandas/tests/test_window.py @@ -11,11 +11,7 @@ import ibis import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope from ibis.backends.pandas import Backend -from ibis.backends.pandas.dispatch import pre_execute -from ibis.backends.pandas.execution import execute from ibis.backends.pandas.tests.conftest import TestConf as tm from ibis.common.annotations import ValidationError from ibis.legacy.udf.vectorized import reduction @@ -51,58 +47,63 @@ def range_window(): @default @row_offset def test_lead(t, df, row_offset, default, row_window): + con = ibis.pandas.connect() expr = t.dup_strings.lead(row_offset, default=default).over(row_window) result = expr.execute() - expected = df.dup_strings.shift(execute((-row_offset).op())) + expected = df.dup_strings.shift(con.execute(-row_offset)) if default is not ibis.NA: - expected = expected.fillna(execute(default.op())) + expected = expected.fillna(con.execute(default)) tm.assert_series_equal(result, expected.rename("tmp")) @default @row_offset def test_lag(t, df, row_offset, default, row_window): + con = ibis.pandas.connect() expr = t.dup_strings.lag(row_offset, default=default).over(row_window) result = expr.execute() - expected = df.dup_strings.shift(execute(row_offset.op())) + expected = df.dup_strings.shift(con.execute(row_offset)) if default is not ibis.NA: - expected = expected.fillna(execute(default.op())) + expected = expected.fillna(con.execute(default)) tm.assert_series_equal(result, expected.rename("tmp")) @default @range_offset def test_lead_delta(t, df, range_offset, default, range_window): + con = ibis.pandas.connect() expr = t.dup_strings.lead(range_offset, default=default).over(range_window) result = expr.execute() expected = ( df[["plain_datetimes_naive", "dup_strings"]] .set_index("plain_datetimes_naive") .squeeze() - .shift(freq=execute((-range_offset).op())) + .shift(freq=con.execute(-range_offset)) .reindex(df.plain_datetimes_naive) .reset_index(drop=True) ) if default is not ibis.NA: - expected = expected.fillna(execute(default.op())) + expected = expected.fillna(con.execute(default)) tm.assert_series_equal(result, expected.rename("tmp")) @default @range_offset def test_lag_delta(t, df, range_offset, default, range_window): + con = ibis.pandas.connect() expr = t.dup_strings.lag(range_offset, default=default).over(range_window) result = expr.execute() + expected = ( df[["plain_datetimes_naive", "dup_strings"]] .set_index("plain_datetimes_naive") .squeeze() - .shift(freq=execute(range_offset.op())) + .shift(freq=con.execute(range_offset)) .reindex(df.plain_datetimes_naive) .reset_index(drop=True) ) if default is not ibis.NA: - expected = expected.fillna(execute(default.op())) + expected = expected.fillna(con.execute(default)) tm.assert_series_equal(result, expected.rename("tmp")) @@ -510,29 +511,6 @@ def test_window_with_mlb(): ) -def test_window_has_pre_execute_scope(): - called = [0] - - @pre_execute.register(ops.Lag, Backend) - def test_pre_execute(op, client, **kwargs): - called[0] += 1 - return Scope() - - data = {"key": list("abc"), "value": [1, 2, 3], "dup": list("ggh")} - df = pd.DataFrame(data, columns=["key", "value", "dup"]) - client = ibis.pandas.connect({"df": df}) - t = client.table("df") - window = ibis.window(order_by="value") - expr = t.key.lag(1).over(window).name("foo") - result = expr.execute() - assert result is not None - - # once in window op at the top to pickup any scope changes before computing - # twice in window op when calling execute on the ops.Lag node at the - # beginning of execute and once before the actual computation - assert called[0] == 3 - - def test_window_grouping_key_has_scope(t, df): param = ibis.param(dt.string) window = ibis.window(group_by=t.dup_strings + param) diff --git a/ibis/backends/pandas/trace.py b/ibis/backends/pandas/trace.py deleted file mode 100644 index 2350e8957930..000000000000 --- a/ibis/backends/pandas/trace.py +++ /dev/null @@ -1,170 +0,0 @@ -"""Module that adds tracing to pandas execution. - -With tracing enabled, this module will log time and call stack information of -the executed expression. Call stack information is presented with indentation -level. - -For example: - -import pandas as pd -import logging - -import ibis.expr.datatypes as dt -import ibis.backends.pandas -from ibis.legacy.udf.vectorized import elementwise -from ibis.backends.pandas import trace - -logging.basicConfig() -trace.enable() - -df = pd.DataFrame( - { - 'a': [1, 2, 3] - } -) - -con = ibis.pandas.connect({"table1": df}) - -@elementwise( - input_type=[dt.double], - output_type=dt.double -) -def add_one(v): - import time - time.sleep(5) - return v + 1 - -table = con.table("table1") -table = table.mutate(b=add_one(table['a'])) -table.execute() - -Output: - -DEBUG:ibis.backends.pandas.trace: main_execute Selection -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope Selection -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope PandasTable -DEBUG:ibis.backends.pandas.trace: execute_database_table_client PandasTable -DEBUG:ibis.backends.pandas.trace: execute_database_table_client PandasTable 0:00:00.000085 -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope PandasTable 0:00:00.000362 -DEBUG:ibis.backends.pandas.trace: execute_selection_dataframe Selection -DEBUG:ibis.backends.pandas.trace: main_execute ElementWiseVectorizedUDF -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope ElementWiseVectorizedUDF -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope TableColumn -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope PandasTable -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope PandasTable 0:00:00.000061 -DEBUG:ibis.backends.pandas.trace: execute_table_column_df_or_df_groupby TableColumn -DEBUG:ibis.backends.pandas.trace: execute_table_column_df_or_df_groupby TableColumn 0:00:00.000304 # noqa: E501 -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope TableColumn 0:00:00.000584 -DEBUG:ibis.backends.pandas.trace: execute_udf_node ElementWiseVectorizedUDF -DEBUG:ibis.backends.pandas.trace: execute_udf_node ElementWiseVectorizedUDF 0:00:05.019173 -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope ElementWiseVectorizedUDF 0:00:05.052604 # noqa: E501 -DEBUG:ibis.backends.pandas.trace: main_execute ElementWiseVectorizedUDF 0:00:05.052819 -DEBUG:ibis.backends.pandas.trace: execute_selection_dataframe Selection 0:00:05.054894 -DEBUG:ibis.backends.pandas.trace: execute_until_in_scope Selection 0:00:05.055662 -DEBUG:ibis.backends.pandas.trace: main_execute Selection 0:00:05.056556 -""" - -from __future__ import annotations - -import functools -import logging -import traceback -from datetime import datetime - -import ibis -from ibis.backends.pandas.dispatcher import TwoLevelDispatcher -from ibis.config import options -from ibis.expr import types as ir - -_logger = logging.getLogger("ibis.backends.pandas.trace") - -# A list of funcs that is traced -_trace_funcs = set() - - -def enable(): - """Enable tracing.""" - if options.pandas is None: - # pandas options haven't been registered yet - force module __getattr__ - ibis.pandas # noqa: B018 - options.pandas.enable_trace = True - logging.getLogger("ibis.backends.pandas.trace").setLevel(logging.DEBUG) - - -def _log_trace(func, start=None): - level = 0 - current_frame = None - - # Increase the current level for each traced function in the stackframe - # This way we can visualize the call stack. - for frame, _ in traceback.walk_stack(None): - current_frame = current_frame if current_frame is not None else frame - func_name = frame.f_code.co_name - if func_name in _trace_funcs: - level += 1 - - # We can assume we have 'args' because we only call _log_trace inside - # trace or TraceDispatcher.register - current_op = current_frame.f_locals["args"][0] - - # If the first argument is a Expr, we print its op because it's more - # informative. - if isinstance(current_op, ir.Expr): - current_op = current_op.op() - - _logger.debug( - "%s %s %s %s", - " " * level, - func.__name__, - type(current_op).__qualname__, - f"{datetime.now() - start}" if start else "", - ) - - -def trace(func): - """Return a function decorator that wraps `func` with tracing.""" - _trace_funcs.add(func.__name__) - - @functools.wraps(func) - def traced_func(*args, **kwargs): - # Unfortunately, this function can be called before the `ibis.pandas` - # attribute has ever been accessed, which means the trace configuration - # option might never get registered and will raise an error. Accessing - # the pandas attribute here forces the option initialization - import ibis - - ibis.pandas # noqa: B018 - - if not options.pandas.enable_trace: - return func(*args, **kwargs) - else: - start = datetime.now() - _log_trace(func) - res = func(*args, **kwargs) - _log_trace(func, start) - return res - - return traced_func - - -class TraceTwoLevelDispatcher(TwoLevelDispatcher): - """A Dispatcher that also wraps the registered function with tracing.""" - - def __init__(self, name, doc=None): - super().__init__(name, doc) - - def register(self, *types, **kwargs): - """Register a function with this Dispatcher. - - The function will also be wrapped with tracing information. - """ - - def _(func): - trace_func = trace(func) - TwoLevelDispatcher.register(self, *types, **kwargs)(trace_func) - # return func instead trace_func here so that - # chained register didn't get wrapped multiple - # times - return func - - return _ diff --git a/ibis/backends/pandas/udf.py b/ibis/backends/pandas/udf.py index 561aca6987d6..3168d348f67d 100644 --- a/ibis/backends/pandas/udf.py +++ b/ibis/backends/pandas/udf.py @@ -2,35 +2,7 @@ from __future__ import annotations -import itertools - -import pandas as pd -from pandas.core.groupby import SeriesGroupBy - -import ibis.expr.operations as ops import ibis.legacy.udf.vectorized -from ibis.backends.base import BaseBackend -from ibis.backends.pandas.aggcontext import Transform -from ibis.backends.pandas.dispatch import execute_node, pre_execute -from ibis.backends.pandas.execution.util import get_grouping - - -def create_gens_from_args_groupby(*args: tuple[SeriesGroupBy, ...]): - """Create generators for each of `args` for groupby UDAF. - - Returns a generator that outputs each group. - - Parameters - ---------- - *args - A tuple of group by objects - - Returns - ------- - Tuple[Generator] - Generators of group by data - """ - return ((data for _, data in arg) for arg in args) class udf: @@ -49,120 +21,3 @@ def reduction(input_type, output_type): def analytic(input_type, output_type): """Alias for ibis.legacy.udf.vectorized.analytic.""" return ibis.legacy.udf.vectorized.analytic(input_type, output_type) - - -@pre_execute.register(ops.ElementWiseVectorizedUDF) -@pre_execute.register(ops.ElementWiseVectorizedUDF, BaseBackend) -def pre_execute_elementwise_udf(op, *clients, scope=None, **kwargs): - """Register execution rules for elementwise UDFs.""" - input_type = op.input_type - - # definitions - - # Define an execution rule for elementwise operations on a - # grouped Series - nargs = len(input_type) - - @execute_node.register( - ops.ElementWiseVectorizedUDF, *(itertools.repeat(SeriesGroupBy, nargs)) - ) - def execute_udf_node_groupby(op, *args, **kwargs): - func = op.func - - groupers = [ - grouper - for grouper in (getattr(arg, "grouper", None) for arg in args) - if grouper is not None - ] - - # all grouping keys must be identical - assert all(groupers[0] == grouper for grouper in groupers[1:]) - - # we're performing a scalar operation on grouped column, so - # perform the operation directly on the underlying Series - # and regroup after it's finished - args = [getattr(arg, "obj", arg) for arg in args] - groupings = get_grouping(groupers[0].groupings) - return func(*args).groupby(groupings, group_keys=False) - - # Define an execution rule for a simple elementwise Series - # function - @execute_node.register( - ops.ElementWiseVectorizedUDF, *(itertools.repeat(pd.Series, nargs)) - ) - @execute_node.register( - ops.ElementWiseVectorizedUDF, *(itertools.repeat(object, nargs)) - ) - def execute_udf_node(op, *args, cache=None, timecontext=None, **kwargs): - # We have rewritten op.func to be a closure enclosing - # the kwargs, and therefore, we do not need to pass - # kwargs here. This is true for all udf execution in this - # file. - # See ibis.legacy.udf.vectorized.UserDefinedFunction - - # prevent executing UDFs multiple times on different execution branches - try: - result = cache[(op, timecontext)] - except KeyError: - result = cache[(op, timecontext)] = op.func(*args) - - return result - - return scope - - -@pre_execute.register(ops.AnalyticVectorizedUDF) -@pre_execute.register(ops.AnalyticVectorizedUDF, BaseBackend) -@pre_execute.register(ops.ReductionVectorizedUDF) -@pre_execute.register(ops.ReductionVectorizedUDF, BaseBackend) -def pre_execute_analytic_and_reduction_udf(op, *clients, scope=None, **kwargs): - input_type = op.input_type - nargs = len(input_type) - - # An execution rule to handle analytic and reduction UDFs over - # 1) an ungrouped window, - # 2) an ungrouped Aggregate node, or - # 3) an ungrouped custom aggregation context - @execute_node.register(type(op), *(itertools.repeat(pd.Series, nargs))) - def execute_udaf_node_no_groupby(op, *args, aggcontext, **kwargs): - func = op.func - return aggcontext.agg(args[0], func, *args[1:]) - - # An execution rule to handle analytic and reduction UDFs over - # 1) a grouped window, - # 2) a grouped Aggregate node, or - # 3) a grouped custom aggregation context - @execute_node.register(type(op), *(itertools.repeat(SeriesGroupBy, nargs))) - def execute_udaf_node_groupby(op, *args, aggcontext, **kwargs): - func = op.func - if isinstance(aggcontext, Transform): - # We are aggregating over an unbounded (and GROUPED) window, - # which uses a Transform aggregation context. - # We need to do some pre-processing to func and args so that - # Transform can pull data out of the SeriesGroupBys in args. - - # Construct a generator that yields the next group of data - # for every argument excluding the first (pandas performs - # the iteration for the first argument) for each argument - # that is a SeriesGroupBy. - iters = create_gens_from_args_groupby(*args[1:]) - - # TODO: Unify calling convention here to be more like - # window - def aggregator(first, *rest): - # map(next, *rest) gets the inputs for the next group - # TODO: might be inefficient to do this on every call - return func(first, *map(next, rest)) - - return aggcontext.agg(args[0], aggregator, *iters) - else: - # We are either: - # 1) Aggregating over a bounded window, which uses a Window - # aggregation context - # 2) Aggregating over a custom aggregation context - # 3) Aggregating using an Aggregate node (with GROUPING), which - # uses a Summarize aggregation context - # No pre-processing to be done for any case. - return aggcontext.agg(args[0], func, *args[1:]) - - return scope diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 4b0a4f7cc056..be97ad419d92 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1022,7 +1022,7 @@ def test_quantile( id="covar_pop", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "polars", "druid"], + ["dask", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1042,7 +1042,7 @@ def test_quantile( id="covar_samp", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "polars", "druid"], + ["dask", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1062,7 +1062,7 @@ def test_quantile( id="corr_pop", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "druid"], + ["dask", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1092,7 +1092,7 @@ def test_quantile( id="corr_samp", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "druid"], + ["dask", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1132,7 +1132,7 @@ def test_quantile( id="covar_pop_bool", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "polars", "druid"], + ["dask", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1156,7 +1156,7 @@ def test_quantile( id="corr_pop_bool", marks=[ pytest.mark.notimpl( - ["dask", "pandas", "druid"], + ["dask", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1325,9 +1325,6 @@ def test_string_quantile(alltypes, func): @pytest.mark.notimpl(["dask"], raises=(AssertionError, NotImplementedError, TypeError)) @pytest.mark.notyet(["polars"], raises=PolarsInvalidOperationError) @pytest.mark.notyet(["datafusion"], raises=Exception, reason="not supported upstream") -@pytest.mark.broken( - ["pandas"], raises=AssertionError, reason="possibly incorrect results" -) @pytest.mark.parametrize( "func", [ @@ -1686,8 +1683,8 @@ def test_grouped_case(backend, con): ["datafusion", "mssql", "polars", "exasol"], raises=com.OperationNotDefinedError ) @pytest.mark.broken( - ["dask", "pandas"], - reason="Dask and Pandas do not windowize this operation correctly", + ["dask"], + reason="Dask does not windowize this operation correctly", raises=AssertionError, ) @pytest.mark.notyet(["impala", "flink"], raises=com.UnsupportedOperationError) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index ea41cbb89956..e2063b94354d 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -342,6 +342,11 @@ def test_unnest_no_nulls(backend): raises=ValueError, reason="ValueError: Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", ) +@pytest.mark.broken( + ["pandas"], + raises=ValueError, + reason="all the input arrays must have same number of dimensions", +) def test_unnest_default_name(backend): array_types = backend.array_types df = array_types.execute() @@ -531,7 +536,7 @@ def test_array_filter(con, input, output): @builtin_array @pytest.mark.notimpl( - ["mssql", "pandas", "polars", "postgres"], + ["mssql", "polars", "postgres"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @@ -588,7 +593,7 @@ def test_array_contains(backend, con): ) @builtin_array @pytest.mark.notimpl( - ["dask", "impala", "mssql", "pandas", "polars"], + ["dask", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError, ) def test_array_position(backend, con, a, expected_array): @@ -602,7 +607,7 @@ def test_array_position(backend, con, a, expected_array): @builtin_array @pytest.mark.notimpl( - ["dask", "impala", "mssql", "pandas", "polars"], + ["dask", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -639,7 +644,7 @@ def test_array_remove(con, a): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "impala", "mssql", "pandas", "polars", "mysql"], + ["dask", "datafusion", "impala", "mssql", "polars", "mysql"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( @@ -693,7 +698,7 @@ def test_array_unique(con, input, expected): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "flink", "impala", "mssql", "pandas", "polars"], + ["dask", "datafusion", "flink", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -714,7 +719,7 @@ def test_array_sort(con): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "impala", "mssql", "pandas", "polars"], + ["dask", "datafusion", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.parametrize( @@ -978,7 +983,7 @@ def test_array_flatten(backend, flatten_data, column, expected): reason="range isn't implemented upstream", raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl(["flink", "pandas", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["flink", "dask"], raises=com.OperationNotDefinedError) @pytest.mark.parametrize("n", [-2, 0, 2]) def test_range_single_argument(con, n): expr = ibis.range(n) @@ -992,9 +997,7 @@ def test_range_single_argument(con, n): raises=com.OperationNotDefinedError, ) @pytest.mark.parametrize("n", [-2, 0, 2]) -@pytest.mark.notimpl( - ["polars", "flink", "pandas", "dask"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["polars", "flink", "dask"], raises=com.OperationNotDefinedError) @pytest.mark.skip("risingwave") def test_range_single_argument_unnest(backend, con, n): expr = ibis.range(n).unnest() @@ -1026,7 +1029,7 @@ def test_range_single_argument_unnest(backend, con, n): reason="range and unnest aren't implemented upstream", raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl(["flink", "pandas", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["flink", "dask"], raises=com.OperationNotDefinedError) def test_range_start_stop_step(con, start, stop, step): expr = ibis.range(start, stop, step) result = con.execute(expr) @@ -1041,7 +1044,7 @@ def test_range_start_stop_step(con, start, stop, step): @pytest.mark.notyet( ["datafusion"], raises=com.OperationNotDefinedError, reason="not supported upstream" ) -@pytest.mark.notimpl(["flink", "pandas", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["flink", "dask"], raises=com.OperationNotDefinedError) @pytest.mark.never( ["risingwave"], raises=sa.exc.InternalError, @@ -1222,7 +1225,7 @@ def swap(token): ) @timestamp_range_tzinfos @pytest.mark.notimpl( - ["pandas", "dask", "flink", "datafusion"], raises=com.OperationNotDefinedError + ["dask", "flink", "datafusion"], raises=com.OperationNotDefinedError ) def test_timestamp_range(con, start, stop, step, freq, tzinfo): start = start.replace(tzinfo=tzinfo) @@ -1273,7 +1276,7 @@ def test_timestamp_range(con, start, stop, step, freq, tzinfo): ) @timestamp_range_tzinfos @pytest.mark.notimpl( - ["pandas", "dask", "flink", "datafusion"], raises=com.OperationNotDefinedError + ["dask", "flink", "datafusion"], raises=com.OperationNotDefinedError ) def test_timestamp_range_zero_step(con, start, stop, step, tzinfo): start = start.replace(tzinfo=tzinfo) @@ -1293,7 +1296,7 @@ def test_repr_timestamp_array(con, monkeypatch): @pytest.mark.notyet( - ["dask", "datafusion", "flink", "pandas", "polars"], + ["dask", "datafusion", "flink", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index e84a5eb97f02..0e2d41fabdf1 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -313,12 +313,14 @@ def test_filter(backend, alltypes, sorted_df, predicate_fn, expected_fn): "druid", "oracle", "exasol", + "pandas", ] ) @pytest.mark.never( ["flink"], reason="Flink engine does not support generic window clause with no order by", ) +# TODO(kszucs): this is not supported at the expression level def test_filter_with_window_op(backend, alltypes, sorted_df): sorted_alltypes = alltypes.order_by("id") table = sorted_alltypes @@ -1154,7 +1156,7 @@ def test_pivot_wider(backend): reason="backend doesn't implement window functions", ) @pytest.mark.notimpl( - ["pandas", "polars"], + ["polars"], raises=com.OperationNotDefinedError, reason="backend doesn't implement ops.WindowFunction", ) @@ -1232,7 +1234,7 @@ def test_distinct_on_keep(backend, on, keep): reason="backend doesn't implement window functions", ) @pytest.mark.notimpl( - ["pandas", "polars"], + ["polars"], raises=com.OperationNotDefinedError, reason="backend doesn't implement ops.WindowFunction", ) diff --git a/ibis/backends/tests/test_interactive.py b/ibis/backends/tests/test_interactive.py index bfa3f6adffe1..704e17019c6e 100644 --- a/ibis/backends/tests/test_interactive.py +++ b/ibis/backends/tests/test_interactive.py @@ -33,6 +33,7 @@ def table(backend): return backend.functional_alltypes +@pytest.mark.notimpl(["pandas"]) def test_interactive_execute_on_repr(table, queries, snapshot): repr(table.bigint_col.sum()) snapshot.assert_match(queries[0], "out.sql") @@ -52,18 +53,21 @@ def test_repr_png_is_not_none_in_not_interactive(table): assert table._repr_png_() is not None +@pytest.mark.notimpl(["pandas"]) def test_default_limit(table, snapshot, queries): repr(table.select("id", "bool_col")) snapshot.assert_match(queries[0], "out.sql") +@pytest.mark.notimpl(["pandas"]) def test_respect_set_limit(table, snapshot, queries): repr(table.select("id", "bool_col").limit(10)) snapshot.assert_match(queries[0], "out.sql") +@pytest.mark.notimpl(["pandas"]) def test_disable_query_limit(table, snapshot, queries): assert ibis.options.sql.default_limit is None diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 8266186481b2..b7aa81c43dd1 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -65,9 +65,7 @@ def test_timestamp_accepts_date_literals(alltypes): assert expr.compile(params=params) is not None -@pytest.mark.notimpl( - ["dask", "impala", "pandas", "pyspark", "druid", "oracle", "exasol"] -) +@pytest.mark.notimpl(["dask", "impala", "pyspark", "druid", "oracle", "exasol"]) @pytest.mark.never( ["mysql", "sqlite", "mssql"], reason="backend will never implement array types" ) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index d441b39896f2..cde2dc86d1bc 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1098,7 +1098,7 @@ def test_no_conditional_percent_escape(con, expr): @pytest.mark.notimpl( - ["dask", "pandas", "mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["dask", "mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ) def test_non_match_regex_search_is_false(con): expr = ibis.literal("foo").re_search("bar") diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index baff2a018e18..4878ad46a287 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1028,7 +1028,6 @@ def convert_to_offset(x): "dask", "impala", "mysql", - "pandas", "postgres", "risingwave", "snowflake", @@ -1644,13 +1643,6 @@ def test_interval_add_cast_column(backend, alltypes, df): ), "%Y%m%d", marks=[ - pytest.mark.notimpl(["pandas"], raises=com.OperationNotDefinedError), - pytest.mark.notimpl( - [ - "pandas", - ], - raises=com.OperationNotDefinedError, - ), pytest.mark.notimpl( [ "pyspark", @@ -2254,7 +2246,7 @@ def test_time_literal(con, backend): @pytest.mark.broken( ["sqlite"], raises=AssertionError, reason="SQLite returns Timedelta from execution" ) -@pytest.mark.notimpl(["dask", "pandas"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.parametrize( "microsecond", diff --git a/ibis/backends/tests/test_timecontext.py b/ibis/backends/tests/test_timecontext.py index 72e78065640e..50b181728d7e 100644 --- a/ibis/backends/tests/test_timecontext.py +++ b/ibis/backends/tests/test_timecontext.py @@ -54,7 +54,7 @@ def filter_by_time_context(df, context): ) -@pytest.mark.notimpl(["dask", "duckdb"]) +@pytest.mark.notimpl(["dask", "duckdb", "pandas"]) @pytest.mark.notimpl( ["flink"], raises=com.OperationNotDefinedError, @@ -91,7 +91,7 @@ def test_context_adjustment_window_udf(backend, alltypes, context, window, monke backend.assert_frame_equal(result, expected) -@pytest.mark.notimpl(["dask", "duckdb"]) +@pytest.mark.notimpl(["dask", "duckdb", "pandas"]) @pytest.mark.broken( # TODO (mehmet): Check with the team. ["flink"], diff --git a/ibis/backends/tests/test_vectorized_udf.py b/ibis/backends/tests/test_vectorized_udf.py index f130b5b60154..c1c85326f52e 100644 --- a/ibis/backends/tests/test_vectorized_udf.py +++ b/ibis/backends/tests/test_vectorized_udf.py @@ -570,7 +570,8 @@ def test_elementwise_udf_named_destruct(udf_alltypes): add_one_struct_udf = create_add_one_struct_udf( result_formatter=lambda v1, v2: (v1, v2) ) - with pytest.raises(com.IbisTypeError, match=r"Unable to infer"): + msg = "Duplicate column name 'new_struct' in result set" + with pytest.raises(com.IntegrityError, match=msg): udf_alltypes.mutate( new_struct=add_one_struct_udf(udf_alltypes["double_col"]).destructure() ) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 28ae24cfd19c..e7968831330f 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -247,7 +247,6 @@ def calc_zscore(s): id="row_number", marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.notimpl(["pandas"], raises=com.OperationNotDefinedError), ], ), param( @@ -469,7 +468,6 @@ def test_ungrouped_bounded_expanding_window( ) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["dask"], raises=NotImplementedError) -@pytest.mark.notimpl(["pandas"], raises=AssertionError) @pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, @@ -652,7 +650,7 @@ def test_grouped_unbounded_window( ], ) @pytest.mark.broken(["snowflake"], raises=AssertionError) -@pytest.mark.broken(["dask", "pandas", "mssql"], raises=AssertionError) +@pytest.mark.broken(["dask", "mssql"], raises=AssertionError) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["flink"], @@ -683,7 +681,7 @@ def test_simple_ungrouped_unbound_following_window( reason="OVER RANGE FOLLOWING windows are not supported in Flink yet", ) @pytest.mark.notimpl( - ["pandas", "dask"], + ["dask"], raises=NotImplementedError, reason="support scalar sorting keys are not yet implemented", ) @@ -719,7 +717,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): True, id="ordered-mean", marks=[ - pytest.mark.broken(["pandas"], raises=AssertionError), pytest.mark.notimpl( ["dask"], raises=NotImplementedError, @@ -796,7 +793,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ], raises=com.OperationNotDefinedError, ), - pytest.mark.broken(["pandas"], raises=AssertionError), pytest.mark.broken( ["dask"], raises=ValueError, @@ -963,11 +959,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ], raises=com.OperationNotDefinedError, ), - pytest.mark.notimpl( - ["pandas"], - raises=RuntimeWarning, - reason="invalid value encountered in divide", - ), pytest.mark.broken( ["dask"], raises=ValueError, @@ -1042,11 +1033,6 @@ def test_ungrouped_unbounded_window( ["impala"], raises=ImpalaHiveServer2Error, reason="limited RANGE support" ) @pytest.mark.notimpl(["dask"], raises=NotImplementedError) -@pytest.mark.notimpl( - ["pandas"], - raises=NotImplementedError, - reason="The pandas backend only implements range windows with temporal ordering keys", -) @pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, @@ -1295,9 +1281,6 @@ def test_range_expression_bounds(backend): reason="clickhouse doesn't implement percent_rank", raises=com.OperationNotDefinedError, ) -@pytest.mark.broken( - ["pandas"], reason="missing column during execution", raises=KeyError -) @pytest.mark.broken( ["mssql"], reason="lack of support for booleans", raises=sa.exc.ProgrammingError ) @@ -1328,7 +1311,7 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): @pytest.mark.notyet( - ["pandas", "dask"], + ["dask"], reason="multiple ordering keys in a window function not supported for ranking", raises=ValueError, ) @@ -1342,6 +1325,11 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): @pytest.mark.broken( ["pyspark"], reason="pyspark requires CURRENT ROW", raises=PySparkAnalysisException ) +@pytest.mark.broken( + ["pandas"], + raises=TypeError, + reason="'<' not supported between instances of 'bool' and 'NoneType'", +) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, diff --git a/ibis/expr/operations/reductions.py b/ibis/expr/operations/reductions.py index 2a85dbfcbab5..597f42107f35 100644 --- a/ibis/expr/operations/reductions.py +++ b/ibis/expr/operations/reductions.py @@ -17,6 +17,7 @@ class Reduction(Value): shape = ds.scalar + # TODO(kszucs): remove this @property def __window_op__(self): return self diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index 4f83af05d320..b18fc2bf106e 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -2,6 +2,7 @@ import contextlib import datetime +import decimal import warnings from importlib.util import find_spec as _find_spec @@ -117,8 +118,10 @@ def convert_table(cls, df, schema): "schema column count does not match input data column count" ) - for (name, series), dtype in zip(df.items(), schema.types): - df[name] = cls.convert_column(series, dtype) + columns = [] + for (_, series), dtype in zip(df.items(), schema.types): + columns.append(cls.convert_column(series, dtype)) + df = pd.concat(columns, axis=1) # return data with the schema's columns which may be different than the # input columns @@ -250,6 +253,23 @@ def convert_Interval(cls, s, dtype, pandas_type): def convert_String(cls, s, dtype, pandas_type): return s.astype(pandas_type, errors="ignore") + @classmethod + def convert_Decimal(cls, s, dtype, pandas_type): + context = decimal.Context(prec=dtype.precision) + + if dtype.scale is None: + normalize = context.create_decimal + else: + exponent = decimal.Decimal(10) ** -dtype.scale + + def normalize(x, exponent=exponent): + try: + return context.create_decimal(x).quantize(exponent) + except decimal.InvalidOperation: + return x + + return s.map(normalize, na_action="ignore").astype(pandas_type) + @classmethod def convert_UUID(cls, s, dtype, pandas_type): return s.map(cls.get_element_converter(dtype), na_action="ignore") diff --git a/ibis/formats/tests/test_dask.py b/ibis/formats/tests/test_dask.py index 89ce6c59198a..2dbe9b61ad7d 100644 --- a/ibis/formats/tests/test_dask.py +++ b/ibis/formats/tests/test_dask.py @@ -199,12 +199,3 @@ def test_schema_infer_exhaustive_dataframe(): ] assert DaskData.infer_table(df) == ibis.schema(expected) - - -def test_convert_dataframe_with_timezone(): - data = {"time": pd.date_range("2018-01-01", "2018-01-02", freq="H")} - df = dd.from_pandas(pd.DataFrame(data), npartitions=2) - expected = df.assign(time=df.time.dt.tz_localize("EST")) - desired_schema = ibis.schema([("time", 'timestamp("EST")')]) - result = DaskData.convert_table(df.copy(), desired_schema) - tm.assert_frame_equal(result.compute(), expected.compute()) From a4666847974cdc10a39567176767da702b15a1b4 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Jan 2024 07:01:26 -0500 Subject: [PATCH 037/161] chore(deps): relock --- poetry.lock | 1509 ++++++++++++++++++++---------------------- requirements-dev.txt | 86 ++- 2 files changed, 774 insertions(+), 821 deletions(-) diff --git a/poetry.lock b/poetry.lock index ad9cf8f1f1b0..9a15d9383518 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,87 +2,87 @@ [[package]] name = "aiohttp" -version = "3.9.2" +version = "3.9.3" description = "Async http client/server framework (asyncio)" optional = true python-versions = ">=3.8" files = [ - {file = "aiohttp-3.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:772fbe371788e61c58d6d3d904268e48a594ba866804d08c995ad71b144f94cb"}, - {file = "aiohttp-3.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:edd4f1af2253f227ae311ab3d403d0c506c9b4410c7fc8d9573dec6d9740369f"}, - {file = "aiohttp-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cfee9287778399fdef6f8a11c9e425e1cb13cc9920fd3a3df8f122500978292b"}, - {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc158466f6a980a6095ee55174d1de5730ad7dec251be655d9a6a9dd7ea1ff9"}, - {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54ec82f45d57c9a65a1ead3953b51c704f9587440e6682f689da97f3e8defa35"}, - {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abeb813a18eb387f0d835ef51f88568540ad0325807a77a6e501fed4610f864e"}, - {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc91d07280d7d169f3a0f9179d8babd0ee05c79d4d891447629ff0d7d8089ec2"}, - {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b65e861f4bebfb660f7f0f40fa3eb9f2ab9af10647d05dac824390e7af8f75b7"}, - {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:04fd8ffd2be73d42bcf55fd78cde7958eeee6d4d8f73c3846b7cba491ecdb570"}, - {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3d8d962b439a859b3ded9a1e111a4615357b01620a546bc601f25b0211f2da81"}, - {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:8ceb658afd12b27552597cf9a65d9807d58aef45adbb58616cdd5ad4c258c39e"}, - {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0e4ee4df741670560b1bc393672035418bf9063718fee05e1796bf867e995fad"}, - {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2dec87a556f300d3211decf018bfd263424f0690fcca00de94a837949fbcea02"}, - {file = "aiohttp-3.9.2-cp310-cp310-win32.whl", hash = "sha256:3e1a800f988ce7c4917f34096f81585a73dbf65b5c39618b37926b1238cf9bc4"}, - {file = "aiohttp-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:ea510718a41b95c236c992b89fdfc3d04cc7ca60281f93aaada497c2b4e05c46"}, - {file = "aiohttp-3.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6aaa6f99256dd1b5756a50891a20f0d252bd7bdb0854c5d440edab4495c9f973"}, - {file = "aiohttp-3.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a27d8c70ad87bcfce2e97488652075a9bdd5b70093f50b10ae051dfe5e6baf37"}, - {file = "aiohttp-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:54287bcb74d21715ac8382e9de146d9442b5f133d9babb7e5d9e453faadd005e"}, - {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb3d05569aa83011fcb346b5266e00b04180105fcacc63743fc2e4a1862a891"}, - {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c8534e7d69bb8e8d134fe2be9890d1b863518582f30c9874ed7ed12e48abe3c4"}, - {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd9d5b989d57b41e4ff56ab250c5ddf259f32db17159cce630fd543376bd96b"}, - {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa6904088e6642609981f919ba775838ebf7df7fe64998b1a954fb411ffb4663"}, - {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bda42eb410be91b349fb4ee3a23a30ee301c391e503996a638d05659d76ea4c2"}, - {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:193cc1ccd69d819562cc7f345c815a6fc51d223b2ef22f23c1a0f67a88de9a72"}, - {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b9f1cb839b621f84a5b006848e336cf1496688059d2408e617af33e3470ba204"}, - {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:d22a0931848b8c7a023c695fa2057c6aaac19085f257d48baa24455e67df97ec"}, - {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4112d8ba61fbd0abd5d43a9cb312214565b446d926e282a6d7da3f5a5aa71d36"}, - {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c4ad4241b52bb2eb7a4d2bde060d31c2b255b8c6597dd8deac2f039168d14fd7"}, - {file = "aiohttp-3.9.2-cp311-cp311-win32.whl", hash = "sha256:ee2661a3f5b529f4fc8a8ffee9f736ae054adfb353a0d2f78218be90617194b3"}, - {file = "aiohttp-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:4deae2c165a5db1ed97df2868ef31ca3cc999988812e82386d22937d9d6fed52"}, - {file = "aiohttp-3.9.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:6f4cdba12539215aaecf3c310ce9d067b0081a0795dd8a8805fdb67a65c0572a"}, - {file = "aiohttp-3.9.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:84e843b33d5460a5c501c05539809ff3aee07436296ff9fbc4d327e32aa3a326"}, - {file = "aiohttp-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8008d0f451d66140a5aa1c17e3eedc9d56e14207568cd42072c9d6b92bf19b52"}, - {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61c47ab8ef629793c086378b1df93d18438612d3ed60dca76c3422f4fbafa792"}, - {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc71f748e12284312f140eaa6599a520389273174b42c345d13c7e07792f4f57"}, - {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a1c3a4d0ab2f75f22ec80bca62385db2e8810ee12efa8c9e92efea45c1849133"}, - {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a87aa0b13bbee025faa59fa58861303c2b064b9855d4c0e45ec70182bbeba1b"}, - {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2cc0d04688b9f4a7854c56c18aa7af9e5b0a87a28f934e2e596ba7e14783192"}, - {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1956e3ac376b1711c1533266dec4efd485f821d84c13ce1217d53e42c9e65f08"}, - {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:114da29f39eccd71b93a0fcacff178749a5c3559009b4a4498c2c173a6d74dff"}, - {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3f17999ae3927d8a9a823a1283b201344a0627272f92d4f3e3a4efe276972fe8"}, - {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:f31df6a32217a34ae2f813b152a6f348154f948c83213b690e59d9e84020925c"}, - {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7a75307ffe31329928a8d47eae0692192327c599113d41b278d4c12b54e1bd11"}, - {file = "aiohttp-3.9.2-cp312-cp312-win32.whl", hash = "sha256:972b63d589ff8f305463593050a31b5ce91638918da38139b9d8deaba9e0fed7"}, - {file = "aiohttp-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:200dc0246f0cb5405c80d18ac905c8350179c063ea1587580e3335bfc243ba6a"}, - {file = "aiohttp-3.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:158564d0d1020e0d3fe919a81d97aadad35171e13e7b425b244ad4337fc6793a"}, - {file = "aiohttp-3.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:da1346cd0ccb395f0ed16b113ebb626fa43b7b07fd7344fce33e7a4f04a8897a"}, - {file = "aiohttp-3.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:eaa9256de26ea0334ffa25f1913ae15a51e35c529a1ed9af8e6286dd44312554"}, - {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1543e7fb00214fb4ccead42e6a7d86f3bb7c34751ec7c605cca7388e525fd0b4"}, - {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:186e94570433a004e05f31f632726ae0f2c9dee4762a9ce915769ce9c0a23d89"}, - {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d52d20832ac1560f4510d68e7ba8befbc801a2b77df12bd0cd2bcf3b049e52a4"}, - {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c45e4e815ac6af3b72ca2bde9b608d2571737bb1e2d42299fc1ffdf60f6f9a1"}, - {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa906b9bdfd4a7972dd0628dbbd6413d2062df5b431194486a78f0d2ae87bd55"}, - {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:68bbee9e17d66f17bb0010aa15a22c6eb28583edcc8b3212e2b8e3f77f3ebe2a"}, - {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4c189b64bd6d9a403a1a3f86a3ab3acbc3dc41a68f73a268a4f683f89a4dec1f"}, - {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8a7876f794523123bca6d44bfecd89c9fec9ec897a25f3dd202ee7fc5c6525b7"}, - {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:d23fba734e3dd7b1d679b9473129cd52e4ec0e65a4512b488981a56420e708db"}, - {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b141753be581fab842a25cb319f79536d19c2a51995d7d8b29ee290169868eab"}, - {file = "aiohttp-3.9.2-cp38-cp38-win32.whl", hash = "sha256:103daf41ff3b53ba6fa09ad410793e2e76c9d0269151812e5aba4b9dd674a7e8"}, - {file = "aiohttp-3.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:328918a6c2835861ff7afa8c6d2c70c35fdaf996205d5932351bdd952f33fa2f"}, - {file = "aiohttp-3.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5264d7327c9464786f74e4ec9342afbbb6ee70dfbb2ec9e3dfce7a54c8043aa3"}, - {file = "aiohttp-3.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07205ae0015e05c78b3288c1517afa000823a678a41594b3fdc870878d645305"}, - {file = "aiohttp-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0a1e638cffc3ec4d4784b8b4fd1cf28968febc4bd2718ffa25b99b96a741bd"}, - {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d43302a30ba1166325974858e6ef31727a23bdd12db40e725bec0f759abce505"}, - {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16a967685907003765855999af11a79b24e70b34dc710f77a38d21cd9fc4f5fe"}, - {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6fa3ee92cd441d5c2d07ca88d7a9cef50f7ec975f0117cd0c62018022a184308"}, - {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b500c5ad9c07639d48615a770f49618130e61be36608fc9bc2d9bae31732b8f"}, - {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c07327b368745b1ce2393ae9e1aafed7073d9199e1dcba14e035cc646c7941bf"}, - {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc7d6502c23a0ec109687bf31909b3fb7b196faf198f8cff68c81b49eb316ea9"}, - {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:07be2be7071723c3509ab5c08108d3a74f2181d4964e869f2504aaab68f8d3e8"}, - {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:122468f6fee5fcbe67cb07014a08c195b3d4c41ff71e7b5160a7bcc41d585a5f"}, - {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:00a9abcea793c81e7f8778ca195a1714a64f6d7436c4c0bb168ad2a212627000"}, - {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a9825fdd64ecac5c670234d80bb52bdcaa4139d1f839165f548208b3779c6c6"}, - {file = "aiohttp-3.9.2-cp39-cp39-win32.whl", hash = "sha256:5422cd9a4a00f24c7244e1b15aa9b87935c85fb6a00c8ac9b2527b38627a9211"}, - {file = "aiohttp-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:7d579dcd5d82a86a46f725458418458fa43686f6a7b252f2966d359033ffc8ab"}, - {file = "aiohttp-3.9.2.tar.gz", hash = "sha256:b0ad0a5e86ce73f5368a164c10ada10504bf91869c05ab75d982c6048217fbf7"}, + {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:939677b61f9d72a4fa2a042a5eee2a99a24001a67c13da113b2e30396567db54"}, + {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f5cd333fcf7590a18334c90f8c9147c837a6ec8a178e88d90a9b96ea03194cc"}, + {file = "aiohttp-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82e6aa28dd46374f72093eda8bcd142f7771ee1eb9d1e223ff0fa7177a96b4a5"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f56455b0c2c7cc3b0c584815264461d07b177f903a04481dfc33e08a89f0c26b"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bca77a198bb6e69795ef2f09a5f4c12758487f83f33d63acde5f0d4919815768"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e083c285857b78ee21a96ba1eb1b5339733c3563f72980728ca2b08b53826ca5"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab40e6251c3873d86ea9b30a1ac6d7478c09277b32e14745d0d3c6e76e3c7e29"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df822ee7feaaeffb99c1a9e5e608800bd8eda6e5f18f5cfb0dc7eeb2eaa6bbec"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:acef0899fea7492145d2bbaaaec7b345c87753168589cc7faf0afec9afe9b747"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cd73265a9e5ea618014802ab01babf1940cecb90c9762d8b9e7d2cc1e1969ec6"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a78ed8a53a1221393d9637c01870248a6f4ea5b214a59a92a36f18151739452c"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:6b0e029353361f1746bac2e4cc19b32f972ec03f0f943b390c4ab3371840aabf"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7cf5c9458e1e90e3c390c2639f1017a0379a99a94fdfad3a1fd966a2874bba52"}, + {file = "aiohttp-3.9.3-cp310-cp310-win32.whl", hash = "sha256:3e59c23c52765951b69ec45ddbbc9403a8761ee6f57253250c6e1536cacc758b"}, + {file = "aiohttp-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:055ce4f74b82551678291473f66dc9fb9048a50d8324278751926ff0ae7715e5"}, + {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6b88f9386ff1ad91ace19d2a1c0225896e28815ee09fc6a8932fded8cda97c3d"}, + {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c46956ed82961e31557b6857a5ca153c67e5476972e5f7190015018760938da2"}, + {file = "aiohttp-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07b837ef0d2f252f96009e9b8435ec1fef68ef8b1461933253d318748ec1acdc"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad46e6f620574b3b4801c68255492e0159d1712271cc99d8bdf35f2043ec266"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ed3e046ea7b14938112ccd53d91c1539af3e6679b222f9469981e3dac7ba1ce"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:039df344b45ae0b34ac885ab5b53940b174530d4dd8a14ed8b0e2155b9dddccb"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7943c414d3a8d9235f5f15c22ace69787c140c80b718dcd57caaade95f7cd93b"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84871a243359bb42c12728f04d181a389718710129b36b6aad0fc4655a7647d4"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5eafe2c065df5401ba06821b9a054d9cb2848867f3c59801b5d07a0be3a380ae"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9d3c9b50f19704552f23b4eaea1fc082fdd82c63429a6506446cbd8737823da3"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:f033d80bc6283092613882dfe40419c6a6a1527e04fc69350e87a9df02bbc283"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:2c895a656dd7e061b2fd6bb77d971cc38f2afc277229ce7dd3552de8313a483e"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1f5a71d25cd8106eab05f8704cd9167b6e5187bcdf8f090a66c6d88b634802b4"}, + {file = "aiohttp-3.9.3-cp311-cp311-win32.whl", hash = "sha256:50fca156d718f8ced687a373f9e140c1bb765ca16e3d6f4fe116e3df7c05b2c5"}, + {file = "aiohttp-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:5fe9ce6c09668063b8447f85d43b8d1c4e5d3d7e92c63173e6180b2ac5d46dd8"}, + {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:38a19bc3b686ad55804ae931012f78f7a534cce165d089a2059f658f6c91fa60"}, + {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:770d015888c2a598b377bd2f663adfd947d78c0124cfe7b959e1ef39f5b13869"}, + {file = "aiohttp-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee43080e75fc92bf36219926c8e6de497f9b247301bbf88c5c7593d931426679"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52df73f14ed99cee84865b95a3d9e044f226320a87af208f068ecc33e0c35b96"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc9b311743a78043b26ffaeeb9715dc360335e5517832f5a8e339f8a43581e4d"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b955ed993491f1a5da7f92e98d5dad3c1e14dc175f74517c4e610b1f2456fb11"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:504b6981675ace64c28bf4a05a508af5cde526e36492c98916127f5a02354d53"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fe5571784af92b6bc2fda8d1925cccdf24642d49546d3144948a6a1ed58ca5"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ba39e9c8627edc56544c8628cc180d88605df3892beeb2b94c9bc857774848ca"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e5e46b578c0e9db71d04c4b506a2121c0cb371dd89af17a0586ff6769d4c58c1"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:938a9653e1e0c592053f815f7028e41a3062e902095e5a7dc84617c87267ebd5"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:c3452ea726c76e92f3b9fae4b34a151981a9ec0a4847a627c43d71a15ac32aa6"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ff30218887e62209942f91ac1be902cc80cddb86bf00fbc6783b7a43b2bea26f"}, + {file = "aiohttp-3.9.3-cp312-cp312-win32.whl", hash = "sha256:38f307b41e0bea3294a9a2a87833191e4bcf89bb0365e83a8be3a58b31fb7f38"}, + {file = "aiohttp-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:b791a3143681a520c0a17e26ae7465f1b6f99461a28019d1a2f425236e6eedb5"}, + {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ed621426d961df79aa3b963ac7af0d40392956ffa9be022024cd16297b30c8c"}, + {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7f46acd6a194287b7e41e87957bfe2ad1ad88318d447caf5b090012f2c5bb528"}, + {file = "aiohttp-3.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:feeb18a801aacb098220e2c3eea59a512362eb408d4afd0c242044c33ad6d542"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f734e38fd8666f53da904c52a23ce517f1b07722118d750405af7e4123933511"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b40670ec7e2156d8e57f70aec34a7216407848dfe6c693ef131ddf6e76feb672"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdd215b7b7fd4a53994f238d0f46b7ba4ac4c0adb12452beee724ddd0743ae5d"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:017a21b0df49039c8f46ca0971b3a7fdc1f56741ab1240cb90ca408049766168"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99abf0bba688259a496f966211c49a514e65afa9b3073a1fcee08856e04425b"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:648056db9a9fa565d3fa851880f99f45e3f9a771dd3ff3bb0c048ea83fb28194"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8aacb477dc26797ee089721536a292a664846489c49d3ef9725f992449eda5a8"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:522a11c934ea660ff8953eda090dcd2154d367dec1ae3c540aff9f8a5c109ab4"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:5bce0dc147ca85caa5d33debc4f4d65e8e8b5c97c7f9f660f215fa74fc49a321"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b4af9f25b49a7be47c0972139e59ec0e8285c371049df1a63b6ca81fdd216a2"}, + {file = "aiohttp-3.9.3-cp38-cp38-win32.whl", hash = "sha256:298abd678033b8571995650ccee753d9458dfa0377be4dba91e4491da3f2be63"}, + {file = "aiohttp-3.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:69361bfdca5468c0488d7017b9b1e5ce769d40b46a9f4a2eed26b78619e9396c"}, + {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0fa43c32d1643f518491d9d3a730f85f5bbaedcbd7fbcae27435bb8b7a061b29"}, + {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:835a55b7ca49468aaaac0b217092dfdff370e6c215c9224c52f30daaa735c1c1"}, + {file = "aiohttp-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06a9b2c8837d9a94fae16c6223acc14b4dfdff216ab9b7202e07a9a09541168f"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abf151955990d23f84205286938796c55ff11bbfb4ccfada8c9c83ae6b3c89a3"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59c26c95975f26e662ca78fdf543d4eeaef70e533a672b4113dd888bd2423caa"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f95511dd5d0e05fd9728bac4096319f80615aaef4acbecb35a990afebe953b0e"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595f105710293e76b9dc09f52e0dd896bd064a79346234b521f6b968ffdd8e58"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c8b816c2b5af5c8a436df44ca08258fc1a13b449393a91484225fcb7545533"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f1088fa100bf46e7b398ffd9904f4808a0612e1d966b4aa43baa535d1b6341eb"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f59dfe57bb1ec82ac0698ebfcdb7bcd0e99c255bd637ff613760d5f33e7c81b3"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:361a1026c9dd4aba0109e4040e2aecf9884f5cfe1b1b1bd3d09419c205e2e53d"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:363afe77cfcbe3a36353d8ea133e904b108feea505aa4792dad6585a8192c55a"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e2c45c208c62e955e8256949eb225bd8b66a4c9b6865729a786f2aa79b72e9d"}, + {file = "aiohttp-3.9.3-cp39-cp39-win32.whl", hash = "sha256:f7217af2e14da0856e082e96ff637f14ae45c10a5714b63c77f26d8884cf1051"}, + {file = "aiohttp-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:27468897f628c627230dba07ec65dc8d0db566923c48f29e084ce382119802bc"}, + {file = "aiohttp-3.9.3.tar.gz", hash = "sha256:90842933e5d1ff760fae6caca4b2b3edba53ba8f4b71e95dacf2818a2aca06f7"}, ] [package.dependencies] @@ -259,37 +259,40 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p [[package]] name = "beartype" -version = "0.16.4" +version = "0.17.0" description = "Unbearably fast runtime type checking in pure Python." optional = false python-versions = ">=3.8.0" files = [ - {file = "beartype-0.16.4-py3-none-any.whl", hash = "sha256:64865952f9dff1e17f22684b3c7286fc79754553b47eaefeb1286224ae8c1bd9"}, - {file = "beartype-0.16.4.tar.gz", hash = "sha256:1ada89cf2d6eb30eb6e156eed2eb5493357782937910d74380918e53c2eae0bf"}, + {file = "beartype-0.17.0-py3-none-any.whl", hash = "sha256:fa84b77a8d037f2a39c4aa2f3dc71854afc7d79312e55a66b338da68fdd48c60"}, + {file = "beartype-0.17.0.tar.gz", hash = "sha256:3226fbba8c53b4e698acdb47dcaf3c0640151c4d405618c281e6631f4112947d"}, ] [package.extras] all = ["typing-extensions (>=3.10.0.0)"] -dev = ["autoapi (>=0.9.0)", "coverage (>=5.5)", "mypy (>=0.800)", "numpy", "pandera", "pydata-sphinx-theme (<=0.7.2)", "pytest (>=4.0.0)", "sphinx", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)", "tox (>=3.20.1)", "typing-extensions (>=3.10.0.0)"] +dev = ["autoapi (>=0.9.0)", "coverage (>=5.5)", "equinox", "mypy (>=0.800)", "numpy", "pandera", "pydata-sphinx-theme (<=0.7.2)", "pytest (>=4.0.0)", "sphinx", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)", "torch", "tox (>=3.20.1)", "typing-extensions (>=3.10.0.0)"] doc-rtd = ["autoapi (>=0.9.0)", "pydata-sphinx-theme (<=0.7.2)", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)"] -test-tox = ["mypy (>=0.800)", "numpy", "pandera", "pytest (>=4.0.0)", "sphinx", "typing-extensions (>=3.10.0.0)"] +test-tox = ["equinox", "mypy (>=0.800)", "numpy", "pandera", "pytest (>=4.0.0)", "sphinx", "torch", "typing-extensions (>=3.10.0.0)"] test-tox-coverage = ["coverage (>=5.5)"] [[package]] name = "beautifulsoup4" -version = "4.12.2" +version = "4.12.3" description = "Screen-scraping library" optional = false python-versions = ">=3.6.0" files = [ - {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, - {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, ] [package.dependencies] soupsieve = ">1.2" [package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] @@ -442,33 +445,33 @@ files = [ [[package]] name = "black" -version = "24.1.0" +version = "24.1.1" description = "The uncompromising code formatter." optional = true python-versions = ">=3.8" files = [ - {file = "black-24.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:94d5280d020dadfafc75d7cae899609ed38653d3f5e82e7ce58f75e76387ed3d"}, - {file = "black-24.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aaf9aa85aaaa466bf969e7dd259547f4481b712fe7ee14befeecc152c403ee05"}, - {file = "black-24.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec489cae76eac3f7573629955573c3a0e913641cafb9e3bfc87d8ce155ebdb29"}, - {file = "black-24.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:a5a0100b4bdb3744dd68412c3789f472d822dc058bb3857743342f8d7f93a5a7"}, - {file = "black-24.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6cc5a6ba3e671cfea95a40030b16a98ee7dc2e22b6427a6f3389567ecf1b5262"}, - {file = "black-24.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0e367759062dcabcd9a426d12450c6d61faf1704a352a49055a04c9f9ce8f5a"}, - {file = "black-24.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be305563ff4a2dea813f699daaffac60b977935f3264f66922b1936a5e492ee4"}, - {file = "black-24.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:6a8977774929b5db90442729f131221e58cc5d8208023c6af9110f26f75b6b20"}, - {file = "black-24.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d74d4d0da276fbe3b95aa1f404182562c28a04402e4ece60cf373d0b902f33a0"}, - {file = "black-24.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39addf23f7070dbc0b5518cdb2018468ac249d7412a669b50ccca18427dba1f3"}, - {file = "black-24.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:827a7c0da520dd2f8e6d7d3595f4591aa62ccccce95b16c0e94bb4066374c4c2"}, - {file = "black-24.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0cd59d01bf3306ff7e3076dd7f4435fcd2fafe5506a6111cae1138fc7de52382"}, - {file = "black-24.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf8dd261ee82df1abfb591f97e174345ab7375a55019cc93ad38993b9ff5c6ad"}, - {file = "black-24.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:82d9452aeabd51d1c8f0d52d4d18e82b9f010ecb30fd55867b5ff95904f427ff"}, - {file = "black-24.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9aede09f72b2a466e673ee9fca96e4bccc36f463cac28a35ce741f0fd13aea8b"}, - {file = "black-24.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:780f13d03066a7daf1707ec723fdb36bd698ffa29d95a2e7ef33a8dd8fe43b5c"}, - {file = "black-24.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a15670c650668399c4b5eae32e222728185961d6ef6b568f62c1681d57b381ba"}, - {file = "black-24.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1e0fa70b8464055069864a4733901b31cbdbe1273f63a24d2fa9d726723d45ac"}, - {file = "black-24.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fa8d9aaa22d846f8c0f7f07391148e5e346562e9b215794f9101a8339d8b6d8"}, - {file = "black-24.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:f0dfbfbacfbf9cd1fac7a5ddd3e72510ffa93e841a69fcf4a6358feab1685382"}, - {file = "black-24.1.0-py3-none-any.whl", hash = "sha256:5134a6f6b683aa0a5592e3fd61dd3519d8acd953d93e2b8b76f9981245b65594"}, - {file = "black-24.1.0.tar.gz", hash = "sha256:30fbf768cd4f4576598b1db0202413fafea9a227ef808d1a12230c643cefe9fc"}, + {file = "black-24.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2588021038bd5ada078de606f2a804cadd0a3cc6a79cb3e9bb3a8bf581325a4c"}, + {file = "black-24.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a95915c98d6e32ca43809d46d932e2abc5f1f7d582ffbe65a5b4d1588af7445"}, + {file = "black-24.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fa6a0e965779c8f2afb286f9ef798df770ba2b6cee063c650b96adec22c056a"}, + {file = "black-24.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:5242ecd9e990aeb995b6d03dc3b2d112d4a78f2083e5a8e86d566340ae80fec4"}, + {file = "black-24.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fc1ec9aa6f4d98d022101e015261c056ddebe3da6a8ccfc2c792cbe0349d48b7"}, + {file = "black-24.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0269dfdea12442022e88043d2910429bed717b2d04523867a85dacce535916b8"}, + {file = "black-24.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3d64db762eae4a5ce04b6e3dd745dcca0fb9560eb931a5be97472e38652a161"}, + {file = "black-24.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5d7b06ea8816cbd4becfe5f70accae953c53c0e53aa98730ceccb0395520ee5d"}, + {file = "black-24.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e2c8dfa14677f90d976f68e0c923947ae68fa3961d61ee30976c388adc0b02c8"}, + {file = "black-24.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a21725862d0e855ae05da1dd25e3825ed712eaaccef6b03017fe0853a01aa45e"}, + {file = "black-24.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07204d078e25327aad9ed2c64790d681238686bce254c910de640c7cc4fc3aa6"}, + {file = "black-24.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:a83fe522d9698d8f9a101b860b1ee154c1d25f8a82ceb807d319f085b2627c5b"}, + {file = "black-24.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08b34e85170d368c37ca7bf81cf67ac863c9d1963b2c1780c39102187ec8dd62"}, + {file = "black-24.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7258c27115c1e3b5de9ac6c4f9957e3ee2c02c0b39222a24dc7aa03ba0e986f5"}, + {file = "black-24.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40657e1b78212d582a0edecafef133cf1dd02e6677f539b669db4746150d38f6"}, + {file = "black-24.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e298d588744efda02379521a19639ebcd314fba7a49be22136204d7ed1782717"}, + {file = "black-24.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:34afe9da5056aa123b8bfda1664bfe6fb4e9c6f311d8e4a6eb089da9a9173bf9"}, + {file = "black-24.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:854c06fb86fd854140f37fb24dbf10621f5dab9e3b0c29a690ba595e3d543024"}, + {file = "black-24.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3897ae5a21ca132efa219c029cce5e6bfc9c3d34ed7e892113d199c0b1b444a2"}, + {file = "black-24.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:ecba2a15dfb2d97105be74bbfe5128bc5e9fa8477d8c46766505c1dda5883aac"}, + {file = "black-24.1.1-py3-none-any.whl", hash = "sha256:5cdc2e2195212208fbcae579b931407c1fa9997584f0a415421748aeafff1168"}, + {file = "black-24.1.1.tar.gz", hash = "sha256:48b5760dcbfe5cf97fd4fba23946681f3a81514c6ab8a45b50da67ac8fbc6c7b"}, ] [package.dependencies] @@ -506,17 +509,17 @@ traittypes = ">=0.0.6" [[package]] name = "branca" -version = "0.7.0" +version = "0.7.1" description = "Generate complex HTML+JS pages with Python" optional = false python-versions = ">=3.7" files = [ - {file = "branca-0.7.0-py3-none-any.whl", hash = "sha256:c653d9a3fef1e6cd203757c77d3eb44810f11998506451f9a27d52b983500c16"}, - {file = "branca-0.7.0.tar.gz", hash = "sha256:503ccb589a9ee9464cb7b5b17e5ffd8d5082c5c28624197f58f20d4d377a68bb"}, + {file = "branca-0.7.1-py3-none-any.whl", hash = "sha256:70515944ed2d1ed2784c552508df58037ca19402a8a1069d57f9113e3e012f51"}, + {file = "branca-0.7.1.tar.gz", hash = "sha256:e6b6f37a37bc0abffd960c68c045a7fe025d628eff87fedf6ab6ca814812110c"}, ] [package.dependencies] -jinja2 = "*" +jinja2 = ">=3" [[package]] name = "build" @@ -818,91 +821,77 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] [[package]] name = "clickhouse-connect" -version = "0.6.23" +version = "0.7.0" description = "ClickHouse Database Core Driver for Python, Pandas, and Superset" optional = true -python-versions = "~=3.7" -files = [ - {file = "clickhouse-connect-0.6.23.tar.gz", hash = "sha256:a74d01349390c0e2713603539927f105c1b9772cd81f850deb1d1ec57e4d9cfc"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cfc8e1cd68f7824e90a3492bf64f66934ad95529fac282cf96bc5a50255a5932"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0726b5f19343dde5b337e8495713a28e0449f42504ea47a691a5a39768ccd79"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e35c767497b22f4c9069f99c24f965ae266b2448e7d2c69407d82f632616bbf7"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c677748c2fb6087fce8fb185980cb539887db2253d0f81900c4a21ef38d7cb89"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:106c680114649d5bfacf76e26bcc87df8d07141b1a3c944099ba0ce297694c7e"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4cd72d85d246fe000377035b1858720b12045f3df1042cc03a5fad4dba6b7f78"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4f7c68297ac933603768f5dd4f49c94f88dacd9344e099b0221ead6b9914643e"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cff31e4fd52517ad79c6d50037c1da7fcaf1270d4c1c70a52852701ff415d86b"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-win32.whl", hash = "sha256:931d9bf3ecd212107e43dd8ed735a79b840b8577d4508b2cf866b1503350b415"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-win_amd64.whl", hash = "sha256:a9e55a50fb165a7be30d335da84e04ec025b2783999312917df86815dc8be3af"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d3a7734e48f0494764ef481c694e02bc78415df60a49d5119c032e75b5e1f8b"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5a66bee81dcbdea969f39a7f75b11225e985cfa752dccd148f54bacac800f72"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d62335114e5792fa95548465d831bb33a1b226c85b87b075c7e6c692a5edc77"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b9fa3eb7d8cbc87f635f7942cb6817a38c6491c8b40cfb6a7c0a6a8e0d59e4"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac7c459641dd99fa7e48921d2c4148d8c0cb171697aa487b55364b0b9081bf07"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:746be95964105fa2eca05ab2dab02b1319e9c94f4a9599b4d3c2894f9090c9bc"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f2593096fc0af049dcf55e03aaf3858bbc94bedddd1fd504087127ec48b68c7b"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38f480f264b648333f90a9f715f2357bf6ec284a9152d3a4a249dea87c797a60"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-win32.whl", hash = "sha256:f9793b8ae15ca93f7ae5d2c96c7de79ab7f6cf311b0c115d9e7948f0887086a0"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-win_amd64.whl", hash = "sha256:e6301202780893d5e3f2f62874670790a450bcbf8009d48ca360d04197205efa"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:66883e21a1598688c2a32f46a3ab9a858eca609bcd6fa6e4e0e758993027f356"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:257482716a9563cd2c964543e46af01848779fcb3665063c30b49e13f82ad27a"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7489202b7eec462e40b52066393f5ec62c82f1e514013a4e9e5f5eab962ad61"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e297da4ab46a405ce3555b89573cd256c80efc505130e08bac673095d731c6d"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f5d0097ae7ef1ff13afb2d56e5b93dfca515480d491f280315b0c16ce58c93c"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1dbd63e6bd2189259a5a9506e8efe5d8117f3d8b114c8d76bb4397eed4751927"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1d861a3b7b877227fe136e6648f3aca070a69ed407fd77c49722ad6b8f3a5aa2"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e5912503717237e0d4cafee19fbe579442484bfe6923063053e21e42c952a043"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-win32.whl", hash = "sha256:d288cf60ef846720fa8b2d2758b72dc488072121d331d7c4b27547e935129472"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-win_amd64.whl", hash = "sha256:4948ca8f292984d1d0cd7ea3bd9aa909101bf62e7d0d7902546d19b3f965f3be"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ad213ef12b0c5a474e38a13b95113aa3aafe1a35d7e81035b4c1bcc2d8d50e93"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed9ea8f2be2dcc4cfda1d33ce07d61467c93f1dbb7a98f09d69464b991dcaba0"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3fd4dcdefcfa2a7175c3c97c53bf66c38544ef84a276932fae4ffcb4c273998"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:102a04bf1eb7612e0924031c751e31dd4bad58e79543c13e8805fddcecbbfe45"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ac3943d4d522bcb1a4becb8850cdb3bfba07713178e84e4cadcd955b9002e28c"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7fe4d55c6f8b72eeedce439ed1287ea1971e30b54879df6059dc87f73bf472d2"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e2dc8127159d5415859af6ac8b42bc70b71709d834477a1dd82e5b147de66e82"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-win32.whl", hash = "sha256:854fcd6cbf7919aa2b9e9f92c52cb5b2d1c976c4e2145193910662237a8d879c"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-win_amd64.whl", hash = "sha256:24c17054e395908eb483fad3dd899748a4d919e5b4e0db2a31c56df503f0921d"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3af001ef95f8dcb572f5cb4518d367b449fa6baf2b8fccc0e6112e04f1932b2b"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9ad33e0949dd8842beb34e6c5f01fac293bfa7701a2697f64d400d30b2858fe0"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b73130d09bb6eddf57d94d9c3bca4953af03231cc14a6757fddd9d3839720c3"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f27d725f054c54d6d8340b5545c8713ca6af640c75ade9eb0eef8b441ec37d66"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ac6bcbf730538106c376d029a0e9aa3e155490cae326e7256a51804d9576345"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9a7b35ccc8526456ad2794ab6af014506cb4472eed7f864d4d7d58bc4acf3b83"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3ac5fe6ac94ca77eed1ba440df81b5f4ff99fa85120afe46676e185f3f6f840d"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6eb8576ab497f9425e1dc3595e0fbe6c97615ae5dc7d184f2f65df874bb31539"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-win32.whl", hash = "sha256:c936e1adf8257acfc459282477ad65e2ef38eba26f72d58187154e8cbeaa1641"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-win_amd64.whl", hash = "sha256:07756dd0c610765000e9d7e202557cb6a06d1e0fd007234458d752998cd8c6da"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bcc1e103b1af2ae8b0485d1c09a55f8e9cb80e02fdaf8a039b813d07950a9586"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:10e6d20b61e5342fed752fb5221c10517af92182072fc792c5252541850d7340"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ce7caa2ceff666aaa86da11b9f42ddd09ae7ffe727db1617f352991f7a67667"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7cafc6ed2214321f3363fe5f23cf9880544ba05dc1820a994f0975bdd7e31d9"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f7e5ba4c78ef49354fac3d91eb951611430c8be2298f1dc2a1516be3149a41c"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a3691ed25e971bbf62c8bed843e80eecc0e4cb9b990e662e0916e8f2b05cd4ec"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b43948da665bbcb5c60e3871e878e068384cd9d2f845bc02fc4c22c934831cd"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9d1b7fb932e45482199f4abce61c95ead8e8628cf1fb882c2b28dc11318742da"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-win32.whl", hash = "sha256:4315c7718a9cc8eedc4b40b53a954e2d5a5dfb705c4a659e3a167537889268da"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-win_amd64.whl", hash = "sha256:040307cad9c11f503290e75d14c0b402454502fa7ab3c742ad8dac1a419219eb"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:086599316e5a532c34dadcf9fa2ea19a923d0acdcc891a829b3cc5cc061fd26a"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bb73a0ee0a0161fce7c38b7f8f66e3c5f0598b8d1f3c30e24ccd17ba1c117b3"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e4b0111ed72058836a44313369dd05e7c550da8e8ca486834c599ae81c7cd6b"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e70e97eb15e89995a220fdec19b448b48f8ea65a014e71bc1cc17763a7d8fd0e"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8e541c2f8921cd48dc57551391441b35af5991ae49f1d221ba77950ad195b807"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c7301862b6eb87aeb48f257932f60b3141777cae317217185279b7a036840e07"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f06348ecd72036d22d65d528221010c86559bdfc048f1633c5cd009993219a0c"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b40d1ec596f7c3ecf1e0d07916ab8c4f7ee52eb867758977335b285c4916e585"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09753a2ce1dfbe0a54fe8a7a464f67b2e0f01c1731f06d68a3ec821a00985d88"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a8ff9600c9721a574c7716a2ad6b436fd043eb05a544ed08d895504d18fb6d5d"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7d5ec217ae361c8c18c605030f9d601d861e23dc23af502d9608e769f3e467b4"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de6bc3e4ac94545a6f80c49f49ad9a9945efe1084ecd89804ebbb048b022699"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d6277c7ec66585e1f03da95617de172aeb38232d8da8a3e69f67b4f7149017"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee1cb7130f0d5e03066e9d4b94ae2052dd96329466c20a3f8688eeebc6f16033"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:aa7009a68df2f464861c694a15f06221f308ee9758a6848c4541936d0b5a9448"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:019bf068b38cb0b94fda3cb51d776f556911777197d1a3d0e73eb41259449879"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a378b6c6b0c092b18e0169b0b94a1f709b80d192e8e6275cfe6eff9c3cb26df0"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd2c4356a7a496d8920c756b0ddac916d7a9a902e6183fe4da67c86a6bf19b34"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:075acc6085c8ece277ce91688a739cbfd54c48de2c839d554045457defdbb81c"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7fca7189195785f5ff2a0805e52dd68f684bd5e4f475ba5ade06c5c6769d562e"}, +python-versions = "~=3.8" +files = [ + {file = "clickhouse-connect-0.7.0.tar.gz", hash = "sha256:4fc0c7c58632237d91a26691507ab37dc28233461f6bbe42699a4f36bba86181"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0ca2e06e033afe8bbf5bad97b5766501f11886414b2f5b504a15cf703ad2d774"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:96480e2c36b265ec1b619e610e3d691be33327a0accb8ba4b4e9b3e6e0437e6f"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8f990b247caa6560f7b5d266d86364c68dbace9d44515c77b62bbd9bdeb8011"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6de3043b32f40d3300a0931ef91d783eba2d67e12437747a59dfda72b796987d"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80545c875038830bb57f28c37c0d0924f99d149cea8c603528abb37022b66ac5"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:01f8a33949d42085207638ed21d7e5442794680f276f9b3903511f6abe08bdce"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:860e117edfca7b4bdc89aa5f8dd89fc1625f90ec0ced0eccf5572bec205fb2c0"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:32a048eab8d415318c5983db7dfeb73dc431f1a53e2e253fffe795906bed1cc6"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-win32.whl", hash = "sha256:f26d9bc7a25193e4e27e636a8b3162ffd67c29c49945e0087ff27a0fbc87a980"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:ac6a5bc0fb93e003291a22c74802560dc7b47ac8e17c400014728072f3296ce4"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d5a037afd82a3ea21106f0de0b556e2ec619b2d836af5268381f939f8a78c2d"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f4560a6eed525ce02aaa42891876e6566a59427a5c8709533fca3fabd49b308"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f03e79bce8535936a938eb5c6bb5d83ae51d70f2f8ecc09c9b6b56c55141b36"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aac5cc6f769ba2b5fc9da6e331cdfe6d1586e3a2af59b28ff9b0408ddd4bf61"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dedf464abe72e1807b5fc86761760e5e736730c2ca2185ef2931b6d2fac860c"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3c3af22a296caec7680a1e6a94eccb78e2aabccd33dd5d5f37187566f6072cb2"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9c0a1e26584bce35642632c28aef99e9a19502ce1148ca78974f0e84fdfe2676"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a37ba5fe6d9dfde5299b6a04e2c4086ebe6c3b6652e5841de2b12fea51694d99"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-win32.whl", hash = "sha256:739779d942f2103991d85f0d0297a05e17f0ee46dbc370d1420590eb836a44b1"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:b9f2a19a2e53463694046e835dea78dfb1ab1891115148020568dc3d18f40e64"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6b9e1a818dcb2287c327f2ae5e103094bbeb50656b21f7e1536551f668a6348a"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5cba0f75c07d8ee8b8acc974134b04184a9d971511a0cd0cc794d4de0edd4786"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab38f3cd100c1f97f24d12b41a97f18117f4e77e2b00d35e92898a115a328bef"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73a95a3a32f036aff1ce4616082bcb1b2246de36aad13dc60641fa592f7bbcee"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71e54b434cf7905957338b8db8e2a9981a6d9bb0a8fa8ee32d6ce30a8f2e7996"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:dd3b4aec4cb695eacbcbbe6a3d88aef7b72e4829d5b1003edf87a4b0bebb17a1"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ca0eb351d1b9e913887a340878bc444cfd1c4ded337446bf014c281a7254c089"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0625fe98c746f3d66baf30630863f61c1decd2e86dba8d024c7bc3175728794c"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-win32.whl", hash = "sha256:9b9b83e8e630564c4045ebf9ff38d6d5cef5fd9fb669ab7357dd981cd58959b4"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca0e3b7ece52cb24bee459b42eb2bc9d2460b53c5de47e99f89454f197509f3e"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:46558c4f54149fb82d06977e536ca19ee5d343ea77cdffbdd1398f534cb5b9a9"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6267326bf1a4e4f6803bead7a28fc148c499e5e4aec5aff9a142bde7a4b269b6"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31019259fdc38759884efaf7e5b5ea6b3612767ac52934f1f4e79913e66ddc09"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be688d9a885035c1604f846ea44d400af7d7e14c49b72ec04ee932216860755d"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b99319b8b08e4696e4011f8c8e3e5a5041a9f98920e8e2abf8c444e9e2d1aae2"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1ffc7465c509bb10c6d8f8d66b31298a203b6a85c137d2cd21195e86243eaf70"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0d3a2e600b50360ac36bb6b97ac44d4851ef2144a3c055df19fff2f48e84ab3e"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:85fedc194b35b7676660bbe510b6eb0fd8ae6c78dca4038dec09a93a227168e8"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-win32.whl", hash = "sha256:61b140694779843f6c2110f1068fb4acbcb3601599d9a721c897605e5939e3ac"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:29afdd5edc77dec17db140df4f1fde66ccffd384011627ce96cd73f0c67ed75f"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d8f14faeafcf9add0eeeb4781d4a5aa8e3b83648db401c5e76237b7a2c631141"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:85a12697c0c3ebcc24bc2c4f5636f986a36f040b28e079b90c7974e12db3424f"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db497029d455e07278b4f714d63936d4462e63554d68c3285f3e0a01e5f7aaa1"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b5462bbd9d776c899a16d17ec49ca4c43793565f5a6956fd64272eecb6bfc55"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d641717b0f675c6cd7c93dc452863a1eac6cf91d637b483a9c42d23b5617ec23"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a9531260d1ef35119ed9d909fda51578279270e38ecf54ff5f1d9d6b6a2b39f8"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fa88a35cb47e38f4ce3d1c3dbc61656537de22c84a5d751f775b974a4efd2e32"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3eb7e0dc1c87e4e9126b2bc614e312022fa741f53f003d98b2f240e6ce8c1e1c"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-win32.whl", hash = "sha256:f479e6422578bc4fb7828f22b882e5294fe9ac9f9af8c660d216c24746bb2be0"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:b1c81338664b2457fae97c1334182e81b77ec057ea9ec3a47d682d14a03b6573"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f167de0f3639aaa0807d011e175ff33be86e2727a4644da65a019306ff3f021"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:185ee65eab42bdd545e00c8244a72c797d1961173b78e37b0ea7b130ef0d9c73"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48dde9b8cd9daf7ec0e4baa72e94e40cdd749ae7aef1dfbe7c7d22af53dae8f4"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d75b1a01770c04650a7dd3ebbee21369939b00125fbb70c02067ac779f523c8"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9f895290de6fa8347114a361cc944ade1ddeba895f936752533b85984d4d183e"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:359eab438b3d6f20090b592084493e04dac369e65d0cf4e1da3ecc84750b52c4"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efc6b0af171681844fbb39d7b756235aea5b416ce7fca163834e068afbd3f833"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cc1a64bc8fb91e594efbd5b4560e6c661ebf75a11d37e08d48c45f3f4e439f7"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99c2d25ceb1fbf187c7b9815373dbcdfc04d1b233dafb3547b56dfeca6904584"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95a1e292b4cf230c2944bdc31c19c4e8fcbcd5609e24322ff5211af357beb26a"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e94de62c3dacfb7b6874f6593ad4268d38c17a1117847acdc1ad0c7b34c4e373"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a34a3f713f0148e30ddaf431af671ed16baf732eff7437ff2c7519adeda2f9c9"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9d08af8c5383d377a12e576106d7c3e0de0d03a3cbc6b9de89932e4b40f550d"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e06b70e4a4a54810ee293875febf71562c346688e2bc517c141958ef1c2af710"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:5c1bc46af3e0eca5a580aaecffd7dc47d541173d3189f250c59ffdd9d1cb0dd1"}, ] [package.dependencies] @@ -1078,63 +1067,63 @@ test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" -version = "7.4.0" +version = "7.4.1" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36b0ea8ab20d6a7564e89cb6135920bc9188fb5f1f7152e94e8300b7b189441a"}, - {file = "coverage-7.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0676cd0ba581e514b7f726495ea75aba3eb20899d824636c6f59b0ed2f88c471"}, - {file = "coverage-7.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ca5c71a5a1765a0f8f88022c52b6b8be740e512980362f7fdbb03725a0d6b9"}, - {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c97726520f784239f6c62506bc70e48d01ae71e9da128259d61ca5e9788516"}, - {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:815ac2d0f3398a14286dc2cea223a6f338109f9ecf39a71160cd1628786bc6f5"}, - {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:80b5ee39b7f0131ebec7968baa9b2309eddb35b8403d1869e08f024efd883566"}, - {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5b2ccb7548a0b65974860a78c9ffe1173cfb5877460e5a229238d985565574ae"}, - {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:995ea5c48c4ebfd898eacb098164b3cc826ba273b3049e4a889658548e321b43"}, - {file = "coverage-7.4.0-cp310-cp310-win32.whl", hash = "sha256:79287fd95585ed36e83182794a57a46aeae0b64ca53929d1176db56aacc83451"}, - {file = "coverage-7.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b14b4f8760006bfdb6e08667af7bc2d8d9bfdb648351915315ea17645347137"}, - {file = "coverage-7.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04387a4a6ecb330c1878907ce0dc04078ea72a869263e53c72a1ba5bbdf380ca"}, - {file = "coverage-7.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea81d8f9691bb53f4fb4db603203029643caffc82bf998ab5b59ca05560f4c06"}, - {file = "coverage-7.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74775198b702868ec2d058cb92720a3c5a9177296f75bd97317c787daf711505"}, - {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76f03940f9973bfaee8cfba70ac991825611b9aac047e5c80d499a44079ec0bc"}, - {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:485e9f897cf4856a65a57c7f6ea3dc0d4e6c076c87311d4bc003f82cfe199d25"}, - {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6ae8c9d301207e6856865867d762a4b6fd379c714fcc0607a84b92ee63feff70"}, - {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bf477c355274a72435ceb140dc42de0dc1e1e0bf6e97195be30487d8eaaf1a09"}, - {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:83c2dda2666fe32332f8e87481eed056c8b4d163fe18ecc690b02802d36a4d26"}, - {file = "coverage-7.4.0-cp311-cp311-win32.whl", hash = "sha256:697d1317e5290a313ef0d369650cfee1a114abb6021fa239ca12b4849ebbd614"}, - {file = "coverage-7.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:26776ff6c711d9d835557ee453082025d871e30b3fd6c27fcef14733f67f0590"}, - {file = "coverage-7.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:13eaf476ec3e883fe3e5fe3707caeb88268a06284484a3daf8250259ef1ba143"}, - {file = "coverage-7.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846f52f46e212affb5bcf131c952fb4075b55aae6b61adc9856222df89cbe3e2"}, - {file = "coverage-7.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26f66da8695719ccf90e794ed567a1549bb2644a706b41e9f6eae6816b398c4a"}, - {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:164fdcc3246c69a6526a59b744b62e303039a81e42cfbbdc171c91a8cc2f9446"}, - {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:316543f71025a6565677d84bc4df2114e9b6a615aa39fb165d697dba06a54af9"}, - {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bb1de682da0b824411e00a0d4da5a784ec6496b6850fdf8c865c1d68c0e318dd"}, - {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0e8d06778e8fbffccfe96331a3946237f87b1e1d359d7fbe8b06b96c95a5407a"}, - {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a56de34db7b7ff77056a37aedded01b2b98b508227d2d0979d373a9b5d353daa"}, - {file = "coverage-7.4.0-cp312-cp312-win32.whl", hash = "sha256:51456e6fa099a8d9d91497202d9563a320513fcf59f33991b0661a4a6f2ad450"}, - {file = "coverage-7.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd3c1e4cb2ff0083758f09be0f77402e1bdf704adb7f89108007300a6da587d0"}, - {file = "coverage-7.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d1bf53c4c8de58d22e0e956a79a5b37f754ed1ffdbf1a260d9dcfa2d8a325e"}, - {file = "coverage-7.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:109f5985182b6b81fe33323ab4707011875198c41964f014579cf82cebf2bb85"}, - {file = "coverage-7.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc9d4bc55de8003663ec94c2f215d12d42ceea128da8f0f4036235a119c88ac"}, - {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc6d65b21c219ec2072c1293c505cf36e4e913a3f936d80028993dd73c7906b1"}, - {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a10a4920def78bbfff4eff8a05c51be03e42f1c3735be42d851f199144897ba"}, - {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b8e99f06160602bc64da35158bb76c73522a4010f0649be44a4e167ff8555952"}, - {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7d360587e64d006402b7116623cebf9d48893329ef035278969fa3bbf75b697e"}, - {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29f3abe810930311c0b5d1a7140f6395369c3db1be68345638c33eec07535105"}, - {file = "coverage-7.4.0-cp38-cp38-win32.whl", hash = "sha256:5040148f4ec43644702e7b16ca864c5314ccb8ee0751ef617d49aa0e2d6bf4f2"}, - {file = "coverage-7.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9864463c1c2f9cb3b5db2cf1ff475eed2f0b4285c2aaf4d357b69959941aa555"}, - {file = "coverage-7.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:936d38794044b26c99d3dd004d8af0035ac535b92090f7f2bb5aa9c8e2f5cd42"}, - {file = "coverage-7.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:799c8f873794a08cdf216aa5d0531c6a3747793b70c53f70e98259720a6fe2d7"}, - {file = "coverage-7.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7defbb9737274023e2d7af02cac77043c86ce88a907c58f42b580a97d5bcca9"}, - {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1526d265743fb49363974b7aa8d5899ff64ee07df47dd8d3e37dcc0818f09ed"}, - {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf635a52fc1ea401baf88843ae8708591aa4adff875e5c23220de43b1ccf575c"}, - {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:756ded44f47f330666843b5781be126ab57bb57c22adbb07d83f6b519783b870"}, - {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0eb3c2f32dabe3a4aaf6441dde94f35687224dfd7eb2a7f47f3fd9428e421058"}, - {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bfd5db349d15c08311702611f3dccbef4b4e2ec148fcc636cf8739519b4a5c0f"}, - {file = "coverage-7.4.0-cp39-cp39-win32.whl", hash = "sha256:53d7d9158ee03956e0eadac38dfa1ec8068431ef8058fe6447043db1fb40d932"}, - {file = "coverage-7.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfd2a8b6b0d8e66e944d47cdec2f47c48fef2ba2f2dff5a9a75757f64172857e"}, - {file = "coverage-7.4.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:c530833afc4707fe48524a44844493f36d8727f04dcce91fb978c414a8556cc6"}, - {file = "coverage-7.4.0.tar.gz", hash = "sha256:707c0f58cb1712b8809ece32b68996ee1e609f71bd14615bd8f87a1293cb610e"}, + {file = "coverage-7.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7"}, + {file = "coverage-7.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61"}, + {file = "coverage-7.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee"}, + {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25"}, + {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19"}, + {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630"}, + {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c"}, + {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b"}, + {file = "coverage-7.4.1-cp310-cp310-win32.whl", hash = "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016"}, + {file = "coverage-7.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018"}, + {file = "coverage-7.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295"}, + {file = "coverage-7.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c"}, + {file = "coverage-7.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676"}, + {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd"}, + {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011"}, + {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74"}, + {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1"}, + {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6"}, + {file = "coverage-7.4.1-cp311-cp311-win32.whl", hash = "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5"}, + {file = "coverage-7.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968"}, + {file = "coverage-7.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581"}, + {file = "coverage-7.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6"}, + {file = "coverage-7.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66"}, + {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156"}, + {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3"}, + {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1"}, + {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1"}, + {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc"}, + {file = "coverage-7.4.1-cp312-cp312-win32.whl", hash = "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74"}, + {file = "coverage-7.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448"}, + {file = "coverage-7.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218"}, + {file = "coverage-7.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45"}, + {file = "coverage-7.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d"}, + {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06"}, + {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766"}, + {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75"}, + {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60"}, + {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad"}, + {file = "coverage-7.4.1-cp38-cp38-win32.whl", hash = "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042"}, + {file = "coverage-7.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d"}, + {file = "coverage-7.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54"}, + {file = "coverage-7.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70"}, + {file = "coverage-7.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628"}, + {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950"}, + {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1"}, + {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7"}, + {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756"}, + {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35"}, + {file = "coverage-7.4.1-cp39-cp39-win32.whl", hash = "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c"}, + {file = "coverage-7.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a"}, + {file = "coverage-7.4.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166"}, + {file = "coverage-7.4.1.tar.gz", hash = "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04"}, ] [package.dependencies] @@ -1216,13 +1205,13 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "dask" -version = "2024.1.0" +version = "2024.1.1" description = "Parallel PyData with Task Scheduling" optional = false python-versions = ">=3.9" files = [ - {file = "dask-2024.1.0-py3-none-any.whl", hash = "sha256:717102ef7c309297291095a0061d374f3b72e11ce4e1115ab9faff940e274b4b"}, - {file = "dask-2024.1.0.tar.gz", hash = "sha256:f24fdc7a07e59a1403bf6903e6d8dc15ed6f8607d3311b4f00f88d8a2ac63e49"}, + {file = "dask-2024.1.1-py3-none-any.whl", hash = "sha256:860ce2797905095beff0187c214840b80c77d752dcb9098a8283e3655a762bf5"}, + {file = "dask-2024.1.1.tar.gz", hash = "sha256:d0dc92e81ce68594a0a0ce23ba33f4d648f2c2f4217ab9b79068b7ecfb0416c7"}, ] [package.dependencies] @@ -1242,7 +1231,7 @@ array = ["numpy (>=1.21)"] complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=7.0)", "pyarrow-hotfix"] dataframe = ["dask[array]", "pandas (>=1.3)"] diagnostics = ["bokeh (>=2.4.2)", "jinja2 (>=2.10.3)"] -distributed = ["distributed (==2024.1.0)"] +distributed = ["distributed (==2024.1.1)"] test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist"] [[package]] @@ -1355,19 +1344,19 @@ files = [ [[package]] name = "distributed" -version = "2024.1.0" +version = "2024.1.1" description = "Distributed scheduler for Dask" optional = false python-versions = ">=3.9" files = [ - {file = "distributed-2024.1.0-py3-none-any.whl", hash = "sha256:b552c9331350ba0e7cb8eccb1da8942b44997ccb680338f61c43fe9843c69988"}, - {file = "distributed-2024.1.0.tar.gz", hash = "sha256:f1d0e2dd5249085e32c6ff5c0ce0521c7e844dd52337683a69363a6bb1799a30"}, + {file = "distributed-2024.1.1-py3-none-any.whl", hash = "sha256:cf05d3b38e1700339b3e36395729ab62110e723efefaecc21a8260fdc7555cf9"}, + {file = "distributed-2024.1.1.tar.gz", hash = "sha256:28cf5e9f4f07197b03ea8e5272e374ce2b9e9dc6742f6c9b525fd81645213c67"}, ] [package.dependencies] click = ">=8.0" cloudpickle = ">=1.5.0" -dask = "2024.1.0" +dask = "2024.1.1" jinja2 = ">=2.10.3" locket = ">=1.0.0" msgpack = ">=1.0.0" @@ -1429,21 +1418,6 @@ files = [ {file = "duckdb-0.9.2.tar.gz", hash = "sha256:3843afeab7c3fc4a4c0b53686a4cc1d9cdbdadcbb468d60fef910355ecafd447"}, ] -[[package]] -name = "duckdb-engine" -version = "0.10.0" -description = "SQLAlchemy driver for duckdb" -optional = true -python-versions = ">=3.7" -files = [ - {file = "duckdb_engine-0.10.0-py3-none-any.whl", hash = "sha256:c408d002e83630b6bbb05fc3b26a43406085b1c22dd43e8cab00bf0b9c011ea8"}, - {file = "duckdb_engine-0.10.0.tar.gz", hash = "sha256:5e3dad3b3513f055a4f5ec5430842249cfe03015743a7597ed1dcc0447dca565"}, -] - -[package.dependencies] -duckdb = ">=0.4.0" -sqlalchemy = ">=1.3.22" - [[package]] name = "dulwich" version = "0.21.7" @@ -1899,22 +1873,24 @@ gcsfuse = ["fusepy"] [[package]] name = "gdown" -version = "4.7.1" -description = "Google Drive direct download of big files." +version = "5.0.1" +description = "Google Drive Public File/Folder Downloader" optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "gdown-4.7.1-py3-none-any.whl", hash = "sha256:65d495699e7c2c61af0d0e9c32748fb4f79abaf80d747a87456c7be14aac2560"}, - {file = "gdown-4.7.1.tar.gz", hash = "sha256:347f23769679aaf7efa73e5655270fcda8ca56be65eb84a4a21d143989541045"}, + {file = "gdown-5.0.1-py3-none-any.whl", hash = "sha256:3f595fcfd4b1bccd5cf73453f60984c5fa1c18eed499277a52b23337238c2670"}, + {file = "gdown-5.0.1.tar.gz", hash = "sha256:173557b4d33aad9f7dc75ce2ff963d8b313f36371e15da4b5ebb35ac6c7d5af6"}, ] [package.dependencies] beautifulsoup4 = "*" filelock = "*" requests = {version = "*", extras = ["socks"]} -six = "*" tqdm = "*" +[package.extras] +test = ["build", "mypy", "pytest", "ruff", "twine", "types-requests"] + [[package]] name = "geoalchemy2" version = "0.14.3" @@ -1946,13 +1922,13 @@ files = [ [[package]] name = "geopandas" -version = "0.14.2" +version = "0.14.3" description = "Geographic pandas extensions" optional = false python-versions = ">=3.9" files = [ - {file = "geopandas-0.14.2-py3-none-any.whl", hash = "sha256:0efa61235a68862c1c6be89fc3707cdeba67667d5676bb19e24f3c57a8c2f723"}, - {file = "geopandas-0.14.2.tar.gz", hash = "sha256:6e71d57b8376f9fdc9f1c3aa3170e7e420e91778de854f51013ae66fd371ccdb"}, + {file = "geopandas-0.14.3-py3-none-any.whl", hash = "sha256:41b31ad39e21bc9e8c4254f78f8dc4ce3d33d144e22e630a00bb336c83160204"}, + {file = "geopandas-0.14.3.tar.gz", hash = "sha256:748af035d4a068a4ae00cab384acb61d387685c833b0022e0729aa45216b23ac"}, ] [package.dependencies] @@ -1964,13 +1940,13 @@ shapely = ">=1.8.0" [[package]] name = "google-api-core" -version = "2.15.0" +version = "2.16.1" description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google-api-core-2.15.0.tar.gz", hash = "sha256:abc978a72658f14a2df1e5e12532effe40f94f868f6e23d95133bd6abcca35ca"}, - {file = "google_api_core-2.15.0-py3-none-any.whl", hash = "sha256:2aa56d2be495551e66bbff7f729b790546f87d5c90e74781aa77233bcb395a8a"}, + {file = "google-api-core-2.16.1.tar.gz", hash = "sha256:7f668ffa3d5b9f3c6930407e5f5d691c05a376050a5a5fd772b9dc32e70a0c30"}, + {file = "google_api_core-2.16.1-py3-none-any.whl", hash = "sha256:257e9e152cd18da0c6701113c122ade04dca04731e179fc5c7dca48e1396ec4c"}, ] [package.dependencies] @@ -1994,13 +1970,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.26.2" +version = "2.27.0" description = "Google Authentication Library" optional = false python-versions = ">=3.7" files = [ - {file = "google-auth-2.26.2.tar.gz", hash = "sha256:97327dbbf58cccb58fc5a1712bba403ae76668e64814eb30f7316f7e27126b81"}, - {file = "google_auth-2.26.2-py2.py3-none-any.whl", hash = "sha256:3f445c8ce9b61ed6459aad86d8ccdba4a9afed841b2d1451a11ef4db08957424"}, + {file = "google-auth-2.27.0.tar.gz", hash = "sha256:e863a56ccc2d8efa83df7a80272601e43487fa9a728a376205c86c26aaefa821"}, + {file = "google_auth-2.27.0-py2.py3-none-any.whl", hash = "sha256:8e4bad367015430ff253fe49d500fdc3396c1a434db5740828c728e45bcce245"}, ] [package.dependencies] @@ -2035,13 +2011,13 @@ tool = ["click (>=6.0.0)"] [[package]] name = "google-cloud-bigquery" -version = "3.16.0" +version = "3.17.1" description = "Google BigQuery API client library" optional = true python-versions = ">=3.7" files = [ - {file = "google-cloud-bigquery-3.16.0.tar.gz", hash = "sha256:1d6abf4b1d740df17cb43a078789872af8059a0b1dd999f32ea69ebc6f7ba7ef"}, - {file = "google_cloud_bigquery-3.16.0-py2.py3-none-any.whl", hash = "sha256:8bac7754f92bf87ee81f38deabb7554d82bb9591fbe06a5c82f33e46e5a482f9"}, + {file = "google-cloud-bigquery-3.17.1.tar.gz", hash = "sha256:0ae07b90d5052ba3a296a2210a2144c28469300d71f6f455881f94c2df543057"}, + {file = "google_cloud_bigquery-3.17.1-py2.py3-none-any.whl", hash = "sha256:7a9a92c7b1f6a6bf8b4c05c150e49f4ad1a03dd591dbd4522381b3f23bf07c73"}, ] [package.dependencies] @@ -2331,13 +2307,13 @@ test = ["objgraph", "psutil"] [[package]] name = "griffe" -version = "0.38.1" +version = "0.40.0" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.8" files = [ - {file = "griffe-0.38.1-py3-none-any.whl", hash = "sha256:334c79d3b5964ade65c05dfcaf53518c576dedd387aaba5c9fd71212f34f1483"}, - {file = "griffe-0.38.1.tar.gz", hash = "sha256:bd68d7da7f3d87bc57eb9962b250db123efd9bbcc06c11c1a91b6e583b2a9361"}, + {file = "griffe-0.40.0-py3-none-any.whl", hash = "sha256:db1da6d1d8e08cbb20f1a7dee8c09da940540c2d4c1bfa26a9091cf6fc36a9ec"}, + {file = "griffe-0.40.0.tar.gz", hash = "sha256:76c4439eaa2737af46ae003c331ab6ca79c5365b552f7b5aed263a3b4125735b"}, ] [package.dependencies] @@ -2441,13 +2417,13 @@ tests = ["freezegun", "pytest", "pytest-cov"] [[package]] name = "hypothesis" -version = "6.93.0" +version = "6.97.4" description = "A library for property-based testing" optional = false python-versions = ">=3.8" files = [ - {file = "hypothesis-6.93.0-py3-none-any.whl", hash = "sha256:bfe6173e36c8cf0779a79de757a8a7151568b2703cb14dcbc186517c7a79144b"}, - {file = "hypothesis-6.93.0.tar.gz", hash = "sha256:e9ceaa5bbd244471fa1c28272fb2b0c68bb6ee014473394d63519ed02bd2d4de"}, + {file = "hypothesis-6.97.4-py3-none-any.whl", hash = "sha256:9069fe3fb18d9b7dd218bd69ab50bbc66426819dfac7cc7168ba85034d98a4df"}, + {file = "hypothesis-6.97.4.tar.gz", hash = "sha256:28ff724fa81ccc55f64f0f1eb06e4a75db6a195fe0857e9b3184cf4ff613a103"}, ] [package.dependencies] @@ -2609,13 +2585,13 @@ ipywidgets = "*" [[package]] name = "ipykernel" -version = "6.28.0" +version = "6.29.0" description = "IPython Kernel for Jupyter" optional = false python-versions = ">=3.8" files = [ - {file = "ipykernel-6.28.0-py3-none-any.whl", hash = "sha256:c6e9a9c63a7f4095c0a22a79f765f079f9ec7be4f2430a898ddea889e8665661"}, - {file = "ipykernel-6.28.0.tar.gz", hash = "sha256:69c11403d26de69df02225916f916b37ea4b9af417da0a8c827f84328d88e5f3"}, + {file = "ipykernel-6.29.0-py3-none-any.whl", hash = "sha256:076663ca68492576f051e4af7720d33f34383e655f2be0d544c8b1c9de915b2f"}, + {file = "ipykernel-6.29.0.tar.gz", hash = "sha256:b5dd3013cab7b330df712891c96cd1ab868c27a7159e606f762015e9bf8ceb3f"}, ] [package.dependencies] @@ -2638,17 +2614,17 @@ cov = ["coverage[toml]", "curio", "matplotlib", "pytest-cov", "trio"] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "trio"] pyqt5 = ["pyqt5"] pyside6 = ["pyside6"] -test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio", "pytest-cov", "pytest-timeout"] +test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (==0.23.2)", "pytest-cov", "pytest-timeout"] [[package]] name = "ipyleaflet" -version = "0.18.1" +version = "0.18.2" description = "A Jupyter widget for dynamic Leaflet maps" optional = false python-versions = ">=3.7" files = [ - {file = "ipyleaflet-0.18.1-py3-none-any.whl", hash = "sha256:c941429945248fb0fb8a7b30cc4f248d3194e4a409066a068495a633c97eb6c6"}, - {file = "ipyleaflet-0.18.1.tar.gz", hash = "sha256:f35d70ad0e0bb2c0c160b499ab8c788333fc54576596e33b974f0dfeee941d12"}, + {file = "ipyleaflet-0.18.2-py3-none-any.whl", hash = "sha256:dc5bed1bad3ba3244fe97aac9d4ed8f8096ae3d5e6ac0c5fdfbe7f1d2a01d3f8"}, + {file = "ipyleaflet-0.18.2.tar.gz", hash = "sha256:8f166529ec7784de08822b253b8cc593fa81af8a8f967d70cbc53e45a6d3755f"}, ] [package.dependencies] @@ -2811,13 +2787,13 @@ files = [ [[package]] name = "jsonschema" -version = "4.20.0" +version = "4.21.1" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.8" files = [ - {file = "jsonschema-4.20.0-py3-none-any.whl", hash = "sha256:ed6231f0429ecf966f5bc8dfef245998220549cbbcf140f913b7464c52c3b6b3"}, - {file = "jsonschema-4.20.0.tar.gz", hash = "sha256:4f614fd46d8d61258610998997743ec5492a648b33cf478c1ddc23ed4598a5fa"}, + {file = "jsonschema-4.21.1-py3-none-any.whl", hash = "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f"}, + {file = "jsonschema-4.21.1.tar.gz", hash = "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5"}, ] [package.dependencies] @@ -3184,71 +3160,71 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] [[package]] name = "markupsafe" -version = "2.1.3" +version = "2.1.4" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" files = [ - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, - {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-win32.whl", hash = "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-win32.whl", hash = "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-win32.whl", hash = "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-win32.whl", hash = "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-win32.whl", hash = "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-win_amd64.whl", hash = "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-win32.whl", hash = "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959"}, + {file = "MarkupSafe-2.1.4.tar.gz", hash = "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f"}, ] [[package]] @@ -3576,13 +3552,13 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] [[package]] name = "nest-asyncio" -version = "1.5.8" +version = "1.6.0" description = "Patch asyncio to allow nested event loops" optional = false python-versions = ">=3.5" files = [ - {file = "nest_asyncio-1.5.8-py3-none-any.whl", hash = "sha256:accda7a339a70599cb08f9dd09a67e0c2ef8d8d6f4c07f96ab203f2ae254e48d"}, - {file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"}, + {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, + {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, ] [[package]] @@ -3727,36 +3703,40 @@ files = [ [[package]] name = "pandas" -version = "2.1.4" +version = "2.2.0" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" files = [ - {file = "pandas-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9"}, - {file = "pandas-2.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034"}, - {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d"}, - {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9"}, - {file = "pandas-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139"}, - {file = "pandas-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46"}, - {file = "pandas-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092"}, - {file = "pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821"}, - {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d"}, - {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171"}, - {file = "pandas-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623"}, - {file = "pandas-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e"}, - {file = "pandas-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6"}, - {file = "pandas-2.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b"}, - {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540"}, - {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead"}, - {file = "pandas-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1"}, - {file = "pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf"}, - {file = "pandas-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34"}, - {file = "pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a"}, - {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732"}, - {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8"}, - {file = "pandas-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860"}, - {file = "pandas-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984"}, - {file = "pandas-2.1.4.tar.gz", hash = "sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7"}, + {file = "pandas-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8108ee1712bb4fa2c16981fba7e68b3f6ea330277f5ca34fa8d557e986a11670"}, + {file = "pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:736da9ad4033aeab51d067fc3bd69a0ba36f5a60f66a527b3d72e2030e63280a"}, + {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e0b4fc3ddceb56ec8a287313bc22abe17ab0eb184069f08fc6a9352a769b18"}, + {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20404d2adefe92aed3b38da41d0847a143a09be982a31b85bc7dd565bdba0f4e"}, + {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ea3ee3f125032bfcade3a4cf85131ed064b4f8dd23e5ce6fa16473e48ebcaf5"}, + {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9670b3ac00a387620489dfc1bca66db47a787f4e55911f1293063a78b108df1"}, + {file = "pandas-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a946f210383c7e6d16312d30b238fd508d80d927014f3b33fb5b15c2f895430"}, + {file = "pandas-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a1b438fa26b208005c997e78672f1aa8138f67002e833312e6230f3e57fa87d5"}, + {file = "pandas-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ce2fbc8d9bf303ce54a476116165220a1fedf15985b09656b4b4275300e920b"}, + {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2707514a7bec41a4ab81f2ccce8b382961a29fbe9492eab1305bb075b2b1ff4f"}, + {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85793cbdc2d5bc32620dc8ffa715423f0c680dacacf55056ba13454a5be5de88"}, + {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cfd6c2491dc821b10c716ad6776e7ab311f7df5d16038d0b7458bc0b67dc10f3"}, + {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a146b9dcacc3123aa2b399df1a284de5f46287a4ab4fbfc237eac98a92ebcb71"}, + {file = "pandas-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbc1b53c0e1fdf16388c33c3cca160f798d38aea2978004dd3f4d3dec56454c9"}, + {file = "pandas-2.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a41d06f308a024981dcaa6c41f2f2be46a6b186b902c94c2674e8cb5c42985bc"}, + {file = "pandas-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:159205c99d7a5ce89ecfc37cb08ed179de7783737cea403b295b5eda8e9c56d1"}, + {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1e1f3861ea9132b32f2133788f3b14911b68102d562715d71bd0013bc45440"}, + {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:761cb99b42a69005dec2b08854fb1d4888fdf7b05db23a8c5a099e4b886a2106"}, + {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a20628faaf444da122b2a64b1e5360cde100ee6283ae8effa0d8745153809a2e"}, + {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f5be5d03ea2073627e7111f61b9f1f0d9625dc3c4d8dda72cc827b0c58a1d042"}, + {file = "pandas-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:a626795722d893ed6aacb64d2401d017ddc8a2341b49e0384ab9bf7112bdec30"}, + {file = "pandas-2.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9f66419d4a41132eb7e9a73dcec9486cf5019f52d90dd35547af11bc58f8637d"}, + {file = "pandas-2.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57abcaeda83fb80d447f28ab0cc7b32b13978f6f733875ebd1ed14f8fbc0f4ab"}, + {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e60f1f7dba3c2d5ca159e18c46a34e7ca7247a73b5dd1a22b6d59707ed6b899a"}, + {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb61dc8567b798b969bcc1fc964788f5a68214d333cade8319c7ab33e2b5d88a"}, + {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:52826b5f4ed658fa2b729264d63f6732b8b29949c7fd234510d57c61dbeadfcd"}, + {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bde2bc699dbd80d7bc7f9cab1e23a95c4375de615860ca089f34e7c64f4a8de7"}, + {file = "pandas-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3de918a754bbf2da2381e8a3dcc45eede8cd7775b047b923f9006d5f876802ae"}, + {file = "pandas-2.2.0.tar.gz", hash = "sha256:30b83f7c3eb217fb4d1b494a57a2fda5444f17834f5df2de6b2ffff68dc3c8e2"}, ] [package.dependencies] @@ -3767,31 +3747,31 @@ numpy = [ ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" -tzdata = ">=2022.1" +tzdata = ">=2022.7" [package.extras] -all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] -aws = ["s3fs (>=2022.05.0)"] -clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] -compression = ["zstandard (>=0.17.0)"] -computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] -feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2022.05.0)"] -gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] -hdf5 = ["tables (>=3.7.0)"] -html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] -mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] -parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] -plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] -spss = ["pyreadstat (>=1.1.5)"] -sql-other = ["SQLAlchemy (>=1.4.36)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.8.0)"] +xml = ["lxml (>=4.9.2)"] [[package]] name = "parso" @@ -4073,13 +4053,13 @@ typing = ["ipython", "pandas-stubs", "pyright"] [[package]] name = "pluggy" -version = "1.3.0" +version = "1.4.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" files = [ - {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, - {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, + {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, + {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, ] [package.extras] @@ -4088,21 +4068,22 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "plum-dispatch" -version = "2.2.2" +version = "2.3.2" description = "Multiple dispatch in Python" optional = false python-versions = ">=3.8" files = [ - {file = "plum_dispatch-2.2.2-py3-none-any.whl", hash = "sha256:d7ee415bd166ffa90eaa4b24d7c9dc7ca6f8875750586001e7c9baff706223bd"}, - {file = "plum_dispatch-2.2.2.tar.gz", hash = "sha256:d5d180225c9fbf0277375bb558b649d97d0b651a91037bb7155cedbe9f52764b"}, + {file = "plum_dispatch-2.3.2-py3-none-any.whl", hash = "sha256:96f519d416accf9a009117682f689114eb23e867bb6f977eed74ef85ef7fef9d"}, + {file = "plum_dispatch-2.3.2.tar.gz", hash = "sha256:f49f00dfdf7ab0f16c9b85cc27cc5241ffb59aee02218bac671ec7c1ac65e139"}, ] [package.dependencies] beartype = ">=0.16.2" +rich = ">=10.0" typing-extensions = {version = "*", markers = "python_version <= \"3.10\""} [package.extras] -dev = ["black (==23.9.0)", "build", "coveralls", "ghp-import", "ipython", "jupyter-book", "mypy", "numpy", "pre-commit", "pyright", "pytest (>=6)", "pytest-cov", "tox", "wheel"] +dev = ["black (==23.9.0)", "build", "coveralls", "ghp-import", "ipython", "jupyter-book", "mypy", "numpy", "pre-commit", "pyright (>=1.1.331)", "pytest (>=6)", "pytest-cov", "ruff (==0.1.0)", "tox", "wheel"] [[package]] name = "poetry" @@ -4188,17 +4169,17 @@ poetry-core = ">=1.7.0,<2.0.0" [[package]] name = "polars" -version = "0.20.4" +version = "0.20.6" description = "Blazingly fast DataFrame library" optional = true python-versions = ">=3.8" files = [ - {file = "polars-0.20.4-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:5fa84f74fc2274e3926d083ccd084c81b3e04debdc66fd917cafe7026d1df19c"}, - {file = "polars-0.20.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:4c5a9f981708f3d090dd5513d83806bcb8a1725653d80bcf63bb738a097b1162"}, - {file = "polars-0.20.4-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfdc2672423c9c73e34161b7b4833c40d042b9d36e899866dc858e8a221b0849"}, - {file = "polars-0.20.4-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:76f270fb17955c97958c2e301c5c2fa10015ccf3048697964ad9c2198e4c6fe6"}, - {file = "polars-0.20.4-cp38-abi3-win_amd64.whl", hash = "sha256:96a067be35745942d3fe6cd3ad1513f9ab7f4249d2b2502484ee64b30d221f96"}, - {file = "polars-0.20.4.tar.gz", hash = "sha256:21a90aa0c7401c80fc814b4db371dced780df6bd5ac81a329307e796b5821190"}, + {file = "polars-0.20.6-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59845bae0b614b3291baa889cfc2a251e1024129696bb655596f2b5556e9f9a1"}, + {file = "polars-0.20.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:9e86736f68440bf97a9100fa0a79ae7ce616d1af6fd4669fff1345f03aab14c0"}, + {file = "polars-0.20.6-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4f4e3335fdcc863f6aac0616510b1baa5e13d5e818ebbfcb980ad534bd6edc2"}, + {file = "polars-0.20.6-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:7c7b494beea914a54bcae8868dee3988a88ecb48525df948e07aacf2fb83e711"}, + {file = "polars-0.20.6-cp38-abi3-win_amd64.whl", hash = "sha256:a96b157d68697c8d6ef2f7c2cc1734d498c3c6cc0c9c18d4fff7283ccfabdd1d"}, + {file = "polars-0.20.6.tar.gz", hash = "sha256:b53553308bc7e2b4f841b18f1949b61ed7f2cf155c5c64712298efa5af67a997"}, ] [package.extras] @@ -4305,27 +4286,27 @@ files = [ [[package]] name = "psutil" -version = "5.9.7" +version = "5.9.8" description = "Cross-platform lib for process and system monitoring in Python." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "psutil-5.9.7-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:0bd41bf2d1463dfa535942b2a8f0e958acf6607ac0be52265ab31f7923bcd5e6"}, - {file = "psutil-5.9.7-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:5794944462509e49d4d458f4dbfb92c47539e7d8d15c796f141f474010084056"}, - {file = "psutil-5.9.7-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:fe361f743cb3389b8efda21980d93eb55c1f1e3898269bc9a2a1d0bb7b1f6508"}, - {file = "psutil-5.9.7-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:e469990e28f1ad738f65a42dcfc17adaed9d0f325d55047593cb9033a0ab63df"}, - {file = "psutil-5.9.7-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:3c4747a3e2ead1589e647e64aad601981f01b68f9398ddf94d01e3dc0d1e57c7"}, - {file = "psutil-5.9.7-cp27-none-win32.whl", hash = "sha256:1d4bc4a0148fdd7fd8f38e0498639ae128e64538faa507df25a20f8f7fb2341c"}, - {file = "psutil-5.9.7-cp27-none-win_amd64.whl", hash = "sha256:4c03362e280d06bbbfcd52f29acd79c733e0af33d707c54255d21029b8b32ba6"}, - {file = "psutil-5.9.7-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ea36cc62e69a13ec52b2f625c27527f6e4479bca2b340b7a452af55b34fcbe2e"}, - {file = "psutil-5.9.7-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1132704b876e58d277168cd729d64750633d5ff0183acf5b3c986b8466cd0284"}, - {file = "psutil-5.9.7-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe8b7f07948f1304497ce4f4684881250cd859b16d06a1dc4d7941eeb6233bfe"}, - {file = "psutil-5.9.7-cp36-cp36m-win32.whl", hash = "sha256:b27f8fdb190c8c03914f908a4555159327d7481dac2f01008d483137ef3311a9"}, - {file = "psutil-5.9.7-cp36-cp36m-win_amd64.whl", hash = "sha256:44969859757f4d8f2a9bd5b76eba8c3099a2c8cf3992ff62144061e39ba8568e"}, - {file = "psutil-5.9.7-cp37-abi3-win32.whl", hash = "sha256:c727ca5a9b2dd5193b8644b9f0c883d54f1248310023b5ad3e92036c5e2ada68"}, - {file = "psutil-5.9.7-cp37-abi3-win_amd64.whl", hash = "sha256:f37f87e4d73b79e6c5e749440c3113b81d1ee7d26f21c19c47371ddea834f414"}, - {file = "psutil-5.9.7-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:032f4f2c909818c86cea4fe2cc407f1c0f0cde8e6c6d702b28b8ce0c0d143340"}, - {file = "psutil-5.9.7.tar.gz", hash = "sha256:3f02134e82cfb5d089fddf20bb2e03fd5cd52395321d1c8458a9e58500ff417c"}, + {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, + {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"}, + {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"}, + {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"}, + {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"}, + {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"}, + {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"}, + {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"}, + {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"}, + {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"}, + {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"}, + {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"}, + {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"}, + {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"}, + {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"}, + {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"}, ] [package.extras] @@ -4558,18 +4539,18 @@ files = [ [[package]] name = "pydantic" -version = "2.5.3" +version = "2.6.0" description = "Data validation using Python type hints" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"}, - {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"}, + {file = "pydantic-2.6.0-py3-none-any.whl", hash = "sha256:1440966574e1b5b99cf75a13bec7b20e3512e8a61b894ae252f56275e2c465ae"}, + {file = "pydantic-2.6.0.tar.gz", hash = "sha256:ae887bd94eb404b09d86e4d12f93893bdca79d766e738528c6fa1c849f3c6bcf"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.14.6" +pydantic-core = "2.16.1" typing-extensions = ">=4.6.1" [package.extras] @@ -4577,116 +4558,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.14.6" +version = "2.16.1" description = "" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"}, - {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"}, - {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"}, - {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"}, - {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"}, - {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"}, - {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"}, - {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"}, - {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"}, - {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"}, - {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"}, - {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"}, - {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"}, - {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"}, - {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"}, - {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"}, - {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"}, - {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"}, - {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"}, - {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"}, - {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"}, - {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"}, - {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"}, - {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"}, - {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"}, - {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"}, - {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"}, - {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"}, - {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"}, - {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"}, - {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"}, - {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"}, - {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"}, - {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"}, - {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"}, + {file = "pydantic_core-2.16.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:300616102fb71241ff477a2cbbc847321dbec49428434a2f17f37528721c4948"}, + {file = "pydantic_core-2.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5511f962dd1b9b553e9534c3b9c6a4b0c9ded3d8c2be96e61d56f933feef9e1f"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98f0edee7ee9cc7f9221af2e1b95bd02810e1c7a6d115cfd82698803d385b28f"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9795f56aa6b2296f05ac79d8a424e94056730c0b860a62b0fdcfe6340b658cc8"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c45f62e4107ebd05166717ac58f6feb44471ed450d07fecd90e5f69d9bf03c48"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:462d599299c5971f03c676e2b63aa80fec5ebc572d89ce766cd11ca8bcb56f3f"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ebaa4bf6386a3b22eec518da7d679c8363fb7fb70cf6972161e5542f470798"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:99f9a50b56713a598d33bc23a9912224fc5d7f9f292444e6664236ae471ddf17"}, + {file = "pydantic_core-2.16.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8ec364e280db4235389b5e1e6ee924723c693cbc98e9d28dc1767041ff9bc388"}, + {file = "pydantic_core-2.16.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:653a5dfd00f601a0ed6654a8b877b18d65ac32c9d9997456e0ab240807be6cf7"}, + {file = "pydantic_core-2.16.1-cp310-none-win32.whl", hash = "sha256:1661c668c1bb67b7cec96914329d9ab66755911d093bb9063c4c8914188af6d4"}, + {file = "pydantic_core-2.16.1-cp310-none-win_amd64.whl", hash = "sha256:561be4e3e952c2f9056fba5267b99be4ec2afadc27261505d4992c50b33c513c"}, + {file = "pydantic_core-2.16.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:102569d371fadc40d8f8598a59379c37ec60164315884467052830b28cc4e9da"}, + {file = "pydantic_core-2.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:735dceec50fa907a3c314b84ed609dec54b76a814aa14eb90da31d1d36873a5e"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e83ebbf020be727d6e0991c1b192a5c2e7113eb66e3def0cd0c62f9f266247e4"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:30a8259569fbeec49cfac7fda3ec8123486ef1b729225222f0d41d5f840b476f"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:920c4897e55e2881db6a6da151198e5001552c3777cd42b8a4c2f72eedc2ee91"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f5247a3d74355f8b1d780d0f3b32a23dd9f6d3ff43ef2037c6dcd249f35ecf4c"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5bea8012df5bb6dda1e67d0563ac50b7f64a5d5858348b5c8cb5043811c19d"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ed3025a8a7e5a59817b7494686d449ebfbe301f3e757b852c8d0d1961d6be864"}, + {file = "pydantic_core-2.16.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:06f0d5a1d9e1b7932477c172cc720b3b23c18762ed7a8efa8398298a59d177c7"}, + {file = "pydantic_core-2.16.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:150ba5c86f502c040b822777e2e519b5625b47813bd05f9273a8ed169c97d9ae"}, + {file = "pydantic_core-2.16.1-cp311-none-win32.whl", hash = "sha256:d6cbdf12ef967a6aa401cf5cdf47850559e59eedad10e781471c960583f25aa1"}, + {file = "pydantic_core-2.16.1-cp311-none-win_amd64.whl", hash = "sha256:afa01d25769af33a8dac0d905d5c7bb2d73c7c3d5161b2dd6f8b5b5eea6a3c4c"}, + {file = "pydantic_core-2.16.1-cp311-none-win_arm64.whl", hash = "sha256:1a2fe7b00a49b51047334d84aafd7e39f80b7675cad0083678c58983662da89b"}, + {file = "pydantic_core-2.16.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f478ec204772a5c8218e30eb813ca43e34005dff2eafa03931b3d8caef87d51"}, + {file = "pydantic_core-2.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1936ef138bed2165dd8573aa65e3095ef7c2b6247faccd0e15186aabdda7f66"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99d3a433ef5dc3021c9534a58a3686c88363c591974c16c54a01af7efd741f13"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd88f40f2294440d3f3c6308e50d96a0d3d0973d6f1a5732875d10f569acef49"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fac641bbfa43d5a1bed99d28aa1fded1984d31c670a95aac1bf1d36ac6ce137"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:72bf9308a82b75039b8c8edd2be2924c352eda5da14a920551a8b65d5ee89253"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb4363e6c9fc87365c2bc777a1f585a22f2f56642501885ffc7942138499bf54"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:20f724a023042588d0f4396bbbcf4cffd0ddd0ad3ed4f0d8e6d4ac4264bae81e"}, + {file = "pydantic_core-2.16.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fb4370b15111905bf8b5ba2129b926af9470f014cb0493a67d23e9d7a48348e8"}, + {file = "pydantic_core-2.16.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23632132f1fd608034f1a56cc3e484be00854db845b3a4a508834be5a6435a6f"}, + {file = "pydantic_core-2.16.1-cp312-none-win32.whl", hash = "sha256:b9f3e0bffad6e238f7acc20c393c1ed8fab4371e3b3bc311020dfa6020d99212"}, + {file = "pydantic_core-2.16.1-cp312-none-win_amd64.whl", hash = "sha256:a0b4cfe408cd84c53bab7d83e4209458de676a6ec5e9c623ae914ce1cb79b96f"}, + {file = "pydantic_core-2.16.1-cp312-none-win_arm64.whl", hash = "sha256:d195add190abccefc70ad0f9a0141ad7da53e16183048380e688b466702195dd"}, + {file = "pydantic_core-2.16.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:502c062a18d84452858f8aea1e520e12a4d5228fc3621ea5061409d666ea1706"}, + {file = "pydantic_core-2.16.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d8c032ccee90b37b44e05948b449a2d6baed7e614df3d3f47fe432c952c21b60"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:920f4633bee43d7a2818e1a1a788906df5a17b7ab6fe411220ed92b42940f818"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9f5d37ff01edcbace53a402e80793640c25798fb7208f105d87a25e6fcc9ea06"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:399166f24c33a0c5759ecc4801f040dbc87d412c1a6d6292b2349b4c505effc9"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ac89ccc39cd1d556cc72d6752f252dc869dde41c7c936e86beac5eb555041b66"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73802194f10c394c2bedce7a135ba1d8ba6cff23adf4217612bfc5cf060de34c"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8fa00fa24ffd8c31fac081bf7be7eb495be6d248db127f8776575a746fa55c95"}, + {file = "pydantic_core-2.16.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:601d3e42452cd4f2891c13fa8c70366d71851c1593ed42f57bf37f40f7dca3c8"}, + {file = "pydantic_core-2.16.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:07982b82d121ed3fc1c51faf6e8f57ff09b1325d2efccaa257dd8c0dd937acca"}, + {file = "pydantic_core-2.16.1-cp38-none-win32.whl", hash = "sha256:d0bf6f93a55d3fa7a079d811b29100b019784e2ee6bc06b0bb839538272a5610"}, + {file = "pydantic_core-2.16.1-cp38-none-win_amd64.whl", hash = "sha256:fbec2af0ebafa57eb82c18c304b37c86a8abddf7022955d1742b3d5471a6339e"}, + {file = "pydantic_core-2.16.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a497be217818c318d93f07e14502ef93d44e6a20c72b04c530611e45e54c2196"}, + {file = "pydantic_core-2.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:694a5e9f1f2c124a17ff2d0be613fd53ba0c26de588eb4bdab8bca855e550d95"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d4dfc66abea3ec6d9f83e837a8f8a7d9d3a76d25c9911735c76d6745950e62c"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8655f55fe68c4685673265a650ef71beb2d31871c049c8b80262026f23605ee3"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:21e3298486c4ea4e4d5cc6fb69e06fb02a4e22089304308817035ac006a7f506"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71b4a48a7427f14679f0015b13c712863d28bb1ab700bd11776a5368135c7d60"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10dca874e35bb60ce4f9f6665bfbfad050dd7573596608aeb9e098621ac331dc"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa496cd45cda0165d597e9d6f01e36c33c9508f75cf03c0a650018c5048f578e"}, + {file = "pydantic_core-2.16.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5317c04349472e683803da262c781c42c5628a9be73f4750ac7d13040efb5d2d"}, + {file = "pydantic_core-2.16.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:42c29d54ed4501a30cd71015bf982fa95e4a60117b44e1a200290ce687d3e640"}, + {file = "pydantic_core-2.16.1-cp39-none-win32.whl", hash = "sha256:ba07646f35e4e49376c9831130039d1b478fbfa1215ae62ad62d2ee63cf9c18f"}, + {file = "pydantic_core-2.16.1-cp39-none-win_amd64.whl", hash = "sha256:2133b0e412a47868a358713287ff9f9a328879da547dc88be67481cdac529118"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d25ef0c33f22649b7a088035fd65ac1ce6464fa2876578df1adad9472f918a76"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:99c095457eea8550c9fa9a7a992e842aeae1429dab6b6b378710f62bfb70b394"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b49c604ace7a7aa8af31196abbf8f2193be605db6739ed905ecaf62af31ccae0"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c56da23034fe66221f2208c813d8aa509eea34d97328ce2add56e219c3a9f41c"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cebf8d56fee3b08ad40d332a807ecccd4153d3f1ba8231e111d9759f02edfd05"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1ae8048cba95f382dba56766525abca438328455e35c283bb202964f41a780b0"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:780daad9e35b18d10d7219d24bfb30148ca2afc309928e1d4d53de86822593dc"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c94b5537bf6ce66e4d7830c6993152940a188600f6ae044435287753044a8fe2"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:adf28099d061a25fbcc6531febb7a091e027605385de9fe14dd6a97319d614cf"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:644904600c15816a1f9a1bafa6aab0d21db2788abcdf4e2a77951280473f33e1"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87bce04f09f0552b66fca0c4e10da78d17cb0e71c205864bab4e9595122cb9d9"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:877045a7969ace04d59516d5d6a7dee13106822f99a5d8df5e6822941f7bedc8"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9c46e556ee266ed3fb7b7a882b53df3c76b45e872fdab8d9cf49ae5e91147fd7"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4eebbd049008eb800f519578e944b8dc8e0f7d59a5abb5924cc2d4ed3a1834ff"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c0be58529d43d38ae849a91932391eb93275a06b93b79a8ab828b012e916a206"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b1fc07896fc1851558f532dffc8987e526b682ec73140886c831d773cef44b76"}, + {file = "pydantic_core-2.16.1.tar.gz", hash = "sha256:daff04257b49ab7f4b3f73f98283d3dbb1a65bf3500d55c7beac3c66c310fe34"}, ] [package.dependencies] @@ -4710,13 +4665,13 @@ setuptools = "*" [[package]] name = "pydeps" -version = "1.12.17" +version = "1.12.18" description = "Display module dependencies" optional = false python-versions = "*" files = [ - {file = "pydeps-1.12.17-py3-none-any.whl", hash = "sha256:4fb2e86071c78c1b85a1c63745a267d100e91daf6bab2f14331b3c77433b58b4"}, - {file = "pydeps-1.12.17.tar.gz", hash = "sha256:c308e8355a1e77ff0af899d6f9f1665d4eb07019692dba9fb1dc1cab05df36a4"}, + {file = "pydeps-1.12.18-py3-none-any.whl", hash = "sha256:fc57f56a6eaf92ea6b9b503dc43d55f098661e253a868bbb52fccfbbcc8e79de"}, + {file = "pydeps-1.12.18.tar.gz", hash = "sha256:15c5d023b5053308e19a69591da06d9f3ff038e7a47111c40c9986b6a2929a4b"}, ] [package.dependencies] @@ -4783,71 +4738,71 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyinstrument" -version = "4.6.1" +version = "4.6.2" description = "Call stack profiler for Python. Shows you why your code is slow!" optional = false python-versions = ">=3.7" files = [ - {file = "pyinstrument-4.6.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:73476e4bc6e467ac1b2c3c0dd1f0b71c9061d4de14626676adfdfbb14aa342b4"}, - {file = "pyinstrument-4.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4d1da8efd974cf9df52ee03edaee2d3875105ddd00de35aa542760f7c612bdf7"}, - {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:507be1ee2f2b0c9fba74d622a272640dd6d1b0c9ec3388b2cdeb97ad1e77125f"}, - {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95cee6de08eb45754ef4f602ce52b640d1c535d934a6a8733a974daa095def37"}, - {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7873e8cec92321251fdf894a72b3c78f4c5c20afdd1fef0baf9042ec843bb04"}, - {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a242f6cac40bc83e1f3002b6b53681846dfba007f366971db0bf21e02dbb1903"}, - {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:97c9660cdb4bd2a43cf4f3ab52cffd22f3ac9a748d913b750178fb34e5e39e64"}, - {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e304cd0723e2b18ada5e63c187abf6d777949454c734f5974d64a0865859f0f4"}, - {file = "pyinstrument-4.6.1-cp310-cp310-win32.whl", hash = "sha256:cee21a2d78187dd8a80f72f5d0f1ddb767b2d9800f8bb4d94b6d11f217c22cdb"}, - {file = "pyinstrument-4.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:2000712f71d693fed2f8a1c1638d37b7919124f367b37976d07128d49f1445eb"}, - {file = "pyinstrument-4.6.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a366c6f3dfb11f1739bdc1dee75a01c1563ad0bf4047071e5e77598087df457f"}, - {file = "pyinstrument-4.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6be327be65d934796558aa9cb0f75ce62ebd207d49ad1854610c97b0579ad47"}, - {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e160d9c5d20d3e4ef82269e4e8b246ff09bdf37af5fb8cb8ccca97936d95ad6"}, - {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ffbf56605ef21c2fcb60de2fa74ff81f417d8be0c5002a407e414d6ef6dee43"}, - {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92cc4924596d6e8f30a16182bbe90893b1572d847ae12652f72b34a9a17c24a"}, - {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f4b48a94d938cae981f6948d9ec603bab2087b178d2095d042d5a48aabaecaab"}, - {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7a386392275bdef4a1849712dc5b74f0023483fca14ef93d0ca27d453548982"}, - {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:871b131b83e9b1122f2325061c68ed1e861eebcb568c934d2fb193652f077f77"}, - {file = "pyinstrument-4.6.1-cp311-cp311-win32.whl", hash = "sha256:8d8515156dd91f5652d13b5fcc87e634f8fe1c07b68d1d0840348cdd50bf5ace"}, - {file = "pyinstrument-4.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb868fbe089036e9f32525a249f4c78b8dc46967612393f204b8234f439c9cc4"}, - {file = "pyinstrument-4.6.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a18cd234cce4f230f1733807f17a134e64a1f1acabf74a14d27f583cf2b183df"}, - {file = "pyinstrument-4.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:574cfca69150be4ce4461fb224712fbc0722a49b0dc02fa204d02807adf6b5a0"}, - {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e02cf505e932eb8ccf561b7527550a67ec14fcae1fe0e25319b09c9c166e914"}, - {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:832fb2acef9d53701c1ab546564c45fb70a8770c816374f8dd11420d399103c9"}, - {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13cb57e9607545623ebe462345b3d0c4caee0125d2d02267043ece8aca8f4ea0"}, - {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9be89e7419bcfe8dd6abb0d959d6d9c439c613a4a873514c43d16b48dae697c9"}, - {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:476785cfbc44e8e1b1ad447398aa3deae81a8df4d37eb2d8bbb0c404eff979cd"}, - {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e9cebd90128a3d2fee36d3ccb665c1b9dce75261061b2046203e45c4a8012d54"}, - {file = "pyinstrument-4.6.1-cp312-cp312-win32.whl", hash = "sha256:1d0b76683df2ad5c40eff73607dc5c13828c92fbca36aff1ddf869a3c5a55fa6"}, - {file = "pyinstrument-4.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:c4b7af1d9d6a523cfbfedebcb69202242d5bd0cb89c4e094cc73d5d6e38279bd"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:79ae152f8c6a680a188fb3be5e0f360ac05db5bbf410169a6c40851dfaebcce9"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cad2745964c174c65aa75f1bf68a4394d1b4d28f33894837cfd315d1e836f0"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb81f66f7f94045d723069cf317453d42375de9ff3c69089cf6466b078ac1db4"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab30ae75969da99e9a529e21ff497c18fdf958e822753db4ae7ed1e67094040"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f36cb5b644762fb3c86289324bbef17e95f91cd710603ac19444a47f638e8e96"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8b45075d9dbbc977dbc7007fb22bb0054c6990fbe91bf48dd80c0b96c6307ba7"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:475ac31477f6302e092463896d6a2055f3e6abcd293bad16ff94fc9185308a88"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-win32.whl", hash = "sha256:29172ab3d8609fdf821c3f2562dc61e14f1a8ff5306607c32ca743582d3a760e"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:bd176f297c99035127b264369d2bb97a65255f65f8d4e843836baf55ebb3cee4"}, - {file = "pyinstrument-4.6.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:23e9b4526978432e9999021da9a545992cf2ac3df5ee82db7beb6908fc4c978c"}, - {file = "pyinstrument-4.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2dbcaccc9f456ef95557ec501caeb292119c24446d768cb4fb43578b0f3d572c"}, - {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2097f63c66c2bc9678c826b9ff0c25acde3ed455590d9dcac21220673fe74fbf"}, - {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:205ac2e76bd65d61b9611a9ce03d5f6393e34ec5b41dd38808f25d54e6b3e067"}, - {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f414ddf1161976a40fc0a333000e6a4ad612719eac0b8c9bb73f47153187148"}, - {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:65e62ebfa2cd8fb57eda90006f4505ac4c70da00fc2f05b6d8337d776ea76d41"}, - {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d96309df4df10be7b4885797c5f69bb3a89414680ebaec0722d8156fde5268c3"}, - {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f3d1ad3bc8ebb4db925afa706aa865c4bfb40d52509f143491ac0df2440ee5d2"}, - {file = "pyinstrument-4.6.1-cp38-cp38-win32.whl", hash = "sha256:dc37cb988c8854eb42bda2e438aaf553536566657d157c4473cc8aad5692a779"}, - {file = "pyinstrument-4.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:2cd4ce750c34a0318fc2d6c727cc255e9658d12a5cf3f2d0473f1c27157bdaeb"}, - {file = "pyinstrument-4.6.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6ca95b21f022e995e062b371d1f42d901452bcbedd2c02f036de677119503355"}, - {file = "pyinstrument-4.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ac1e1d7e1f1b64054c4eb04eb4869a7a5eef2261440e73943cc1b1bc3c828c18"}, - {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0711845e953fce6ab781221aacffa2a66dbc3289f8343e5babd7b2ea34da6c90"}, - {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b7d28582017de35cb64eb4e4fa603e753095108ca03745f5d17295970ee631f"}, - {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7be57db08bd366a37db3aa3a6187941ee21196e8b14975db337ddc7d1490649d"}, - {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9a0ac0f56860398d2628ce389826ce83fb3a557d0c9a2351e8a2eac6eb869983"}, - {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a9045186ff13bc826fef16be53736a85029aae3c6adfe52e666cad00d7ca623b"}, - {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6c4c56b6eab9004e92ad8a48bb54913fdd71fc8a748ae42a27b9e26041646f8b"}, - {file = "pyinstrument-4.6.1-cp39-cp39-win32.whl", hash = "sha256:37e989c44b51839d0c97466fa2b623638b9470d56d79e329f359f0e8fa6d83db"}, - {file = "pyinstrument-4.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:5494c5a84fee4309d7d973366ca6b8b9f8ba1d6b254e93b7c506264ef74f2cef"}, - {file = "pyinstrument-4.6.1.tar.gz", hash = "sha256:f4731b27121350f5a983d358d2272fe3df2f538aed058f57217eef7801a89288"}, + {file = "pyinstrument-4.6.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7a1b1cd768ea7ea9ab6f5490f7e74431321bcc463e9441dbc2f769617252d9e2"}, + {file = "pyinstrument-4.6.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8a386b9d09d167451fb2111eaf86aabf6e094fed42c15f62ec51d6980bce7d96"}, + {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c3e3ca8553b9aac09bd978c73d21b9032c707ac6d803bae6a20ecc048df4a8"}, + {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5f329f5534ca069420246f5ce57270d975229bcb92a3a3fd6b2ca086527d9764"}, + {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4dcdcc7ba224a0c5edfbd00b0f530f5aed2b26da5aaa2f9af5519d4aa8c7e41"}, + {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73db0c2c99119c65b075feee76e903b4ed82e59440fe8b5724acf5c7cb24721f"}, + {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:da58f265326f3cf3975366ccb8b39014f1e69ff8327958a089858d71c633d654"}, + {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:feebcf860f955401df30d029ec8de7a0c5515d24ea809736430fd1219686fe14"}, + {file = "pyinstrument-4.6.2-cp310-cp310-win32.whl", hash = "sha256:b2b66ff0b16c8ecf1ec22de001cfff46872b2c163c62429055105564eef50b2e"}, + {file = "pyinstrument-4.6.2-cp310-cp310-win_amd64.whl", hash = "sha256:8d104b7a7899d5fa4c5bf1ceb0c1a070615a72c5dc17bc321b612467ad5c5d88"}, + {file = "pyinstrument-4.6.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:62f6014d2b928b181a52483e7c7b82f2c27e22c577417d1681153e5518f03317"}, + {file = "pyinstrument-4.6.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dcb5c8d763c5df55131670ba2a01a8aebd0d490a789904a55eb6a8b8d497f110"}, + {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ed4e8c6c84e0e6429ba7008a66e435ede2d8cb027794c20923c55669d9c5633"}, + {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c0f0e1d8f8c70faa90ff57f78ac0dda774b52ea0bfb2d9f0f41ce6f3e7c869e"}, + {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b3c44cb037ad0d6e9d9a48c14d856254ada641fbd0ae9de40da045fc2226a2a"}, + {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:be9901f17ac2f527c352f2fdca3d717c1d7f2ce8a70bad5a490fc8cc5d2a6007"}, + {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8a9791bf8916c1cf439c202fded32de93354b0f57328f303d71950b0027c7811"}, + {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d6162615e783c59e36f2d7caf903a7e3ecb6b32d4a4ae8907f2760b2ef395bf6"}, + {file = "pyinstrument-4.6.2-cp311-cp311-win32.whl", hash = "sha256:28af084aa84bbfd3620ebe71d5f9a0deca4451267f363738ca824f733de55056"}, + {file = "pyinstrument-4.6.2-cp311-cp311-win_amd64.whl", hash = "sha256:dd6007d3c2e318e09e582435dd8d111cccf30d342af66886b783208813caf3d7"}, + {file = "pyinstrument-4.6.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e3813c8ecfab9d7d855c5f0f71f11793cf1507f40401aa33575c7fd613577c23"}, + {file = "pyinstrument-4.6.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6c761372945e60fc1396b7a49f30592e8474e70a558f1a87346d27c8c4ce50f7"}, + {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fba3244e94c117bf4d9b30b8852bbdcd510e7329fdd5c7c8b3799e00a9215a8"}, + {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:803ac64e526473d64283f504df3b0d5c2c203ea9603cab428641538ffdc753a7"}, + {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2e554b1bb0df78f5ce8a92df75b664912ca93aa94208386102af454ec31b647"}, + {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7c671057fad22ee3ded897a6a361204ea2538e44c1233cad0e8e30f6d27f33db"}, + {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:d02f31fa13a9e8dc702a113878419deba859563a32474c9f68e04619d43d6f01"}, + {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b55983a884f083f93f0fc6d12ff8df0acd1e2fb0580d2f4c7bfe6def33a84b58"}, + {file = "pyinstrument-4.6.2-cp312-cp312-win32.whl", hash = "sha256:fdc0a53b27e5d8e47147489c7dab596ddd1756b1e053217ef5bc6718567099ff"}, + {file = "pyinstrument-4.6.2-cp312-cp312-win_amd64.whl", hash = "sha256:dd5c53a0159126b5ce7cbc4994433c9c671e057c85297ff32645166a06ad2c50"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b082df0bbf71251a7f4880a12ed28421dba84ea7110bb376e0533067a4eaff40"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90350533396071cb2543affe01e40bf534c35cb0d4b8fa9fdb0f052f9ca2cfe3"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67268bb0d579330cff40fd1c90b8510363ca1a0e7204225840614068658dab77"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20e15b4e1d29ba0b7fc81aac50351e0dc0d7e911e93771ebc3f408e864a2c93b"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2e625fc6ffcd4fd420493edd8276179c3f784df207bef4c2192725c1b310534c"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:113d2fc534c9ca7b6b5661d6ada05515bf318f6eb34e8d05860fe49eb7cfe17e"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3098cd72b71a322a72dafeb4ba5c566465e193d2030adad4c09566bd2f89bf4f"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-win32.whl", hash = "sha256:08fdc7f88c989316fa47805234c37a40fafe7b614afd8ae863f0afa9d1707b37"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-win_amd64.whl", hash = "sha256:5ebeba952c0056dcc9b9355328c78c4b5c2a33b4b4276a9157a3ab589f3d1bac"}, + {file = "pyinstrument-4.6.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:34e59e91c88ec9ad5630c0964eca823949005e97736bfa838beb4789e94912a2"}, + {file = "pyinstrument-4.6.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cd0320c39e99e3c0a3129d1ed010ac41e5a7eb96fb79900d270080a97962e995"}, + {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46992e855d630575ec635eeca0068a8ddf423d4fd32ea0875a94e9f8688f0b95"}, + {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e474c56da636253dfdca7cd1998b240d6b39f7ed34777362db69224fcf053b1"}, + {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4b559322f30509ad8f082561792352d0805b3edfa508e492a36041fdc009259"}, + {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:06a8578b2943eb1dbbf281e1e59e44246acfefd79e1b06d4950f01b693de12af"}, + {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7bd3da31c46f1c1cb7ae89031725f6a1d1015c2041d9c753fe23980f5f9fd86c"}, + {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e63f4916001aa9c625976a50779282e0a5b5e9b17c52a50ef4c651e468ed5b88"}, + {file = "pyinstrument-4.6.2-cp38-cp38-win32.whl", hash = "sha256:32ec8db6896b94af790a530e1e0edad4d0f941a0ab8dd9073e5993e7ea46af7d"}, + {file = "pyinstrument-4.6.2-cp38-cp38-win_amd64.whl", hash = "sha256:a59fc4f7db738a094823afe6422509fa5816a7bf74e768ce5a7a2ddd91af40ac"}, + {file = "pyinstrument-4.6.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3a165e0d2deb212d4cf439383982a831682009e1b08733c568cac88c89784e62"}, + {file = "pyinstrument-4.6.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7ba858b3d6f6e5597c641edcc0e7e464f85aba86d71bc3b3592cb89897bf43f6"}, + {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fd8e547cf3df5f0ec6e4dffbe2e857f6b28eda51b71c3c0b5a2fc0646527835"}, + {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de2c1714a37a820033b19cf134ead43299a02662f1379140974a9ab733c5f3a"}, + {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01fc45dedceec3df81668d702bca6d400d956c8b8494abc206638c167c78dfd9"}, + {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5b6e161ef268d43ee6bbfae7fd2cdd0a52c099ddd21001c126ca1805dc906539"}, + {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6ba8e368d0421f15ba6366dfd60ec131c1b46505d021477e0f865d26cf35a605"}, + {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edca46f04a573ac2fb11a84b937844e6a109f38f80f4b422222fb5be8ecad8cb"}, + {file = "pyinstrument-4.6.2-cp39-cp39-win32.whl", hash = "sha256:baf375953b02fe94d00e716f060e60211ede73f49512b96687335f7071adb153"}, + {file = "pyinstrument-4.6.2-cp39-cp39-win_amd64.whl", hash = "sha256:af1a953bce9fd530040895d01ff3de485e25e1576dccb014f76ba9131376fcad"}, + {file = "pyinstrument-4.6.2.tar.gz", hash = "sha256:0002ee517ed8502bbda6eb2bb1ba8f95a55492fcdf03811ba13d4806e50dd7f6"}, ] [package.extras] @@ -5317,13 +5272,13 @@ six = ">=1.5" [[package]] name = "pytz" -version = "2023.3.post1" +version = "2023.4" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, - {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, + {file = "pytz-2023.4-py2.py3-none-any.whl", hash = "sha256:f90ef520d95e7c46951105338d918664ebfd6f1d995bd7d153127ce90efafa6a"}, + {file = "pytz-2023.4.tar.gz", hash = "sha256:31d4583c4ed539cd037956140d695e42c033a19e984bfce9964a3f7d59bc2b40"}, ] [[package]] @@ -5385,7 +5340,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -5656,13 +5610,13 @@ full = ["numpy"] [[package]] name = "referencing" -version = "0.32.1" +version = "0.33.0" description = "JSON Referencing + Python" optional = false python-versions = ">=3.8" files = [ - {file = "referencing-0.32.1-py3-none-any.whl", hash = "sha256:7e4dc12271d8e15612bfe35792f5ea1c40970dadf8624602e33db2758f7ee554"}, - {file = "referencing-0.32.1.tar.gz", hash = "sha256:3c57da0513e9563eb7e203ebe9bb3a1b509b042016433bd1e45a2853466c3dd3"}, + {file = "referencing-0.33.0-py3-none-any.whl", hash = "sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5"}, + {file = "referencing-0.33.0.tar.gz", hash = "sha256:c775fedf74bc0f9189c2a3be1c12fd03e8c23f4d371dce795df44e06c5b412f7"}, ] [package.dependencies] @@ -5967,118 +5921,133 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.1.13" +version = "0.1.15" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.1.13-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e3fd36e0d48aeac672aa850045e784673449ce619afc12823ea7868fcc41d8ba"}, - {file = "ruff-0.1.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9fb6b3b86450d4ec6a6732f9f60c4406061b6851c4b29f944f8c9d91c3611c7a"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b13ba5d7156daaf3fd08b6b993360a96060500aca7e307d95ecbc5bb47a69296"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9ebb40442f7b531e136d334ef0851412410061e65d61ca8ce90d894a094feb22"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226b517f42d59a543d6383cfe03cccf0091e3e0ed1b856c6824be03d2a75d3b6"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5f0312ba1061e9b8c724e9a702d3c8621e3c6e6c2c9bd862550ab2951ac75c16"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2f59bcf5217c661254bd6bc42d65a6fd1a8b80c48763cb5c2293295babd945dd"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6894b00495e00c27b6ba61af1fc666f17de6140345e5ef27dd6e08fb987259d"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1600942485c6e66119da294c6294856b5c86fd6df591ce293e4a4cc8e72989"}, - {file = "ruff-0.1.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ee3febce7863e231a467f90e681d3d89210b900d49ce88723ce052c8761be8c7"}, - {file = "ruff-0.1.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dcaab50e278ff497ee4d1fe69b29ca0a9a47cd954bb17963628fa417933c6eb1"}, - {file = "ruff-0.1.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f57de973de4edef3ad3044d6a50c02ad9fc2dff0d88587f25f1a48e3f72edf5e"}, - {file = "ruff-0.1.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:7a36fa90eb12208272a858475ec43ac811ac37e91ef868759770b71bdabe27b6"}, - {file = "ruff-0.1.13-py3-none-win32.whl", hash = "sha256:a623349a505ff768dad6bd57087e2461be8db58305ebd5577bd0e98631f9ae69"}, - {file = "ruff-0.1.13-py3-none-win_amd64.whl", hash = "sha256:f988746e3c3982bea7f824c8fa318ce7f538c4dfefec99cd09c8770bd33e6539"}, - {file = "ruff-0.1.13-py3-none-win_arm64.whl", hash = "sha256:6bbbc3042075871ec17f28864808540a26f0f79a4478c357d3e3d2284e832998"}, - {file = "ruff-0.1.13.tar.gz", hash = "sha256:e261f1baed6291f434ffb1d5c6bd8051d1c2a26958072d38dfbec39b3dda7352"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"}, + {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"}, + {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"}, + {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"}, + {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, ] [[package]] name = "scikit-learn" -version = "1.3.2" +version = "1.4.0" description = "A set of python modules for machine learning and data mining" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "scikit-learn-1.3.2.tar.gz", hash = "sha256:a2f54c76accc15a34bfb9066e6c7a56c1e7235dda5762b990792330b52ccfb05"}, - {file = "scikit_learn-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e326c0eb5cf4d6ba40f93776a20e9a7a69524c4db0757e7ce24ba222471ee8a1"}, - {file = "scikit_learn-1.3.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:535805c2a01ccb40ca4ab7d081d771aea67e535153e35a1fd99418fcedd1648a"}, - {file = "scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1215e5e58e9880b554b01187b8c9390bf4dc4692eedeaf542d3273f4785e342c"}, - {file = "scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ee107923a623b9f517754ea2f69ea3b62fc898a3641766cb7deb2f2ce450161"}, - {file = "scikit_learn-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:35a22e8015048c628ad099da9df5ab3004cdbf81edc75b396fd0cff8699ac58c"}, - {file = "scikit_learn-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6fb6bc98f234fda43163ddbe36df8bcde1d13ee176c6dc9b92bb7d3fc842eb66"}, - {file = "scikit_learn-1.3.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:18424efee518a1cde7b0b53a422cde2f6625197de6af36da0b57ec502f126157"}, - {file = "scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3271552a5eb16f208a6f7f617b8cc6d1f137b52c8a1ef8edf547db0259b2c9fb"}, - {file = "scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4144a5004a676d5022b798d9e573b05139e77f271253a4703eed295bde0433"}, - {file = "scikit_learn-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:67f37d708f042a9b8d59551cf94d30431e01374e00dc2645fa186059c6c5d78b"}, - {file = "scikit_learn-1.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8db94cd8a2e038b37a80a04df8783e09caac77cbe052146432e67800e430c028"}, - {file = "scikit_learn-1.3.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:61a6efd384258789aa89415a410dcdb39a50e19d3d8410bd29be365bcdd512d5"}, - {file = "scikit_learn-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb06f8dce3f5ddc5dee1715a9b9f19f20d295bed8e3cd4fa51e1d050347de525"}, - {file = "scikit_learn-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b2de18d86f630d68fe1f87af690d451388bb186480afc719e5f770590c2ef6c"}, - {file = "scikit_learn-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107"}, - {file = "scikit_learn-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a19f90f95ba93c1a7f7924906d0576a84da7f3b2282ac3bfb7a08a32801add93"}, - {file = "scikit_learn-1.3.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b8692e395a03a60cd927125eef3a8e3424d86dde9b2370d544f0ea35f78a8073"}, - {file = "scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15e1e94cc23d04d39da797ee34236ce2375ddea158b10bee3c343647d615581d"}, - {file = "scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:785a2213086b7b1abf037aeadbbd6d67159feb3e30263434139c98425e3dcfcf"}, - {file = "scikit_learn-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:64381066f8aa63c2710e6b56edc9f0894cc7bf59bd71b8ce5613a4559b6145e0"}, - {file = "scikit_learn-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c43290337f7a4b969d207e620658372ba3c1ffb611f8bc2b6f031dc5c6d1d03"}, - {file = "scikit_learn-1.3.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:dc9002fc200bed597d5d34e90c752b74df516d592db162f756cc52836b38fe0e"}, - {file = "scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d08ada33e955c54355d909b9c06a4789a729977f165b8bae6f225ff0a60ec4a"}, - {file = "scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763f0ae4b79b0ff9cca0bf3716bcc9915bdacff3cebea15ec79652d1cc4fa5c9"}, - {file = "scikit_learn-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:ed932ea780517b00dae7431e031faae6b49b20eb6950918eb83bd043237950e0"}, -] - -[package.dependencies] -joblib = ">=1.1.1" -numpy = ">=1.17.3,<2.0" -scipy = ">=1.5.0" + {file = "scikit-learn-1.4.0.tar.gz", hash = "sha256:d4373c984eba20e393216edd51a3e3eede56cbe93d4247516d205643c3b93121"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fce93a7473e2f4ee4cc280210968288d6a7d7ad8dc6fa7bb7892145e407085f9"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d77df3d1e15fc37a9329999979fa7868ba8655dbab21fe97fc7ddabac9e08cc7"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2404659fedec40eeafa310cd14d613e564d13dbf8f3c752d31c095195ec05de6"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e98632da8f6410e6fb6bf66937712c949b4010600ccd3f22a5388a83e610cc3c"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-win_amd64.whl", hash = "sha256:11b3b140f70fbc9f6a08884631ae8dd60a4bb2d7d6d1de92738ea42b740d8992"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8341eabdc754d5ab91641a7763243845e96b6d68e03e472531e88a4f1b09f21"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d1f6bce875ac2bb6b52514f67c185c564ccd299a05b65b7bab091a4c13dde12d"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c408b46b2fd61952d519ea1af2f8f0a7a703e1433923ab1704c4131520b2083b"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b465dd1dcd237b7b1dcd1a9048ccbf70a98c659474324fa708464c3a2533fad"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-win_amd64.whl", hash = "sha256:0db8e22c42f7980fe5eb22069b1f84c48966f3e0d23a01afde5999e3987a2501"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7eef6ea2ed289af40e88c0be9f7704ca8b5de18508a06897c3fe21e0905efdf"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:349669b01435bc4dbf25c6410b0892073befdaec52637d1a1d1ff53865dc8db3"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d439c584e58434d0350701bd33f6c10b309e851fccaf41c121aed55f6851d8cf"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0e2427d9ef46477625ab9b55c1882844fe6fc500f418c3f8e650200182457bc"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-win_amd64.whl", hash = "sha256:d3d75343940e7bf9b85c830c93d34039fa015eeb341c5c0b4cd7a90dadfe00d4"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:76986d22e884ab062b1beecdd92379656e9d3789ecc1f9870923c178de55f9fe"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e22446ad89f1cb7657f0d849dcdc345b48e2d10afa3daf2925fdb740f85b714c"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74812c9eabb265be69d738a8ea8d4884917a59637fcbf88a5f0e9020498bc6b3"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad2a63e0dd386b92da3270887a29b308af4d7c750d8c4995dfd9a4798691bcc"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-win_amd64.whl", hash = "sha256:53b9e29177897c37e2ff9d4ba6ca12fdb156e22523e463db05def303f5c72b5c"}, + {file = "scikit_learn-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb8f044a8f5962613ce1feb4351d66f8d784bd072d36393582f351859b065f7d"}, + {file = "scikit_learn-1.4.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:a6372c90bbf302387792108379f1ec77719c1618d88496d0df30cb8e370b4661"}, + {file = "scikit_learn-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:785ce3c352bf697adfda357c3922c94517a9376002971bc5ea50896144bc8916"}, + {file = "scikit_learn-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0aba2a20d89936d6e72d95d05e3bf1db55bca5c5920926ad7b92c34f5e7d3bbe"}, + {file = "scikit_learn-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2bac5d56b992f8f06816f2cd321eb86071c6f6d44bb4b1cb3d626525820d754b"}, + {file = "scikit_learn-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27ae4b0f1b2c77107c096a7e05b33458354107b47775428d1f11b23e30a73e8a"}, + {file = "scikit_learn-1.4.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5c5c62ffb52c3ffb755eb21fa74cc2cbf2c521bd53f5c04eaa10011dbecf5f80"}, + {file = "scikit_learn-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f0d2018ac6fa055dab65fe8a485967990d33c672d55bc254c56c35287b02fab"}, + {file = "scikit_learn-1.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a8918c415c4b4bf1d60c38d32958849a9191c2428ab35d30b78354085c7c7a"}, + {file = "scikit_learn-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:80a21de63275f8bcd7877b3e781679d2ff1eddfed515a599f95b2502a3283d42"}, + {file = "scikit_learn-1.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0f33bbafb310c26b81c4d41ecaebdbc1f63498a3f13461d50ed9a2e8f24d28e4"}, + {file = "scikit_learn-1.4.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:8b6ac1442ec714b4911e5aef8afd82c691b5c88b525ea58299d455acc4e8dcec"}, + {file = "scikit_learn-1.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05fc5915b716c6cc60a438c250108e9a9445b522975ed37e416d5ea4f9a63381"}, + {file = "scikit_learn-1.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:842b7d6989f3c574685e18da6f91223eb32301d0f93903dd399894250835a6f7"}, + {file = "scikit_learn-1.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:88bcb586fdff865372df1bc6be88bb7e6f9e0aa080dab9f54f5cac7eca8e2b6b"}, + {file = "scikit_learn-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f77674647dd31f56cb12ed13ed25b6ed43a056fffef051715022d2ebffd7a7d1"}, + {file = "scikit_learn-1.4.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:833999872e2920ce00f3a50839946bdac7539454e200eb6db54898a41f4bfd43"}, + {file = "scikit_learn-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:970ec697accaef10fb4f51763f3a7b1250f9f0553cf05514d0e94905322a0172"}, + {file = "scikit_learn-1.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:923d778f378ebacca2c672ab1740e5a413e437fb45ab45ab02578f8b689e5d43"}, + {file = "scikit_learn-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:1d041bc95006b545b59e458399e3175ab11ca7a03dc9a74a573ac891f5df1489"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" threadpoolctl = ">=2.0.0" [package.extras] -benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] -docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.10.1)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] -examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] -tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.16.2)"] +benchmark = ["matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "pandas (>=1.1.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"] [[package]] name = "scipy" -version = "1.11.4" +version = "1.12.0" description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "scipy-1.11.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc9a714581f561af0848e6b69947fda0614915f072dfd14142ed1bfe1b806710"}, - {file = "scipy-1.11.4-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cf00bd2b1b0211888d4dc75656c0412213a8b25e80d73898083f402b50f47e41"}, - {file = "scipy-1.11.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9999c008ccf00e8fbcce1236f85ade5c569d13144f77a1946bef8863e8f6eb4"}, - {file = "scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:933baf588daa8dc9a92c20a0be32f56d43faf3d1a60ab11b3f08c356430f6e56"}, - {file = "scipy-1.11.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8fce70f39076a5aa62e92e69a7f62349f9574d8405c0a5de6ed3ef72de07f446"}, - {file = "scipy-1.11.4-cp310-cp310-win_amd64.whl", hash = "sha256:6550466fbeec7453d7465e74d4f4b19f905642c89a7525571ee91dd7adabb5a3"}, - {file = "scipy-1.11.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f313b39a7e94f296025e3cffc2c567618174c0b1dde173960cf23808f9fae4be"}, - {file = "scipy-1.11.4-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1b7c3dca977f30a739e0409fb001056484661cb2541a01aba0bb0029f7b68db8"}, - {file = "scipy-1.11.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00150c5eae7b610c32589dda259eacc7c4f1665aedf25d921907f4d08a951b1c"}, - {file = "scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:530f9ad26440e85766509dbf78edcfe13ffd0ab7fec2560ee5c36ff74d6269ff"}, - {file = "scipy-1.11.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5e347b14fe01003d3b78e196e84bd3f48ffe4c8a7b8a1afbcb8f5505cb710993"}, - {file = "scipy-1.11.4-cp311-cp311-win_amd64.whl", hash = "sha256:acf8ed278cc03f5aff035e69cb511741e0418681d25fbbb86ca65429c4f4d9cd"}, - {file = "scipy-1.11.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:028eccd22e654b3ea01ee63705681ee79933652b2d8f873e7949898dda6d11b6"}, - {file = "scipy-1.11.4-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c6ff6ef9cc27f9b3db93a6f8b38f97387e6e0591600369a297a50a8e96e835d"}, - {file = "scipy-1.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b030c6674b9230d37c5c60ab456e2cf12f6784596d15ce8da9365e70896effc4"}, - {file = "scipy-1.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad669df80528aeca5f557712102538f4f37e503f0c5b9541655016dd0932ca79"}, - {file = "scipy-1.11.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ce7fff2e23ab2cc81ff452a9444c215c28e6305f396b2ba88343a567feec9660"}, - {file = "scipy-1.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:36750b7733d960d7994888f0d148d31ea3017ac15eef664194b4ef68d36a4a97"}, - {file = "scipy-1.11.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6e619aba2df228a9b34718efb023966da781e89dd3d21637b27f2e54db0410d7"}, - {file = "scipy-1.11.4-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f3cd9e7b3c2c1ec26364856f9fbe78695fe631150f94cd1c22228456404cf1ec"}, - {file = "scipy-1.11.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d10e45a6c50211fe256da61a11c34927c68f277e03138777bdebedd933712fea"}, - {file = "scipy-1.11.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91af76a68eeae0064887a48e25c4e616fa519fa0d38602eda7e0f97d65d57937"}, - {file = "scipy-1.11.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6df1468153a31cf55ed5ed39647279beb9cfb5d3f84369453b49e4b8502394fd"}, - {file = "scipy-1.11.4-cp39-cp39-win_amd64.whl", hash = "sha256:ee410e6de8f88fd5cf6eadd73c135020bfbbbdfcd0f6162c36a7638a1ea8cc65"}, - {file = "scipy-1.11.4.tar.gz", hash = "sha256:90a2b78e7f5733b9de748f589f09225013685f9b218275257f8a8168ededaeaa"}, -] - -[package.dependencies] -numpy = ">=1.21.6,<1.28.0" + {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"}, + {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"}, + {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"}, + {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"}, + {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"}, + {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"}, + {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"}, + {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"}, + {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"}, + {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"}, + {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"}, + {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"}, + {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"}, + {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"}, + {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"}, + {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"}, + {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"}, + {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"}, + {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"}, + {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"}, + {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"}, + {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"}, + {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"}, + {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"}, + {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<1.29.0" [package.extras] dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] -test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] [[package]] name = "scooby" @@ -6096,13 +6065,13 @@ cpu = ["mkl", "psutil"] [[package]] name = "seaborn" -version = "0.13.1" +version = "0.13.2" description = "Statistical data visualization" optional = false python-versions = ">=3.8" files = [ - {file = "seaborn-0.13.1-py3-none-any.whl", hash = "sha256:6baa69b6d1169ae59037971491c450c0b73332b42bd4b23570b62a546bc61cb8"}, - {file = "seaborn-0.13.1.tar.gz", hash = "sha256:bfad65e9c5989e5e1897e61bdbd2f22e62455940ca76fd49eca3ed69345b9179"}, + {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"}, + {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"}, ] [package.dependencies] @@ -6227,32 +6196,37 @@ files = [ [[package]] name = "snowflake-connector-python" -version = "3.6.0" +version = "3.7.0" description = "Snowflake Connector for Python" optional = true python-versions = ">=3.8" files = [ - {file = "snowflake-connector-python-3.6.0.tar.gz", hash = "sha256:15667a918780d79da755e6a60bbf6918051854951e8f56ccdf5692283e9a8479"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4093b38cf9abf95c38119f0b23b07e23dc7a8689b956cd5d34975e1875741f20"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:cf5a964fe01b177063f8c44d14df3a72715580bcd195788ec2822090f37330a5"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55a6418cec585b050e6f05404f25e62b075a3bbea587dc1f903de15640565c58"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c76aea92b87f6ecd604e9c934aac8a779f2e20f3be1d990d53bb5b6d87b009"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:9dfcf178271e892e64e4092b9e011239a066ce5de848afd2efe3f13197a9f8b3"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4916f9b4a0efd7c96d1fa50a157e05907b6935f91492cca7f200b43cc178a25e"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:f15024c66db5e87d359216ec733a2974d7562aa38f3f18c8b6e65489839e00d7"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcbd3102f807ebbbae52b1b5683d45cd7b3dcb0eaec131233ba6b156e8d70fa4"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7662e2de25b885abe08ab866cf7c7b026ad1af9faa39c25e2c25015ef807abe3"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:d1fa102f55ee166cc766aeee3f9333b17b4bede6fb088eee1e1f022df15b6d81"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fde1e0727e2f23c2a07b49b30e1bc0f49977f965d08ddfda10015b24a2beeb76"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:1b51fe000c8cf6372d30b73c7136275e52788e6af47010cd1984c9fb03378e86"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7a11699689a19916e65794ce58dca72b8a40fe6a7eea06764931ede10b47bcc"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d810be5b180c6f47ce9b6f989fe64b9984383e4b77e30b284a83e33f229a3a82"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b5db47d4164d6b7a07c413a46f9edc4a1d687e3df44fd9d5fa89a89aecb94a8e"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bf8c1ad5aab5304fefa2a4178061a24c96da45e3e3db9d901621e9953e005402"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:1058ab5c98cc62fde8b3f021f0a5076cb7865b5cdab8a9bccde0df88b9e91334"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b93f55989f80d69278e0f40a7a1c0e737806b7c0ddb0351513a752b837243e8"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50dd954ea5918d3242ded69225b72f701963cd9c043ee7d9ab35dc22211611c8"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4ad42613b87f31441d07a8ea242f4c28ed5eb7b6e05986f9e94a7e44b96d3d1e"}, + {file = "snowflake-connector-python-3.7.0.tar.gz", hash = "sha256:b2bfaec64059307b08caadad40214d488fefb4a23fcd7553ac75f5ea758a9169"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f38070af24d15cd103d565b63b08c5eac3bdf72ad06ad27cd98c46359cb4bee2"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:f8f3912699030291fd82d75321cda44205c9f8fb27841ffbaaf6d3dc4065b798"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7ac1190c6ca48297429f0fb6515b54e3fd3bceb1b72fce7b59097044a9e98e0"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57deaa28baa601b64c1ae5a5c75260ab1c6a22bd07a8d8c7ac785c8deb1c556e"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:715635ed5b6e5ef8de659fc336c1b89296fe72fdec180c40915c10df885c8082"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d51f3a8912fcc5169731d2b42262087e8a6da20f7344dd001ed97fbdf6ff972c"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:46bfa243875eff9c6dfe1afc26f2034b00ac6eb9f77010b2949a174c38a59722"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7004ccfe3c16075d33b0440b4d5241a50156bbc5dcbf11dec61674d0ac830f46"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee9e6a0a94e0ac1f15fa93c0f61f6e930240280bd043f61216d942e837beb7f"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:b545fd11c9bd200681e182cf46bb4cbc8250ca6acc41fbea749799a2b23f574f"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:258541df8ba90201ce6f7c4ae9f59e3a9f585ed30fbbaafd207e0774104cf6dc"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:e548642913f7d0ef9d5a35c69c7a8308cbab8fe255fdc3c9f7e18c71e52a0c2e"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:639d09de40c014c7ab0308f622bd1d29a9b9dd05c0ced2d858da31323fa16bda"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da9cf62665ee47c7ec8c18ae554a31c72cacf1cef4b42d55cfbdbae4b5ddb3f2"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad1d0e339cadb5ba79d24783c39ba21a63e2159f0d3d9540da0168f97043904c"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d8e4d0fad8b00b55bc99035ad2c54d9aa3ca8495f7dfcce736a961b5dbd1d9f"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:fc3e95d4c99472444ffda35b9bbfe4cd4c775279c7eca579f1eee9d8d2ec1e2a"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f93a5861333c2f87ecd1fea34a0fae35c12c196e86fa75c2dd89741e83f2d82"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdf0fe7d77e02949a8a2a7d365217b822bcaf2fc9541095a241116576458568"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:1ec29946b224d8089070477f60ffe58923433d8c2308b6403684500e85c37699"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f945c512383a8b5f1d2404c40d20e0c915ba3f0ac01983f2e43987d6eecda02"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:574cf5be3c61a6ea421ac9710ac791a80f6dfcc53986ab81e68d1085dad79dab"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb8168458e5d23a0ba4d4e0a276bbd477ddd26d35c554f2c3c64cfe29622499a"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecf8f520692653775f51307140d326b53a51e338d67dc522b1d376b51b12d14e"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:5ed928055ed40da22b2d6bdde62eee5068c352f66415e1c9aee7c45eb67d39cb"}, ] [package.dependencies] @@ -6279,25 +6253,6 @@ development = ["Cython", "coverage", "more-itertools", "numpy (<1.27.0)", "pendu pandas = ["pandas (>=1.0.0,<2.2.0)", "pyarrow"] secure-local-storage = ["keyring (!=16.1.0,<25.0.0)"] -[[package]] -name = "snowflake-sqlalchemy" -version = "1.5.1" -description = "Snowflake SQLAlchemy Dialect" -optional = true -python-versions = ">=3.7" -files = [ - {file = "snowflake-sqlalchemy-1.5.1.tar.gz", hash = "sha256:4f1383402ffc89311974bd810dee22003aef4af0f312a0fdb55778333ad1abf7"}, - {file = "snowflake_sqlalchemy-1.5.1-py2.py3-none-any.whl", hash = "sha256:df022fb73bc04d68dfb3216ebf7a1bfbd14d22def9c38bbe05275beb258adcd0"}, -] - -[package.dependencies] -snowflake-connector-python = "<4.0.0" -sqlalchemy = ">=1.4.0,<2.0.0" - -[package.extras] -development = ["mock", "numpy", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytz"] -pandas = ["snowflake-connector-python[pandas] (<4.0.0)"] - [[package]] name = "sortedcontainers" version = "2.4.0" @@ -6465,17 +6420,17 @@ sqlalchemy = ">=1.0.0" [[package]] name = "sqlglot" -version = "20.8.0" +version = "20.11.0" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.7" files = [ - {file = "sqlglot-20.8.0-py3-none-any.whl", hash = "sha256:cb73b81a26da462c34b12b98cf193d679d4b5693703d309db236d9784cef60bb"}, - {file = "sqlglot-20.8.0.tar.gz", hash = "sha256:5636e97fab9efdb4a8690c0e32bbd2d657fe91eb650f10e913a56b4bd979faef"}, + {file = "sqlglot-20.11.0-py3-none-any.whl", hash = "sha256:658509272da15e90dd1c59d9ca5281d7bff2e87121f87e6f9e6541067a057c9c"}, + {file = "sqlglot-20.11.0.tar.gz", hash = "sha256:79a1510ffad1f1e4c5915751f0ed978c099e7e83cd4010ecbd471c00331b6902"}, ] [package.extras] -dev = ["autoflake", "black", "duckdb (>=0.6)", "isort", "maturin (>=1.4,<2.0)", "mypy (>=0.990)", "pandas", "pdoc", "pre-commit", "pyspark", "python-dateutil", "types-python-dateutil"] +dev = ["autoflake", "black", "duckdb (>=0.6)", "isort", "maturin (>=1.4,<2.0)", "mypy (>=0.990)", "pandas", "pdoc", "pre-commit", "pyspark", "python-dateutil", "types-python-dateutil", "typing-extensions"] rs = ["sqlglotrs (==0.1.0)"] [[package]] @@ -6676,13 +6631,13 @@ files = [ [[package]] name = "toolz" -version = "0.12.0" +version = "0.12.1" description = "List processing tools and functional utilities" optional = false -python-versions = ">=3.5" +python-versions = ">=3.7" files = [ - {file = "toolz-0.12.0-py3-none-any.whl", hash = "sha256:2059bd4148deb1884bb0eb770a3cde70e7f954cfbbdc2285f1f2de01fd21eb6f"}, - {file = "toolz-0.12.0.tar.gz", hash = "sha256:88c570861c440ee3f2f6037c4654613228ff40c93a6c25e0eba70d17282c6194"}, + {file = "toolz-0.12.1-py3-none-any.whl", hash = "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85"}, + {file = "toolz-0.12.1.tar.gz", hash = "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d"}, ] [[package]] @@ -6784,13 +6739,13 @@ tests = ["black", "httpretty (<1.1)", "isort", "pre-commit", "pytest", "pytest-r [[package]] name = "trove-classifiers" -version = "2024.1.8" +version = "2024.1.31" description = "Canonical source for classifiers on PyPI (pypi.org)." optional = false python-versions = "*" files = [ - {file = "trove-classifiers-2024.1.8.tar.gz", hash = "sha256:6e36caf430ff6485c4b57a4c6b364a13f6a898d16b9417c6c37467e59c14b05a"}, - {file = "trove_classifiers-2024.1.8-py3-none-any.whl", hash = "sha256:3c1ff4deb10149c7e39ede6e5bbc107def64362ef1ee7590ec98d71fb92f1b6a"}, + {file = "trove-classifiers-2024.1.31.tar.gz", hash = "sha256:bfdfe60bbf64985c524416afb637ecc79c558e0beb4b7f52b0039e01044b0229"}, + {file = "trove_classifiers-2024.1.31-py3-none-any.whl", hash = "sha256:854aba3358f3cf10e5c0916aa533f5a39e27aadd8ade26a54cdc2a93257e39c4"}, ] [[package]] @@ -7396,15 +7351,15 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "duckdb-engine", "geoalchemy2", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "snowflake-sqlalchemy", "sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-risingwave", "sqlalchemy-views", "trino"] +all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geoalchemy2", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-risingwave", "sqlalchemy-views", "trino"] bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "pydata-google-auth"] -clickhouse = ["clickhouse-connect", "sqlalchemy"] +clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] datafusion = ["datafusion"] decompiler = ["black"] deltalake = ["deltalake"] druid = ["pydruid", "sqlalchemy"] -duckdb = ["duckdb", "duckdb-engine", "sqlalchemy", "sqlalchemy-views"] +duckdb = ["duckdb"] examples = ["pins"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] flink = [] @@ -7418,7 +7373,7 @@ polars = ["packaging", "polars"] postgres = ["psycopg2", "sqlalchemy", "sqlalchemy-views"] pyspark = ["packaging", "pyspark", "sqlalchemy"] risingwave = ["psycopg2", "sqlalchemy", "sqlalchemy-risingwave", "sqlalchemy-views"] -snowflake = ["packaging", "snowflake-connector-python", "snowflake-sqlalchemy", "sqlalchemy-views"] +snowflake = ["packaging", "snowflake-connector-python"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] trino = ["sqlalchemy", "sqlalchemy-views", "trino"] visualization = ["graphviz"] @@ -7426,4 +7381,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "3c1dfc652d2d025e6ea434033966154b44ba3a4452cbe3c7439ea4754c6ec420" +content-hash = "2191160a6ebf9c3e237ada9bba709eb3a912f31f60a25138c9c6b5aace96ee9f" diff --git a/requirements-dev.txt b/requirements-dev.txt index 7855a13915d5..ae9b4c7d1d43 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -aiohttp==3.9.2 ; python_version >= "3.9" and python_version < "4.0" +aiohttp==3.9.3 ; python_version >= "3.9" and python_version < "4.0" aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "4.0" altair==5.2.0 ; python_version >= "3.10" and python_version < "3.13" annotated-types==0.6.0 ; python_version >= "3.10" and python_version < "3.13" @@ -10,13 +10,13 @@ asttokens==2.4.1 ; python_version >= "3.9" and python_version < "4.0" async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.11" atpublic==4.0 ; python_version >= "3.9" and python_version < "4.0" attrs==23.2.0 ; python_version >= "3.9" and python_version < "4.0" -beartype==0.16.4 ; python_version >= "3.10" and python_version < "3.13" -beautifulsoup4==4.12.2 ; python_version >= "3.10" and python_version < "3.13" +beartype==0.17.0 ; python_version >= "3.10" and python_version < "3.13" +beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "3.13" bidict==0.22.1 ; python_version >= "3.9" and python_version < "4.0" bitarray==2.9.2 ; python_version >= "3.9" and python_version < "4.0" -black==24.1.0 ; python_version >= "3.9" and python_version < "4.0" +black==24.1.1 ; python_version >= "3.9" and python_version < "4.0" bqplot==0.12.42 ; python_version >= "3.10" and python_version < "3.13" -branca==0.7.0 ; python_version >= "3.10" and python_version < "3.13" +branca==0.7.1 ; python_version >= "3.10" and python_version < "3.13" build==1.0.3 ; python_version >= "3.9" and python_version < "4.0" cachecontrol[filecache]==0.13.1 ; python_version >= "3.9" and python_version < "4.0" cachetools==5.3.2 ; python_version >= "3.9" and python_version < "4.0" @@ -28,7 +28,7 @@ charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "4.0" cleo==2.1.0 ; python_version >= "3.9" and python_version < "4.0" click-plugins==1.1.1 ; python_version >= "3.9" and python_version < "4.0" click==8.1.7 ; python_version >= "3.9" and python_version < "4.0" -clickhouse-connect[arrow,numpy,pandas]==0.6.23 ; python_version >= "3.9" and python_version < "4.0" +clickhouse-connect[arrow,numpy,pandas]==0.7.0 ; python_version >= "3.9" and python_version < "4.0" cligj==0.7.2 ; python_version >= "3.9" and python_version < "4" cloudpickle==3.0.0 ; python_version >= "3.9" and python_version < "4.0" codespell[hard-encoding-detection,toml]==2.2.6 ; python_version >= "3.9" and python_version < "4.0" @@ -36,20 +36,19 @@ colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and (sys_pl colour==0.1.5 ; python_version >= "3.10" and python_version < "3.13" comm==0.2.1 ; python_version >= "3.10" and python_version < "3.13" contourpy==1.2.0 ; python_version >= "3.10" and python_version < "3.13" -coverage[toml]==7.4.0 ; python_version >= "3.9" and python_version < "4.0" +coverage[toml]==7.4.1 ; python_version >= "3.9" and python_version < "4.0" crashtest==0.4.1 ; python_version >= "3.9" and python_version < "4.0" cryptography==41.0.7 ; python_version >= "3.9" and python_version < "4.0" cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" -dask==2024.1.0 ; python_version >= "3.10" and python_version < "3.13" -dask[array,dataframe]==2024.1.0 ; python_version >= "3.9" and python_version < "4.0" +dask==2024.1.1 ; python_version >= "3.10" and python_version < "3.13" +dask[array,dataframe]==2024.1.1 ; python_version >= "3.9" and python_version < "4.0" datafusion==34.0.0 ; python_version >= "3.9" and python_version < "4.0" db-dtypes==1.2.0 ; python_version >= "3.9" and python_version < "4.0" debugpy==1.8.0 ; python_version >= "3.10" and python_version < "3.13" decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0" deltalake==0.15.1 ; python_version >= "3.9" and python_version < "4.0" distlib==0.3.8 ; python_version >= "3.9" and python_version < "4.0" -distributed==2024.1.0 ; python_version >= "3.10" and python_version < "3.13" -duckdb-engine==0.10.0 ; python_version >= "3.9" and python_version < "4.0" +distributed==2024.1.1 ; python_version >= "3.10" and python_version < "3.13" duckdb==0.9.2 ; python_version >= "3.9" and python_version < "4.0" dulwich==0.21.7 ; python_version >= "3.9" and python_version < "4.0" dunamai==1.19.0 ; python_version >= "3.9" and python_version < "4.0" @@ -64,16 +63,16 @@ fonttools==4.47.2 ; python_version >= "3.10" and python_version < "3.13" frozenlist==1.4.1 ; python_version >= "3.9" and python_version < "4.0" fsspec==2023.12.2 ; python_version >= "3.9" and python_version < "4.0" gcsfs==2023.12.2.post1 ; python_version >= "3.9" and python_version < "4.0" -gdown==4.7.1 ; python_version >= "3.10" and python_version < "3.13" +gdown==5.0.1 ; python_version >= "3.10" and python_version < "3.13" geoalchemy2==0.14.3 ; python_version >= "3.9" and python_version < "4.0" geojson==3.1.0 ; python_version >= "3.10" and python_version < "3.13" -geopandas==0.14.2 ; python_version >= "3.9" and python_version < "4.0" -google-api-core==2.15.0 ; python_version >= "3.9" and python_version < "4.0" -google-api-core[grpc]==2.15.0 ; python_version >= "3.9" and python_version < "4.0" +geopandas==0.14.3 ; python_version >= "3.9" and python_version < "4.0" +google-api-core==2.16.1 ; python_version >= "3.9" and python_version < "4.0" +google-api-core[grpc]==2.16.1 ; python_version >= "3.9" and python_version < "4.0" google-auth-oauthlib==1.2.0 ; python_version >= "3.9" and python_version < "4.0" -google-auth==2.26.2 ; python_version >= "3.9" and python_version < "4.0" +google-auth==2.27.0 ; python_version >= "3.9" and python_version < "4.0" google-cloud-bigquery-storage==2.24.0 ; python_version >= "3.9" and python_version < "4.0" -google-cloud-bigquery==3.16.0 ; python_version >= "3.9" and python_version < "4.0" +google-cloud-bigquery==3.17.1 ; python_version >= "3.9" and python_version < "4.0" google-cloud-core==2.4.1 ; python_version >= "3.9" and python_version < "4.0" google-cloud-storage==2.14.0 ; python_version >= "3.9" and python_version < "4.0" google-crc32c==1.5.0 ; python_version >= "3.9" and python_version < "4.0" @@ -81,11 +80,11 @@ google-resumable-media==2.7.0 ; python_version >= "3.9" and python_version < "4. googleapis-common-protos==1.62.0 ; python_version >= "3.9" and python_version < "4.0" graphviz==0.20.1 ; python_version >= "3.9" and python_version < "4.0" greenlet==3.0.3 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0" -griffe==0.38.1 ; python_version >= "3.10" and python_version < "3.13" +griffe==0.40.0 ; python_version >= "3.10" and python_version < "3.13" grpcio-status==1.60.0 ; python_version >= "3.9" and python_version < "4.0" grpcio==1.60.0 ; python_version >= "3.9" and python_version < "4.0" humanize==4.9.0 ; python_version >= "3.9" and python_version < "4.0" -hypothesis==6.93.0 ; python_version >= "3.9" and python_version < "4.0" +hypothesis==6.97.4 ; python_version >= "3.9" and python_version < "4.0" identify==2.5.33 ; python_version >= "3.9" and python_version < "4.0" idna==3.6 ; python_version >= "3.9" and python_version < "4.0" importlib-metadata==7.0.1 ; python_version >= "3.9" and python_version < "4.0" @@ -95,8 +94,8 @@ iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0" installer==0.7.0 ; python_version >= "3.9" and python_version < "4.0" ipyevents==2.0.2 ; python_version >= "3.10" and python_version < "3.13" ipyfilechooser==0.6.0 ; python_version >= "3.10" and python_version < "3.13" -ipykernel==6.28.0 ; python_version >= "3.10" and python_version < "3.13" -ipyleaflet==0.18.1 ; python_version >= "3.10" and python_version < "3.13" +ipykernel==6.29.0 ; python_version >= "3.10" and python_version < "3.13" +ipyleaflet==0.18.2 ; python_version >= "3.10" and python_version < "3.13" ipython==8.18.1 ; python_version >= "3.9" and python_version < "4.0" ipytree==0.2.2 ; python_version >= "3.10" and python_version < "3.13" ipywidgets==8.1.1 ; python_version >= "3.10" and python_version < "3.13" @@ -106,7 +105,7 @@ jeepney==0.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_plat jinja2==3.1.3 ; python_version >= "3.9" and python_version < "4.0" joblib==1.3.2 ; python_version >= "3.9" and python_version < "4.0" jsonschema-specifications==2023.12.1 ; python_version >= "3.10" and python_version < "3.13" -jsonschema==4.20.0 ; python_version >= "3.10" and python_version < "3.13" +jsonschema==4.21.1 ; python_version >= "3.10" and python_version < "3.13" jupyter-client==8.6.0 ; python_version >= "3.10" and python_version < "3.13" jupyter-core==5.7.1 ; python_version >= "3.10" and python_version < "3.13" jupyterlab-widgets==3.0.9 ; python_version >= "3.10" and python_version < "3.13" @@ -117,7 +116,7 @@ locket==1.0.0 ; python_version >= "3.9" and python_version < "4.0" lonboard==0.5.0 ; python_version >= "3.10" and python_version < "3.13" lz4==4.3.3 ; python_version >= "3.9" and python_version < "4.0" markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "4.0" -markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "4.0" +markupsafe==2.1.4 ; python_version >= "3.9" and python_version < "4.0" matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0" matplotlib==3.8.2 ; python_version >= "3.10" and python_version < "3.13" mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0" @@ -129,14 +128,14 @@ multipledispatch==1.0.0 ; python_version >= "3.9" and python_version < "4.0" mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0" nbclient==0.9.0 ; python_version >= "3.10" and python_version < "3.13" nbformat==5.9.2 ; python_version >= "3.10" and python_version < "3.13" -nest-asyncio==1.5.8 ; python_version >= "3.10" and python_version < "3.13" +nest-asyncio==1.6.0 ; python_version >= "3.10" and python_version < "3.13" nodeenv==1.8.0 ; python_version >= "3.9" and python_version < "4.0" numpy==1.26.3 ; python_version >= "3.9" and python_version < "4.0" oauthlib==3.2.2 ; python_version >= "3.9" and python_version < "4.0" oracledb==2.0.1 ; python_version >= "3.9" and python_version < "4.0" packaging==23.2 ; python_version >= "3.9" and python_version < "4.0" palettable==3.3.3 ; python_version >= "3.10" and python_version < "3.13" -pandas==2.1.4 ; python_version >= "3.9" and python_version < "4.0" +pandas==2.2.0 ; python_version >= "3.9" and python_version < "4.0" parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0" parsy==2.1 ; python_version >= "3.9" and python_version < "4.0" partd==1.4.1 ; python_version >= "3.9" and python_version < "4.0" @@ -149,19 +148,19 @@ pkginfo==1.9.6 ; python_version >= "3.9" and python_version < "4.0" platformdirs==3.11.0 ; python_version >= "3.9" and python_version < "4.0" plotly==5.18.0 ; python_version >= "3.10" and python_version < "3.13" plotnine==0.12.4 ; python_version >= "3.10" and python_version < "3.13" -pluggy==1.3.0 ; python_version >= "3.9" and python_version < "4.0" -plum-dispatch==2.2.2 ; python_version >= "3.10" and python_version < "3.13" +pluggy==1.4.0 ; python_version >= "3.9" and python_version < "4.0" +plum-dispatch==2.3.2 ; python_version >= "3.10" and python_version < "3.13" poetry-core==1.8.1 ; python_version >= "3.9" and python_version < "4.0" poetry-dynamic-versioning==1.2.0 ; python_version >= "3.9" and python_version < "4.0" poetry-plugin-export==1.6.0 ; python_version >= "3.9" and python_version < "4.0" poetry==1.7.1 ; python_version >= "3.9" and python_version < "4.0" -polars==0.20.4 ; python_version >= "3.9" and python_version < "4.0" +polars==0.20.6 ; python_version >= "3.9" and python_version < "4.0" pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0" pre-commit==3.6.0 ; python_version >= "3.9" and python_version < "4.0" prompt-toolkit==3.0.43 ; python_version >= "3.9" and python_version < "4.0" proto-plus==1.23.0 ; python_version >= "3.9" and python_version < "4.0" protobuf==4.25.2 ; python_version >= "3.9" and python_version < "4.0" -psutil==5.9.7 ; python_version >= "3.10" and python_version < "3.13" +psutil==5.9.8 ; python_version >= "3.10" and python_version < "3.13" psycopg2==2.9.9 ; python_version >= "3.9" and python_version < "4.0" psygnal==0.9.5 ; python_version >= "3.10" and python_version < "3.13" ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" @@ -174,14 +173,14 @@ pyarrow==15.0.0 ; python_version >= "3.9" and python_version < "4.0" pyasn1-modules==0.3.0 ; python_version >= "3.9" and python_version < "4.0" pyasn1==0.5.1 ; python_version >= "3.9" and python_version < "4.0" pycparser==2.21 ; python_version >= "3.9" and python_version < "4.0" -pydantic-core==2.14.6 ; python_version >= "3.10" and python_version < "3.13" -pydantic==2.5.3 ; python_version >= "3.10" and python_version < "3.13" +pydantic-core==2.16.1 ; python_version >= "3.10" and python_version < "3.13" +pydantic==2.6.0 ; python_version >= "3.10" and python_version < "3.13" pydata-google-auth==1.8.2 ; python_version >= "3.9" and python_version < "4.0" -pydeps==1.12.17 ; python_version >= "3.9" and python_version < "4.0" +pydeps==1.12.18 ; python_version >= "3.9" and python_version < "4.0" pydruid[sqlalchemy]==0.6.6 ; python_version >= "3.9" and python_version < "4.0" pyexasol==0.25.2 ; python_version >= "3.9" and python_version < "4.0" pygments==2.17.2 ; python_version >= "3.9" and python_version < "4.0" -pyinstrument==4.6.1 ; python_version >= "3.9" and python_version < "4.0" +pyinstrument==4.6.2 ; python_version >= "3.9" and python_version < "4.0" pyjwt==2.8.0 ; python_version >= "3.9" and python_version < "4.0" pymysql==1.1.0 ; python_version >= "3.9" and python_version < "4.0" pyodbc==5.0.1 ; python_version >= "3.9" and python_version < "4.0" @@ -206,14 +205,14 @@ pytest-xdist==3.5.0 ; python_version >= "3.9" and python_version < "4.0" pytest==8.0.0 ; python_version >= "3.9" and python_version < "4.0" python-box==7.1.1 ; python_version >= "3.10" and python_version < "3.13" python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0" -pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "4.0" +pytz==2023.4 ; python_version >= "3.9" and python_version < "4.0" pywin32-ctypes==0.2.2 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32" pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "3.13" pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0" pyzmq==25.1.2 ; python_version >= "3.10" and python_version < "3.13" quartodoc==0.7.2 ; python_version >= "3.10" and python_version < "3.13" rapidfuzz==3.6.1 ; python_version >= "3.9" and python_version < "4.0" -referencing==0.32.1 ; python_version >= "3.10" and python_version < "3.13" +referencing==0.33.0 ; python_version >= "3.10" and python_version < "3.13" regex==2023.12.25 ; python_version >= "3.9" and python_version < "4.0" requests-oauthlib==1.3.1 ; python_version >= "3.9" and python_version < "4.0" requests-toolbelt==1.0.0 ; python_version >= "3.9" and python_version < "4.0" @@ -222,18 +221,17 @@ requests[socks]==2.31.0 ; python_version >= "3.10" and python_version < "3.13" rich==13.7.0 ; python_version >= "3.9" and python_version < "4.0" rpds-py==0.17.1 ; python_version >= "3.10" and python_version < "3.13" rsa==4.9 ; python_version >= "3.9" and python_version < "4" -ruff==0.1.13 ; python_version >= "3.9" and python_version < "4.0" -scikit-learn==1.3.2 ; python_version >= "3.10" and python_version < "3.13" -scipy==1.11.4 ; python_version >= "3.10" and python_version < "3.13" +ruff==0.1.15 ; python_version >= "3.9" and python_version < "4.0" +scikit-learn==1.4.0 ; python_version >= "3.10" and python_version < "3.13" +scipy==1.12.0 ; python_version >= "3.10" and python_version < "3.13" scooby==0.9.2 ; python_version >= "3.10" and python_version < "3.13" -seaborn==0.13.1 ; python_version >= "3.10" and python_version < "3.13" +seaborn==0.13.2 ; python_version >= "3.10" and python_version < "3.13" secretstorage==3.3.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "linux" setuptools==69.0.3 ; python_version >= "3.9" and python_version < "4.0" shapely==2.0.2 ; python_version >= "3.9" and python_version < "4.0" shellingham==1.5.4 ; python_version >= "3.9" and python_version < "4.0" six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" -snowflake-connector-python==3.6.0 ; python_version >= "3.9" and python_version < "4.0" -snowflake-sqlalchemy==1.5.1 ; python_version >= "3.9" and python_version < "4.0" +snowflake-connector-python==3.7.0 ; python_version >= "3.9" and python_version < "4.0" sortedcontainers==2.4.0 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.5 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1 ; python_version >= "3.10" and python_version < "3.13" @@ -241,7 +239,7 @@ sqlalchemy-exasol==4.6.3 ; python_version >= "3.9" and python_version < "4.0" sqlalchemy-risingwave==1.0.0 ; python_version >= "3.9" and python_version < "4.0" sqlalchemy-views==0.3.2 ; python_version >= "3.9" and python_version < "4.0" sqlalchemy==1.4.51 ; python_version >= "3.9" and python_version < "4.0" -sqlglot==20.8.0 ; python_version >= "3.9" and python_version < "4.0" +sqlglot==20.11.0 ; python_version >= "3.9" and python_version < "4.0" stack-data==0.6.3 ; python_version >= "3.9" and python_version < "4.0" statsmodels==0.14.1 ; python_version >= "3.10" and python_version < "3.13" stdlib-list==0.10.0 ; python_version >= "3.9" and python_version < "4.0" @@ -253,13 +251,13 @@ thrift-sasl==0.4.3 ; python_version >= "3.9" and python_version < "4.0" thrift==0.16.0 ; python_version >= "3.9" and python_version < "4.0" tomli==2.0.1 ; python_version >= "3.9" and python_full_version <= "3.11.0a6" tomlkit==0.12.3 ; python_version >= "3.9" and python_version < "4.0" -toolz==0.12.0 ; python_version >= "3.9" and python_version < "4.0" +toolz==0.12.1 ; python_version >= "3.9" and python_version < "4.0" tornado==6.4 ; python_version >= "3.10" and python_version < "3.13" tqdm==4.66.1 ; python_version >= "3.9" and python_version < "4.0" traitlets==5.14.1 ; python_version >= "3.9" and python_version < "4.0" traittypes==0.2.1 ; python_version >= "3.10" and python_version < "3.13" trino[sqlalchemy]==0.327.0 ; python_version >= "3.9" and python_version < "4.0" -trove-classifiers==2024.1.8 ; python_version >= "3.9" and python_version < "4.0" +trove-classifiers==2024.1.31 ; python_version >= "3.9" and python_version < "4.0" typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "4.0" tzdata==2023.4 ; python_version >= "3.9" and python_version < "4.0" tzlocal==5.2 ; python_version >= "3.9" and python_version < "4.0" From 3fd91f0194d2652638918d2c4bcef71cc295677b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Jan 2024 07:04:14 -0500 Subject: [PATCH 038/161] ci: comment out sys-deps step --- .github/workflows/ibis-backends.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index baffb490b3d0..aec5a76623d3 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -317,13 +317,13 @@ jobs: # extras: # - risingwave steps: - - name: update and install system dependencies - if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null - run: | - set -euo pipefail - - sudo apt-get update -qq -y - sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} + # - name: update and install system dependencies + # if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null + # run: | + # set -euo pipefail + # + # sudo apt-get update -qq -y + # sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} - name: install sqlite if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite' From c1f4a9a0bd71a214a2692b419023a3c8624a2b24 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Jan 2024 07:04:45 -0500 Subject: [PATCH 039/161] chore: remove duplicate `distinct` decompile rule --- ibis/expr/decompile.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ibis/expr/decompile.py b/ibis/expr/decompile.py index 6c96648b0ff9..0567849f0055 100644 --- a/ibis/expr/decompile.py +++ b/ibis/expr/decompile.py @@ -192,11 +192,6 @@ def self_reference(op, parent, identifier): return f"{parent}.view()" -@translate.register(ops.Distinct) -def distinct(op, parent): - return f"{parent}.distinct()" - - @translate.register(ops.JoinTable) def join_table(op, parent, index): return parent From ace6e20192b3d9c9fd6331daa669582518110b32 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Jan 2024 07:39:05 -0500 Subject: [PATCH 040/161] ci: install decompiler as extra not as its own dependency (#7901) Fixes installation of the `decompiler` extra. --- .github/workflows/ibis-backends.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index aec5a76623d3..c5aea341638e 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -63,9 +63,9 @@ jobs: - deltalake - geospatial - examples + - decompiler additional_deps: - torch - - decompiler - name: clickhouse title: ClickHouse services: From 3474dc9dc484fce008644f7ed71b767359c28bb8 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Jan 2024 07:36:16 -0500 Subject: [PATCH 041/161] fix(conversion): convert decimals to the exact precision and scale requested by the input type --- ibis/common/numeric.py | 9 +++++++-- ibis/formats/pandas.py | 24 +++++++++--------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/ibis/common/numeric.py b/ibis/common/numeric.py index 8d3f10ffd8b3..bd7eca1bc4b9 100644 --- a/ibis/common/numeric.py +++ b/ibis/common/numeric.py @@ -3,7 +3,12 @@ from decimal import Context, Decimal, InvalidOperation -def normalize_decimal(value, precision: int | None = None, scale: int | None = None): +def normalize_decimal( + value, + precision: int | None = None, + scale: int | None = None, + strict: bool = True, +): context = Context(prec=38 if precision is None else precision) try: @@ -25,7 +30,7 @@ def normalize_decimal(value, precision: int | None = None, scale: int | None = N ) if scale is not None: - if exponent < -scale: + if strict and exponent < -scale: raise TypeError( f"Normalizing {value} with scale {exponent} to scale -{scale} " "would loose precision" diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index b18fc2bf106e..d1983b1b58a8 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -2,8 +2,8 @@ import contextlib import datetime -import decimal import warnings +from functools import partial from importlib.util import find_spec as _find_spec import numpy as np @@ -13,6 +13,7 @@ import ibis.expr.datatypes as dt import ibis.expr.schema as sch +from ibis.common.numeric import normalize_decimal from ibis.common.temporal import normalize_timezone from ibis.formats import DataMapper, SchemaMapper, TableProxy from ibis.formats.numpy import NumpyType @@ -255,20 +256,13 @@ def convert_String(cls, s, dtype, pandas_type): @classmethod def convert_Decimal(cls, s, dtype, pandas_type): - context = decimal.Context(prec=dtype.precision) - - if dtype.scale is None: - normalize = context.create_decimal - else: - exponent = decimal.Decimal(10) ** -dtype.scale - - def normalize(x, exponent=exponent): - try: - return context.create_decimal(x).quantize(exponent) - except decimal.InvalidOperation: - return x - - return s.map(normalize, na_action="ignore").astype(pandas_type) + func = partial( + normalize_decimal, + precision=dtype.precision, + scale=dtype.scale, + strict=False, + ) + return s.map(func, na_action="ignore") @classmethod def convert_UUID(cls, s, dtype, pandas_type): From 0a629e6a0b850c2b967d4b8926415662374b223a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Jan 2024 07:52:15 -0500 Subject: [PATCH 042/161] test(snowflake): fix expected decimal results --- ibis/backends/tests/test_numeric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 2a15dd5f8f3d..f8e374aea6ed 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -255,7 +255,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": 1.1, + "snowflake": decimal.Decimal("1.1"), "sqlite": 1.1, "trino": 1.1, "dask": decimal.Decimal("1.1"), @@ -309,7 +309,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "snowflake": 1.1, + "snowflake": decimal.Decimal("1.1"), "sqlite": 1.1, "trino": 1.1, "duckdb": decimal.Decimal("1.100000000"), From 544925e8ca7519c568374ba2061ecab59bdb382b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Jan 2024 07:52:41 -0500 Subject: [PATCH 043/161] test(duckdb): relax exact type check in decimal literal assertion DuckDB returns `numpy.float64` nans, which are valid `nan`s and valid Python `float`s so there's no real reason to assert that the `type` must be exactly equal if the values are `nan`. --- ibis/backends/tests/test_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index f8e374aea6ed..68a487d6a2f2 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -676,7 +676,7 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): if type(current_expected_result) in (float, decimal.Decimal) and math.isnan( current_expected_result ): - assert math.isnan(result) and type(result) == type(current_expected_result) + assert math.isnan(result) else: assert result == current_expected_result From 4a3952c45c157f319c1f67f4f0a5c54e2b3eef97 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Jan 2024 10:22:03 -0500 Subject: [PATCH 044/161] refactor(sqlglot): various sqlglot compiler and backend clean ups (#7904) This PR pulls out some changes from #7871, including some removal of redundant translation rules and fixing some xpassing tests --- ibis/backends/base/sqlglot/__init__.py | 74 ++++++- ibis/backends/base/sqlglot/compiler.py | 185 ++++++++++-------- ibis/backends/base/sqlglot/datatypes.py | 2 +- ibis/backends/clickhouse/compiler.py | 28 ++- ibis/backends/datafusion/compiler.py | 9 +- ibis/backends/duckdb/__init__.py | 12 +- ibis/backends/duckdb/compiler.py | 13 +- ibis/backends/pandas/kernels.py | 2 +- .../test_union_aliasing/duckdb/out.sql | 8 +- .../test_filter_predicates/out.sql | 2 +- .../test_limit_with_self_join/out.sql | 2 +- .../test_tpch_self_join_failure/out.sql | 4 +- ibis/backends/tests/test_aggregation.py | 63 +++--- ibis/backends/tests/test_param.py | 2 - ibis/backends/tests/test_uuid.py | 4 +- ibis/backends/tests/test_window.py | 5 - ibis/backends/tests/tpch/conftest.py | 9 +- .../test_h07/test_tpc_h07/duckdb/h07.sql | 2 +- .../test_h07/test_tpc_h07/snowflake/h07.sql | 2 +- .../test_h08/test_tpc_h08/duckdb/h08.sql | 2 +- .../test_h08/test_tpc_h08/snowflake/h08.sql | 2 +- .../test_h09/test_tpc_h09/duckdb/h09.sql | 2 +- .../test_h09/test_tpc_h09/snowflake/h09.sql | 2 +- ibis/expr/operations/strings.py | 4 +- ibis/expr/types/arrays.py | 2 +- ibis/expr/types/strings.py | 2 +- ibis/formats/pandas.py | 10 +- 27 files changed, 271 insertions(+), 183 deletions(-) diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index f8cc80e5c76f..b9055e3aa292 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -13,7 +13,9 @@ from ibis.backends.base.sqlglot.compiler import STAR if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Iterable, Iterator, Mapping + + import pyarrow as pa import ibis.expr.datatypes as dt import ibis.expr.types as ir @@ -60,7 +62,7 @@ def table( ).to_expr() def _to_sqlglot( - self, expr: ir.Expr, limit: str | None = None, params=None, **_: Any + self, expr: ir.Expr, *, limit: str | None = None, params=None, **_: Any ): """Compile an Ibis expression to a sqlglot object.""" table_expr = expr.as_table() @@ -206,13 +208,17 @@ def _clean_up_cached_table(self, op): self.drop_table(op.name) def execute( - self, expr: ir.Expr, limit: str | None = "default", **kwargs: Any + self, + expr: ir.Expr, + params: Mapping | None = None, + limit: str | None = "default", + **kwargs: Any, ) -> Any: """Execute an expression.""" self._run_pre_execute_hooks(expr) table = expr.as_table() - sql = self.compile(table, limit=limit, **kwargs) + sql = self.compile(table, params=params, limit=limit, **kwargs) schema = table.schema() @@ -236,3 +242,63 @@ def drop_table( ) with self._safe_raw_sql(drop_stmt): pass + + def _cursor_batches( + self, + expr: ir.Expr, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1 << 20, + ) -> Iterable[list]: + self._run_pre_execute_hooks(expr) + + with self._safe_raw_sql( + self.compile(expr, limit=limit, params=params) + ) as cursor: + while batch := cursor.fetchmany(chunk_size): + yield batch + + def to_pyarrow_batches( + self, + expr: ir.Expr, + *, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1_000_000, + **_: Any, + ) -> pa.ipc.RecordBatchReader: + """Execute expression and return an iterator of pyarrow record batches. + + This method is eager and will execute the associated expression + immediately. + + Parameters + ---------- + expr + Ibis expression to export to pyarrow + limit + An integer to effect a specific row limit. A value of `None` means + "no limit". The default is in `ibis/config.py`. + params + Mapping of scalar parameter expressions to value. + chunk_size + Maximum number of rows in each returned record batch. + + Returns + ------- + RecordBatchReader + Collection of pyarrow `RecordBatch`s. + """ + pa = self._import_pyarrow() + + schema = expr.as_table().schema() + array_type = schema.as_struct().to_pyarrow() + arrays = ( + pa.array(map(tuple, batch), type=array_type) + for batch in self._cursor_batches( + expr, params=params, limit=limit, chunk_size=chunk_size + ) + ) + batches = map(pa.RecordBatch.from_struct_array, arrays) + + return pa.ipc.RecordBatchReader.from_batches(schema.to_pyarrow(), batches) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 687d21f83ed3..b7e8dbc9a83c 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -49,6 +49,16 @@ def __getitem__(self, key: str) -> partial: return getattr(self, key) +class VarGen: + __slots__ = () + + def __getattr__(self, name: str) -> sge.Var: + return sge.Var(this=name) + + def __getitem__(self, key: str) -> sge.Var: + return sge.Var(this=key) + + class FuncGen: __slots__ = ("namespace",) @@ -110,7 +120,7 @@ def parenthesize(op, arg): @public class SQLGlotCompiler(abc.ABC): - __slots__ = "agg", "f" + __slots__ = "agg", "f", "v" rewrites: tuple = ( empty_in_values_right_side, @@ -138,6 +148,7 @@ class SQLGlotCompiler(abc.ABC): def __init__(self) -> None: self.agg = AggGen(aggfunc=self._aggregate) self.f = FuncGen() + self.v = VarGen() @property @abc.abstractmethod @@ -258,14 +269,56 @@ def visit_Alias(self, op, *, arg, name): return arg @visit_node.register(ops.Literal) - def visit_Literal(self, op, *, value, dtype, **kw): + def visit_Literal(self, op, *, value, dtype): + """Compile a literal value. + + This is the default implementation for compiling literal values. + + Most backends should not need to override this method unless they want + to handle NULL literals as well as every other type of non-null literal + including integers, floating point numbers, decimals, strings, etc. + + The logic here is: + + 1. If the value is None and the type is nullable, return NULL + 1. If the value is None and the type is not nullable, raise an error + 1. Call `visit_NonNullLiteral` method. + 1. If the previous returns `None`, call `visit_DefaultLiteral` method + else return the result of the previous step. + """ if value is None: if dtype.nullable: return NULL if dtype.is_null() else self.cast(NULL, dtype) raise com.UnsupportedOperationError( f"Unsupported NULL for non-nullable type: {dtype!r}" ) - elif dtype.is_integer(): + else: + result = self.visit_NonNullLiteral(op, value=value, dtype=dtype) + if result is None: + return self.visit_DefaultLiteral(op, value=value, dtype=dtype) + return result + + def visit_NonNullLiteral(self, op, *, value, dtype): + """Compile a non-null literal differently than the default implementation. + + Most backends should implement this, but only when they need to handle + some non-null literal differently than the default implementation + (`visit_DefaultLiteral`). + + Return `None` from an override of this method to fall back to + `visit_DefaultLiteral`. + """ + return self.visit_DefaultLiteral(op, value=value, dtype=dtype) + + def visit_DefaultLiteral(self, op, *, value, dtype): + """Compile a literal with a non-null value. + + This is the default implementation for compiling non-null literals. + + Most backends should not need to override this method unless they want + to handle compiling every kind of non-null literal value. + """ + if dtype.is_integer(): return sge.convert(value) elif dtype.is_floating(): if math.isnan(value): @@ -274,7 +327,7 @@ def visit_Literal(self, op, *, value, dtype, **kw): return self.POS_INF if value < 0 else self.NEG_INF return sge.convert(value) elif dtype.is_decimal(): - return self.cast(sge.convert(str(value)), dtype) + return self.cast(str(value), dtype) elif dtype.is_interval(): return sge.Interval( this=sge.convert(str(value)), unit=dtype.resolution.upper() @@ -304,7 +357,7 @@ def visit_Literal(self, op, *, value, dtype, **kw): keys = self.f.array( *( self.visit_Literal( - ops.Literal(k, key_type), value=k, dtype=key_type, **kw + ops.Literal(k, key_type), value=k, dtype=key_type ) for k in value.keys() ) @@ -314,7 +367,7 @@ def visit_Literal(self, op, *, value, dtype, **kw): values = self.f.array( *( self.visit_Literal( - ops.Literal(v, value_type), value=v, dtype=value_type, **kw + ops.Literal(v, value_type), value=v, dtype=value_type ) for v in value.values() ) @@ -323,15 +376,14 @@ def visit_Literal(self, op, *, value, dtype, **kw): return self.f.map(keys, values) elif dtype.is_struct(): items = [ - sge.Slice( - this=sge.convert(k), - expression=self.visit_Literal( - ops.Literal(v, field_dtype), value=v, dtype=field_dtype, **kw - ), - ) + self.visit_Literal( + ops.Literal(v, field_dtype), value=v, dtype=field_dtype + ).as_(k, quoted=self.quoted) for field_dtype, (k, v) in zip(dtype.types, value.items()) ] return sge.Struct.from_arg_list(items) + elif dtype.is_uuid(): + return self.cast(str(value), dtype) else: raise NotImplementedError(f"Unsupported type: {dtype!r}") @@ -403,14 +455,6 @@ def visit_Not(self, op, *, arg): ### Timey McTimeFace - @visit_node.register(ops.Date) - def visit_Date(self, op, *, arg): - return sge.Date(this=arg) - - @visit_node.register(ops.DateFromYMD) - def visit_DateFromYMD(self, op, *, year, month, day): - return sge.DateFromParts(year=year, month=month, day=day) - @visit_node.register(ops.Time) def visit_Time(self, op, *, arg): return self.cast(arg, to=dt.time) @@ -429,39 +473,39 @@ def visit_ExtractEpochSeconds(self, op, *, arg): @visit_node.register(ops.ExtractYear) def visit_ExtractYear(self, op, *, arg): - return self.f.extract("year", arg) + return self.f.extract(self.v.year, arg) @visit_node.register(ops.ExtractMonth) def visit_ExtractMonth(self, op, *, arg): - return self.f.extract("month", arg) + return self.f.extract(self.v.month, arg) @visit_node.register(ops.ExtractDay) def visit_ExtractDay(self, op, *, arg): - return self.f.extract("day", arg) + return self.f.extract(self.v.day, arg) @visit_node.register(ops.ExtractDayOfYear) def visit_ExtractDayOfYear(self, op, *, arg): - return self.f.extract("dayofyear", arg) + return self.f.extract(self.v.dayofyear, arg) @visit_node.register(ops.ExtractQuarter) def visit_ExtractQuarter(self, op, *, arg): - return self.f.extract("quarter", arg) + return self.f.extract(self.v.quarter, arg) @visit_node.register(ops.ExtractWeekOfYear) def visit_ExtractWeekOfYear(self, op, *, arg): - return self.f.extract("week", arg) + return self.f.extract(self.v.week, arg) @visit_node.register(ops.ExtractHour) def visit_ExtractHour(self, op, *, arg): - return self.f.extract("hour", arg) + return self.f.extract(self.v.hour, arg) @visit_node.register(ops.ExtractMinute) def visit_ExtractMinute(self, op, *, arg): - return self.f.extract("minute", arg) + return self.f.extract(self.v.minute, arg) @visit_node.register(ops.ExtractSecond) def visit_ExtractSecond(self, op, *, arg): - return self.f.extract("second", arg) + return self.f.extract(self.v.second, arg) @visit_node.register(ops.TimestampTruncate) @visit_node.register(ops.DateTruncate) @@ -479,11 +523,10 @@ def visit_TimestampTruncate(self, op, *, arg, unit): "us": "us", } - unit = unit.short - if (duckunit := unit_mapping.get(unit)) is None: + if (unit := unit_mapping.get(unit.short)) is None: raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") - return self.f.date_trunc(duckunit, arg) + return self.f.date_trunc(unit, arg) @visit_node.register(ops.DayOfWeekIndex) def visit_DayOfWeekIndex(self, op, *, arg): @@ -521,7 +564,6 @@ def visit_LStrip(self, op, *, arg): def visit_Substring(self, op, *, arg, start, length): if_pos = sge.Substring(this=arg, start=start + 1, length=length) if_neg = sge.Substring(this=arg, start=start, length=length) - return self.if_(start >= 0, if_pos, if_neg) @visit_node.register(ops.StringFind) @@ -538,18 +580,10 @@ def visit_StringFind(self, op, *, arg, substr, start, end): return self.f.strpos(arg, substr) - @visit_node.register(ops.RegexSearch) - def visit_RegexSearch(self, op, *, arg, pattern): - return sge.RegexpLike(this=arg, expression=pattern, flag=sge.convert("s")) - @visit_node.register(ops.RegexReplace) def visit_RegexReplace(self, op, *, arg, pattern, replacement): return self.f.regexp_replace(arg, pattern, replacement, "g") - @visit_node.register(ops.RegexExtract) - def visit_RegexExtract(self, op, *, arg, pattern, index): - return self.f.regexp_extract(arg, pattern, index, dialect=self.dialect) - @visit_node.register(ops.StringConcat) def visit_StringConcat(self, op, *, arg): return self.f.concat(*arg) @@ -566,10 +600,6 @@ def visit_StringSQLLike(self, op, *, arg, pattern, escape): def visit_StringSQLILike(self, op, *, arg, pattern, escape): return arg.ilike(pattern) - @visit_node.register(ops.StringToTimestamp) - def visit_StringToTimestamp(self, op, *, arg, format_str): - return sge.StrToTime(this=arg, format=format_str) - ### NULL PLAYER CHARACTER @visit_node.register(ops.IsNull) def visit_IsNull(self, op, *, arg): @@ -583,12 +613,6 @@ def visit_NotNull(self, op, *, arg): def visit_InValues(self, op, *, value, options): return value.isin(*options) - ### Definitely Not Tensors - - @visit_node.register(ops.ArrayStringJoin) - def visit_ArrayStringJoin(self, op, *, sep, arg): - return self.f.array_to_string(arg, sep) - ### Counting @visit_node.register(ops.CountDistinct) @@ -667,15 +691,12 @@ def visit_Array(self, op, *, exprs): @visit_node.register(ops.StructColumn) def visit_StructColumn(self, op, *, names, values): return sge.Struct.from_arg_list( - [ - sge.Slice(this=sge.convert(name), expression=value) - for name, value in zip(names, values) - ] + [value.as_(name, quoted=self.quoted) for name, value in zip(names, values)] ) @visit_node.register(ops.StructField) def visit_StructField(self, op, *, arg, field): - return arg[sge.convert(field)] + return sge.Dot(this=arg, expression=sg.to_identifier(field, quoted=self.quoted)) @visit_node.register(ops.IdenticalTo) def visit_IdenticalTo(self, op, *, left, right): @@ -695,10 +716,6 @@ def visit_Coalesce(self, op, *, arg): ### Ordering and window functions - @visit_node.register(ops.RowNumber) - def visit_RowNumber(self, op): - return sge.RowNumber() - @visit_node.register(ops.SortKey) def visit_SortKey(self, op, *, expr, ascending: bool): return sge.Ordered(this=expr, desc=not ascending) @@ -726,7 +743,7 @@ def visit_Window(self, op, *, how, func, start, end, group_by, order_by): end_side = end.get("side", "FOLLOWING") spec = sge.WindowSpec( - kind=op.how.upper(), + kind=how.upper(), start=start_value, start_side=start_side, end=end_value, @@ -735,8 +752,14 @@ def visit_Window(self, op, *, how, func, start, end, group_by, order_by): ) order = sge.Order(expressions=order_by) if order_by else None + spec = self._minimize_spec(op.start, op.end, spec) + return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) + @staticmethod + def _minimize_spec(start, end, spec): + return spec + @visit_node.register(ops.Lag) @visit_node.register(ops.Lead) def visit_LagLead(self, op, *, arg, offset, default): @@ -790,10 +813,6 @@ def visit_TimestampBucket(self, op, *, arg, interval, offset): def visit_ArrayConcat(self, op, *, arg): return sge.ArrayConcat(this=arg[0], expressions=list(arg[1:])) - @visit_node.register(ops.ArrayContains) - def visit_ArrayContains(self, op, *, arg, other): - return sge.ArrayContains(this=arg, expression=other) - ## relations def _dedup_name( @@ -1094,22 +1113,18 @@ def visit_SQLStringView(self, op, *, query: str, name: str, child): def visit_SQLQueryResult(self, op, *, query, schema, source): return sg.parse_one(query, read=self.dialect).subquery() - @visit_node.register(ops.Unnest) - def visit_Unnest(self, op, *, arg): - return sge.Explode(this=arg) - - @visit_node.register(ops.RegexSplit) - def visit_RegexSplit(self, op, *, arg, pattern): - return sge.RegexpSplit(this=arg, expression=pattern) - - @visit_node.register(ops.Levenshtein) - def visit_Levenshtein(self, op, *, left, right): - return sge.Levenshtein(this=left, expression=right) - @visit_node.register(ops.JoinTable) def visit_JoinTable(self, op, *, parent, index): return parent + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + return self.cast(arg, to) + + @visit_node.register(ops.Value) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + _SIMPLE_OPS = { ops.All: "bool_and", @@ -1117,7 +1132,6 @@ def visit_JoinTable(self, op, *, parent, index): ops.ArgMax: "max_by", ops.ArgMin: "min_by", ops.Power: "pow", - # Unary operations ops.IsNan: "isnan", ops.IsInf: "isinf", ops.Abs: "abs", @@ -1137,7 +1151,6 @@ def visit_JoinTable(self, op, *, parent, index): ops.Pi: "pi", ops.RandomScalar: "random", ops.Sign: "sign", - # Unary aggregates ops.ApproxCountDistinct: "approx_distinct", ops.Median: "median", ops.Mean: "avg", @@ -1152,14 +1165,12 @@ def visit_JoinTable(self, op, *, parent, index): ops.Any: "bool_or", ops.ArrayCollect: "array_agg", ops.GroupConcat: "group_concat", - # string operations ops.StringContains: "contains", ops.StringLength: "length", ops.Lowercase: "lower", ops.Uppercase: "upper", ops.StartsWith: "starts_with", ops.StrRight: "right", - # Other operations ops.IfElse: "if", ops.ArrayLength: "length", ops.NullIf: "nullif", @@ -1167,7 +1178,6 @@ def visit_JoinTable(self, op, *, parent, index): ops.Map: "map", ops.JSONGetItem: "json_extract", ops.ArrayFlatten: "flatten", - # common enough to be in the base, but not modeled in sqlglot ops.NTile: "ntile", ops.Degrees: "degrees", ops.Radians: "radians", @@ -1185,6 +1195,17 @@ def visit_JoinTable(self, op, *, parent, index): ops.StringReplace: "replace", ops.Reverse: "reverse", ops.StringSplit: "split", + ops.RegexSearch: "regexp_like", + ops.DateFromYMD: "datefromparts", + ops.Date: "date", + ops.RowNumber: "row_number", + ops.StringToTimestamp: "str_to_time", + ops.ArrayStringJoin: "array_to_string", + ops.Levenshtein: "levenshtein", + ops.Unnest: "explode", + ops.RegexSplit: "regexp_split", + ops.ArrayContains: "array_contains", + ops.RegexExtract: "regexp_extract", } _BINARY_INFIX_OPS = { diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 46a7c996c996..9c4a7fe531df 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -274,7 +274,7 @@ def _from_ibis_Interval(cls, dtype: dt.Interval) -> sge.DataType: assert dtype.unit is not None, "interval unit cannot be None" return sge.DataType( this=typecode.INTERVAL, - expressions=[sge.IntervalSpan(this=sge.Var(this=dtype.unit.name))], + expressions=[sge.Var(this=dtype.unit.name)], ) @classmethod diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py index 44760bf8ed8e..7fdb5428f1d8 100644 --- a/ibis/backends/clickhouse/compiler.py +++ b/ibis/backends/clickhouse/compiler.py @@ -41,7 +41,19 @@ def _aggregate(self, funcname: str, *args, where): has_filter = where is not None func = self.f[funcname + "If" * has_filter] args += (where,) * has_filter - return func(*args) + + return func(*args, dialect=self.dialect) + + @staticmethod + def _minimize_spec(start, end, spec): + if ( + start is None + and isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec @singledispatchmethod def visit_node(self, op, **kw): @@ -223,11 +235,8 @@ def visit_IntervalFromInteger(self, op, *, arg, unit): ) return super().visit_node(op, arg=arg, unit=unit) - @visit_node.register(ops.Literal) - def visit_Literal(self, op, *, value, dtype, **kw): - if value is None: - return super().visit_node(op, value=value, dtype=dtype, **kw) - elif dtype.is_inet(): + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_inet(): v = str(value) return self.f.toIPv6(v) if ":" in v else self.f.toIPv4(v) elif dtype.is_string(): @@ -286,7 +295,7 @@ def visit_Literal(self, op, *, value, dtype, **kw): value_type = dtype.value_type values = [ self.visit_Literal( - ops.Literal(v, dtype=value_type), value=v, dtype=value_type, **kw + ops.Literal(v, dtype=value_type), value=v, dtype=value_type ) for v in value ] @@ -303,7 +312,6 @@ def visit_Literal(self, op, *, value, dtype, **kw): ops.Literal(v, dtype=value_type), value=v, dtype=value_type, - **kw, ) ) @@ -311,13 +319,13 @@ def visit_Literal(self, op, *, value, dtype, **kw): elif dtype.is_struct(): fields = [ self.visit_Literal( - ops.Literal(v, dtype=field_type), value=v, dtype=field_type, **kw + ops.Literal(v, dtype=field_type), value=v, dtype=field_type ) for field_type, v in zip(dtype.types, value.values()) ] return self.f.tuple(*fields) else: - return super().visit_node(op, value=value, dtype=dtype, **kw) + return None @visit_node.register(ops.TimestampFromUNIX) def visit_TimestampFromUNIX(self, op, *, arg, unit): diff --git a/ibis/backends/datafusion/compiler.py b/ibis/backends/datafusion/compiler.py index 3166c1c3b4d3..d82ad23e5597 100644 --- a/ibis/backends/datafusion/compiler.py +++ b/ibis/backends/datafusion/compiler.py @@ -73,11 +73,8 @@ def _to_timestamp(self, value, target_dtype, literal=False): def visit_node(self, op, **kw): return super().visit_node(op, **kw) - @visit_node.register(ops.Literal) - def visit_Literal(self, op, *, value, dtype, **kw): - if value is None: - return super().visit_node(op, value=value, dtype=dtype, **kw) - elif dtype.is_decimal(): + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_decimal(): return self.cast( sg.exp.convert(str(value)), dt.Decimal(precision=dtype.precision or 38, scale=dtype.scale or 9), @@ -106,7 +103,7 @@ def visit_Literal(self, op, *, value, dtype, **kw): elif dtype.is_binary(): return sg.exp.HexString(this=value.hex()) else: - return super().visit_node(op, value=value, dtype=dtype, **kw) + return None @visit_node.register(ops.Cast) def visit_Cast(self, op, *, arg, to): diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 44e4fe78c7f4..b25257e3979d 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -28,7 +28,6 @@ from ibis.backends.base import CanCreateSchema from ibis.backends.base.sqlglot import SQLGlotBackend from ibis.backends.base.sqlglot.compiler import STAR, C, F -from ibis.backends.base.sqlglot.datatypes import DuckDBType from ibis.backends.duckdb.compiler import DuckDBCompiler from ibis.backends.duckdb.datatypes import DuckDBPandasData from ibis.expr.operations.udf import InputType @@ -311,7 +310,7 @@ def get_schema( return sch.Schema( { - name: DuckDBType.from_string(typ, nullable=nullable) + name: self.compiler.type_mapper.from_string(typ, nullable=nullable) for name, typ, nullable in zip(names, types, nullables) } ) @@ -1394,7 +1393,10 @@ def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: for name, typ, null in zip( rows["column_name"], rows["column_type"], rows["null"] ): - yield name, DuckDBType.from_string(typ, nullable=null == "YES") + yield ( + name, + self.compiler.type_mapper.from_string(typ, nullable=null == "YES"), + ) def _register_in_memory_tables(self, expr: ir.Expr) -> None: for memtable in expr.op().find(ops.InMemoryTable): @@ -1434,10 +1436,10 @@ def _compile_udf(self, udf_node: ops.ScalarUDF) -> None: func = udf_node.__func__ name = func.__name__ input_types = [ - DuckDBType.to_string(param.annotation.pattern.dtype) + self.compiler.type_mapper.to_string(param.annotation.pattern.dtype) for param in udf_node.__signature__.parameters.values() ] - output_type = DuckDBType.to_string(udf_node.dtype) + output_type = self.compiler.type_mapper.to_string(udf_node.dtype) def register_udf(con): return con.create_function( diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 6afe634d4280..86d60785895f 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -239,11 +239,8 @@ def visit_Cast(self, op, *, arg, to): return self.cast(arg, to) - @visit_node.register(ops.Literal) - def visit_Literal(self, op, *, value, dtype, **kw): - if value is None: - return super().visit_node(op, value=value, dtype=dtype, **kw) - elif dtype.is_interval(): + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_interval(): if dtype.unit.short == "ns": raise com.UnsupportedOperationError( f"{self.dialect} doesn't support nanosecond interval resolutions" @@ -288,7 +285,7 @@ def visit_Literal(self, op, *, value, dtype, **kw): return self.f[funcname](*args) else: - return super().visit_node(op, value=value, dtype=dtype, **kw) + return None @visit_node.register(ops.Capitalize) def visit_Capitalize(self, op, *, arg): @@ -340,6 +337,10 @@ def visit_TimestampNow(self, op): """DuckDB current timestamp defaults to timestamp + tz.""" return self.cast(super().visit_TimestampNow(op), dt.timestamp) + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + return self.f.regexp_extract(arg, pattern, index, dialect=self.dialect) + _SIMPLE_OPS = { ops.ArrayPosition: "list_indexof", diff --git a/ibis/backends/pandas/kernels.py b/ibis/backends/pandas/kernels.py index 1e28095c1ee2..7bfea9883fdd 100644 --- a/ibis/backends/pandas/kernels.py +++ b/ibis/backends/pandas/kernels.py @@ -308,7 +308,7 @@ def round_serieswise(arg, digits): ops.Pi: lambda: np.pi, ops.TimestampNow: lambda: pd.Timestamp("now", tz="UTC").tz_localize(None), ops.StringConcat: lambda xs: reduce(operator.add, xs), - ops.StringJoin: lambda sep, xs: reduce(lambda x, y: x + sep + y, xs), + ops.StringJoin: lambda xs, sep: reduce(lambda x, y: x + sep + y, xs), ops.Log: lambda x, base: np.log(x) if base is None else np.log(x) / np.log(base), } diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql index 6eaa105c4a49..ffa8c03c59cf 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql @@ -27,8 +27,8 @@ FROM ( FROM ( SELECT t1.field_of_study, - t1.__pivoted__['years'] AS years, - t1.__pivoted__['degrees'] AS degrees + t1.__pivoted__.years AS years, + t1.__pivoted__.degrees AS degrees FROM ( SELECT t0.field_of_study, @@ -72,8 +72,8 @@ FROM ( FROM ( SELECT t1.field_of_study, - t1.__pivoted__['years'] AS years, - t1.__pivoted__['degrees'] AS degrees + t1.__pivoted__.years AS years, + t1.__pivoted__.degrees AS degrees FROM ( SELECT t0.field_of_study, diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql index fb4bf6a1c3ff..918ff235ee2a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql @@ -4,4 +4,4 @@ FROM t AS t0 WHERE LOWER(t0.color) LIKE '%de%' AND CONTAINS(LOWER(t0.color), 'de') - AND REGEXP_MATCHES(LOWER(t0.color), '.*ge.*', 's') \ No newline at end of file + AND REGEXP_MATCHES(LOWER(t0.color), '.*ge.*') \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql index 26824f377a3e..b8cfc5063ba5 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql @@ -30,5 +30,5 @@ FROM ( t3.month AS month_right FROM functional_alltypes AS t1 INNER JOIN functional_alltypes AS t3 - ON t1.tinyint_col < EXTRACT('minute' FROM t3.timestamp_col) + ON t1.tinyint_col < EXTRACT(minute FROM t3.timestamp_col) ) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql index aa75e2be0ae1..feacfd23da7e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql @@ -5,7 +5,7 @@ SELECT FROM ( SELECT t11.region, - EXTRACT('year' FROM t11.odate) AS year, + EXTRACT(year FROM t11.odate) AS year, CAST(SUM(t11.amount) AS DOUBLE) AS total FROM ( SELECT @@ -28,7 +28,7 @@ FROM ( INNER JOIN ( SELECT t11.region, - EXTRACT('year' FROM t11.odate) AS year, + EXTRACT(year FROM t11.odate) AS year, CAST(SUM(t11.amount) AS DOUBLE) AS total FROM ( SELECT diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index be97ad419d92..d403f613146e 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1350,33 +1350,6 @@ def test_date_quantile(alltypes, func): assert result == date(2009, 12, 31) -@pytest.mark.parametrize( - ("result_fn", "expected_fn"), - [ - param( - lambda t, where, sep: ( - t.group_by("bigint_col") - .aggregate(tmp=lambda t: t.string_col.group_concat(sep, where=where)) - .order_by("bigint_col") - ), - lambda t, where, sep: ( - ( - t - if isinstance(where, slice) - else t.assign(string_col=t.string_col.where(where)) - ) - .groupby("bigint_col") - .string_col.agg( - lambda s: (np.nan if pd.isna(s).all() else sep.join(s.values)) - ) - .rename("tmp") - .sort_index() - .reset_index() - ), - id="group_concat", - ) - ], -) @pytest.mark.parametrize( ("ibis_sep", "pandas_sep"), [ @@ -1422,8 +1395,7 @@ def test_date_quantile(alltypes, func): ], ) @pytest.mark.notimpl( - ["datafusion", "polars", "mssql"], - raises=com.OperationNotDefinedError, + ["datafusion", "polars", "mssql"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl( ["druid"], @@ -1442,19 +1414,30 @@ def test_date_quantile(alltypes, func): reason='SQL parse failed. Encountered "group_concat ("', ) def test_group_concat( - backend, - alltypes, - df, - result_fn, - expected_fn, - ibis_cond, - pandas_cond, - ibis_sep, - pandas_sep, + backend, alltypes, df, ibis_cond, pandas_cond, ibis_sep, pandas_sep ): - expr = result_fn(alltypes, ibis_cond(alltypes), ibis_sep) + expr = ( + alltypes.group_by("bigint_col") + .aggregate( + tmp=lambda t: t.string_col.group_concat(ibis_sep, where=ibis_cond(t)) + ) + .order_by("bigint_col") + ) result = expr.execute() - expected = expected_fn(df, pandas_cond(df), pandas_sep) + expected = ( + ( + df + if isinstance(pandas_cond(df), slice) + else df.assign(string_col=df.string_col.where(pandas_cond(df))) + ) + .groupby("bigint_col") + .string_col.agg( + lambda s: (np.nan if pd.isna(s).all() else pandas_sep.join(s.values)) + ) + .rename("tmp") + .sort_index() + .reset_index() + ) backend.assert_frame_equal(result.fillna(pd.NA), expected.fillna(pd.NA)) diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index b7aa81c43dd1..7fc9696aa9ab 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -247,9 +247,7 @@ def test_scalar_param_date(backend, alltypes, value): "datafusion", "clickhouse", "polars", - "duckdb", "sqlite", - "snowflake", "impala", "oracle", "pyspark", diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index ea9064dd0d74..9a4dce517afa 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -25,6 +25,7 @@ "snowflake": "VARCHAR", "trino": "varchar(32)" if SQLALCHEMY2 else "uuid", "postgres": "uuid", + "clickhouse": "Nullable(UUID)", } UUID_EXPECTED_VALUES = { @@ -41,6 +42,7 @@ "oracle": TEST_UUID, "flink": TEST_UUID, "exasol": TEST_UUID, + "clickhouse": TEST_UUID, } pytestmark = pytest.mark.notimpl( @@ -64,7 +66,7 @@ raises=sqlalchemy.exc.NotSupportedError, ) @pytest.mark.notimpl( - ["impala", "datafusion", "polars", "clickhouse"], raises=NotImplementedError + ["impala", "datafusion", "polars"], raises=NotImplementedError ) @pytest.mark.notimpl( ["risingwave"], diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index e7968831330f..b164f158d5ac 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -177,11 +177,6 @@ def calc_zscore(s): ["dask", "pandas", "polars"], raises=com.OperationNotDefinedError, ), - pytest.mark.notyet( - ["clickhouse"], - raises=ClickHouseDatabaseError, - reason="ClickHouse requires a specific window frame: unbounded preceding and unbounded following ONLY", - ), pytest.mark.broken( ["impala"], raises=AssertionError, diff --git a/ibis/backends/tests/tpch/conftest.py b/ibis/backends/tests/tpch/conftest.py index cfb85452841b..b2a88f9370a1 100644 --- a/ibis/backends/tests/tpch/conftest.py +++ b/ibis/backends/tests/tpch/conftest.py @@ -85,7 +85,14 @@ def wrapper(*args, backend, snapshot, **kwargs): assert not expected.empty assert len(expected) == len(result) - backend.assert_frame_equal(result, expected, check_dtype=False) + assert result.columns.tolist() == expected.columns.tolist() + for column in result.columns: + left = result.loc[:, column] + right = expected.loc[:, column] + assert ( + pytest.approx(left.values.tolist(), nan_ok=True) + == right.values.tolist() + ) # only write sql if the execution passes snapshot.assert_match(ibis_sql, sql_path_name) diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql index 35411472de9a..ea7f9f6eb7fe 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql @@ -25,7 +25,7 @@ FROM ( t6.l_shipdate, t6.l_extendedprice, t6.l_discount, - EXTRACT('year' FROM t6.l_shipdate) AS l_year, + EXTRACT(year FROM t6.l_shipdate) AS l_year, t6.l_extendedprice * ( CAST(1 AS TINYINT) - t6.l_discount ) AS volume diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql index 48269d09259c..ce954992953d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql @@ -25,7 +25,7 @@ FROM ( "t11"."l_shipdate", "t11"."l_extendedprice", "t11"."l_discount", - DATE_PART('year', "t11"."l_shipdate") AS "l_year", + DATE_PART(year, "t11"."l_shipdate") AS "l_year", "t11"."l_extendedprice" * ( 1 - "t11"."l_discount" ) AS "volume" diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql index 97b1be133851..99ba095e07ae 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql @@ -16,7 +16,7 @@ FROM ( CASE WHEN t23.nation = 'BRAZIL' THEN t23.volume ELSE CAST(0 AS TINYINT) END AS nation_volume FROM ( SELECT - EXTRACT('year' FROM t10.o_orderdate) AS o_year, + EXTRACT(year FROM t10.o_orderdate) AS o_year, t8.l_extendedprice * ( CAST(1 AS TINYINT) - t8.l_discount ) AS volume, diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql index 8d25f3b2df17..e6b90d1f7a6e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql @@ -16,7 +16,7 @@ FROM ( CASE WHEN "t30"."nation" = 'BRAZIL' THEN "t30"."volume" ELSE 0 END AS "nation_volume" FROM ( SELECT - DATE_PART('year', "t17"."o_orderdate") AS "o_year", + DATE_PART(year, "t17"."o_orderdate") AS "o_year", "t15"."l_extendedprice" * ( 1 - "t15"."l_discount" ) AS "volume", diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql index 21489f03313d..b305db73e0ae 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql @@ -22,7 +22,7 @@ FROM ( ) - ( t8.ps_supplycost * t6.l_quantity ) AS amount, - EXTRACT('year' FROM t10.o_orderdate) AS o_year, + EXTRACT(year FROM t10.o_orderdate) AS o_year, t11.n_name AS nation, t9.p_name FROM lineitem AS t6 diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql index a57563a10289..b828b08644bc 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql @@ -22,7 +22,7 @@ FROM ( ) - ( "t14"."ps_supplycost" * "t12"."l_quantity" ) AS "amount", - DATE_PART('year', "t16"."o_orderdate") AS "o_year", + DATE_PART(year, "t16"."o_orderdate") AS "o_year", "t17"."n_name" AS "nation", "t15"."p_name" FROM ( diff --git a/ibis/expr/operations/strings.py b/ibis/expr/operations/strings.py index 9b40261b9d2a..bbaef67021c4 100644 --- a/ibis/expr/operations/strings.py +++ b/ibis/expr/operations/strings.py @@ -133,8 +133,8 @@ class FindInSet(Value): @public class StringJoin(Value): - sep: Value[dt.String] arg: VarTuple[Value[dt.String]] + sep: Value[dt.String] dtype = dt.string @@ -145,8 +145,8 @@ def shape(self): @public class ArrayStringJoin(Value): - sep: Value[dt.String] arg: Value[dt.Array[dt.String]] + sep: Value[dt.String] dtype = dt.string shape = rlz.shape_like("args") diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 7cc58afe2ccf..923e01f44dcc 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -354,7 +354,7 @@ def join(self, sep: str | ir.StringValue) -> ir.StringValue: -------- [`StringValue.join`](./expression-strings.qmd#ibis.expr.types.strings.StringValue.join) """ - return ops.ArrayStringJoin(sep, self).to_expr() + return ops.ArrayStringJoin(self, sep=sep).to_expr() def map(self, func: Callable[[ir.Value], ir.Value]) -> ir.ArrayValue: """Apply a callable `func` to each element of this array expression. diff --git a/ibis/expr/types/strings.py b/ibis/expr/types/strings.py index 1b1b88d9b6ed..58aaf56b7f80 100644 --- a/ibis/expr/types/strings.py +++ b/ibis/expr/types/strings.py @@ -862,7 +862,7 @@ def join(self, strings: Sequence[str | StringValue] | ir.ArrayValue) -> StringVa cls = ops.ArrayStringJoin else: cls = ops.StringJoin - return cls(self, strings).to_expr() + return cls(strings, sep=self).to_expr() def startswith(self, start: str | StringValue) -> ir.BooleanValue: """Determine whether `self` starts with `end`. diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index d1983b1b58a8..e202b48f0621 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -112,6 +112,8 @@ def infer_table(cls, df, schema=None): return sch.Schema.from_tuples(pairs) + concat = staticmethod(pd.concat) + @classmethod def convert_table(cls, df, schema): if len(schema) != len(df.columns): @@ -122,7 +124,7 @@ def convert_table(cls, df, schema): columns = [] for (_, series), dtype in zip(df.items(), schema.types): columns.append(cls.convert_column(series, dtype)) - df = pd.concat(columns, axis=1) + df = cls.concat(columns, axis=1) # return data with the schema's columns which may be different than the # input columns @@ -395,6 +397,12 @@ def convert(value): class DaskData(PandasData): + @staticmethod + def concat(*args, **kwargs): + import dask.dataframe as dd + + return dd.concat(*args, **kwargs) + @classmethod def infer_column(cls, s): return PyArrowData.infer_column(s.compute()) From 45a42b947693ada091b43310b7d9e92ac9a6fdf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 4 Jan 2024 17:18:02 +0100 Subject: [PATCH 045/161] refactor(polars): update the polars backend to use the new relational abstractions (#7868) Bare minimum changes required for the polars backend to work with the new relational operations. Since polars' join API follows the same semantics as pandas, I'm using the pandas specific rewrites here. In the future we may want to rewrite the compiler similarly to the pandas one: using `node.map()` and `Dispatched`. --- .github/workflows/ibis-backends.yml | 10 +- ibis/backends/pandas/rewrites.py | 1 + ibis/backends/polars/__init__.py | 26 ++-- ibis/backends/polars/compiler.py | 195 +++++++++++++++--------- ibis/backends/polars/tests/test_join.py | 28 ++++ ibis/backends/tests/test_interactive.py | 8 +- ibis/backends/tests/test_join.py | 25 --- 7 files changed, 178 insertions(+), 115 deletions(-) create mode 100644 ibis/backends/polars/tests/test_join.py diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index c5aea341638e..7bf6c9fbca61 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -89,11 +89,11 @@ jobs: title: Datafusion extras: - datafusion - # - name: polars - # title: Polars - # extras: - # - polars - # - deltalake + - name: polars + title: Polars + extras: + - polars + - deltalake # - name: mysql # title: MySQL # services: diff --git a/ibis/backends/pandas/rewrites.py b/ibis/backends/pandas/rewrites.py index 7419f92d498d..18cf53813af5 100644 --- a/ibis/backends/pandas/rewrites.py +++ b/ibis/backends/pandas/rewrites.py @@ -224,6 +224,7 @@ def split_join_predicates(left, right, predicates, only_equality=True): @replace(ops.JoinChain) def rewrite_join(_, **kwargs): + # TODO(kszucs): JoinTable.index can be used as a prefix prefixes = {} prefixes[_.first] = prefix = str(len(prefixes)) left = PandasRename.from_prefix(_.first, prefix) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 1619ac927371..328fbd4b1631 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -12,9 +12,13 @@ import ibis.expr.schema as sch import ibis.expr.types as ir from ibis.backends.base import BaseBackend, Database +from ibis.backends.pandas.rewrites import ( + bind_unbound_table, + replace_parameter, + rewrite_join, +) from ibis.backends.polars.compiler import translate from ibis.backends.polars.datatypes import dtype_to_polars, schema_from_polars -from ibis.common.patterns import Replace from ibis.util import gen_name, normalize_filename if TYPE_CHECKING: @@ -379,20 +383,18 @@ def has_operation(cls, operation: type[ops.Value]) -> bool: def compile( self, expr: ir.Expr, params: Mapping[ir.Expr, object] | None = None, **_: Any ): - node = expr.op() - ctx = self._context - - if params: + if params is None: + params = dict() + else: params = {param.op(): value for param, value in params.items()} - rule = Replace( - ops.ScalarParameter, - lambda _: ops.Literal(value=params[_], dtype=_.dtype), - ) - node = node.replace(rule) - expr = node.to_expr() node = expr.as_table().op() - return translate(node, ctx=ctx) + node = node.replace( + rewrite_join | replace_parameter | bind_unbound_table, + context={"params": params, "backend": self}, + ) + + return translate(node, ctx=self._context) def _get_schema_using_query(self, query: str) -> sch.Schema: return schema_from_polars(self._context.execute(query).schema) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index a9927cd6ad1a..97a45438609c 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -15,8 +15,10 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops +from ibis.backends.pandas.rewrites import PandasAsofJoin, PandasJoin, PandasRename from ibis.backends.polars.datatypes import dtype_to_polars, schema_from_polars from ibis.expr.operations.udf import InputType +from ibis.util import gen_name def _expr_method(expr, op, methods): @@ -59,7 +61,7 @@ def table(op, **_): @translate.register(ops.DummyTable) def dummy_table(op, **kw): - selections = [translate(arg, **kw) for arg in op.values] + selections = [translate(arg, **kw) for name, arg in op.values.items()] return pl.DataFrame().lazy().select(selections) @@ -181,7 +183,7 @@ def _cast(op, strict=True, **kw): return arg.cast(typ, strict=strict) -@translate.register(ops.TableColumn) +@translate.register(ops.Field) def column(op, **_): return pl.col(op.name) @@ -196,35 +198,24 @@ def sort_key(op, **kw): return arg.sort(reverse=descending) # pragma: no cover -@translate.register(ops.Selection) -def selection(op, **kw): - lf = translate(op.table, **kw) - - if op.predicates: - predicates = map(partial(translate, **kw), op.predicates) - predicate = reduce(operator.and_, predicates) - lf = lf.filter(predicate) +@translate.register(ops.Project) +def project(op, **kw): + lf = translate(op.parent, **kw) selections = [] unnests = [] - for arg in op.selections: - if isinstance(arg, ops.TableNode): - for name in arg.schema.names: - column = ops.TableColumn(table=arg, name=name) - selections.append(translate(column, **kw)) - elif ( - isinstance(arg, ops.Alias) and isinstance(unnest := arg.arg, ops.Unnest) - ) or isinstance(unnest := arg, ops.Unnest): - name = arg.name + for name, arg in op.values.items(): + if isinstance(arg, ops.Unnest): unnests.append(name) - selections.append(translate(unnest.arg, **kw).alias(name)) + translated = translate(arg.arg, **kw) elif isinstance(arg, ops.Value): - selections.append(translate(arg, **kw)) + translated = translate(arg, **kw) else: raise com.TranslationError( "Polars backend is unable to compile selection with " f"operation type of {type(arg)}" ) + selections.append(translated.alias(name)) if selections: lf = lf.select(selections) @@ -232,9 +223,16 @@ def selection(op, **kw): if unnests: lf = lf.explode(*unnests) - if op.sort_keys: - by = [key.name for key in op.sort_keys] - descending = [key.descending for key in op.sort_keys] + return lf + + +@translate.register(ops.Sort) +def sort(op, **kw): + lf = translate(op.parent, **kw) + + if op.keys: + by = [key.name for key in op.keys] + descending = [key.descending for key in op.keys] try: lf = lf.sort(by, descending=descending) except TypeError: # pragma: no cover @@ -243,6 +241,18 @@ def selection(op, **kw): return lf +@translate.register(ops.Filter) +def filter_(op, **kw): + lf = translate(op.parent, **kw) + + if op.predicates: + predicates = map(partial(translate, **kw), op.predicates) + predicate = reduce(operator.and_, predicates) + lf = lf.filter(predicate) + + return lf + + @translate.register(ops.Limit) def limit(op, **kw): if (n := op.n) is not None and not isinstance(n, int): @@ -251,75 +261,99 @@ def limit(op, **kw): if not isinstance(offset := op.offset, int): raise NotImplementedError("Dynamic offset not supported") - lf = translate(op.table, **kw) + lf = translate(op.parent, **kw) return lf.slice(offset, n) -@translate.register(ops.Aggregation) +@translate.register(ops.Aggregate) def aggregation(op, **kw): - lf = translate(op.table, **kw) + lf = translate(op.parent, **kw) - if op.predicates: - lf = lf.filter( - reduce( - operator.and_, - map(partial(translate, **kw), op.predicates), + if op.groups: + # project first to handle computed group by columns + lf = ( + lf.with_columns( + [translate(arg, **kw).alias(name) for name, arg in op.groups.items()] ) + .group_by(list(op.groups.keys())) + .agg ) - - # project first to handle computed group by columns - lf = lf.with_columns([translate(arg, **kw) for arg in op.by]) - - if op.by: - lf = lf.group_by([pl.col(by.name) for by in op.by]).agg else: lf = lf.select if op.metrics: - metrics = [translate(arg, **kw).alias(arg.name) for arg in op.metrics] + metrics = [translate(arg, **kw).alias(name) for name, arg in op.metrics.items()] lf = lf(metrics) return lf -_join_types = { - ops.InnerJoin: "inner", - ops.LeftJoin: "left", - ops.RightJoin: "right", - ops.OuterJoin: "outer", - ops.LeftAntiJoin: "anti", - ops.LeftSemiJoin: "semi", -} +@translate.register(PandasRename) +def rename(op, **kw): + parent = translate(op.parent, **kw) + return parent.rename(op.mapping) -@translate.register(ops.Join) +@translate.register(PandasJoin) def join(op, **kw): + how = op.how left = translate(op.left, **kw) right = translate(op.right, **kw) - if isinstance(op, ops.RightJoin): + # workaround required for https://github.com/pola-rs/polars/issues/13130 + prefix = gen_name("on") + left_on = {f"{prefix}_{i}": translate(v, **kw) for i, v in enumerate(op.left_on)} + right_on = {f"{prefix}_{i}": translate(v, **kw) for i, v in enumerate(op.right_on)} + left = left.with_columns(**left_on) + right = right.with_columns(**right_on) + on = list(left_on.keys()) + + if how == "right": how = "left" left, right = right, left - else: - how = _join_types[type(op)] - left_on, right_on = [], [] - for pred in op.predicates: - if isinstance(pred, ops.Equals): - left_on.append(translate(pred.left, **kw)) - right_on.append(translate(pred.right, **kw)) - else: - raise com.TranslationError( - "Polars backend is unable to compile join predicate " - f"with operation type of {type(pred)}" - ) + joined = left.join(right, on=on, how=how) + joined = joined.drop(columns=on) + + return joined - return left.join(right, left_on=left_on, right_on=right_on, how=how) + +@translate.register(PandasAsofJoin) +def asof_join(op, **kw): + left = translate(op.left, **kw) + right = translate(op.right, **kw) + + # workaround required for https://github.com/pola-rs/polars/issues/13130 + on, by = gen_name("on"), gen_name("by") + left_on = {f"{on}_{i}": translate(v, **kw) for i, v in enumerate(op.left_on)} + right_on = {f"{on}_{i}": translate(v, **kw) for i, v in enumerate(op.right_on)} + left_by = {f"{by}_{i}": translate(v, **kw) for i, v in enumerate(op.left_by)} + right_by = {f"{by}_{i}": translate(v, **kw) for i, v in enumerate(op.right_by)} + + left = left.with_columns(**left_on, **left_by) + right = right.with_columns(**right_on, **right_by) + + on = list(left_on.keys()) + by = list(left_by.keys()) + + if op.operator in {ops.Less, ops.LessEqual}: + direction = "forward" + elif op.operator in {ops.Greater, ops.GreaterEqual}: + direction = "backward" + elif op.operator == ops.Equals: + direction = "nearest" + else: + raise NotImplementedError(f"Operator {operator} not supported for asof join") + + assert len(on) == 1 + joined = left.join_asof(right, on=on[0], by=by, strategy=direction) + joined = joined.drop(columns=on + by) + return joined @translate.register(ops.DropNa) def dropna(op, **kw): - lf = translate(op.table, **kw) + lf = translate(op.parent, **kw) if op.subset is None: subset = None @@ -337,10 +371,28 @@ def dropna(op, **kw): @translate.register(ops.FillNa) def fillna(op, **kw): - table = translate(op.table, **kw) + table = translate(op.parent, **kw) columns = [] - for name, dtype in op.table.schema.items(): + + repls = op.replacements + + if isinstance(repls, Mapping): + + def get_replacement(name): + repl = repls.get(name) + if repl is not None: + return repl.value + else: + return None + + else: + value = repls.value + + def get_replacement(_): + return value + + for name, dtype in op.parent.schema.items(): column = pl.col(name) if isinstance(op.replacements, Mapping): value = op.replacements.get(name) @@ -422,11 +474,11 @@ def greatest(op, **kw): return pl.max_horizontal(arg) -@translate.register(ops.InColumn) +@translate.register(ops.InSubquery) def in_column(op, **kw): value = translate(op.value, **kw) - options = translate(op.options, **kw) - return value.is_in(options) + needle = translate(op.needle, **kw) + return needle.is_in(value) @translate.register(ops.InValues) @@ -734,7 +786,7 @@ def correlation(op, **kw): @translate.register(ops.Distinct) def distinct(op, **kw): - table = translate(op.table, **kw) + table = translate(op.parent, **kw) return table.unique() @@ -1163,6 +1215,11 @@ def execute_self_reference(op, **kw): return translate(op.table, **kw) +@translate.register(ops.JoinTable) +def execute_join_table(op, **kw): + return translate(op.parent, **kw) + + @translate.register(ops.CountDistinctStar) def execute_count_distinct_star(op, **kw): arg = pl.struct(*op.arg.schema.names) diff --git a/ibis/backends/polars/tests/test_join.py b/ibis/backends/polars/tests/test_join.py new file mode 100644 index 000000000000..51a7295dd924 --- /dev/null +++ b/ibis/backends/polars/tests/test_join.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import pandas as pd +import pandas.testing as tm + +import ibis + + +def test_memtable_join(con): + t1 = ibis.memtable({"x": [1, 2, 3], "y": [4, 5, 6], "z": ["a", "b", "c"]}) + t2 = ibis.memtable({"x": [3, 2, 1], "y": [7, 8, 9], "z": ["d", "e", "f"]}) + expr = t1.join(t2, "x") + + result = con.execute(expr) + expected = pd.DataFrame( + { + "x": [1, 2, 3], + "y": [4, 5, 6], + "z": ["a", "b", "c"], + "x_right": [1, 2, 3], + "y_right": [9, 8, 7], + "z_right": ["f", "e", "d"], + } + ) + + left = result.sort_values("x").reset_index(drop=True) + right = expected.sort_values("x").reset_index(drop=True) + tm.assert_frame_equal(left, right) diff --git a/ibis/backends/tests/test_interactive.py b/ibis/backends/tests/test_interactive.py index 704e17019c6e..b25311b3650b 100644 --- a/ibis/backends/tests/test_interactive.py +++ b/ibis/backends/tests/test_interactive.py @@ -33,7 +33,7 @@ def table(backend): return backend.functional_alltypes -@pytest.mark.notimpl(["pandas"]) +@pytest.mark.notimpl(["pandas", "polars"]) def test_interactive_execute_on_repr(table, queries, snapshot): repr(table.bigint_col.sum()) snapshot.assert_match(queries[0], "out.sql") @@ -53,21 +53,21 @@ def test_repr_png_is_not_none_in_not_interactive(table): assert table._repr_png_() is not None -@pytest.mark.notimpl(["pandas"]) +@pytest.mark.notimpl(["pandas", "polars"]) def test_default_limit(table, snapshot, queries): repr(table.select("id", "bool_col")) snapshot.assert_match(queries[0], "out.sql") -@pytest.mark.notimpl(["pandas"]) +@pytest.mark.notimpl(["pandas", "polars"]) def test_respect_set_limit(table, snapshot, queries): repr(table.select("id", "bool_col").limit(10)) snapshot.assert_match(queries[0], "out.sql") -@pytest.mark.notimpl(["pandas"]) +@pytest.mark.notimpl(["pandas", "polars"]) def test_disable_query_limit(table, snapshot, queries): assert ibis.options.sql.default_limit is None diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index cfe287eca026..b1c3939bf515 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -54,9 +54,6 @@ def check_eq(left, right, how, **kwargs): + ["sqlite"] * (vparse(sqlite3.sqlite_version) < vparse("3.39")) ), pytest.mark.xfail_version(datafusion=["datafusion<31"]), - pytest.mark.broken( - ["polars"], reason="upstream outer joins are broken" - ), ], ), ], @@ -141,11 +138,6 @@ def test_filtering_join(backend, batting, awards_players, how): backend.assert_frame_equal(result, expected, check_like=True) -@pytest.mark.broken( - ["polars"], - raises=ValueError, - reason="https://github.com/pola-rs/polars/issues/9335", -) @pytest.mark.notimpl(["exasol"], raises=com.IbisTypeError) def test_join_then_filter_no_column_overlap(awards_players, batting): left = batting[batting.yearID == 2015] @@ -159,11 +151,6 @@ def test_join_then_filter_no_column_overlap(awards_players, batting): assert not q.execute().empty -@pytest.mark.broken( - ["polars"], - raises=ValueError, - reason="https://github.com/pola-rs/polars/issues/9335", -) @pytest.mark.notimpl(["exasol"], raises=com.IbisTypeError) def test_mutate_then_join_no_column_overlap(batting, awards_players): left = batting.mutate(year=batting.yearID).filter(lambda t: t.year == 2015) @@ -176,11 +163,6 @@ def test_mutate_then_join_no_column_overlap(batting, awards_players): @pytest.mark.notimpl(["druid"]) @pytest.mark.notyet(["dask"], reason="dask doesn't support descending order by") @pytest.mark.notyet(["flink"], reason="Flink doesn't support semi joins") -@pytest.mark.broken( - ["polars"], - raises=ValueError, - reason="https://github.com/pola-rs/polars/issues/9335", -) @pytest.mark.parametrize( "func", [ @@ -283,11 +265,6 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): ), ], ) -@pytest.mark.notimpl( - ["polars"], - raises=com.TranslationError, - reason="polars doesn't support join predicates", -) @pytest.mark.notimpl( ["dask"], raises=TypeError, @@ -339,7 +316,6 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu lambda left: left.filter(lambda t: t.x == 1).select(y=lambda t: t.x), [("x", "y")], id="left-xy", - marks=pytest.mark.notyet(["polars"], reason="renaming fails"), ), param( "left", @@ -355,7 +331,6 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu lambda left: left.filter(lambda t: t.x == 1).select(y=lambda t: t.x), [("x", "y")], id="right-xy", - marks=pytest.mark.notyet(["polars"], reason="renaming fails"), ), param( "right", From aefc3f79987b611d9e4a5c63dc4a0e440f90eafd Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:17:11 -0500 Subject: [PATCH 046/161] feat(trino): port to sqlglot --- .github/workflows/ibis-backends.yml | 37 +- ibis/backends/base/sqlglot/datatypes.py | 64 ++ ibis/backends/base/sqlglot/rewrites.py | 47 ++ ibis/backends/conftest.py | 1 - ibis/backends/tests/errors.py | 5 + .../test_default_limit/trino/out.sql | 5 + .../test_disable_query_limit/trino/out.sql | 5 + .../trino/out.sql | 3 + .../test_respect_set_limit/trino/out.sql | 10 + .../test_group_by_has_index/trino/out.sql | 8 +- .../test_sql/test_isin_bug/trino/out.sql | 18 +- .../test_union_aliasing/trino/out.sql | 191 ++++-- ibis/backends/tests/test_aggregation.py | 17 +- ibis/backends/tests/test_array.py | 37 +- ibis/backends/tests/test_asof_join.py | 4 +- ibis/backends/tests/test_binary.py | 9 +- ibis/backends/tests/test_client.py | 4 +- ibis/backends/tests/test_export.py | 6 +- ibis/backends/tests/test_generic.py | 45 +- ibis/backends/tests/test_numeric.py | 40 +- ibis/backends/tests/test_struct.py | 11 +- ibis/backends/tests/test_temporal.py | 3 +- ibis/backends/tests/test_window.py | 2 + .../test_h01/test_tpc_h01/trino/h01.sql | 87 ++- .../test_h02/test_tpc_h02/trino/h02.sql | 264 +++++--- .../test_h03/test_tpc_h03/trino/h03.sql | 170 ++++- .../test_h04/test_tpc_h04/trino/h04.sql | 66 +- .../test_h05/test_tpc_h05/trino/h05.sql | 212 ++++++- .../test_h06/test_tpc_h06/trino/h06.sql | 33 +- .../test_h07/test_tpc_h07/trino/h07.sql | 172 +++-- .../test_h08/test_tpc_h08/trino/h08.sql | 188 ++++-- .../test_h09/test_tpc_h09/trino/h09.sql | 142 ++++- .../test_h10/test_tpc_h10/trino/h10.sql | 207 ++++-- .../test_h11/test_tpc_h11/trino/h11.sql | 184 +++++- .../test_h12/test_tpc_h12/trino/h12.sql | 125 +++- .../test_h13/test_tpc_h13/trino/h13.sql | 81 ++- .../test_h14/test_tpc_h14/trino/h14.sql | 115 +++- .../test_h15/test_tpc_h15/trino/h15.sql | 173 +++-- .../test_h16/test_tpc_h16/trino/h16.sql | 116 +++- .../test_h17/test_tpc_h17/trino/h17.sql | 134 +++- .../test_h18/test_tpc_h18/trino/h18.sql | 201 ++++-- .../test_h19/test_tpc_h19/trino/h19.sql | 205 +++++- .../test_h20/test_tpc_h20/trino/h20.sql | 152 +++-- .../test_h21/test_tpc_h21/trino/h21.sql | 196 ++++-- .../test_h22/test_tpc_h22/trino/h22.sql | 109 ++-- ibis/backends/tests/tpch/test_h15.py | 11 +- ibis/backends/trino/__init__.py | 484 +++++++++----- ibis/backends/trino/compiler.py | 565 +++++++++++++++-- ibis/backends/trino/converter.py | 24 + ibis/backends/trino/datatypes.py | 149 ----- ibis/backends/trino/registry.py | 600 ------------------ ibis/backends/trino/tests/conftest.py | 47 +- ibis/backends/trino/tests/test_client.py | 44 +- ibis/backends/trino/tests/test_datatypes.py | 2 +- ibis/expr/rewrites.py | 1 + poetry.lock | 5 +- pyproject.toml | 4 +- 57 files changed, 3852 insertions(+), 1988 deletions(-) create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/trino/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/trino/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/trino/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/trino/out.sql create mode 100644 ibis/backends/trino/converter.py delete mode 100644 ibis/backends/trino/datatypes.py delete mode 100644 ibis/backends/trino/registry.py diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 7bf6c9fbca61..bc06cd0ab9df 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -143,13 +143,12 @@ jobs: # - freetds-dev # - unixodbc-dev # - tdsodbc - # - name: trino - # title: Trino - # extras: - # - trino - # - postgres - # services: - # - trino + - name: trino + title: Trino + extras: + - trino + services: + - trino # - name: druid # title: Druid # extras: @@ -255,15 +254,14 @@ jobs: # - freetds-dev # - unixodbc-dev # - tdsodbc - # - os: windows-latest - # backend: - # name: trino - # title: Trino - # services: - # - trino - # extras: - # - trino - # - postgres + - os: windows-latest + backend: + name: trino + title: Trino + services: + - trino + extras: + - trino # - os: windows-latest # backend: # name: druid @@ -691,13 +689,6 @@ jobs: # title: SQLite # extras: # - sqlite - # - name: trino - # title: Trino - # services: - # - trino - # extras: - # - trino - # - postgres # - name: oracle # title: Oracle # serial: true diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 9c4a7fe531df..b3a9b643c1ed 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -447,6 +447,70 @@ class TrinoType(SqlglotType): } ) + @classmethod + def _from_ibis_Interval(cls, dtype: dt.Interval) -> sge.DataType: + assert dtype.unit is not None, "interval unit cannot be None" + if (short := dtype.unit.short) in ("Y", "Q", "M"): + return sge.DataType( + this=typecode.INTERVAL, + expressions=[ + sge.IntervalSpan( + this=sge.Var(this="YEAR"), expression=sge.Var(this="MONTH") + ) + ], + ) + elif short in ("D", "h", "m", "s", "ms", "us", "ns"): + return sge.DataType( + this=typecode.INTERVAL, + expressions=[ + sge.IntervalSpan( + this=sge.Var(this="DAY"), expression=sge.Var(this="SECOND") + ) + ], + ) + else: + raise NotImplementedError( + f"Trino does not support {dtype.unit.name} intervals" + ) + + @classmethod + def _from_sqlglot_UBIGINT(cls): + return dt.Decimal(precision=19, scale=0, nullable=cls.default_nullable) + + @classmethod + def _from_ibis_UInt64(cls, dtype): + return sge.DataType( + this=typecode.DECIMAL, + expressions=[ + sge.DataTypeParam(this=sge.convert(19)), + sge.DataTypeParam(this=sge.convert(0)), + ], + ) + + @classmethod + def _from_sqlglot_UINT(cls): + return dt.Int64(nullable=cls.default_nullable) + + @classmethod + def _from_ibis_UInt32(cls, dtype): + return sge.DataType(this=typecode.BIGINT) + + @classmethod + def _from_sqlglot_USMALLINT(cls): + return dt.Int32(nullable=cls.default_nullable) + + @classmethod + def _from_ibis_UInt16(cls, dtype): + return sge.DataType(this=typecode.INT) + + @classmethod + def _from_sqlglot_UTINYINT(cls): + return dt.Int16(nullable=cls.default_nullable) + + @classmethod + def _from_ibis_UInt8(cls, dtype): + return sge.DataType(this=typecode.SMALLINT) + class DruidType(SqlglotType): # druid doesn't have a sophisticated type system and hive is close enough diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 4b8341329980..522380d9111e 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -8,16 +8,21 @@ import toolz from public import public +import ibis.common.exceptions as com import ibis.expr.datashape as ds import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.common.annotations import attribute from ibis.common.collections import FrozenDict # noqa: TCH001 +from ibis.common.deferred import var from ibis.common.patterns import Object, replace from ibis.common.typing import VarTuple # noqa: TCH001 from ibis.expr.rewrites import p from ibis.expr.schema import Schema +x = var("x") +y = var("y") + @public class Select(ops.Relation): @@ -140,3 +145,45 @@ def sqlize(node): ) step2 = step1.replace(merge_select_select) return step2 + + +@replace(p.WindowFunction(p.First(x, y))) +def rewrite_first_to_first_value(_, x, y): + """Rewrite Ibis's first to first_value when used in a window function.""" + if y is not None: + raise com.UnsupportedOperationError( + "`first` with `where` is unsupported in a window function" + ) + return _.copy(func=ops.FirstValue(x)) + + +@replace(p.WindowFunction(p.Last(x, y))) +def rewrite_last_to_last_value(_, x, y): + """Rewrite Ibis's last to last_value when used in a window function.""" + if y is not None: + raise com.UnsupportedOperationError( + "`last` with `where` is unsupported in a window function" + ) + return _.copy(func=ops.LastValue(x)) + + +@replace(p.WindowFunction(frame=y @ p.WindowFrame(order_by=()))) +def rewrite_empty_order_by_window(_, y): + import ibis + + return _.copy(frame=y.copy(order_by=(ibis.NA,))) + + +@replace(p.WindowFunction(p.RowNumber | p.NTile, y)) +def exclude_unsupported_window_frame_from_row_number(_, y): + return ops.Subtract(_.copy(frame=y.copy(start=None, end=None)), 1) + + +@replace( + p.WindowFunction( + p.Lag | p.Lead | p.PercentRank | p.CumeDist | p.Any | p.All, + y @ p.WindowFrame(start=None), + ) +) +def exclude_unsupported_window_frame_from_ops(_, y): + return _.copy(frame=y.copy(start=None, end=0, order_by=y.order_by or (ops.NULL,))) diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 2dfabcc8e06d..51f8153a8f1f 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -542,7 +542,6 @@ def ddl_con(ddl_backend): "postgres", "risingwave", "sqlite", - "trino", ) ), scope="session", diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 3384d7c2bd9e..769f746654e0 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -74,3 +74,8 @@ from snowflake.connector.errors import ProgrammingError as SnowflakeProgrammingError except ImportError: SnowflakeProgrammingError = None + +try: + from trino.exceptions import TrinoUserError +except ImportError: + TrinoUserError = None diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/trino/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/trino/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/trino/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/trino/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/trino/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/trino/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/trino/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/trino/out.sql new file mode 100644 index 000000000000..6bd0ba8c995d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/trino/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/trino/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/trino/out.sql new file mode 100644 index 000000000000..97338646649f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/trino/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/trino/out.sql index fc16f2428d16..d3969647c9ea 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/trino/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.continent + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,8 +15,8 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/trino/out.sql index 3f66295a7f5a..c1611d8cecc3 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/trino/out.sql @@ -1,13 +1,9 @@ SELECT - t0.x IN ( + "t0"."x" IN ( SELECT - t1.x - FROM ( - SELECT - t0.x AS x - FROM t AS t0 - WHERE - t0.x > 2 - ) AS t1 - ) AS "InColumn(x, x)" -FROM t AS t0 \ No newline at end of file + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql index beed99d33761..f20ebad9894f 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql @@ -1,75 +1,130 @@ -WITH t0 AS ( - SELECT - t7.field_of_study AS field_of_study, - CAST(ROW(anon_2.years, anon_2.degrees) AS ROW(years VARCHAR, degrees BIGINT)) AS __pivoted__ - FROM humanities AS t7 - JOIN UNNEST(ARRAY[CAST(ROW('1970-71', t7."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', t7."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', t7."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', t7."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', t7."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', t7."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', t7."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', t7."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', t7."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', t7."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', t7."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', t7."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', t7."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', t7."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', t7."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', t7."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', t7."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', t7."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) AS anon_2(years, degrees) - ON TRUE -), t1 AS ( - SELECT - t0.field_of_study AS field_of_study, - t0.__pivoted__.years AS years, - t0.__pivoted__.degrees AS degrees - FROM t0 -), t2 AS ( - SELECT - t1.field_of_study AS field_of_study, - t1.years AS years, - t1.degrees AS degrees, - FIRST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - LAST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM t1 -), t3 AS ( - SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, - t2.earliest_degrees AS earliest_degrees, - t2.latest_degrees AS latest_degrees, - t2.latest_degrees - t2.earliest_degrees AS diff - FROM t2 -), t4 AS ( - SELECT - t3.field_of_study AS field_of_study, - ARBITRARY(t3.diff) AS diff - FROM t3 - GROUP BY - 1 -), anon_1 AS ( +SELECT + "t10"."field_of_study", + "t10"."diff" +FROM ( SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + "t5"."field_of_study", + "t5"."diff" + FROM ( + SELECT + "t4"."field_of_study", + ARBITRARY("t4"."diff") AS "diff" + FROM ( + SELECT + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + FROM ( + SELECT + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + FROM ( + SELECT + "t1"."field_of_study", + "t1"."__pivoted__"."years" AS "years", + "t1"."__pivoted__"."degrees" AS "degrees" + FROM ( + SELECT + "t0"."field_of_study", + IF(_u.pos = _u_2.pos_2, _u_2."__pivoted__") AS "__pivoted__" + FROM "humanities" AS "t0" + CROSS JOIN UNNEST(SEQUENCE( + 1, + GREATEST( + CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + )) AS _u(pos) + CROSS JOIN UNNEST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) + WHERE + _u.pos = _u_2.pos_2 + OR ( + _u.pos > CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + AND _u_2.pos_2 = CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 + ) AS "t5" ORDER BY - t4.diff DESC + "t5"."diff" DESC LIMIT 10 -), t5 AS ( + UNION ALL SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + "t5"."field_of_study", + "t5"."diff" + FROM ( + SELECT + "t4"."field_of_study", + ARBITRARY("t4"."diff") AS "diff" + FROM ( + SELECT + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + FROM ( + SELECT + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + FROM ( + SELECT + "t1"."field_of_study", + "t1"."__pivoted__"."years" AS "years", + "t1"."__pivoted__"."degrees" AS "degrees" + FROM ( + SELECT + "t0"."field_of_study", + IF(_u.pos = _u_2.pos_2, _u_2."__pivoted__") AS "__pivoted__" + FROM "humanities" AS "t0" + CROSS JOIN UNNEST(SEQUENCE( + 1, + GREATEST( + CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + )) AS _u(pos) + CROSS JOIN UNNEST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) + WHERE + _u.pos = _u_2.pos_2 + OR ( + _u.pos > CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + AND _u_2.pos_2 = CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 + ) AS "t5" WHERE - t4.diff < 0 -), anon_3 AS ( - SELECT - t5.field_of_study AS field_of_study, - t5.diff AS diff - FROM t5 + "t5"."diff" < 0 ORDER BY - t5.diff ASC + "t5"."diff" ASC LIMIT 10 -) -SELECT - t6.field_of_study, - t6.diff -FROM ( - SELECT - anon_1.field_of_study AS field_of_study, - anon_1.diff AS diff - FROM anon_1 - UNION ALL - SELECT - anon_3.field_of_study AS field_of_study, - anon_3.diff AS diff - FROM anon_3 -) AS t6 \ No newline at end of file +) AS "t10" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index d403f613146e..e008089e632d 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -22,6 +22,7 @@ Py4JError, PySparkAnalysisException, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.legacy.udf.vectorized import reduction @@ -1111,7 +1112,6 @@ def test_quantile( ), pytest.mark.notyet( [ - "trino", "postgres", "risingwave", "snowflake", @@ -1120,6 +1120,7 @@ def test_quantile( raises=ValueError, reason="XXXXSQLExprTranslator only implements population correlation coefficient", ), + pytest.mark.notyet(["trino"], raises=com.UnsupportedOperationError), ], ), param( @@ -1359,7 +1360,6 @@ def test_date_quantile(alltypes, func): "::", id="expr", marks=[ - pytest.mark.notyet(["trino"], raises=com.UnsupportedOperationError), pytest.mark.notyet( ["bigquery"], raises=GoogleBadRequest, @@ -1383,13 +1383,19 @@ def test_date_quantile(alltypes, func): param( lambda t: t.string_col.isin(["1", "7"]), lambda t: t.string_col.isin(["1", "7"]), - marks=pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + marks=[ + pytest.mark.notyet(["trino"], raises=TrinoUserError), + pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + ], id="is_in", ), param( lambda t: t.string_col.notin(["1", "7"]), lambda t: ~t.string_col.isin(["1", "7"]), - marks=pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + marks=[ + pytest.mark.notyet(["trino"], raises=TrinoUserError), + pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + ], id="not_in", ), ], @@ -1672,8 +1678,9 @@ def test_grouped_case(backend, con): ) @pytest.mark.notyet(["impala", "flink"], raises=com.UnsupportedOperationError) @pytest.mark.notyet(["clickhouse"], raises=ClickHouseDatabaseError) -@pytest.mark.notyet(["druid", "trino"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["druid"], raises=sa.exc.ProgrammingError) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) +@pytest.mark.notyet(["trino"], raises=TrinoUserError) @pytest.mark.notyet(["mysql"], raises=sa.exc.NotSupportedError) @pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index e2063b94354d..72272e4a3411 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -23,6 +23,7 @@ PolarsComputeError, Py4JJavaError, PySparkAnalysisException, + TrinoUserError, ) pytestmark = [ @@ -81,8 +82,7 @@ def test_array_concat(con): right = ibis.literal([2, 1]) expr = left + right result = con.execute(expr.name("tmp")) - expected = np.array([1, 2, 3, 2, 1]) - assert np.array_equal(result, expected) + assert sorted(result) == sorted([1, 2, 3, 2, 1]) # Issues #2370 @@ -99,7 +99,7 @@ def test_array_concat_variadic(con): # Issues #2370 @pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( - ["postgres", "trino"], + ["postgres"], raises=sa.exc.ProgrammingError, reason="backend can't infer the type of an empty array", ) @@ -108,6 +108,7 @@ def test_array_concat_variadic(con): raises=sa.exc.InternalError, reason="Bind error: cannot determine type of empty array", ) +@pytest.mark.notyet(["trino"], raises=TrinoUserError) def test_array_concat_some_empty(con): left = ibis.literal([]) right = ibis.literal([2, 1]) @@ -536,8 +537,7 @@ def test_array_filter(con, input, output): @builtin_array @pytest.mark.notimpl( - ["mssql", "polars", "postgres"], - raises=com.OperationNotDefinedError, + ["mssql", "polars", "postgres"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.never(["impala"], reason="array_types table isn't defined") @@ -593,8 +593,7 @@ def test_array_contains(backend, con): ) @builtin_array @pytest.mark.notimpl( - ["dask", "impala", "mssql", "polars"], - raises=com.OperationNotDefinedError, + ["dask", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError ) def test_array_position(backend, con, a, expected_array): t = ibis.memtable({"a": a}) @@ -607,8 +606,7 @@ def test_array_position(backend, con, a, expected_array): @builtin_array @pytest.mark.notimpl( - ["dask", "impala", "mssql", "polars"], - raises=com.OperationNotDefinedError, + ["dask", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError ) @pytest.mark.broken( ["risingwave"], @@ -813,9 +811,9 @@ def test_array_intersect(con, data): reason="ClickHouse won't accept dicts for struct type values", ) @pytest.mark.notimpl(["postgres", "risingwave"], raises=sa.exc.ProgrammingError) -@pytest.mark.notimpl( - ["datafusion", "flink"], - raises=com.OperationNotDefinedError, +@pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) +@pytest.mark.broken( + ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError ) def test_unnest_struct(con): data = {"value": [[{"a": 1}, {"a": 2}], [{"a": 3}, {"a": 4}]]} @@ -887,6 +885,9 @@ def test_zip(backend): reason="pyspark doesn't seem to support field selection on explode", raises=PySparkAnalysisException, ) +@pytest.mark.broken( + ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError +) def test_array_of_struct_unnest(con): jobs = ibis.memtable( { @@ -970,12 +971,14 @@ def flatten_data(): @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) def test_array_flatten(backend, flatten_data, column, expected): data = flatten_data[column] - t = ibis.memtable( - {column: data["data"]}, schema=ibis.schema({column: data["type"]}) - ) + t = ibis.memtable({column: data["data"]}, schema={column: data["type"]}) expr = t[column].flatten() result = backend.connection.execute(expr) - backend.assert_series_equal(result, expected, check_names=False) + backend.assert_series_equal( + result.sort_values().reset_index(drop=True), + expected.sort_values().reset_index(drop=True), + check_names=False, + ) @pytest.mark.notyet( @@ -1155,7 +1158,7 @@ def swap(token): marks=[ pytest.mark.notyet( ["trino"], - raises=sa.exc.ProgrammingError, + raises=TrinoUserError, reason="trino doesn't support timestamp with time zone arguments to its sequence function", ), pytest.mark.notyet( diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index ad7678665678..904948e03051 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -84,7 +84,7 @@ def time_keyed_right(time_keyed_df2): ("forward", operator.le), ], ) -@pytest.mark.notimpl(["datafusion", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "trino"]) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): on = op(time_left["time"], time_right["time"]) expr = time_left.asof_join(time_right, on=on, predicates="group") @@ -112,7 +112,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op @pytest.mark.broken( ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds" ) -@pytest.mark.notimpl(["datafusion", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "trino"]) def test_keyed_asof_join_with_tolerance( con, time_keyed_left, diff --git a/ibis/backends/tests/test_binary.py b/ibis/backends/tests/test_binary.py index 02445e9c9ff7..9dc8c8e3ffff 100644 --- a/ibis/backends/tests/test_binary.py +++ b/ibis/backends/tests/test_binary.py @@ -14,19 +14,14 @@ "duckdb": "BLOB", "snowflake": "BINARY", "sqlite": "blob", - "trino": "STRING", + "trino": "varbinary", "postgres": "bytea", "risingwave": "bytea", "flink": "BINARY(1) NOT NULL", } -@pytest.mark.broken( - ["trino"], - "(builtins.AttributeError) 'bytes' object has no attribute 'encode'", - raises=sqlalchemy.exc.StatementError, -) -@pytest.mark.broken( +@pytest.mark.notimpl( ["clickhouse", "impala"], "Unsupported type: Binary(nullable=True)", raises=NotImplementedError, diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 974c0e9f56a9..3904ff82fbba 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -884,7 +884,7 @@ def test_in_memory_table(backend, con, arg, lambda_, expected, monkeypatch): monkeypatch.setattr(ibis.options, "default_backend", con) expr = lambda_(arg) - result = con.execute(expr) + result = con.execute(expr.order_by(expr.columns[0])) backend.assert_frame_equal(result, expected) @@ -892,7 +892,7 @@ def test_filter_memory_table(backend, con, monkeypatch): monkeypatch.setattr(ibis.options, "default_backend", con) t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) - expr = t.filter(t.x > 1) + expr = t.filter(t.x > 1).order_by("x") expected = pd.DataFrame({"x": [3, 5], "y": [4, 6]}) result = con.execute(expr) backend.assert_frame_equal(result, expected) diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 6e417032603d..4b678b7fb3c0 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -16,6 +16,7 @@ PyDeltaTableError, PySparkAnalysisException, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.formats.pyarrow import PyArrowType @@ -351,10 +352,9 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): marks=[ pytest.mark.notyet(["impala"], reason="precision not supported"), pytest.mark.notyet(["duckdb"], reason="precision is out of range"), - pytest.mark.notyet( - ["druid", "mssql", "trino"], raises=sa.exc.ProgrammingError - ), + pytest.mark.notyet(["druid", "mssql"], raises=sa.exc.ProgrammingError), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), + pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), pytest.mark.notyet(["mysql"], raises=sa.exc.OperationalError), pytest.mark.notyet( diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 0e2d41fabdf1..0a6c98d2fd27 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -3,6 +3,7 @@ import contextlib import datetime import decimal +from collections import Counter from operator import invert, methodcaller, neg import numpy as np @@ -24,6 +25,7 @@ ImpalaHiveServer2Error, Py4JJavaError, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.common.annotations import ValidationError @@ -949,10 +951,9 @@ def test_literal_na(con, dtype): @pytest.mark.notimpl(["exasol"]) def test_memtable_bool_column(backend, con): - t = ibis.memtable({"a": [True, False, True]}) - backend.assert_series_equal( - con.execute(t.a), pd.Series([True, False, True], name="a") - ) + data = [True, False, True] + t = ibis.memtable({"a": data}) + assert Counter(con.execute(t.a)) == Counter(data) @pytest.mark.broken( @@ -977,7 +978,7 @@ def test_memtable_construct(backend, con, monkeypatch): ) t = ibis.memtable(pa_t) backend.assert_frame_equal( - t.execute().fillna(pd.NA), pa_t.to_pandas().fillna(pd.NA) + t.order_by("a").execute().fillna(pd.NA), pa_t.to_pandas().fillna(pd.NA) ) @@ -1059,6 +1060,11 @@ def query(t, group_cols): raises=sa.exc.InternalError, reason='sql parser error: Expected ), found: TEXT at line:3, column:219 Near "))]) AS anon_1(f1"', ) +@pytest.mark.broken( + ["trino"], + reason="invalid code generated for unnesting a struct", + raises=TrinoUserError, +) def test_pivot_longer(backend): diamonds = backend.diamonds df = diamonds.execute() @@ -1396,11 +1402,7 @@ def hash_256(col): 1672531200, marks=[ pytest.mark.notyet(["duckdb"], reason="casts to None"), - pytest.mark.notyet( - ["trino"], - raises=sa.exc.ProgrammingError, - reason="raises TrinoUserError", - ), + pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), pytest.mark.broken(["datafusion"], reason="casts to 1672531200000000"), ], @@ -1480,7 +1482,7 @@ def test_try_cast_table(backend, con): t = ibis.memtable(df) backend.assert_frame_equal( - con.execute(t.try_cast({"a": "int", "b": "float"})), expected + con.execute(t.try_cast({"a": "int", "b": "float"}).order_by("a")), expected ) @@ -1516,11 +1518,7 @@ def test_try_cast_table(backend, con): ["clickhouse", "polars", "flink"], reason="casts this to to a number", ), - pytest.mark.notyet( - ["trino"], - raises=sa.exc.ProgrammingError, - reason="raises TrinoUserError", - ), + pytest.mark.notyet(["trino"], raises=TrinoUserError), ], ), ], @@ -1685,7 +1683,7 @@ def test_static_table_slice(backend, slc, expected_count_fn): ids=str, ) @pytest.mark.notyet( - ["mysql", "trino"], + ["mysql"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) @@ -1694,6 +1692,11 @@ def test_static_table_slice(backend, slc, expected_count_fn): raises=SnowflakeProgrammingError, reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notyet( + ["trino"], + raises=TrinoUserError, + reason="backend doesn't support dynamic limit/offset", +) @pytest.mark.notimpl( ["mssql"], raises=sa.exc.CompileError, @@ -1743,7 +1746,7 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): @pytest.mark.notyet( - ["mysql", "trino"], + ["mysql"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) @@ -1753,9 +1756,11 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): reason="backend doesn't support dynamic limit/offset", ) @pytest.mark.notimpl( - ["exasol"], - raises=sa.exc.CompileError, + ["trino"], + raises=TrinoUserError, + reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notimpl(["exasol"], raises=sa.exc.CompileError) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 68a487d6a2f2..4dfc06dcd4f4 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -24,6 +24,7 @@ ImpalaHiveServer2Error, Py4JError, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.expr import datatypes as dt from ibis.tests.util import assert_equal @@ -168,7 +169,7 @@ "impala": "DECIMAL(2,1)", "snowflake": "INTEGER", "sqlite": "real", - "trino": "double", + "trino": "real", "duckdb": "FLOAT", "postgres": "numeric", "risingwave": "numeric", @@ -200,7 +201,7 @@ "impala": "DECIMAL(2,1)", "snowflake": "INTEGER", "sqlite": "real", - "trino": "double", + "trino": "real", "duckdb": "FLOAT", "postgres": "numeric", "risingwave": "numeric", @@ -257,7 +258,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": decimal.Decimal("1.1"), "snowflake": decimal.Decimal("1.1"), "sqlite": 1.1, - "trino": 1.1, + "trino": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.1"), "postgres": 1.1, @@ -275,7 +276,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": "NUMERIC", "snowflake": "DECIMAL", "sqlite": "real", - "trino": "decimal(2,1)", + "trino": "decimal(18,3)", "duckdb": "DECIMAL(18,3)", "postgres": "numeric", "risingwave": "numeric", @@ -311,7 +312,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": decimal.Decimal("1.1"), "snowflake": decimal.Decimal("1.1"), "sqlite": 1.1, - "trino": 1.1, + "trino": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.100000000"), "postgres": 1.1, "risingwave": 1.1, @@ -331,7 +332,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "clickhouse": "Decimal(38, 9)", "snowflake": "DECIMAL", "sqlite": "real", - "trino": "decimal(2,1)", + "trino": "decimal(38,9)", "duckdb": "DECIMAL(38,9)", "postgres": "numeric", "risingwave": "numeric", @@ -361,7 +362,6 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": decimal.Decimal("1.1"), "sqlite": 1.1, - "trino": 1.1, "dask": decimal.Decimal("1.1"), "postgres": 1.1, "risingwave": 1.1, @@ -398,7 +398,12 @@ def test_numeric_literal(con, backend, expr, expected_types): raises=ImpalaHiveServer2Error, ), pytest.mark.broken( - ["duckdb"], "Unsupported precision.", raises=DuckDBParserException + ["duckdb"], + reason="Unsupported precision.", + raises=DuckDBParserException, + ), + pytest.mark.broken( + ["trino"], reason="Unsupported precision.", raises=TrinoUserError ), pytest.mark.notyet(["datafusion"], raises=Exception), pytest.mark.notyet( @@ -426,7 +431,6 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "FLOAT64", "sqlite": "real", - "trino": "decimal(2,1)", "postgres": "numeric", "risingwave": "numeric", "impala": "DOUBLE", @@ -478,6 +482,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "(oracledb.exceptions.DatabaseError) DPY-4004: invalid number", raises=sa.exc.DatabaseError, ), + pytest.mark.notyet( + ["trino"], + raises=TrinoUserError, + reason="can't cast infinity to decimal", + ), pytest.mark.notyet( ["flink"], "Infinity is not supported in Flink SQL", @@ -508,7 +517,6 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "FLOAT64", "sqlite": "real", - "trino": "decimal(2,1)", "postgres": "numeric", "risingwave": "numeric", "impala": "DOUBLE", @@ -570,6 +578,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "infinity is not allowed as a decimal value", raises=SnowflakeProgrammingError, ), + pytest.mark.notyet( + ["trino"], + raises=TrinoUserError, + reason="can't cast infinity to decimal", + ), ], id="decimal-infinity-", ), @@ -592,7 +605,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": "FLOAT64", "snowflake": "DOUBLE", "sqlite": "null", - "trino": "decimal(2,1)", "postgres": "numeric", "risingwave": "numeric", "impala": "DOUBLE", @@ -662,6 +674,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "NaN is not allowed as a decimal value", raises=SnowflakeProgrammingError, ), + pytest.mark.notyet( + ["trino"], + raises=TrinoUserError, + reason="can't cast nan to decimal", + ), ], id="decimal-NaN", ), @@ -1495,6 +1512,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "polars", "flink", "snowflake", + "trino", ], reason="Not SQLAlchemy backends", ) diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 4c4897ab6cce..95f7df9f4ea5 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -24,26 +24,25 @@ [ param( "a", - [1.0, 2.0, 3.0, np.nan, 2.0, np.nan, 3.0], + [1.0, 2.0, 2.0, 3.0, 3.0, np.nan, np.nan], id="a", marks=pytest.mark.notimpl(["snowflake"]), ), param( - "b", ["banana", "apple", "orange", "banana", None, None, "orange"], id="b" + "b", ["apple", "banana", "banana", "orange", "orange", None, None], id="b" ), param( "c", - [2, 3, 4, 2, 3, np.nan, np.nan], + [2, 2, 3, 3, 4, np.nan, np.nan], id="c", marks=pytest.mark.notimpl(["snowflake"]), ), ], ) def test_single_field(struct, field, expected): - expr = struct.abc[field] + expr = struct.select(field=lambda t: t.abc[field]).order_by("field") result = expr.execute() - equal_nan = expr.type().is_numeric() - assert np.array_equal(result, expected, equal_nan=equal_nan) + tm.assert_series_equal(result.field, pd.Series(expected, name="field")) @pytest.mark.notimpl(["dask"]) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 4878ad46a287..c48ea004f6b8 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -29,6 +29,7 @@ Py4JJavaError, PySparkIllegalArgumentException, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.common.annotations import ValidationError @@ -1836,7 +1837,7 @@ def test_integer_to_timestamp(backend, con, unit): pytest.mark.never( ["trino"], reason="datetime formatting style not supported", - raises=sa.exc.ProgrammingError, + raises=TrinoUserError, ), pytest.mark.never( ["polars"], diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index b164f158d5ac..432b18594d61 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -850,6 +850,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ["trino"], reason="this isn't actually broken: the backend result is equal up to ordering", raises=AssertionError, + strict=False, # sometimes it passes ), pytest.mark.broken(["oracle"], raises=AssertionError), pytest.mark.notimpl( @@ -902,6 +903,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "result is equal up to ordering" ), raises=AssertionError, + strict=False, # sometimes it passes ), pytest.mark.broken(["oracle"], raises=AssertionError), pytest.mark.notimpl( diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql index 398fc8d0a84c..5f0048dce3fb 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql @@ -1,39 +1,62 @@ SELECT - t0.l_returnflag, - t0.l_linestatus, - t0.sum_qty, - t0.sum_base_price, - t0.sum_disc_price, - t0.sum_charge, - t0.avg_qty, - t0.avg_price, - t0.avg_disc, - t0.count_order + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."sum_qty", + "t2"."sum_base_price", + "t2"."sum_disc_price", + "t2"."sum_charge", + "t2"."avg_qty", + "t2"."avg_price", + "t2"."avg_disc", + "t2"."count_order" FROM ( SELECT - t1.l_returnflag AS l_returnflag, - t1.l_linestatus AS l_linestatus, - SUM(t1.l_quantity) AS sum_qty, - SUM(t1.l_extendedprice) AS sum_base_price, - SUM(t1.l_extendedprice * ( - 1 - t1.l_discount - )) AS sum_disc_price, - SUM(t1.l_extendedprice * ( - 1 - t1.l_discount - ) * ( - t1.l_tax + 1 - )) AS sum_charge, - AVG(t1.l_quantity) AS avg_qty, - AVG(t1.l_extendedprice) AS avg_price, - AVG(t1.l_discount) AS avg_disc, - COUNT(*) AS count_order - FROM hive.ibis_sf1.lineitem AS t1 - WHERE - t1.l_shipdate <= FROM_ISO8601_DATE('1998-09-02') + "t1"."l_returnflag", + "t1"."l_linestatus", + SUM("t1"."l_quantity") AS "sum_qty", + SUM("t1"."l_extendedprice") AS "sum_base_price", + SUM("t1"."l_extendedprice" * ( + 1 - "t1"."l_discount" + )) AS "sum_disc_price", + SUM( + ( + "t1"."l_extendedprice" * ( + 1 - "t1"."l_discount" + ) + ) * ( + "t1"."l_tax" + 1 + ) + ) AS "sum_charge", + AVG("t1"."l_quantity") AS "avg_qty", + AVG("t1"."l_extendedprice") AS "avg_price", + AVG("t1"."l_discount") AS "avg_disc", + COUNT(*) AS "count_order" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + WHERE + "t0"."l_shipdate" <= FROM_ISO8601_DATE('1998-09-02') + ) AS "t1" GROUP BY 1, 2 -) AS t0 +) AS "t2" ORDER BY - t0.l_returnflag ASC, - t0.l_linestatus ASC \ No newline at end of file + "t2"."l_returnflag" ASC, + "t2"."l_linestatus" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql index af4e746c080e..d76a0c18cfed 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql @@ -1,84 +1,190 @@ -WITH t0 AS ( - SELECT - t2.p_partkey AS p_partkey, - t2.p_name AS p_name, - t2.p_mfgr AS p_mfgr, - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - t2.p_container AS p_container, - t2.p_retailprice AS p_retailprice, - t2.p_comment AS p_comment, - t3.ps_partkey AS ps_partkey, - t3.ps_suppkey AS ps_suppkey, - t3.ps_availqty AS ps_availqty, - t3.ps_supplycost AS ps_supplycost, - t3.ps_comment AS ps_comment, - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment, - t5.n_nationkey AS n_nationkey, - t5.n_name AS n_name, - t5.n_regionkey AS n_regionkey, - t5.n_comment AS n_comment, - t6.r_regionkey AS r_regionkey, - t6.r_name AS r_name, - t6.r_comment AS r_comment - FROM hive.ibis_sf1.part AS t2 - JOIN hive.ibis_sf1.partsupp AS t3 - ON t2.p_partkey = t3.ps_partkey - JOIN hive.ibis_sf1.supplier AS t4 - ON t4.s_suppkey = t3.ps_suppkey - JOIN hive.ibis_sf1.nation AS t5 - ON t4.s_nationkey = t5.n_nationkey - JOIN hive.ibis_sf1.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t2.p_size = 15 - AND t2.p_type LIKE '%BRASS' - AND t6.r_name = 'EUROPE' - AND t3.ps_supplycost = ( - SELECT - MIN(t3.ps_supplycost) AS "Min(ps_supplycost)" - FROM hive.ibis_sf1.partsupp AS t3 - JOIN hive.ibis_sf1.supplier AS t4 - ON t4.s_suppkey = t3.ps_suppkey - JOIN hive.ibis_sf1.nation AS t5 - ON t4.s_nationkey = t5.n_nationkey - JOIN hive.ibis_sf1.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t6.r_name = 'EUROPE' AND t2.p_partkey = t3.ps_partkey - ) -) SELECT - t1.s_acctbal, - t1.s_name, - t1.n_name, - t1.p_partkey, - t1.p_mfgr, - t1.s_address, - t1.s_phone, - t1.s_comment + "t26"."s_acctbal", + "t26"."s_name", + "t26"."n_name", + "t26"."p_partkey", + "t26"."p_mfgr", + "t26"."s_address", + "t26"."s_phone", + "t26"."s_comment" FROM ( SELECT - t0.s_acctbal AS s_acctbal, - t0.s_name AS s_name, - t0.n_name AS n_name, - t0.p_partkey AS p_partkey, - t0.p_mfgr AS p_mfgr, - t0.s_address AS s_address, - t0.s_phone AS s_phone, - t0.s_comment AS s_comment - FROM t0 -) AS t1 + "t14"."p_partkey", + "t14"."p_name", + "t14"."p_mfgr", + "t14"."p_brand", + "t14"."p_type", + "t14"."p_size", + "t14"."p_container", + "t14"."p_retailprice", + "t14"."p_comment", + "t15"."ps_partkey", + "t15"."ps_suppkey", + "t15"."ps_availqty", + "t15"."ps_supplycost", + "t15"."ps_comment", + "t17"."s_suppkey", + "t17"."s_name", + "t17"."s_address", + "t17"."s_nationkey", + "t17"."s_phone", + "t17"."s_acctbal", + "t17"."s_comment", + "t10"."n_nationkey", + "t10"."n_name", + "t10"."n_regionkey", + "t10"."n_comment", + "t12"."r_regionkey", + "t12"."r_name", + "t12"."r_comment" + FROM ( + SELECT + "t0"."p_partkey", + "t0"."p_name", + "t0"."p_mfgr", + "t0"."p_brand", + "t0"."p_type", + "t0"."p_size", + "t0"."p_container", + CAST("t0"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t0"."p_comment" + FROM "part" AS "t0" + ) AS "t14" + INNER JOIN ( + SELECT + "t1"."ps_partkey", + "t1"."ps_suppkey", + "t1"."ps_availqty", + CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t1"."ps_comment" + FROM "partsupp" AS "t1" + ) AS "t15" + ON "t14"."p_partkey" = "t15"."ps_partkey" + INNER JOIN ( + SELECT + "t2"."s_suppkey", + "t2"."s_name", + "t2"."s_address", + "t2"."s_nationkey", + "t2"."s_phone", + CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t2"."s_comment" + FROM "supplier" AS "t2" + ) AS "t17" + ON "t17"."s_suppkey" = "t15"."ps_suppkey" + INNER JOIN ( + SELECT + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "nation" AS "t3" + ) AS "t10" + ON "t17"."s_nationkey" = "t10"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."r_regionkey", + "t4"."r_name", + "t4"."r_comment" + FROM "region" AS "t4" + ) AS "t12" + ON "t10"."n_regionkey" = "t12"."r_regionkey" +) AS "t26" +WHERE + "t26"."p_size" = 15 + AND "t26"."p_type" LIKE '%BRASS' + AND "t26"."r_name" = 'EUROPE' + AND "t26"."ps_supplycost" = ( + SELECT + MIN("t28"."ps_supplycost") AS "Min(ps_supplycost)" + FROM ( + SELECT + "t27"."ps_partkey", + "t27"."ps_suppkey", + "t27"."ps_availqty", + "t27"."ps_supplycost", + "t27"."ps_comment", + "t27"."s_suppkey", + "t27"."s_name", + "t27"."s_address", + "t27"."s_nationkey", + "t27"."s_phone", + "t27"."s_acctbal", + "t27"."s_comment", + "t27"."n_nationkey", + "t27"."n_name", + "t27"."n_regionkey", + "t27"."n_comment", + "t27"."r_regionkey", + "t27"."r_name", + "t27"."r_comment" + FROM ( + SELECT + "t16"."ps_partkey", + "t16"."ps_suppkey", + "t16"."ps_availqty", + "t16"."ps_supplycost", + "t16"."ps_comment", + "t18"."s_suppkey", + "t18"."s_name", + "t18"."s_address", + "t18"."s_nationkey", + "t18"."s_phone", + "t18"."s_acctbal", + "t18"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment", + "t13"."r_regionkey", + "t13"."r_name", + "t13"."r_comment" + FROM ( + SELECT + "t1"."ps_partkey", + "t1"."ps_suppkey", + "t1"."ps_availqty", + CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t1"."ps_comment" + FROM "partsupp" AS "t1" + ) AS "t16" + INNER JOIN ( + SELECT + "t2"."s_suppkey", + "t2"."s_name", + "t2"."s_address", + "t2"."s_nationkey", + "t2"."s_phone", + CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t2"."s_comment" + FROM "supplier" AS "t2" + ) AS "t18" + ON "t18"."s_suppkey" = "t16"."ps_suppkey" + INNER JOIN ( + SELECT + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "nation" AS "t3" + ) AS "t11" + ON "t18"."s_nationkey" = "t11"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."r_regionkey", + "t4"."r_name", + "t4"."r_comment" + FROM "region" AS "t4" + ) AS "t13" + ON "t11"."n_regionkey" = "t13"."r_regionkey" + ) AS "t27" + WHERE + "t27"."r_name" = 'EUROPE' AND "t26"."p_partkey" = "t27"."ps_partkey" + ) AS "t28" + ) ORDER BY - t1.s_acctbal DESC, - t1.n_name ASC, - t1.s_name ASC, - t1.p_partkey ASC + "t26"."s_acctbal" DESC, + "t26"."n_name" ASC, + "t26"."s_name" ASC, + "t26"."p_partkey" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql index c6d2d28f8db8..ca724d740022 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql @@ -1,39 +1,145 @@ -WITH t0 AS ( +SELECT + "t13"."l_orderkey", + "t13"."revenue", + "t13"."o_orderdate", + "t13"."o_shippriority" +FROM ( SELECT - t4.l_orderkey AS l_orderkey, - t3.o_orderdate AS o_orderdate, - t3.o_shippriority AS o_shippriority, - SUM(t4.l_extendedprice * ( - 1 - t4.l_discount - )) AS revenue - FROM hive.ibis_sf1.customer AS t2 - JOIN hive.ibis_sf1.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN hive.ibis_sf1.lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - WHERE - t2.c_mktsegment = 'BUILDING' - AND t3.o_orderdate < FROM_ISO8601_DATE('1995-03-15') - AND t4.l_shipdate > FROM_ISO8601_DATE('1995-03-15') + "t12"."l_orderkey", + "t12"."o_orderdate", + "t12"."o_shippriority", + SUM("t12"."l_extendedprice" * ( + 1 - "t12"."l_discount" + )) AS "revenue" + FROM ( + SELECT + "t11"."c_custkey", + "t11"."c_name", + "t11"."c_address", + "t11"."c_nationkey", + "t11"."c_phone", + "t11"."c_acctbal", + "t11"."c_mktsegment", + "t11"."c_comment", + "t11"."o_orderkey", + "t11"."o_custkey", + "t11"."o_orderstatus", + "t11"."o_totalprice", + "t11"."o_orderdate", + "t11"."o_orderpriority", + "t11"."o_clerk", + "t11"."o_shippriority", + "t11"."o_comment", + "t11"."l_orderkey", + "t11"."l_partkey", + "t11"."l_suppkey", + "t11"."l_linenumber", + "t11"."l_quantity", + "t11"."l_extendedprice", + "t11"."l_discount", + "t11"."l_tax", + "t11"."l_returnflag", + "t11"."l_linestatus", + "t11"."l_shipdate", + "t11"."l_commitdate", + "t11"."l_receiptdate", + "t11"."l_shipinstruct", + "t11"."l_shipmode", + "t11"."l_comment" + FROM ( + SELECT + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t8"."l_orderkey", + "t8"."l_partkey", + "t8"."l_suppkey", + "t8"."l_linenumber", + "t8"."l_quantity", + "t8"."l_extendedprice", + "t8"."l_discount", + "t8"."l_tax", + "t8"."l_returnflag", + "t8"."l_linestatus", + "t8"."l_shipdate", + "t8"."l_commitdate", + "t8"."l_receiptdate", + "t8"."l_shipinstruct", + "t8"."l_shipmode", + "t8"."l_comment" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t7" + ON "t6"."c_custkey" = "t7"."o_custkey" + INNER JOIN ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t8" + ON "t8"."l_orderkey" = "t7"."o_orderkey" + ) AS "t11" + WHERE + "t11"."c_mktsegment" = 'BUILDING' + AND "t11"."o_orderdate" < FROM_ISO8601_DATE('1995-03-15') + AND "t11"."l_shipdate" > FROM_ISO8601_DATE('1995-03-15') + ) AS "t12" GROUP BY 1, 2, 3 -) -SELECT - t1.l_orderkey, - t1.revenue, - t1.o_orderdate, - t1.o_shippriority -FROM ( - SELECT - t0.l_orderkey AS l_orderkey, - t0.revenue AS revenue, - t0.o_orderdate AS o_orderdate, - t0.o_shippriority AS o_shippriority - FROM t0 -) AS t1 +) AS "t13" ORDER BY - t1.revenue DESC, - t1.o_orderdate ASC + "t13"."revenue" DESC, + "t13"."o_orderdate" ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql index 745fe02cef56..10bf14955d70 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql @@ -1,20 +1,52 @@ SELECT - t0.o_orderpriority, - COUNT(*) AS order_count -FROM hive.ibis_sf1.orders AS t0 -WHERE - ( - EXISTS( + "t5"."o_orderpriority", + "t5"."order_count" +FROM ( + SELECT + "t4"."o_orderpriority", + COUNT(*) AS "order_count" + FROM ( + SELECT + "t2"."o_orderkey", + "t2"."o_custkey", + "t2"."o_orderstatus", + "t2"."o_totalprice", + "t2"."o_orderdate", + "t2"."o_orderpriority", + "t2"."o_clerk", + "t2"."o_shippriority", + "t2"."o_comment" + FROM ( SELECT - 1 AS anon_1 - FROM hive.ibis_sf1.lineitem AS t1 - WHERE - t1.l_orderkey = t0.o_orderkey AND t1.l_commitdate < t1.l_receiptdate - ) - ) - AND t0.o_orderdate >= FROM_ISO8601_DATE('1993-07-01') - AND t0.o_orderdate < FROM_ISO8601_DATE('1993-10-01') -GROUP BY - 1 + "t0"."o_orderkey", + "t0"."o_custkey", + "t0"."o_orderstatus", + CAST("t0"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t0"."o_orderdate", + "t0"."o_orderpriority", + "t0"."o_clerk", + "t0"."o_shippriority", + "t0"."o_comment" + FROM "orders" AS "t0" + ) AS "t2" + WHERE + EXISTS( + SELECT + 1 AS "1" + FROM "lineitem" AS "t1" + WHERE + ( + "t1"."l_orderkey" = "t2"."o_orderkey" + ) + AND ( + "t1"."l_commitdate" < "t1"."l_receiptdate" + ) + ) + AND "t2"."o_orderdate" >= FROM_ISO8601_DATE('1993-07-01') + AND "t2"."o_orderdate" < FROM_ISO8601_DATE('1993-10-01') + ) AS "t4" + GROUP BY + 1 +) AS "t5" ORDER BY - t0.o_orderpriority ASC \ No newline at end of file + "t5"."o_orderpriority" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql index 7f8b31a4a45c..c1bbad1c8935 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql @@ -1,29 +1,195 @@ SELECT - t0.n_name, - t0.revenue + "t25"."n_name", + "t25"."revenue" FROM ( SELECT - t5.n_name AS n_name, - SUM(t3.l_extendedprice * ( - 1 - t3.l_discount - )) AS revenue - FROM hive.ibis_sf1.customer AS t1 - JOIN hive.ibis_sf1.orders AS t2 - ON t1.c_custkey = t2.o_custkey - JOIN hive.ibis_sf1.lineitem AS t3 - ON t3.l_orderkey = t2.o_orderkey - JOIN hive.ibis_sf1.supplier AS t4 - ON t3.l_suppkey = t4.s_suppkey - JOIN hive.ibis_sf1.nation AS t5 - ON t1.c_nationkey = t4.s_nationkey AND t4.s_nationkey = t5.n_nationkey - JOIN hive.ibis_sf1.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t6.r_name = 'ASIA' - AND t2.o_orderdate >= FROM_ISO8601_DATE('1994-01-01') - AND t2.o_orderdate < FROM_ISO8601_DATE('1995-01-01') + "t24"."n_name", + SUM("t24"."l_extendedprice" * ( + 1 - "t24"."l_discount" + )) AS "revenue" + FROM ( + SELECT + "t23"."c_custkey", + "t23"."c_name", + "t23"."c_address", + "t23"."c_nationkey", + "t23"."c_phone", + "t23"."c_acctbal", + "t23"."c_mktsegment", + "t23"."c_comment", + "t23"."o_orderkey", + "t23"."o_custkey", + "t23"."o_orderstatus", + "t23"."o_totalprice", + "t23"."o_orderdate", + "t23"."o_orderpriority", + "t23"."o_clerk", + "t23"."o_shippriority", + "t23"."o_comment", + "t23"."l_orderkey", + "t23"."l_partkey", + "t23"."l_suppkey", + "t23"."l_linenumber", + "t23"."l_quantity", + "t23"."l_extendedprice", + "t23"."l_discount", + "t23"."l_tax", + "t23"."l_returnflag", + "t23"."l_linestatus", + "t23"."l_shipdate", + "t23"."l_commitdate", + "t23"."l_receiptdate", + "t23"."l_shipinstruct", + "t23"."l_shipmode", + "t23"."l_comment", + "t23"."s_suppkey", + "t23"."s_name", + "t23"."s_address", + "t23"."s_nationkey", + "t23"."s_phone", + "t23"."s_acctbal", + "t23"."s_comment", + "t23"."n_nationkey", + "t23"."n_name", + "t23"."n_regionkey", + "t23"."n_comment", + "t23"."r_regionkey", + "t23"."r_name", + "t23"."r_comment" + FROM ( + SELECT + "t14"."c_custkey", + "t14"."c_name", + "t14"."c_address", + "t14"."c_nationkey", + "t14"."c_phone", + "t14"."c_acctbal", + "t14"."c_mktsegment", + "t14"."c_comment", + "t15"."o_orderkey", + "t15"."o_custkey", + "t15"."o_orderstatus", + "t15"."o_totalprice", + "t15"."o_orderdate", + "t15"."o_orderpriority", + "t15"."o_clerk", + "t15"."o_shippriority", + "t15"."o_comment", + "t16"."l_orderkey", + "t16"."l_partkey", + "t16"."l_suppkey", + "t16"."l_linenumber", + "t16"."l_quantity", + "t16"."l_extendedprice", + "t16"."l_discount", + "t16"."l_tax", + "t16"."l_returnflag", + "t16"."l_linestatus", + "t16"."l_shipdate", + "t16"."l_commitdate", + "t16"."l_receiptdate", + "t16"."l_shipinstruct", + "t16"."l_shipmode", + "t16"."l_comment", + "t17"."s_suppkey", + "t17"."s_name", + "t17"."s_address", + "t17"."s_nationkey", + "t17"."s_phone", + "t17"."s_acctbal", + "t17"."s_comment", + "t12"."n_nationkey", + "t12"."n_name", + "t12"."n_regionkey", + "t12"."n_comment", + "t13"."r_regionkey", + "t13"."r_name", + "t13"."r_comment" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t14" + INNER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t15" + ON "t14"."c_custkey" = "t15"."o_custkey" + INNER JOIN ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t16" + ON "t16"."l_orderkey" = "t15"."o_orderkey" + INNER JOIN ( + SELECT + "t3"."s_suppkey", + "t3"."s_name", + "t3"."s_address", + "t3"."s_nationkey", + "t3"."s_phone", + CAST("t3"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t3"."s_comment" + FROM "supplier" AS "t3" + ) AS "t17" + ON "t16"."l_suppkey" = "t17"."s_suppkey" + INNER JOIN ( + SELECT + "t4"."n_nationkey", + "t4"."n_name", + "t4"."n_regionkey", + "t4"."n_comment" + FROM "nation" AS "t4" + ) AS "t12" + ON "t14"."c_nationkey" = "t17"."s_nationkey" + AND "t17"."s_nationkey" = "t12"."n_nationkey" + INNER JOIN ( + SELECT + "t5"."r_regionkey", + "t5"."r_name", + "t5"."r_comment" + FROM "region" AS "t5" + ) AS "t13" + ON "t12"."n_regionkey" = "t13"."r_regionkey" + ) AS "t23" + WHERE + "t23"."r_name" = 'ASIA' + AND "t23"."o_orderdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t23"."o_orderdate" < FROM_ISO8601_DATE('1995-01-01') + ) AS "t24" GROUP BY 1 -) AS t0 +) AS "t25" ORDER BY - t0.revenue DESC \ No newline at end of file + "t25"."revenue" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql index 9e96d3028389..1984dc9737ef 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql @@ -1,8 +1,27 @@ SELECT - SUM(t0.l_extendedprice * t0.l_discount) AS revenue -FROM hive.ibis_sf1.lineitem AS t0 -WHERE - t0.l_shipdate >= FROM_ISO8601_DATE('1994-01-01') - AND t0.l_shipdate < FROM_ISO8601_DATE('1995-01-01') - AND t0.l_discount BETWEEN 0.05 AND 0.07 - AND t0.l_quantity < 24 \ No newline at end of file + SUM("t1"."l_extendedprice" * "t1"."l_discount") AS "revenue" +FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + WHERE + "t0"."l_shipdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t0"."l_shipdate" < FROM_ISO8601_DATE('1995-01-01') + AND CAST("t0"."l_discount" AS DECIMAL(15, 2)) BETWEEN CAST(0.05 AS DOUBLE) AND CAST(0.07 AS DOUBLE) + AND CAST("t0"."l_quantity" AS DECIMAL(15, 2)) < 24 +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql index 01cd03444260..74c153ef4979 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql @@ -1,51 +1,137 @@ -WITH t0 AS ( - SELECT - t6.n_name AS supp_nation, - t7.n_name AS cust_nation, - t3.l_shipdate AS l_shipdate, - t3.l_extendedprice AS l_extendedprice, - t3.l_discount AS l_discount, - CAST(EXTRACT(year FROM t3.l_shipdate) AS SMALLINT) AS l_year, - t3.l_extendedprice * ( - 1 - t3.l_discount - ) AS volume - FROM hive.ibis_sf1.supplier AS t2 - JOIN hive.ibis_sf1.lineitem AS t3 - ON t2.s_suppkey = t3.l_suppkey - JOIN hive.ibis_sf1.orders AS t4 - ON t4.o_orderkey = t3.l_orderkey - JOIN hive.ibis_sf1.customer AS t5 - ON t5.c_custkey = t4.o_custkey - JOIN hive.ibis_sf1.nation AS t6 - ON t2.s_nationkey = t6.n_nationkey - JOIN hive.ibis_sf1.nation AS t7 - ON t5.c_nationkey = t7.n_nationkey -) SELECT - t1.supp_nation, - t1.cust_nation, - t1.l_year, - t1.revenue + "t24"."supp_nation", + "t24"."cust_nation", + "t24"."l_year", + "t24"."revenue" FROM ( SELECT - t0.supp_nation AS supp_nation, - t0.cust_nation AS cust_nation, - t0.l_year AS l_year, - SUM(t0.volume) AS revenue - FROM t0 - WHERE - ( - t0.cust_nation = 'FRANCE' AND t0.supp_nation = 'GERMANY' - OR t0.cust_nation = 'GERMANY' - AND t0.supp_nation = 'FRANCE' - ) - AND t0.l_shipdate BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + "t23"."supp_nation", + "t23"."cust_nation", + "t23"."l_year", + SUM("t23"."volume") AS "revenue" + FROM ( + SELECT + "t22"."supp_nation", + "t22"."cust_nation", + "t22"."l_shipdate", + "t22"."l_extendedprice", + "t22"."l_discount", + "t22"."l_year", + "t22"."volume" + FROM ( + SELECT + "t10"."n_name" AS "supp_nation", + "t16"."n_name" AS "cust_nation", + "t13"."l_shipdate", + "t13"."l_extendedprice", + "t13"."l_discount", + EXTRACT(year FROM "t13"."l_shipdate") AS "l_year", + "t13"."l_extendedprice" * ( + 1 - "t13"."l_discount" + ) AS "volume" + FROM ( + SELECT + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t12" + INNER JOIN ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t13" + ON "t12"."s_suppkey" = "t13"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."o_orderkey", + "t2"."o_custkey", + "t2"."o_orderstatus", + CAST("t2"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t2"."o_orderdate", + "t2"."o_orderpriority", + "t2"."o_clerk", + "t2"."o_shippriority", + "t2"."o_comment" + FROM "orders" AS "t2" + ) AS "t14" + ON "t14"."o_orderkey" = "t13"."l_orderkey" + INNER JOIN ( + SELECT + "t3"."c_custkey", + "t3"."c_name", + "t3"."c_address", + "t3"."c_nationkey", + "t3"."c_phone", + CAST("t3"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t3"."c_mktsegment", + "t3"."c_comment" + FROM "customer" AS "t3" + ) AS "t15" + ON "t15"."c_custkey" = "t14"."o_custkey" + INNER JOIN ( + SELECT + "t4"."n_nationkey", + "t4"."n_name", + "t4"."n_regionkey", + "t4"."n_comment" + FROM "nation" AS "t4" + ) AS "t10" + ON "t12"."s_nationkey" = "t10"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."n_nationkey", + "t4"."n_name", + "t4"."n_regionkey", + "t4"."n_comment" + FROM "nation" AS "t4" + ) AS "t16" + ON "t15"."c_nationkey" = "t16"."n_nationkey" + ) AS "t22" + WHERE + ( + ( + ( + "t22"."cust_nation" = 'FRANCE' + ) AND ( + "t22"."supp_nation" = 'GERMANY' + ) + ) + OR ( + ( + "t22"."cust_nation" = 'GERMANY' + ) AND ( + "t22"."supp_nation" = 'FRANCE' + ) + ) + ) + AND "t22"."l_shipdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + ) AS "t23" GROUP BY 1, 2, 3 -) AS t1 +) AS "t24" ORDER BY - t1.supp_nation ASC, - t1.cust_nation ASC, - t1.l_year ASC \ No newline at end of file + "t24"."supp_nation" ASC, + "t24"."cust_nation" ASC, + "t24"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql index 7b8fa445e977..80eaa5b0522c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql @@ -1,64 +1,136 @@ -WITH t0 AS ( - SELECT - CAST(EXTRACT(year FROM t7.o_orderdate) AS SMALLINT) AS o_year, - t5.l_extendedprice * ( - 1 - t5.l_discount - ) AS volume, - t11.n_name AS nation, - t10.r_name AS r_name, - t7.o_orderdate AS o_orderdate, - t4.p_type AS p_type - FROM hive.ibis_sf1.part AS t4 - JOIN hive.ibis_sf1.lineitem AS t5 - ON t4.p_partkey = t5.l_partkey - JOIN hive.ibis_sf1.supplier AS t6 - ON t6.s_suppkey = t5.l_suppkey - JOIN hive.ibis_sf1.orders AS t7 - ON t5.l_orderkey = t7.o_orderkey - JOIN hive.ibis_sf1.customer AS t8 - ON t7.o_custkey = t8.c_custkey - JOIN hive.ibis_sf1.nation AS t9 - ON t8.c_nationkey = t9.n_nationkey - JOIN hive.ibis_sf1.region AS t10 - ON t9.n_regionkey = t10.r_regionkey - JOIN hive.ibis_sf1.nation AS t11 - ON t6.s_nationkey = t11.n_nationkey -), t1 AS ( - SELECT - t0.o_year AS o_year, - t0.volume AS volume, - t0.nation AS nation, - t0.r_name AS r_name, - t0.o_orderdate AS o_orderdate, - t0.p_type AS p_type - FROM t0 - WHERE - t0.r_name = 'AMERICA' - AND t0.o_orderdate BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') - AND t0.p_type = 'ECONOMY ANODIZED STEEL' -), t2 AS ( - SELECT - t1.o_year AS o_year, - t1.volume AS volume, - t1.nation AS nation, - t1.r_name AS r_name, - t1.o_orderdate AS o_orderdate, - t1.p_type AS p_type, - CASE WHEN ( - t1.nation = 'BRAZIL' - ) THEN t1.volume ELSE 0 END AS nation_volume - FROM t1 -) SELECT - t3.o_year, - t3.mkt_share + "t32"."o_year", + "t32"."mkt_share" FROM ( SELECT - t2.o_year AS o_year, - SUM(t2.nation_volume) / SUM(t2.volume) AS mkt_share - FROM t2 + "t31"."o_year", + CAST(SUM("t31"."nation_volume") AS DOUBLE) / SUM("t31"."volume") AS "mkt_share" + FROM ( + SELECT + "t30"."o_year", + "t30"."volume", + "t30"."nation", + "t30"."r_name", + "t30"."o_orderdate", + "t30"."p_type", + CASE WHEN "t30"."nation" = 'BRAZIL' THEN "t30"."volume" ELSE 0 END AS "nation_volume" + FROM ( + SELECT + EXTRACT(year FROM "t20"."o_orderdate") AS "o_year", + "t18"."l_extendedprice" * ( + 1 - "t18"."l_discount" + ) AS "volume", + "t22"."n_name" AS "nation", + "t16"."r_name", + "t20"."o_orderdate", + "t17"."p_type" + FROM ( + SELECT + "t0"."p_partkey", + "t0"."p_name", + "t0"."p_mfgr", + "t0"."p_brand", + "t0"."p_type", + "t0"."p_size", + "t0"."p_container", + CAST("t0"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t0"."p_comment" + FROM "part" AS "t0" + ) AS "t17" + INNER JOIN ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t18" + ON "t17"."p_partkey" = "t18"."l_partkey" + INNER JOIN ( + SELECT + "t2"."s_suppkey", + "t2"."s_name", + "t2"."s_address", + "t2"."s_nationkey", + "t2"."s_phone", + CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t2"."s_comment" + FROM "supplier" AS "t2" + ) AS "t19" + ON "t19"."s_suppkey" = "t18"."l_suppkey" + INNER JOIN ( + SELECT + "t3"."o_orderkey", + "t3"."o_custkey", + "t3"."o_orderstatus", + CAST("t3"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t3"."o_orderdate", + "t3"."o_orderpriority", + "t3"."o_clerk", + "t3"."o_shippriority", + "t3"."o_comment" + FROM "orders" AS "t3" + ) AS "t20" + ON "t18"."l_orderkey" = "t20"."o_orderkey" + INNER JOIN ( + SELECT + "t4"."c_custkey", + "t4"."c_name", + "t4"."c_address", + "t4"."c_nationkey", + "t4"."c_phone", + CAST("t4"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t4"."c_mktsegment", + "t4"."c_comment" + FROM "customer" AS "t4" + ) AS "t21" + ON "t20"."o_custkey" = "t21"."c_custkey" + INNER JOIN ( + SELECT + "t5"."n_nationkey", + "t5"."n_name", + "t5"."n_regionkey", + "t5"."n_comment" + FROM "nation" AS "t5" + ) AS "t14" + ON "t21"."c_nationkey" = "t14"."n_nationkey" + INNER JOIN ( + SELECT + "t6"."r_regionkey", + "t6"."r_name", + "t6"."r_comment" + FROM "region" AS "t6" + ) AS "t16" + ON "t14"."n_regionkey" = "t16"."r_regionkey" + INNER JOIN ( + SELECT + "t5"."n_nationkey", + "t5"."n_name", + "t5"."n_regionkey", + "t5"."n_comment" + FROM "nation" AS "t5" + ) AS "t22" + ON "t19"."s_nationkey" = "t22"."n_nationkey" + ) AS "t30" + WHERE + "t30"."r_name" = 'AMERICA' + AND "t30"."o_orderdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + AND "t30"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t31" GROUP BY 1 -) AS t3 +) AS "t32" ORDER BY - t3.o_year ASC \ No newline at end of file + "t32"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql index 2656c917ba20..3e652f95bc61 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql @@ -1,39 +1,117 @@ -WITH t0 AS ( - SELECT - t2.l_extendedprice * ( - 1 - t2.l_discount - ) - t4.ps_supplycost * t2.l_quantity AS amount, - CAST(EXTRACT(year FROM t6.o_orderdate) AS SMALLINT) AS o_year, - t7.n_name AS nation, - t5.p_name AS p_name - FROM hive.ibis_sf1.lineitem AS t2 - JOIN hive.ibis_sf1.supplier AS t3 - ON t3.s_suppkey = t2.l_suppkey - JOIN hive.ibis_sf1.partsupp AS t4 - ON t4.ps_suppkey = t2.l_suppkey AND t4.ps_partkey = t2.l_partkey - JOIN hive.ibis_sf1.part AS t5 - ON t5.p_partkey = t2.l_partkey - JOIN hive.ibis_sf1.orders AS t6 - ON t6.o_orderkey = t2.l_orderkey - JOIN hive.ibis_sf1.nation AS t7 - ON t3.s_nationkey = t7.n_nationkey - WHERE - t5.p_name LIKE '%green%' -) SELECT - t1.nation, - t1.o_year, - t1.sum_profit + "t25"."nation", + "t25"."o_year", + "t25"."sum_profit" FROM ( SELECT - t0.nation AS nation, - t0.o_year AS o_year, - SUM(t0.amount) AS sum_profit - FROM t0 + "t24"."nation", + "t24"."o_year", + SUM("t24"."amount") AS "sum_profit" + FROM ( + SELECT + "t23"."amount", + "t23"."o_year", + "t23"."nation", + "t23"."p_name" + FROM ( + SELECT + ( + "t13"."l_extendedprice" * ( + 1 - "t13"."l_discount" + ) + ) - ( + "t15"."ps_supplycost" * "t13"."l_quantity" + ) AS "amount", + EXTRACT(year FROM "t17"."o_orderdate") AS "o_year", + "t12"."n_name" AS "nation", + "t16"."p_name" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + ) AS "t13" + INNER JOIN ( + SELECT + "t1"."s_suppkey", + "t1"."s_name", + "t1"."s_address", + "t1"."s_nationkey", + "t1"."s_phone", + CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t1"."s_comment" + FROM "supplier" AS "t1" + ) AS "t14" + ON "t14"."s_suppkey" = "t13"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."ps_partkey", + "t2"."ps_suppkey", + "t2"."ps_availqty", + CAST("t2"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t2"."ps_comment" + FROM "partsupp" AS "t2" + ) AS "t15" + ON "t15"."ps_suppkey" = "t13"."l_suppkey" AND "t15"."ps_partkey" = "t13"."l_partkey" + INNER JOIN ( + SELECT + "t3"."p_partkey", + "t3"."p_name", + "t3"."p_mfgr", + "t3"."p_brand", + "t3"."p_type", + "t3"."p_size", + "t3"."p_container", + CAST("t3"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t3"."p_comment" + FROM "part" AS "t3" + ) AS "t16" + ON "t16"."p_partkey" = "t13"."l_partkey" + INNER JOIN ( + SELECT + "t4"."o_orderkey", + "t4"."o_custkey", + "t4"."o_orderstatus", + CAST("t4"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t4"."o_orderdate", + "t4"."o_orderpriority", + "t4"."o_clerk", + "t4"."o_shippriority", + "t4"."o_comment" + FROM "orders" AS "t4" + ) AS "t17" + ON "t17"."o_orderkey" = "t13"."l_orderkey" + INNER JOIN ( + SELECT + "t5"."n_nationkey", + "t5"."n_name", + "t5"."n_regionkey", + "t5"."n_comment" + FROM "nation" AS "t5" + ) AS "t12" + ON "t14"."s_nationkey" = "t12"."n_nationkey" + ) AS "t23" + WHERE + "t23"."p_name" LIKE '%green%' + ) AS "t24" GROUP BY 1, 2 -) AS t1 +) AS "t25" ORDER BY - t1.nation ASC, - t1.o_year DESC \ No newline at end of file + "t25"."nation" ASC, + "t25"."o_year" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql index a754dbf57432..d30ac72d2fd8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql @@ -1,26 +1,164 @@ -WITH t0 AS ( +SELECT + "t17"."c_custkey", + "t17"."c_name", + "t17"."revenue", + "t17"."c_acctbal", + "t17"."n_name", + "t17"."c_address", + "t17"."c_phone", + "t17"."c_comment" +FROM ( SELECT - t2.c_custkey AS c_custkey, - t2.c_name AS c_name, - t2.c_acctbal AS c_acctbal, - t5.n_name AS n_name, - t2.c_address AS c_address, - t2.c_phone AS c_phone, - t2.c_comment AS c_comment, - SUM(t4.l_extendedprice * ( - 1 - t4.l_discount - )) AS revenue - FROM hive.ibis_sf1.customer AS t2 - JOIN hive.ibis_sf1.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN hive.ibis_sf1.lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - JOIN hive.ibis_sf1.nation AS t5 - ON t2.c_nationkey = t5.n_nationkey - WHERE - t3.o_orderdate >= FROM_ISO8601_DATE('1993-10-01') - AND t3.o_orderdate < FROM_ISO8601_DATE('1994-01-01') - AND t4.l_returnflag = 'R' + "t16"."c_custkey", + "t16"."c_name", + "t16"."c_acctbal", + "t16"."n_name", + "t16"."c_address", + "t16"."c_phone", + "t16"."c_comment", + SUM("t16"."l_extendedprice" * ( + 1 - "t16"."l_discount" + )) AS "revenue" + FROM ( + SELECT + "t15"."c_custkey", + "t15"."c_name", + "t15"."c_address", + "t15"."c_nationkey", + "t15"."c_phone", + "t15"."c_acctbal", + "t15"."c_mktsegment", + "t15"."c_comment", + "t15"."o_orderkey", + "t15"."o_custkey", + "t15"."o_orderstatus", + "t15"."o_totalprice", + "t15"."o_orderdate", + "t15"."o_orderpriority", + "t15"."o_clerk", + "t15"."o_shippriority", + "t15"."o_comment", + "t15"."l_orderkey", + "t15"."l_partkey", + "t15"."l_suppkey", + "t15"."l_linenumber", + "t15"."l_quantity", + "t15"."l_extendedprice", + "t15"."l_discount", + "t15"."l_tax", + "t15"."l_returnflag", + "t15"."l_linestatus", + "t15"."l_shipdate", + "t15"."l_commitdate", + "t15"."l_receiptdate", + "t15"."l_shipinstruct", + "t15"."l_shipmode", + "t15"."l_comment", + "t15"."n_nationkey", + "t15"."n_name", + "t15"."n_regionkey", + "t15"."n_comment" + FROM ( + SELECT + "t9"."c_custkey", + "t9"."c_name", + "t9"."c_address", + "t9"."c_nationkey", + "t9"."c_phone", + "t9"."c_acctbal", + "t9"."c_mktsegment", + "t9"."c_comment", + "t10"."o_orderkey", + "t10"."o_custkey", + "t10"."o_orderstatus", + "t10"."o_totalprice", + "t10"."o_orderdate", + "t10"."o_orderpriority", + "t10"."o_clerk", + "t10"."o_shippriority", + "t10"."o_comment", + "t11"."l_orderkey", + "t11"."l_partkey", + "t11"."l_suppkey", + "t11"."l_linenumber", + "t11"."l_quantity", + "t11"."l_extendedprice", + "t11"."l_discount", + "t11"."l_tax", + "t11"."l_returnflag", + "t11"."l_linestatus", + "t11"."l_shipdate", + "t11"."l_commitdate", + "t11"."l_receiptdate", + "t11"."l_shipinstruct", + "t11"."l_shipmode", + "t11"."l_comment", + "t8"."n_nationkey", + "t8"."n_name", + "t8"."n_regionkey", + "t8"."n_comment" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t9" + INNER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t10" + ON "t9"."c_custkey" = "t10"."o_custkey" + INNER JOIN ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t11" + ON "t11"."l_orderkey" = "t10"."o_orderkey" + INNER JOIN ( + SELECT + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "nation" AS "t3" + ) AS "t8" + ON "t9"."c_nationkey" = "t8"."n_nationkey" + ) AS "t15" + WHERE + "t15"."o_orderdate" >= FROM_ISO8601_DATE('1993-10-01') + AND "t15"."o_orderdate" < FROM_ISO8601_DATE('1994-01-01') + AND "t15"."l_returnflag" = 'R' + ) AS "t16" GROUP BY 1, 2, @@ -29,28 +167,7 @@ WITH t0 AS ( 5, 6, 7 -) -SELECT - t1.c_custkey, - t1.c_name, - t1.revenue, - t1.c_acctbal, - t1.n_name, - t1.c_address, - t1.c_phone, - t1.c_comment -FROM ( - SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.revenue AS revenue, - t0.c_acctbal AS c_acctbal, - t0.n_name AS n_name, - t0.c_address AS c_address, - t0.c_phone AS c_phone, - t0.c_comment AS c_comment - FROM t0 -) AS t1 +) AS "t17" ORDER BY - t1.revenue DESC + "t17"."revenue" DESC LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql index eee1dda0fb53..4d85a7eb8a1b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql @@ -1,41 +1,159 @@ -WITH t0 AS ( - SELECT - t2.ps_partkey AS ps_partkey, - SUM(t2.ps_supplycost * t2.ps_availqty) AS value - FROM hive.ibis_sf1.partsupp AS t2 - JOIN hive.ibis_sf1.supplier AS t3 - ON t2.ps_suppkey = t3.s_suppkey - JOIN hive.ibis_sf1.nation AS t4 - ON t4.n_nationkey = t3.s_nationkey - WHERE - t4.n_name = 'GERMANY' - GROUP BY - 1 -) SELECT - t1.ps_partkey, - t1.value + "t13"."ps_partkey", + "t13"."value" FROM ( SELECT - t0.ps_partkey AS ps_partkey, - t0.value AS value - FROM t0 - WHERE - t0.value > ( + "t12"."ps_partkey", + SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "value" + FROM ( + SELECT + "t11"."ps_partkey", + "t11"."ps_suppkey", + "t11"."ps_availqty", + "t11"."ps_supplycost", + "t11"."ps_comment", + "t11"."s_suppkey", + "t11"."s_name", + "t11"."s_address", + "t11"."s_nationkey", + "t11"."s_phone", + "t11"."s_acctbal", + "t11"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment" + FROM ( + SELECT + "t7"."ps_partkey", + "t7"."ps_suppkey", + "t7"."ps_availqty", + "t7"."ps_supplycost", + "t7"."ps_comment", + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_nationkey", + "t8"."s_phone", + "t8"."s_acctbal", + "t8"."s_comment", + "t6"."n_nationkey", + "t6"."n_name", + "t6"."n_regionkey", + "t6"."n_comment" + FROM ( + SELECT + "t0"."ps_partkey", + "t0"."ps_suppkey", + "t0"."ps_availqty", + CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t0"."ps_comment" + FROM "partsupp" AS "t0" + ) AS "t7" + INNER JOIN ( + SELECT + "t1"."s_suppkey", + "t1"."s_name", + "t1"."s_address", + "t1"."s_nationkey", + "t1"."s_phone", + CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t1"."s_comment" + FROM "supplier" AS "t1" + ) AS "t8" + ON "t7"."ps_suppkey" = "t8"."s_suppkey" + INNER JOIN ( + SELECT + "t2"."n_nationkey", + "t2"."n_name", + "t2"."n_regionkey", + "t2"."n_comment" + FROM "nation" AS "t2" + ) AS "t6" + ON "t6"."n_nationkey" = "t8"."s_nationkey" + ) AS "t11" + WHERE + "t11"."n_name" = 'GERMANY' + ) AS "t12" + GROUP BY + 1 +) AS "t13" +WHERE + "t13"."value" > ( + ( SELECT - anon_1.total + SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - SUM(t2.ps_supplycost * t2.ps_availqty) AS total - FROM hive.ibis_sf1.partsupp AS t2 - JOIN hive.ibis_sf1.supplier AS t3 - ON t2.ps_suppkey = t3.s_suppkey - JOIN hive.ibis_sf1.nation AS t4 - ON t4.n_nationkey = t3.s_nationkey + "t11"."ps_partkey", + "t11"."ps_suppkey", + "t11"."ps_availqty", + "t11"."ps_supplycost", + "t11"."ps_comment", + "t11"."s_suppkey", + "t11"."s_name", + "t11"."s_address", + "t11"."s_nationkey", + "t11"."s_phone", + "t11"."s_acctbal", + "t11"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment" + FROM ( + SELECT + "t7"."ps_partkey", + "t7"."ps_suppkey", + "t7"."ps_availqty", + "t7"."ps_supplycost", + "t7"."ps_comment", + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_nationkey", + "t8"."s_phone", + "t8"."s_acctbal", + "t8"."s_comment", + "t6"."n_nationkey", + "t6"."n_name", + "t6"."n_regionkey", + "t6"."n_comment" + FROM ( + SELECT + "t0"."ps_partkey", + "t0"."ps_suppkey", + "t0"."ps_availqty", + CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t0"."ps_comment" + FROM "partsupp" AS "t0" + ) AS "t7" + INNER JOIN ( + SELECT + "t1"."s_suppkey", + "t1"."s_name", + "t1"."s_address", + "t1"."s_nationkey", + "t1"."s_phone", + CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t1"."s_comment" + FROM "supplier" AS "t1" + ) AS "t8" + ON "t7"."ps_suppkey" = "t8"."s_suppkey" + INNER JOIN ( + SELECT + "t2"."n_nationkey", + "t2"."n_name", + "t2"."n_regionkey", + "t2"."n_comment" + FROM "nation" AS "t2" + ) AS "t6" + ON "t6"."n_nationkey" = "t8"."s_nationkey" + ) AS "t11" WHERE - t4.n_name = 'GERMANY' - ) AS anon_1 - ) * 0.0001 -) AS t1 + "t11"."n_name" = 'GERMANY' + ) AS "t12" + ) * CAST(0.0001 AS DOUBLE) + ) ORDER BY - t1.value DESC \ No newline at end of file + "t13"."value" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql index f504a7a81a8f..444f5d44b978 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql @@ -1,23 +1,114 @@ SELECT - t0.l_shipmode, - t0.high_line_count, - t0.low_line_count + "t9"."l_shipmode", + "t9"."high_line_count", + "t9"."low_line_count" FROM ( SELECT - t2.l_shipmode AS l_shipmode, - SUM(CASE t1.o_orderpriority WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END) AS high_line_count, - SUM(CASE t1.o_orderpriority WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END) AS low_line_count - FROM hive.ibis_sf1.orders AS t1 - JOIN hive.ibis_sf1.lineitem AS t2 - ON t1.o_orderkey = t2.l_orderkey - WHERE - t2.l_shipmode IN ('MAIL', 'SHIP') - AND t2.l_commitdate < t2.l_receiptdate - AND t2.l_shipdate < t2.l_commitdate - AND t2.l_receiptdate >= FROM_ISO8601_DATE('1994-01-01') - AND t2.l_receiptdate < FROM_ISO8601_DATE('1995-01-01') + "t8"."l_shipmode", + SUM( + CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END + ) AS "high_line_count", + SUM( + CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END + ) AS "low_line_count" + FROM ( + SELECT + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment" + FROM ( + SELECT + "t4"."o_orderkey", + "t4"."o_custkey", + "t4"."o_orderstatus", + "t4"."o_totalprice", + "t4"."o_orderdate", + "t4"."o_orderpriority", + "t4"."o_clerk", + "t4"."o_shippriority", + "t4"."o_comment", + "t5"."l_orderkey", + "t5"."l_partkey", + "t5"."l_suppkey", + "t5"."l_linenumber", + "t5"."l_quantity", + "t5"."l_extendedprice", + "t5"."l_discount", + "t5"."l_tax", + "t5"."l_returnflag", + "t5"."l_linestatus", + "t5"."l_shipdate", + "t5"."l_commitdate", + "t5"."l_receiptdate", + "t5"."l_shipinstruct", + "t5"."l_shipmode", + "t5"."l_comment" + FROM ( + SELECT + "t0"."o_orderkey", + "t0"."o_custkey", + "t0"."o_orderstatus", + CAST("t0"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t0"."o_orderdate", + "t0"."o_orderpriority", + "t0"."o_clerk", + "t0"."o_shippriority", + "t0"."o_comment" + FROM "orders" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t5" + ON "t4"."o_orderkey" = "t5"."l_orderkey" + ) AS "t7" + WHERE + "t7"."l_shipmode" IN ('MAIL', 'SHIP') + AND "t7"."l_commitdate" < "t7"."l_receiptdate" + AND "t7"."l_shipdate" < "t7"."l_commitdate" + AND "t7"."l_receiptdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t7"."l_receiptdate" < FROM_ISO8601_DATE('1995-01-01') + ) AS "t8" GROUP BY 1 -) AS t0 +) AS "t9" ORDER BY - t0.l_shipmode ASC \ No newline at end of file + "t9"."l_shipmode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql index 5e9327de74bb..dff39fad956b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql @@ -1,24 +1,69 @@ -WITH t0 AS ( - SELECT - t2.c_custkey AS c_custkey, - COUNT(t3.o_orderkey) AS c_count - FROM hive.ibis_sf1.customer AS t2 - LEFT OUTER JOIN hive.ibis_sf1.orders AS t3 - ON t2.c_custkey = t3.o_custkey AND NOT t3.o_comment LIKE '%special%requests%' - GROUP BY - 1 -) SELECT - t1.c_count, - t1.custdist + "t9"."c_count", + "t9"."custdist" FROM ( SELECT - t0.c_count AS c_count, - COUNT(*) AS custdist - FROM t0 + "t8"."c_count", + COUNT(*) AS "custdist" + FROM ( + SELECT + "t7"."c_custkey", + COUNT("t7"."o_orderkey") AS "c_count" + FROM ( + SELECT + "t4"."c_custkey", + "t4"."c_name", + "t4"."c_address", + "t4"."c_nationkey", + "t4"."c_phone", + "t4"."c_acctbal", + "t4"."c_mktsegment", + "t4"."c_comment", + "t5"."o_orderkey", + "t5"."o_custkey", + "t5"."o_orderstatus", + "t5"."o_totalprice", + "t5"."o_orderdate", + "t5"."o_orderpriority", + "t5"."o_clerk", + "t5"."o_shippriority", + "t5"."o_comment" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t4" + LEFT OUTER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t5" + ON "t4"."c_custkey" = "t5"."o_custkey" + AND NOT ( + "t5"."o_comment" LIKE '%special%requests%' + ) + ) AS "t7" + GROUP BY + 1 + ) AS "t8" GROUP BY 1 -) AS t1 +) AS "t9" ORDER BY - t1.custdist DESC, - t1.c_count DESC \ No newline at end of file + "t9"."custdist" DESC, + "t9"."c_count" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql index aa6ce1815a1f..be7f2a998089 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql @@ -1,14 +1,103 @@ SELECT - ( - SUM(IF(t1.p_type LIKE 'PROMO%', t0.l_extendedprice * ( - 1 - t0.l_discount - ), 0)) * 100 - ) / SUM(t0.l_extendedprice * ( - 1 - t0.l_discount - )) AS promo_revenue -FROM hive.ibis_sf1.lineitem AS t0 -JOIN hive.ibis_sf1.part AS t1 - ON t0.l_partkey = t1.p_partkey -WHERE - t0.l_shipdate >= FROM_ISO8601_DATE('1995-09-01') - AND t0.l_shipdate < FROM_ISO8601_DATE('1995-10-01') \ No newline at end of file + CAST(( + SUM( + IF("t8"."p_type" LIKE 'PROMO%', "t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" + ), 0) + ) * 100 + ) AS DOUBLE) / SUM("t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" + )) AS "promo_revenue" +FROM ( + SELECT + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" + FROM ( + SELECT + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."p_partkey", + "t1"."p_name", + "t1"."p_mfgr", + "t1"."p_brand", + "t1"."p_type", + "t1"."p_size", + "t1"."p_container", + CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t1"."p_comment" + FROM "part" AS "t1" + ) AS "t5" + ON "t4"."l_partkey" = "t5"."p_partkey" + ) AS "t7" + WHERE + "t7"."l_shipdate" >= FROM_ISO8601_DATE('1995-09-01') + AND "t7"."l_shipdate" < FROM_ISO8601_DATE('1995-10-01') +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql index 1f1b58bc18e6..423ef0b3245b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql @@ -1,54 +1,125 @@ -WITH t0 AS ( - SELECT - t3.l_suppkey AS l_suppkey, - SUM(t3.l_extendedprice * ( - 1 - t3.l_discount - )) AS total_revenue - FROM hive.ibis_sf1.lineitem AS t3 - WHERE - t3.l_shipdate >= FROM_ISO8601_DATE('1996-01-01') - AND t3.l_shipdate < FROM_ISO8601_DATE('1996-04-01') - GROUP BY - 1 -), t1 AS ( - SELECT - t3.s_suppkey AS s_suppkey, - t3.s_name AS s_name, - t3.s_address AS s_address, - t3.s_nationkey AS s_nationkey, - t3.s_phone AS s_phone, - t3.s_acctbal AS s_acctbal, - t3.s_comment AS s_comment, - t0.l_suppkey AS l_suppkey, - t0.total_revenue AS total_revenue - FROM hive.ibis_sf1.supplier AS t3 - JOIN t0 - ON t3.s_suppkey = t0.l_suppkey - WHERE - t0.total_revenue = ( - SELECT - MAX(t0.total_revenue) AS "Max(total_revenue)" - FROM t0 - ) -) SELECT - t2.s_suppkey, - t2.s_name, - t2.s_address, - t2.s_phone, - t2.total_revenue + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_phone", + "t8"."total_revenue" FROM ( SELECT - t1.s_suppkey AS s_suppkey, - t1.s_name AS s_name, - t1.s_address AS s_address, - t1.s_nationkey AS s_nationkey, - t1.s_phone AS s_phone, - t1.s_acctbal AS s_acctbal, - t1.s_comment AS s_comment, - t1.l_suppkey AS l_suppkey, - t1.total_revenue AS total_revenue - FROM t1 - ORDER BY - t1.s_suppkey ASC -) AS t2 \ No newline at end of file + "t4"."s_suppkey", + "t4"."s_name", + "t4"."s_address", + "t4"."s_nationkey", + "t4"."s_phone", + "t4"."s_acctbal", + "t4"."s_comment", + "t6"."l_suppkey", + "t6"."total_revenue" + FROM ( + SELECT + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t3"."l_suppkey", + SUM("t3"."l_extendedprice" * ( + 1 - "t3"."l_discount" + )) AS "total_revenue" + FROM ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + WHERE + "t1"."l_shipdate" >= FROM_ISO8601_DATE('1996-01-01') + AND "t1"."l_shipdate" < FROM_ISO8601_DATE('1996-04-01') + ) AS "t3" + GROUP BY + 1 + ) AS "t6" + ON "t4"."s_suppkey" = "t6"."l_suppkey" +) AS "t8" +WHERE + "t8"."total_revenue" = ( + SELECT + MAX("t8"."total_revenue") AS "Max(total_revenue)" + FROM ( + SELECT + "t4"."s_suppkey", + "t4"."s_name", + "t4"."s_address", + "t4"."s_nationkey", + "t4"."s_phone", + "t4"."s_acctbal", + "t4"."s_comment", + "t6"."l_suppkey", + "t6"."total_revenue" + FROM ( + SELECT + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t3"."l_suppkey", + SUM("t3"."l_extendedprice" * ( + 1 - "t3"."l_discount" + )) AS "total_revenue" + FROM ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + WHERE + "t1"."l_shipdate" >= FROM_ISO8601_DATE('1996-01-01') + AND "t1"."l_shipdate" < FROM_ISO8601_DATE('1996-04-01') + ) AS "t3" + GROUP BY + 1 + ) AS "t6" + ON "t4"."s_suppkey" = "t6"."l_suppkey" + ) AS "t8" + ) +ORDER BY + "t8"."s_suppkey" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql index 3eab28115bdc..f1681099f881 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql @@ -1,47 +1,93 @@ SELECT - t0.p_brand, - t0.p_type, - t0.p_size, - t0.supplier_cnt + "t11"."p_brand", + "t11"."p_type", + "t11"."p_size", + "t11"."supplier_cnt" FROM ( SELECT - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - COUNT(DISTINCT t1.ps_suppkey) AS supplier_cnt - FROM hive.ibis_sf1.partsupp AS t1 - JOIN hive.ibis_sf1.part AS t2 - ON t2.p_partkey = t1.ps_partkey - WHERE - t2.p_brand <> 'Brand#45' - AND NOT t2.p_type LIKE 'MEDIUM POLISHED%' - AND t2.p_size IN (49, 14, 23, 45, 19, 3, 36, 9) - AND ( - NOT t1.ps_suppkey IN ( + "t10"."p_brand", + "t10"."p_type", + "t10"."p_size", + COUNT(DISTINCT "t10"."ps_suppkey") AS "supplier_cnt" + FROM ( + SELECT + "t9"."ps_partkey", + "t9"."ps_suppkey", + "t9"."ps_availqty", + "t9"."ps_supplycost", + "t9"."ps_comment", + "t9"."p_partkey", + "t9"."p_name", + "t9"."p_mfgr", + "t9"."p_brand", + "t9"."p_type", + "t9"."p_size", + "t9"."p_container", + "t9"."p_retailprice", + "t9"."p_comment" + FROM ( + SELECT + "t6"."ps_partkey", + "t6"."ps_suppkey", + "t6"."ps_availqty", + "t6"."ps_supplycost", + "t6"."ps_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" + FROM ( SELECT - t3.s_suppkey - FROM ( + "t0"."ps_partkey", + "t0"."ps_suppkey", + "t0"."ps_availqty", + CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t0"."ps_comment" + FROM "partsupp" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t2"."p_partkey", + "t2"."p_name", + "t2"."p_mfgr", + "t2"."p_brand", + "t2"."p_type", + "t2"."p_size", + "t2"."p_container", + CAST("t2"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t2"."p_comment" + FROM "part" AS "t2" + ) AS "t7" + ON "t7"."p_partkey" = "t6"."ps_partkey" + ) AS "t9" + WHERE + "t9"."p_brand" <> 'Brand#45' + AND NOT ( + "t9"."p_type" LIKE 'MEDIUM POLISHED%' + ) + AND "t9"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) + AND NOT ( + "t9"."ps_suppkey" IN ( SELECT - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment - FROM hive.ibis_sf1.supplier AS t4 + "t1"."s_suppkey" + FROM "supplier" AS "t1" WHERE - t4.s_comment LIKE '%Customer%Complaints%' - ) AS t3 + "t1"."s_comment" LIKE '%Customer%Complaints%' + ) ) - ) + ) AS "t10" GROUP BY 1, 2, 3 -) AS t0 +) AS "t11" ORDER BY - t0.supplier_cnt DESC, - t0.p_brand ASC, - t0.p_type ASC, - t0.p_size ASC \ No newline at end of file + "t11"."supplier_cnt" DESC, + "t11"."p_brand" ASC, + "t11"."p_type" ASC, + "t11"."p_size" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql index a964aea72736..3c5f39dcc1d8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql @@ -1,15 +1,123 @@ SELECT - SUM(t0.l_extendedprice) / 7.0 AS avg_yearly -FROM hive.ibis_sf1.lineitem AS t0 -JOIN hive.ibis_sf1.part AS t1 - ON t1.p_partkey = t0.l_partkey -WHERE - t1.p_brand = 'Brand#23' - AND t1.p_container = 'MED BOX' - AND t0.l_quantity < ( + SUM("t10"."l_extendedprice") / CAST(7.0 AS DOUBLE) AS "avg_yearly" +FROM ( + SELECT + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" + FROM ( SELECT - AVG(t0.l_quantity) AS "Mean(l_quantity)" - FROM hive.ibis_sf1.lineitem AS t0 - WHERE - t0.l_partkey = t1.p_partkey - ) * 0.2 \ No newline at end of file + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."p_partkey", + "t1"."p_name", + "t1"."p_mfgr", + "t1"."p_brand", + "t1"."p_type", + "t1"."p_size", + "t1"."p_container", + CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t1"."p_comment" + FROM "part" AS "t1" + ) AS "t5" + ON "t5"."p_partkey" = "t4"."l_partkey" + ) AS "t7" + WHERE + "t7"."p_brand" = 'Brand#23' + AND "t7"."p_container" = 'MED BOX' + AND "t7"."l_quantity" < ( + ( + SELECT + AVG("t8"."l_quantity") AS "Mean(l_quantity)" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + WHERE + "t0"."l_partkey" = "t7"."p_partkey" + ) AS "t8" + ) * CAST(0.2 AS DOUBLE) + ) +) AS "t10" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql index be697b0061d3..f84e31100199 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql @@ -1,52 +1,179 @@ -WITH t0 AS ( - SELECT - t2.l_orderkey AS l_orderkey, - SUM(t2.l_quantity) AS qty_sum - FROM hive.ibis_sf1.lineitem AS t2 - GROUP BY - 1 -) SELECT - t1.c_name, - t1.c_custkey, - t1.o_orderkey, - t1.o_orderdate, - t1.o_totalprice, - t1.sum_qty + "t15"."c_name", + "t15"."c_custkey", + "t15"."o_orderkey", + "t15"."o_orderdate", + "t15"."o_totalprice", + "t15"."sum_qty" FROM ( SELECT - t2.c_name AS c_name, - t2.c_custkey AS c_custkey, - t3.o_orderkey AS o_orderkey, - t3.o_orderdate AS o_orderdate, - t3.o_totalprice AS o_totalprice, - SUM(t4.l_quantity) AS sum_qty - FROM hive.ibis_sf1.customer AS t2 - JOIN hive.ibis_sf1.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN hive.ibis_sf1.lineitem AS t4 - ON t3.o_orderkey = t4.l_orderkey - WHERE - t3.o_orderkey IN ( + "t14"."c_name", + "t14"."c_custkey", + "t14"."o_orderkey", + "t14"."o_orderdate", + "t14"."o_totalprice", + SUM("t14"."l_quantity") AS "sum_qty" + FROM ( + SELECT + "t12"."c_custkey", + "t12"."c_name", + "t12"."c_address", + "t12"."c_nationkey", + "t12"."c_phone", + "t12"."c_acctbal", + "t12"."c_mktsegment", + "t12"."c_comment", + "t12"."o_orderkey", + "t12"."o_custkey", + "t12"."o_orderstatus", + "t12"."o_totalprice", + "t12"."o_orderdate", + "t12"."o_orderpriority", + "t12"."o_clerk", + "t12"."o_shippriority", + "t12"."o_comment", + "t12"."l_orderkey", + "t12"."l_partkey", + "t12"."l_suppkey", + "t12"."l_linenumber", + "t12"."l_quantity", + "t12"."l_extendedprice", + "t12"."l_discount", + "t12"."l_tax", + "t12"."l_returnflag", + "t12"."l_linestatus", + "t12"."l_shipdate", + "t12"."l_commitdate", + "t12"."l_receiptdate", + "t12"."l_shipinstruct", + "t12"."l_shipmode", + "t12"."l_comment" + FROM ( SELECT - t5.l_orderkey + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t8"."l_orderkey", + "t8"."l_partkey", + "t8"."l_suppkey", + "t8"."l_linenumber", + "t8"."l_quantity", + "t8"."l_extendedprice", + "t8"."l_discount", + "t8"."l_tax", + "t8"."l_returnflag", + "t8"."l_linestatus", + "t8"."l_shipdate", + "t8"."l_commitdate", + "t8"."l_receiptdate", + "t8"."l_shipinstruct", + "t8"."l_shipmode", + "t8"."l_comment" FROM ( SELECT - t0.l_orderkey AS l_orderkey, - t0.qty_sum AS qty_sum - FROM t0 + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t7" + ON "t6"."c_custkey" = "t7"."o_custkey" + INNER JOIN ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t8" + ON "t7"."o_orderkey" = "t8"."l_orderkey" + ) AS "t12" + WHERE + "t12"."o_orderkey" IN ( + SELECT + "t9"."l_orderkey" + FROM ( + SELECT + "t5"."l_orderkey", + SUM("t5"."l_quantity") AS "qty_sum" + FROM ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t5" + GROUP BY + 1 + ) AS "t9" WHERE - t0.qty_sum > 300 - ) AS t5 - ) + "t9"."qty_sum" > 300 + ) + ) AS "t14" GROUP BY 1, 2, 3, 4, 5 -) AS t1 +) AS "t15" ORDER BY - t1.o_totalprice DESC, - t1.o_orderdate ASC + "t15"."o_totalprice" DESC, + "t15"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql index 5c9774fb10b8..033059993529 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql @@ -1,29 +1,178 @@ SELECT - SUM(t0.l_extendedprice * ( - 1 - t0.l_discount - )) AS revenue -FROM hive.ibis_sf1.lineitem AS t0 -JOIN hive.ibis_sf1.part AS t1 - ON t1.p_partkey = t0.l_partkey -WHERE - t1.p_brand = 'Brand#12' - AND t1.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - AND t0.l_quantity >= 1 - AND t0.l_quantity <= 11 - AND t1.p_size BETWEEN 1 AND 5 - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' - OR t1.p_brand = 'Brand#23' - AND t1.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - AND t0.l_quantity >= 10 - AND t0.l_quantity <= 20 - AND t1.p_size BETWEEN 1 AND 10 - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' - OR t1.p_brand = 'Brand#34' - AND t1.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - AND t0.l_quantity >= 20 - AND t0.l_quantity <= 30 - AND t1.p_size BETWEEN 1 AND 15 - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' \ No newline at end of file + SUM("t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" + )) AS "revenue" +FROM ( + SELECT + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" + FROM ( + SELECT + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."p_partkey", + "t1"."p_name", + "t1"."p_mfgr", + "t1"."p_brand", + "t1"."p_type", + "t1"."p_size", + "t1"."p_container", + CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t1"."p_comment" + FROM "part" AS "t1" + ) AS "t5" + ON "t5"."p_partkey" = "t4"."l_partkey" + ) AS "t7" + WHERE + ( + ( + ( + ( + ( + ( + ( + ( + "t7"."p_brand" = 'Brand#12' + ) + AND "t7"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + ) + AND ( + "t7"."l_quantity" >= 1 + ) + ) + AND ( + "t7"."l_quantity" <= 11 + ) + ) + AND "t7"."p_size" BETWEEN 1 AND 5 + ) + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + "t7"."p_brand" = 'Brand#23' + ) + AND "t7"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + ) + AND ( + "t7"."l_quantity" >= 10 + ) + ) + AND ( + "t7"."l_quantity" <= 20 + ) + ) + AND "t7"."p_size" BETWEEN 1 AND 10 + ) + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + "t7"."p_brand" = 'Brand#34' + ) + AND "t7"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + ) + AND ( + "t7"."l_quantity" >= 20 + ) + ) + AND ( + "t7"."l_quantity" <= 30 + ) + ) + AND "t7"."p_size" BETWEEN 1 AND 15 + ) + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql index ae1f1a8c519e..22520f016c64 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql @@ -1,73 +1,93 @@ -WITH t0 AS ( +SELECT + "t13"."s_name", + "t13"."s_address" +FROM ( SELECT - t2.s_suppkey AS s_suppkey, - t2.s_name AS s_name, - t2.s_address AS s_address, - t2.s_nationkey AS s_nationkey, - t2.s_phone AS s_phone, - t2.s_acctbal AS s_acctbal, - t2.s_comment AS s_comment, - t3.n_nationkey AS n_nationkey, - t3.n_name AS n_name, - t3.n_regionkey AS n_regionkey, - t3.n_comment AS n_comment - FROM hive.ibis_sf1.supplier AS t2 - JOIN hive.ibis_sf1.nation AS t3 - ON t2.s_nationkey = t3.n_nationkey - WHERE - t3.n_name = 'CANADA' - AND t2.s_suppkey IN ( + "t10"."s_suppkey", + "t10"."s_name", + "t10"."s_address", + "t10"."s_nationkey", + "t10"."s_phone", + "t10"."s_acctbal", + "t10"."s_comment", + "t8"."n_nationkey", + "t8"."n_name", + "t8"."n_regionkey", + "t8"."n_comment" + FROM ( + SELECT + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t10" + INNER JOIN ( + SELECT + "t2"."n_nationkey", + "t2"."n_name", + "t2"."n_regionkey", + "t2"."n_comment" + FROM "nation" AS "t2" + ) AS "t8" + ON "t10"."s_nationkey" = "t8"."n_nationkey" +) AS "t13" +WHERE + "t13"."n_name" = 'CANADA' + AND "t13"."s_suppkey" IN ( + SELECT + "t7"."ps_suppkey" + FROM ( SELECT - t4.ps_suppkey - FROM ( + "t1"."ps_partkey", + "t1"."ps_suppkey", + "t1"."ps_availqty", + CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t1"."ps_comment" + FROM "partsupp" AS "t1" + ) AS "t7" + WHERE + "t7"."ps_partkey" IN ( SELECT - t5.ps_partkey AS ps_partkey, - t5.ps_suppkey AS ps_suppkey, - t5.ps_availqty AS ps_availqty, - t5.ps_supplycost AS ps_supplycost, - t5.ps_comment AS ps_comment - FROM hive.ibis_sf1.partsupp AS t5 + "t3"."p_partkey" + FROM "part" AS "t3" WHERE - t5.ps_partkey IN ( - SELECT - t6.p_partkey - FROM ( - SELECT - t7.p_partkey AS p_partkey, - t7.p_name AS p_name, - t7.p_mfgr AS p_mfgr, - t7.p_brand AS p_brand, - t7.p_type AS p_type, - t7.p_size AS p_size, - t7.p_container AS p_container, - t7.p_retailprice AS p_retailprice, - t7.p_comment AS p_comment - FROM hive.ibis_sf1.part AS t7 - WHERE - t7.p_name LIKE 'forest%' - ) AS t6 - ) - AND t5.ps_availqty > ( + "t3"."p_name" LIKE 'forest%' + ) + AND "t7"."ps_availqty" > ( + ( + SELECT + SUM("t11"."l_quantity") AS "Sum(l_quantity)" + FROM ( SELECT - SUM(t6.l_quantity) AS "Sum(l_quantity)" - FROM hive.ibis_sf1.lineitem AS t6 + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + CAST("t4"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t4"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t4"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t4"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment" + FROM "lineitem" AS "t4" WHERE - t6.l_partkey = t5.ps_partkey - AND t6.l_suppkey = t5.ps_suppkey - AND t6.l_shipdate >= FROM_ISO8601_DATE('1994-01-01') - AND t6.l_shipdate < FROM_ISO8601_DATE('1995-01-01') - ) * 0.5 - ) AS t4 - ) -) -SELECT - t1.s_name, - t1.s_address -FROM ( - SELECT - t0.s_name AS s_name, - t0.s_address AS s_address - FROM t0 -) AS t1 + "t4"."l_partkey" = "t7"."ps_partkey" + AND "t4"."l_suppkey" = "t7"."ps_suppkey" + AND "t4"."l_shipdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t4"."l_shipdate" < FROM_ISO8601_DATE('1995-01-01') + ) AS "t11" + ) * CAST(0.5 AS DOUBLE) + ) + ) ORDER BY - t1.s_name ASC \ No newline at end of file + "t13"."s_name" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql index 7e8f9a143f61..c7c0686a5465 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql @@ -1,56 +1,162 @@ -WITH t0 AS ( - SELECT - t3.l_orderkey AS l1_orderkey, - t4.o_orderstatus AS o_orderstatus, - t3.l_receiptdate AS l_receiptdate, - t3.l_commitdate AS l_commitdate, - t3.l_suppkey AS l1_suppkey, - t2.s_name AS s_name, - t5.n_name AS n_name - FROM hive.ibis_sf1.supplier AS t2 - JOIN hive.ibis_sf1.lineitem AS t3 - ON t2.s_suppkey = t3.l_suppkey - JOIN hive.ibis_sf1.orders AS t4 - ON t4.o_orderkey = t3.l_orderkey - JOIN hive.ibis_sf1.nation AS t5 - ON t2.s_nationkey = t5.n_nationkey -) SELECT - t1.s_name, - t1.numwait + "t21"."s_name", + "t21"."numwait" FROM ( SELECT - t0.s_name AS s_name, - COUNT(*) AS numwait - FROM t0 - WHERE - t0.o_orderstatus = 'F' - AND t0.l_receiptdate > t0.l_commitdate - AND t0.n_name = 'SAUDI ARABIA' - AND ( - EXISTS( + "t20"."s_name", + COUNT(*) AS "numwait" + FROM ( + SELECT + "t17"."l1_orderkey", + "t17"."o_orderstatus", + "t17"."l_receiptdate", + "t17"."l_commitdate", + "t17"."l1_suppkey", + "t17"."s_name", + "t17"."n_name" + FROM ( + SELECT + "t10"."l_orderkey" AS "l1_orderkey", + "t13"."o_orderstatus", + "t10"."l_receiptdate", + "t10"."l_commitdate", + "t10"."l_suppkey" AS "l1_suppkey", + "t9"."s_name", + "t8"."n_name" + FROM ( SELECT - 1 AS anon_1 - FROM hive.ibis_sf1.lineitem AS t2 - WHERE - t2.l_orderkey = t0.l1_orderkey AND t2.l_suppkey <> t0.l1_suppkey - ) - ) - AND NOT ( - EXISTS( + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t9" + INNER JOIN ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t10" + ON "t9"."s_suppkey" = "t10"."l_suppkey" + INNER JOIN ( SELECT - 1 AS anon_2 - FROM hive.ibis_sf1.lineitem AS t2 + "t2"."o_orderkey", + "t2"."o_custkey", + "t2"."o_orderstatus", + CAST("t2"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t2"."o_orderdate", + "t2"."o_orderpriority", + "t2"."o_clerk", + "t2"."o_shippriority", + "t2"."o_comment" + FROM "orders" AS "t2" + ) AS "t13" + ON "t13"."o_orderkey" = "t10"."l_orderkey" + INNER JOIN ( + SELECT + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "nation" AS "t3" + ) AS "t8" + ON "t9"."s_nationkey" = "t8"."n_nationkey" + ) AS "t17" + WHERE + "t17"."o_orderstatus" = 'F' + AND "t17"."l_receiptdate" > "t17"."l_commitdate" + AND "t17"."n_name" = 'SAUDI ARABIA' + AND EXISTS( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t11" WHERE - t2.l_orderkey = t0.l1_orderkey - AND t2.l_suppkey <> t0.l1_suppkey - AND t2.l_receiptdate > t2.l_commitdate + ( + "t11"."l_orderkey" = "t17"."l1_orderkey" + ) + AND ( + "t11"."l_suppkey" <> "t17"."l1_suppkey" + ) + ) + AND NOT ( + EXISTS( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t12" + WHERE + ( + ( + "t12"."l_orderkey" = "t17"."l1_orderkey" + ) + AND ( + "t12"."l_suppkey" <> "t17"."l1_suppkey" + ) + ) + AND ( + "t12"."l_receiptdate" > "t12"."l_commitdate" + ) + ) ) - ) + ) AS "t20" GROUP BY 1 -) AS t1 +) AS "t21" ORDER BY - t1.numwait DESC, - t1.s_name ASC + "t21"."numwait" DESC, + "t21"."s_name" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql index c76bc9fc5af0..10f4cf53dd9c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql @@ -1,62 +1,61 @@ -WITH t0 AS ( +SELECT + "t7"."cntrycode", + "t7"."numcust", + "t7"."totacctbal" +FROM ( SELECT - CASE - WHEN ( - 0 + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, 0 + 1, 2) - ELSE SUBSTR(t2.c_phone, 0 + 1 + LENGTH(t2.c_phone), 2) - END AS cntrycode, - t2.c_acctbal AS c_acctbal - FROM hive.ibis_sf1.customer AS t2 - WHERE - CASE - WHEN ( - 0 + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, 0 + 1, 2) - ELSE SUBSTR(t2.c_phone, 0 + 1 + LENGTH(t2.c_phone), 2) - END IN ('13', '31', '23', '29', '30', '18', '17') - AND t2.c_acctbal > ( + "t6"."cntrycode", + COUNT(*) AS "numcust", + SUM("t6"."c_acctbal") AS "totacctbal" + FROM ( + SELECT + IF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) AS "cntrycode", + "t2"."c_acctbal" + FROM ( SELECT - anon_1.avg_bal - FROM ( - SELECT - AVG(t2.c_acctbal) AS avg_bal - FROM hive.ibis_sf1.customer AS t2 - WHERE - t2.c_acctbal > 0.0 - AND CASE - WHEN ( - 0 + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, 0 + 1, 2) - ELSE SUBSTR(t2.c_phone, 0 + 1 + LENGTH(t2.c_phone), 2) - END IN ('13', '31', '23', '29', '30', '18', '17') - ) AS anon_1 - ) - AND NOT ( - EXISTS( + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t2" + WHERE + IF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + AND "t2"."c_acctbal" > ( SELECT - 1 AS anon_2 - FROM hive.ibis_sf1.orders AS t3 - WHERE - t3.o_custkey = t2.c_custkey + AVG("t3"."c_acctbal") AS "Mean(c_acctbal)" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + WHERE + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) > CAST(0.0 AS DOUBLE) + AND IF(0 >= 0, SUBSTRING("t0"."c_phone", 0 + 1, 2), SUBSTRING("t0"."c_phone", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + ) AS "t3" ) - ) -) -SELECT - t1.cntrycode, - t1.numcust, - t1.totacctbal -FROM ( - SELECT - t0.cntrycode AS cntrycode, - COUNT(*) AS numcust, - SUM(t0.c_acctbal) AS totacctbal - FROM t0 + AND NOT ( + EXISTS( + SELECT + 1 AS "1" + FROM "orders" AS "t1" + WHERE + "t1"."o_custkey" = "t2"."c_custkey" + ) + ) + ) AS "t6" GROUP BY 1 -) AS t1 +) AS "t7" ORDER BY - t1.cntrycode ASC \ No newline at end of file + "t7"."cntrycode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/test_h15.py b/ibis/backends/tests/tpch/test_h15.py index 0d3fedd9a1b0..5d288e389d2a 100644 --- a/ibis/backends/tests/tpch/test_h15.py +++ b/ibis/backends/tests/tpch/test_h15.py @@ -1,11 +1,19 @@ from __future__ import annotations +import pytest + import ibis from .conftest import add_date, tpch_test @tpch_test +@pytest.mark.notyet( + ["trino"], + reason="unreliable due to floating point differences in repeated evaluations of identical subqueries", + raises=AssertionError, + strict=False, +) def test_tpc_h15(lineitem, supplier): """Top Supplier Query (Q15)""" @@ -26,6 +34,5 @@ def test_tpc_h15(lineitem, supplier): q = supplier.join(qrev, supplier.s_suppkey == qrev.l_suppkey) q = q.filter([q.total_revenue == qrev.total_revenue.max()]) - q = q.order_by([q.s_suppkey]) q = q[q.s_suppkey, q.s_name, q.s_address, q.s_phone, q.total_revenue] - return q + return q.order_by([q.s_suppkey]) diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index 833f6a8ecb9c..7122241c56c1 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -2,60 +2,175 @@ from __future__ import annotations -import collections +import atexit import contextlib -import warnings from functools import cached_property +from operator import itemgetter from typing import TYPE_CHECKING, Any -import pandas as pd -import sqlalchemy as sa import sqlglot as sg -import toolz -from trino.sqlalchemy.datatype import ROW as _ROW -from trino.sqlalchemy.dialect import TrinoDialect +import sqlglot.expressions as sge +import trino import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt +import ibis.expr.schema as sch import ibis.expr.types as ir from ibis import util from ibis.backends.base import CanListDatabases -from ibis.backends.base.sql.alchemy import ( - AlchemyCanCreateSchema, - AlchemyCrossSchemaBackend, -) -from ibis.backends.base.sql.alchemy.datatypes import ArrayType -from ibis.backends.trino.compiler import TrinoSQLCompiler -from ibis.backends.trino.datatypes import INTERVAL, ROW, TrinoType +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import C +from ibis.backends.trino.compiler import TrinoCompiler if TYPE_CHECKING: from collections.abc import Iterator, Mapping + import pandas as pd import pyarrow as pa - import ibis.expr.schema as sch + import ibis.expr.operations as ops -class Backend(AlchemyCrossSchemaBackend, AlchemyCanCreateSchema, CanListDatabases): +class Backend(SQLGlotBackend, CanListDatabases): name = "trino" - compiler = TrinoSQLCompiler + compiler = TrinoCompiler() supports_create_or_replace = False supports_temporary_tables = False + def raw_sql(self, query: str | sg.Expression) -> Any: + """Execute a raw SQL query.""" + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name, pretty=True) + + con = self.con + cur = con.cursor() + try: + cur.execute(query) + except Exception: + if con.transaction is not None: + con.rollback() + if cur._query: + cur.close() + raise + else: + if con.transaction is not None: + con.commit() + return cur + + @contextlib.contextmanager + def begin(self): + con = self.con + cur = con.cursor() + try: + yield cur + except Exception: + if con.transaction is not None: + con.rollback() + raise + else: + if con.transaction is not None: + con.commit() + finally: + if cur._query: + cur.close() + + @contextlib.contextmanager + def _safe_raw_sql( + self, query: str | sge.Expression + ) -> Iterator[trino.dbapi.Cursor]: + """Execute a raw SQL query, yielding the cursor. + + Parameters + ---------- + query + The query to execute. + + Yields + ------ + trino.dbapi.Cursor + The cursor of the executed query. + """ + cur = self.raw_sql(query) + try: + yield cur + finally: + if cur._query: + cur.close() + + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + """Compute the schema of a `table`. + + Parameters + ---------- + table_name + May **not** be fully qualified. Use `database` if you want to + qualify the identifier. + schema + Schema name + database + Database name + + Returns + ------- + sch.Schema + Ibis schema + """ + conditions = [sg.column("table_name").eq(sge.convert(table_name))] + + if schema is not None: + conditions.append(sg.column("table_schema").eq(sge.convert(schema))) + + query = ( + sg.select( + "column_name", + "data_type", + sg.column("is_nullable").eq(sge.convert("YES")).as_("nullable"), + ) + .from_(sg.table("columns", db="information_schema", catalog=database)) + .where(sg.and_(*conditions)) + .order_by("ordinal_position") + ) + + with self._safe_raw_sql(query) as cur: + meta = cur.fetchall() + + if not meta: + fqn = sg.table(table_name, db=schema, catalog=database).sql(self.name) + raise com.IbisError(f"Table not found: {fqn}") + + return sch.Schema( + { + name: self.compiler.type_mapper.from_string(typ, nullable=nullable) + for name, typ, nullable in meta + } + ) + @cached_property def version(self) -> str: - return self._scalar_query(sa.select(sa.func.version())) + with self._safe_raw_sql(sg.select(self.compiler.f.version())) as cur: + [(version,)] = cur.fetchall() + return version @property def current_database(self) -> str: - return self._scalar_query(sa.select(sa.literal_column("current_catalog"))) + with self._safe_raw_sql(sg.select(C.current_catalog)) as cur: + [(database,)] = cur.fetchall() + return database + + @property + def current_schema(self) -> str: + with self._safe_raw_sql(sg.select(C.current_schema)) as cur: + [(schema,)] = cur.fetchall() + return schema def list_databases(self, like: str | None = None) -> list[str]: query = "SHOW CATALOGS" - with self.begin() as con: - catalogs = list(con.exec_driver_sql(query).scalars()) - return self._filter_with_like(catalogs, like=like) + with self._safe_raw_sql(query) as cur: + catalogs = cur.fetchall() + return self._filter_with_like(list(map(itemgetter(0), catalogs)), like=like) def list_schemas( self, like: str | None = None, database: str | None = None @@ -63,15 +178,14 @@ def list_schemas( query = "SHOW SCHEMAS" if database is not None: - query += f" IN {self._quote(database)}" - - with self.begin() as con: - schemata = list(con.exec_driver_sql(query).scalars()) - return self._filter_with_like(schemata, like) + database = sg.to_identifier(database, quoted=self.compiler.quoted).sql( + self.name + ) + query += f" IN {database}" - @property - def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.literal_column("current_schema"))) + with self._safe_raw_sql(query) as cur: + schemata = cur.fetchall() + return self._filter_with_like(list(map(itemgetter(0), schemata)), like) def list_tables( self, @@ -111,10 +225,10 @@ def list_tables( if database is not None: query += f" IN {database}" - with self.begin() as con: - tables = list(con.exec_driver_sql(query).scalars()) + with self._safe_raw_sql(query) as cur: + tables = cur.fetchall() - return self._filter_with_like(tables, like=like) + return self._filter_with_like(list(map(itemgetter(0), tables)), like=like) def do_connect( self, @@ -125,6 +239,7 @@ def do_connect( database: str | None = None, schema: str | None = None, source: str | None = None, + timezone: str = "UTC", **connect_args, ) -> None: """Connect to Trino. @@ -145,6 +260,8 @@ def do_connect( Schema to use on the Trino server source Application name passed to Trino + timezone + Timezone to use for the connection connect_args Additional keyword arguments passed directly to SQLAlchemy's `create_engine` @@ -167,97 +284,83 @@ def do_connect( >>> con = ibis.trino.connect(database=catalog, schema=schema) >>> con = ibis.trino.connect(database=catalog, schema=schema, source="my-app") """ - database = "/".join(filter(None, (database, schema))) - url = sa.engine.URL.create( - drivername="trino", - username=user, - password=password, + self.con = trino.dbapi.connect( + user=user, + auth=password, host=host, port=port, - database=database, - query=dict(source="ibis" if source is None else source), + catalog=database, + schema=schema, + source=source or "ibis", + timezone=timezone, + **connect_args, ) - connect_args.setdefault("timezone", "UTC") - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=r"The dbapi\(\) classmethod on dialect classes has been renamed", - category=sa.exc.SADeprecationWarning, - ) - super().do_connect( - sa.create_engine( - url, connect_args=connect_args, poolclass=sa.pool.StaticPool - ) - ) - - @staticmethod - def _new_sa_metadata(): - meta = sa.MetaData() - - @sa.event.listens_for(meta, "column_reflect") - def column_reflect(inspector, table, column_info): - if isinstance(typ := column_info["type"], _ROW): - column_info["type"] = ROW(typ.attr_types) - elif isinstance(typ, sa.ARRAY): - column_info["type"] = toolz.nth( - typ.dimensions or 1, toolz.iterate(ArrayType, typ.item_type) - ) - elif isinstance(typ, sa.Interval): - column_info["type"] = INTERVAL( - native=typ.native, - day_precision=typ.day_precision, - second_precision=typ.second_precision, - ) - - return meta + self._temp_views = set() @contextlib.contextmanager def _prepare_metadata(self, query: str) -> Iterator[dict[str, str]]: - name = util.gen_name("trino_metadata") - with self.begin() as con: - con.exec_driver_sql(f"PREPARE {name} FROM {query}") + name = util.gen_name(f"{self.name}_metadata") + with self.begin() as cur: + cur.execute(f"PREPARE {name} FROM {query}") try: - yield con.exec_driver_sql(f"DESCRIBE OUTPUT {name}").mappings() + cur.execute(f"DESCRIBE OUTPUT {name}") + yield cur.fetchall() finally: - con.exec_driver_sql(f"DEALLOCATE PREPARE {name}") + cur.execute(f"DEALLOCATE PREPARE {name}") def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: - with self._prepare_metadata(query) as mappings: + with self._prepare_metadata(query) as info: yield from ( # trino types appear to be always nullable - (name, TrinoType.from_string(trino_type).copy(nullable=True)) - for name, trino_type in toolz.pluck(["Column Name", "Type"], mappings) + ( + name, + self.compiler.type_mapper.from_string(trino_type).copy( + nullable=True + ), + ) + for name, _, _, _, trino_type, *_ in info ) def _execute_view_creation(self, name, definition): - from sqlalchemy_views import CreateView - # NB: trino doesn't support temporary views so we use the less # desirable method of cleaning up when the Python process exits using # an atexit hook # # the method that defines the atexit hook is defined in the parent # class - view = CreateView(sa.table(name), definition, or_replace=True) + view = sg.Create( + kind="VIEW", + this=sg.table(name, quoted=self.compiler.quoted), + expression=definition, + replace=True, + ) - with self.begin() as con: - con.execute(view) + with self._safe_raw_sql(view): + pass def create_schema( self, name: str, database: str | None = None, force: bool = False ) -> None: - name = ".".join(map(self._quote, filter(None, [database, name]))) - if_not_exists = "IF NOT EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"CREATE SCHEMA {if_not_exists}{name}") + with self._safe_raw_sql( + sge.Create( + this=sg.table(name, catalog=database, quoted=self.compiler.quoted), + kind="SCHEMA", + exists=force, + ) + ): + pass def drop_schema( self, name: str, database: str | None = None, force: bool = False ) -> None: - name = ".".join(map(self._quote, filter(None, [database, name]))) - if_exists = "IF EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"DROP SCHEMA {if_exists}{name}") + with self._safe_raw_sql( + sge.Drop( + this=sg.table(name, catalog=database, quoted=self.compiler.quoted), + kind="SCHEMA", + exists=force, + ) + ): + pass def create_table( self, @@ -301,49 +404,46 @@ def create_table( if temp: raise NotImplementedError( - "Temporary tables in the Trino backend are not yet supported" + "Temporary tables are not supported in the Trino backend" ) - orig_table_ref = name + quoted = self.compiler.quoted + orig_table_ref = sg.to_identifier(name, quoted=quoted) if overwrite: - name = util.gen_name("trino_overwrite") - - create_stmt = "CREATE TABLE" + name = util.gen_name(f"{self.name}_overwrite") - table_ref = self._quote(name) - - create_stmt += f" {table_ref}" + table_ref = sg.table(name, catalog=database, quoted=quoted) if schema is not None and obj is None: - schema_str = ", ".join( - ( - f"{self._quote(name)} {TrinoType.to_string(typ)}" - + " NOT NULL" * (not typ.nullable) + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + # TODO(cpcloud): not null constraints are unreliable in + # trino, so we ignore them + # https://github.com/trinodb/trino/issues/2923 + constraints=None, ) for name, typ in schema.items() - ) - create_stmt += f" ({schema_str})" - - if comment is not None: - create_stmt += f" COMMENT {comment!r}" - - if properties: - - def literal_compile(v): - if isinstance(v, collections.abc.Mapping): - return f"MAP(ARRAY{list(v.keys())!r}, ARRAY{list(v.values())!r})" - elif util.is_iterable(v): - return f"ARRAY{list(v)!r}" - else: - return repr(v) + ] + target = sge.Schema(this=table_ref, expressions=column_defs) + else: + target = table_ref - pairs = ", ".join( - f"{k} = {literal_compile(v)}" for k, v in properties.items() + property_list = [ + sge.Property( + this=sg.to_identifier(k), + value=self.compiler.translate(ibis.literal(v).op(), params={}), ) - create_stmt += f" WITH ({pairs})" + for k, v in (properties or {}).items() + ] + + if comment: + property_list.append(sge.SchemaCommentProperty(this=sge.convert(comment))) if obj is not None: + import pandas as pd import pyarrow as pa import pyarrow_hotfix # noqa: F401 @@ -354,53 +454,119 @@ def literal_compile(v): self._run_pre_execute_hooks(table) - compiled_table = self.compile(table) - # cast here because trino doesn't allow specifying a schema in # CTAS, e.g., `CREATE TABLE (schema) AS SELECT` - subquery = compiled_table.subquery() - columns = subquery.columns - select = sa.select( + select = sg.select( *( - sa.cast(columns[name], TrinoType.from_ibis(typ)) + self.compiler.cast(sg.column(name, quoted=quoted), typ).as_( + name, quoted=quoted + ) for name, typ in (schema or table.schema()).items() ) - ) - - compiled = select.compile( - dialect=TrinoDialect(), compile_kwargs=dict(literal_binds=True) - ) - - create_stmt += f" AS {compiled}" - - with self.begin() as con: - con.exec_driver_sql(create_stmt) + ).from_(self._to_sqlglot(table).subquery()) + else: + select = None + + create_stmt = sge.Create( + kind="TABLE", + this=target, + expression=select, + properties=( + sge.Properties(expressions=property_list) if property_list else None + ), + ) + with self._safe_raw_sql(create_stmt) as cur: if overwrite: # drop the original table - con.exec_driver_sql( - f"DROP TABLE IF EXISTS {self._quote(orig_table_ref)}" + cur.execute( + sge.Drop(kind="TABLE", this=orig_table_ref, exists=True).sql( + self.name + ) ) # rename the new table to the original table name - con.exec_driver_sql( - f"ALTER TABLE IF EXISTS {table_ref} RENAME TO {self._quote(orig_table_ref)}" + cur.execute( + sge.AlterTable( + this=table_ref, + exists=True, + actions=[sge.RenameTable(this=orig_table_ref, exists=True)], + ).sql(self.name) ) - return self.table(orig_table_ref) + return self.table(orig_table_ref.name) - def _table_from_schema( - self, - name: str, - schema: sch.Schema, - temp: bool = False, - database: str | None = None, - **kwargs: Any, - ) -> sa.Table: - return super()._table_from_schema( - name, - schema, - temp=temp, - trino_catalog=database or self.current_database, - **kwargs, + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=True, ) + + def _register_temp_view_cleanup(self, name: str) -> None: + def drop(self, name: str, query: str): + self.raw_sql(query) + self._temp_views.discard(name) + + query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) + atexit.register(drop, self, name=name, query=query) + + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + import pandas as pd + + from ibis.backends.trino.converter import TrinoPandasData + + try: + df = pd.DataFrame.from_records( + cursor.fetchall(), columns=schema.names, coerce_float=True + ) + except Exception: + # clean up the cursor if we fail to create the DataFrame + # + # in the sqlite case failing to close the cursor results in + # artificially locked tables + cursor.close() + raise + df = TrinoPandasData.convert_table(df, schema) + return df + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise com.IbisTypeError( + "Trino cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" + ) + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + # we don't support `NOT NULL` constraints in trino because + # because each trino connector differs in whether it + # supports nullability constraints, and whether the + # connector supports it isn't visible to ibis via a + # metadata query + ) + for colname, typ in schema.items() + ] + + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema( + this=sg.to_identifier(name, quoted=quoted), expressions=column_defs + ), + ).sql(self.name, pretty=True) + + data = op.data.to_frame().itertuples(index=False) + specs = ", ".join("?" * len(schema)) + table = sg.table(name, quoted=quoted).sql(self.name) + insert_stmt = f"INSERT INTO {table} VALUES ({specs})" + with self.begin() as cur: + cur.execute(create_stmt) + for row in data: + cur.execute(insert_stmt, row) diff --git a/ibis/backends/trino/compiler.py b/ibis/backends/trino/compiler.py index e8d199daead5..3ea36c3b81f3 100644 --- a/ibis/backends/trino/compiler.py +++ b/ibis/backends/trino/compiler.py @@ -1,86 +1,521 @@ from __future__ import annotations -import sqlalchemy as sa +import math +from functools import partial, reduce, singledispatchmethod +import sqlglot as sg +import sqlglot.expressions as sge +import toolz +from sqlglot.dialects import Trino +from sqlglot.dialects.dialect import rename_func + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.base.sql.alchemy.query_builder import _AlchemyTableSetFormatter -from ibis.backends.trino.datatypes import TrinoType -from ibis.backends.trino.registry import operation_registry -from ibis.common.exceptions import UnsupportedOperationError - - -class TrinoSQLExprTranslator(AlchemyExprTranslator): - _registry = operation_registry.copy() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _has_reduction_filter_syntax = True - _supports_tuple_syntax = True - _integer_to_timestamp = staticmethod(sa.func.from_unixtime) - - _forbids_frame_clause = ( - *AlchemyExprTranslator._forbids_frame_clause, - ops.Lead, - ops.Lag, - ) - _require_order_by = ( - *AlchemyExprTranslator._require_order_by, - ops.Lag, - ops.Lead, - ) - _dialect_name = "trino" - supports_unnest_in_select = False +from ibis.backends.base.sqlglot.compiler import FALSE, NULL, SQLGlotCompiler, paren +from ibis.backends.base.sqlglot.datatypes import TrinoType +from ibis.backends.base.sqlglot.rewrites import ( + exclude_unsupported_window_frame_from_ops, + rewrite_first_to_first_value, + rewrite_last_to_last_value, +) +from ibis.expr.rewrites import rewrite_sample + + +# TODO(cpcloud): remove this hack once +# https://github.com/tobymao/sqlglot/issues/2735 is resolved +def make_cross_joins_explicit(node): + if not (node.kind or node.side): + node.args["kind"] = "CROSS" + return node + + +Trino.Generator.TRANSFORMS |= { + sge.BitwiseLeftShift: rename_func("bitwise_left_shift"), + sge.BitwiseRightShift: rename_func("bitwise_right_shift"), + sge.Join: sg.transforms.preprocess([make_cross_joins_explicit]), +} + + +class TrinoCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "trino" type_mapper = TrinoType + rewrites = ( + rewrite_sample, + rewrite_first_to_first_value, + rewrite_last_to_last_value, + exclude_unsupported_window_frame_from_ops, + *SQLGlotCompiler.rewrites, + ) + quoted = True + NAN = sg.func("nan") + POS_INF = sg.func("infinity") + NEG_INF = -POS_INF -rewrites = TrinoSQLExprTranslator.rewrites + def _aggregate(self, funcname: str, *args, where): + expr = self.f[funcname](*args) + if where is not None: + return sge.Filter(this=expr, expression=sge.Where(this=where)) + return expr + @staticmethod + def _minimize_spec(start, end, spec): + if ( + start is None + and isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec -@rewrites(ops.Any) -@rewrites(ops.All) -@rewrites(ops.StringContains) -def _no_op(expr): - return expr + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "sample": + raise com.UnsupportedOperationError( + "Trino does not support `sample` correlation" + ) + if (left_type := op.left.dtype).is_boolean(): + left = self.cast(left, dt.Int32(nullable=left_type.nullable)) -@rewrites(ops.StringContains) -def _rewrite_string_contains(op): - return ops.GreaterEqual(ops.StringFind(op.haystack, op.needle), 0) + if (right_type := op.right.dtype).is_boolean(): + right = self.cast(right, dt.Int32(nullable=right_type.nullable)) + return self.agg.corr(left, right, where=where) -class TrinoTableSetFormatter(_AlchemyTableSetFormatter): - def _format_sample(self, op, table): - if op.seed is not None: - raise UnsupportedOperationError( - "`Table.sample` with a random seed is unsupported" + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if how != "first": + raise com.UnsupportedOperationError( + 'Trino only supports how="first" for `arbitrary` reduction' ) - method = sa.func.bernoulli if op.method == "row" else sa.func.system - return table.tablesample( - sampling=method(sa.literal_column(f"{op.fraction * 100}")) + return self.agg.arbitrary(arg, where=where) + + @visit_node.register(ops.BitXor) + def visit_BitXor(self, op, *, arg, where): + a, b = map(sg.to_identifier, "ab") + input_fn = combine_fn = sge.Lambda( + this=sge.BitwiseXor(this=a, expression=b), expressions=[a, b] ) + return self.agg.reduce_agg(arg, 0, input_fn, combine_fn, where=where) + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + return self.f.flatten(self.f.repeat(arg, times)) + + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + def _neg_idx_to_pos(n, idx): + return self.if_(idx < 0, n + self.f.greatest(idx, -n), idx) + + arg_length = self.f.cardinality(arg) + + if start is None: + start = 0 + else: + start = self.f.least(arg_length, _neg_idx_to_pos(arg_length, start)) - def _format_in_memory_table(self, op, translator): - if not op.data: - return sa.select( - *( - translator.translate(ops.Literal(None, dtype=type_)).label(name) - for name, type_ in op.schema.items() + if stop is None: + stop = arg_length + else: + stop = _neg_idx_to_pos(arg_length, stop) + + return self.f.slice(arg, start + 1, stop - start) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, param, body): + return self.f.transform(arg, sge.Lambda(this=body, expressions=[param])) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, param, body): + return self.f.filter(arg, sge.Lambda(this=body, expressions=[param])) + + @visit_node.register(ops.ArrayContains) + def visit_ArrayContains(self, op, *, arg, other): + return self.if_( + arg.is_(sg.not_(NULL)), + self.f.coalesce(self.f.contains(arg, other), FALSE), + NULL, + ) + + @visit_node.register(ops.JSONGetItem) + def visit_JSONGetItem(self, op, *, arg, index): + fmt = "%d" if op.index.dtype.is_integer() else '"%s"' + return self.f.json_extract(arg, self.f.format(f"$[{fmt}]", index)) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return self.cast(paren(self.f.day_of_week(arg) + 6) % 7, op.dtype) + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + return self.f.date_format(arg, "%W") + + @visit_node.register(ops.StrRight) + def visit_StrRight(self, op, *, arg, nchars): + return self.f.substr(arg, -nchars) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return self.f.substr(arg, -self.f.length(end)).eq(end) + + @visit_node.register(ops.Repeat) + def visit_Repeat(self, op, *, arg, times): + return self.f.array_join(self.f.repeat(arg, times), "") + + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimestampTruncate) + def visit_DateTimestampTruncate(self, op, *, arg, unit): + _truncate_precisions = { + # ms unit is not yet officially documented but it works + "ms": "millisecond", + "s": "second", + "m": "minute", + "h": "hour", + "D": "day", + "W": "week", + "M": "month", + "Q": "quarter", + "Y": "year", + } + + if (precision := _truncate_precisions.get(unit.short)) is None: + raise com.UnsupportedOperationError( + f"Unsupported truncate unit {op.unit!r}" + ) + return self.f.date_trunc(precision, arg) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return self.f.from_iso8601_date( + self.f.format("%04d-%02d-%02d", year, month, day) + ) + + @visit_node.register(ops.TimeFromHMS) + def visit_TimeFromHMS(self, op, *, hours, minutes, seconds): + return self.cast( + self.f.format("%02d:%02d:%02d", hours, minutes, seconds), dt.time + ) + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds + ): + return self.cast( + self.f.from_iso8601_timestamp( + self.f.format( + "%04d-%02d-%02dT%02d:%02d:%02d", + year, + month, + day, + hours, + minutes, + seconds, ) - ).limit(0) + ), + dt.timestamp, + ) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + short = unit.short + if short == "ms": + res = self.f.from_unixtime(self.f.floor(arg / 1_000)) + elif short == "s": + res = self.f.from_unixtime(arg) + elif short == "us": + res = self.f.from_unixtime_nanos((arg - arg % 1_000_000) * 1_000) + elif short == "ns": + res = self.f.from_unixtime_nanos(arg - arg % 1_000_000_000) + else: + raise com.UnsupportedOperationError(f"{unit!r} unit is not supported") + return self.cast(res, op.dtype) + + @visit_node.register(ops.StructColumn) + def visit_StructColumn(self, op, *, names, values): + return self.cast(sge.Struct(expressions=list(values)), op.dtype) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_floating(): + if math.isfinite(value): + return self.cast(value, dtype) + return super().visit_NonNullLiteral(op, value=value, dtype=dtype) + elif dtype.is_struct(): + items = [ + self.visit_Literal(ops.Literal(v, fdtype), value=v, dtype=fdtype) + for fdtype, v in zip(dtype.types, value.values()) + ] + return self.cast(sge.Struct(expressions=items), dtype) + elif dtype.is_timestamp(): + return self.cast(self.f.from_iso8601_timestamp(value.isoformat()), dtype) + elif dtype.is_date(): + return self.f.from_iso8601_date(value.isoformat()) + elif dtype.is_time(): + return self.cast(value.isoformat(), dtype) + elif dtype.is_interval(): + return sge.Interval( + this=sge.convert(str(value)), unit=self.v[dtype.resolution.upper()] + ) + elif dtype.is_binary(): + return self.f.from_hex(value.hex()) + else: + return None + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + return self.f.log(base, arg, dialect=self.dialect) + + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + return self.f.coalesce(self.f.element_at(arg, key), default) - op_schema = list(op.schema.items()) - rows = [ - tuple( - translator.translate(ops.Literal(col, dtype=type_)).label(name) - for col, (name, type_) in zip(row, op_schema) + @visit_node.register(ops.MapContains) + def visit_MapContains(self, op, *, arg, key): + return self.f.contains(self.f.map_keys(arg), key) + + @visit_node.register(ops.ExtractFile) + def visit_ExtractProtocol(self, op, *, arg): + return self.f.concat_ws( + "?", + self.f.nullif(self.f.url_extract_path(arg), ""), + self.f.nullif(self.f.url_extract_query(arg), ""), + ) + + @visit_node.register(ops.ExtractQuery) + def visit_ExtractQuery(self, op, *, arg, key): + if key is None: + return self.f.url_extract_query(arg) + return self.f.url_extract_parameter(arg, key) + + @visit_node.register(ops.Cot) + def visit_Cot(self, op, *, arg): + return 1.0 / self.f.tan(arg) + + @visit_node.register(ops.StringAscii) + def visit_StringAscii(self, op, *, arg): + return self.f.codepoint( + sge.Cast( + this=self.f.substr(arg, 1, 2), + to=sge.DataType( + this=sge.DataType.Type.VARCHAR, + expressions=[sge.DataTypeParam(this=sge.convert(1))], + ), + ) + ) + + @visit_node.register(ops.ArrayStringJoin) + def visit_ArrayStringJoin(self, op, *, sep, arg): + return self.f.array_join(arg, sep) + + @visit_node.register(ops.First) + def visit_First(self, op, *, arg, where): + return self.f.element_at(self.agg.array_agg(arg, where=where), 1) + + @visit_node.register(ops.Last) + def visit_Last(self, op, *, arg, where): + return self.f.element_at(self.agg.array_agg(arg, where=where), -1) + + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op, *, arg): + max_zip_arguments = 5 + chunks = ( + (len(chunk), self.f.zip(*chunk) if len(chunk) > 1 else chunk[0]) + for chunk in toolz.partition_all(max_zip_arguments, arg) + ) + + def combine_zipped(left, right): + left_n, left_chunk = left + x, y = map(sg.to_identifier, "xy") + + lhs = list(map(x.__getitem__, range(left_n))) if left_n > 1 else [x] + + right_n, right_chunk = right + rhs = list(map(y.__getitem__, range(right_n))) if right_n > 1 else [y] + + zipped_chunk = self.f.zip_with( + left_chunk, + right_chunk, + sge.Lambda(this=self.f.row(*lhs, *rhs), expressions=[x, y]), + ) + return left_n + right_n, zipped_chunk + + all_n, chunk = reduce(combine_zipped, chunks) + assert all_n == len(op.dtype.value_type) + return chunk + + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + # trino only seems to store milliseconds, but the result of formatting + # always pads the right with 000 + return self.cast(self.f.date_format(arg, "%f"), dt.int32) + + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.TimestampDelta) + def visit_TemporalDelta(self, op, *, part, left, right): + # trino truncates _after_ the delta, whereas many other backends + # truncate each operand + dialect = self.dialect + return self.f.date_diff( + part, + self.f.date_trunc(part, right, dialect=dialect), + self.f.date_trunc(part, left, dialect=dialect), + dialect=dialect, + ) + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + unit = op.unit.short + if unit in ("Y", "Q", "M", "W"): + raise com.UnsupportedOperationError(f"Interval unit {unit!r} not supported") + return self.f.parse_duration( + self.f.concat( + self.cast(arg, dt.String(nullable=op.arg.dtype.nullable)), unit.lower() ) - for row in op.data.to_frame().itertuples(index=False) - ] - columns = translator._schema_to_sqlalchemy_columns(op.schema) - return sa.values(*columns, name=op.name).data(rows).select().subquery() + ) + + @visit_node.register(ops.TimestampRange) + @visit_node.register(ops.IntegerRange) + def visit_Range(self, op, *, start, stop, step): + def zero_value(dtype): + if dtype.is_interval(): + # the unit doesn't matter here, because e.g. 0d = 0s + return self.f.parse_duration("0s") + return 0 + + def interval_sign(v): + zero = self.f.parse_duration("0s") + return sge.Case( + ifs=[ + self.if_(v.eq(zero), 0), + self.if_(v < zero, -1), + self.if_(v > zero, 1), + ] + ) + + def _sign(value, dtype): + if dtype.is_interval(): + return interval_sign(value) + return self.f.sign(value) + + step_dtype = op.step.dtype + zero = zero_value(step_dtype) + return self.if_( + sg.and_( + self.f.nullif(step, zero).is_(sg.not_(NULL)), + _sign(step, step_dtype).eq(_sign(stop - start, step_dtype)), + ), + self.f.array_remove(self.f.sequence(start, stop, step), stop), + self.f.array(), + ) + + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return self.f.element_at(arg, index + 1) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + from_ = op.arg.dtype + if from_.is_integer() and to.is_interval(): + return self.visit_IntervalFromInteger( + ops.IntervalFromInteger(op.arg, unit=to.unit), + arg=arg, + unit=to.unit, + ) + elif from_.is_integer() and to.is_timestamp(): + return self.f.from_unixtime(arg, to.timezone or "UTC") + return super().visit_Cast(op, arg=arg, to=to) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, arg, where): + make_col = partial(sg.column, table=arg.alias_or_name, quoted=self.quoted) + row = self.f.row(*map(make_col, op.arg.schema.names)) + return self.agg.count(sge.Distinct(expressions=[row]), where=where) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + return self.f.concat(*arg) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.strpos(haystack, needle) > 0 + + @visit_node.register(ops.RegexExtract) + def visit_RegexpExtract(self, op, *, arg, pattern, index): + # sqlglot doesn't support the third `group` argument for trino so work + # around that limitation using an anonymous function + return sge.Anonymous(this="regexp_extract", expressions=[arg, pattern, index]) + + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.Median) + @visit_node.register(ops.RowID) + @visit_node.register(ops.TimestampBucket) + def visit_Undefined(self, op, **kw): + return super().visit_Undefined(op, **kw) + + +_SIMPLE_OPS = { + ops.Pi: "pi", + ops.E: "e", + ops.RegexReplace: "regexp_replace", + ops.Map: "map", + ops.MapKeys: "map_keys", + ops.MapLength: "cardinality", + ops.MapMerge: "map_concat", + ops.MapValues: "map_values", + ops.Log2: "log2", + ops.Log10: "log10", + ops.IsNan: "is_nan", + ops.IsInf: "is_infinite", + ops.StringToTimestamp: "date_parse", + ops.Strftime: "date_format", + ops.ExtractEpochSeconds: "to_unixtime", + ops.ExtractWeekOfYear: "week_of_year", + ops.ExtractDayOfYear: "day_of_year", + ops.ExtractMillisecond: "millisecond", + ops.ArrayUnion: "array_union", + ops.ArrayRemove: "array_remove", + ops.ArrayFlatten: "flatten", + ops.ArraySort: "array_sort", + ops.ArrayDistinct: "array_distinct", + ops.ArrayLength: "cardinality", + ops.ArrayCollect: "array_agg", + ops.ArrayIntersect: "array_intersect", + ops.BitAnd: "bitwise_and_agg", + ops.BitOr: "bitwise_or_agg", + ops.TypeOf: "typeof", + ops.Levenshtein: "levenshtein_distance", + ops.ExtractProtocol: "url_extract_protocol", + ops.ExtractHost: "url_extract_host", + ops.ExtractPath: "url_extract_path", + ops.ExtractFragment: "url_extract_fragment", + ops.RPad: "rpad", + ops.LPad: "lpad", + ops.ArrayPosition: "array_position", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @TrinoCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @TrinoCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + setattr(TrinoCompiler, f"visit_{_op.__name__}", _fmt) -class TrinoSQLCompiler(AlchemyCompiler): - cheap_in_memory_tables = False - translator_class = TrinoSQLExprTranslator - null_limit = sa.literal_column("ALL") - table_set_formatter_class = TrinoTableSetFormatter +del _op, _name, _fmt diff --git a/ibis/backends/trino/converter.py b/ibis/backends/trino/converter.py new file mode 100644 index 000000000000..5488b6083219 --- /dev/null +++ b/ibis/backends/trino/converter.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import datetime + +from ibis.formats.pandas import PandasData + + +class TrinoPandasData(PandasData): + @classmethod + def convert_Interval(cls, s, dtype, pandas_dtype): + def parse_trino_timedelta(value): + # format is 'days hour:minute:second.millisecond' + days, rest = value.split(" ", 1) + hms, millis = rest.split(".", 1) + hours, minutes, seconds = hms.split(":") + return datetime.timedelta( + days=int(days), + hours=int(hours), + minutes=int(minutes), + seconds=int(seconds), + milliseconds=int(millis), + ) + + return s.map(parse_trino_timedelta, na_action="ignore") diff --git a/ibis/backends/trino/datatypes.py b/ibis/backends/trino/datatypes.py deleted file mode 100644 index d34bc2c81997..000000000000 --- a/ibis/backends/trino/datatypes.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import annotations - -from datetime import time, timedelta -from typing import Any - -import sqlalchemy.types as sat -import trino.client -from sqlalchemy.ext.compiler import compiles -from trino.sqlalchemy.datatype import DOUBLE, JSON, MAP, TIMESTAMP -from trino.sqlalchemy.datatype import ROW as _ROW - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import TrinoType as SqlglotTrinoType - - -class ROW(_ROW): - _result_is_tuple = hasattr(trino.client, "NamedRowTuple") - - def result_processor(self, dialect, coltype: str) -> None: - if not coltype.lower().startswith("row"): - return None - - def process( - value, - result_is_tuple: bool = self._result_is_tuple, - names: tuple[str, ...] = tuple(name for name, _ in self.attr_types), - ) -> dict[str, Any] | None: - if value is None or not result_is_tuple: - return value - else: - return dict(zip(names, value)) - - return process - - -class INTERVAL(sat.Interval): - def result_processor(self, dialect, coltype: str) -> None: - def process(value): - if value is None: - return value - - # TODO: support year-month intervals - days, duration = value.split(" ", 1) - t = time.fromisoformat(duration) - return timedelta( - days=int(days), - hours=t.hour, - minutes=t.minute, - seconds=t.second, - microseconds=t.microsecond, - ) - - return process - - -@compiles(TIMESTAMP) -def compiles_timestamp(typ, compiler, **kw): - result = "TIMESTAMP" - - if (prec := typ.precision) is not None: - result += f"({prec:d})" - - if typ.timezone: - result += " WITH TIME ZONE" - - return result - - -@compiles(ROW) -def _compiles_row(element, compiler, **kw): - # TODO: @compiles should live in the dialect - quote = compiler.dialect.identifier_preparer.quote - content = ", ".join( - f"{quote(field)} {compiler.process(typ, **kw)}" - for field, typ in element.attr_types - ) - return f"ROW({content})" - - -@compiles(MAP) -def compiles_map(typ, compiler, **kw): - # TODO: @compiles should live in the dialect - key_type = compiler.process(typ.key_type, **kw) - value_type = compiler.process(typ.value_type, **kw) - return f"MAP({key_type}, {value_type})" - - -@compiles(DOUBLE) -@compiles(sat.REAL, "trino") -def _floating(element, compiler, **kw): - return type(element).__name__.upper() - - -class TrinoType(AlchemyType): - dialect = "trino" - source_types = { - DOUBLE: dt.Float64, - sat.REAL: dt.Float32, - JSON: dt.JSON, - } - - @classmethod - def to_ibis(cls, typ, nullable=True): - if dtype := cls.source_types.get(type(typ)): - return dtype(nullable=nullable) - elif isinstance(typ, sat.NUMERIC): - return dt.Decimal(typ.precision or 18, typ.scale or 3, nullable=nullable) - elif isinstance(typ, sat.ARRAY): - value_dtype = cls.to_ibis(typ.item_type) - return dt.Array(value_dtype, nullable=nullable) - elif isinstance(typ, ROW): - fields = ((k, cls.to_ibis(v)) for k, v in typ.attr_types) - return dt.Struct.from_tuples(fields, nullable=nullable) - elif isinstance(typ, MAP): - return dt.Map( - cls.to_ibis(typ.key_type), - cls.to_ibis(typ.value_type), - nullable=nullable, - ) - elif isinstance(typ, TIMESTAMP): - return dt.Timestamp( - timezone="UTC" if typ.timezone else None, - scale=typ.precision, - nullable=nullable, - ) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_ibis(cls, dtype): - if isinstance(dtype, dt.Float64): - return DOUBLE() - elif isinstance(dtype, dt.Float32): - return sat.REAL() - elif dtype.is_string(): - return sat.VARCHAR() - elif dtype.is_struct(): - return ROW((name, cls.from_ibis(typ)) for name, typ in dtype.fields.items()) - elif dtype.is_map(): - return MAP(cls.from_ibis(dtype.key_type), cls.from_ibis(dtype.value_type)) - elif dtype.is_timestamp(): - return TIMESTAMP(precision=dtype.scale, timezone=bool(dtype.timezone)) - else: - return super().from_ibis(dtype) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotTrinoType.from_string(type_string, nullable=nullable) diff --git a/ibis/backends/trino/registry.py b/ibis/backends/trino/registry.py deleted file mode 100644 index 5c5f45658e3d..000000000000 --- a/ibis/backends/trino/registry.py +++ /dev/null @@ -1,600 +0,0 @@ -from __future__ import annotations - -import operator -from functools import partial, reduce -from typing import Literal - -import sqlalchemy as sa -import toolz -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.expression import FunctionElement -from trino.sqlalchemy.datatype import DOUBLE - -import ibis -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy.registry import ( - _literal as _alchemy_literal, -) -from ibis.backends.base.sql.alchemy.registry import ( - array_filter, - array_map, - fixed_arity, - reduction, - sqlalchemy_operation_registry, - sqlalchemy_window_functions_registry, - try_cast, - unary, - varargs, -) -from ibis.backends.postgres.registry import _corr, _covar -from ibis.backends.trino.datatypes import INTERVAL - -operation_registry = sqlalchemy_operation_registry.copy() -operation_registry.update(sqlalchemy_window_functions_registry) - - -def _array(t, elements): - return t.translate(ibis.array(elements).op()) - - -class make_array(FunctionElement): - pass - - -@compiles(make_array, "default") -def compile_make_array(element, compiler, **kw): - return f"ARRAY[{compiler.process(element.clauses, **kw)}]" - - -def _literal(t, op): - value = op.value - dtype = op.dtype - - if value is None: - return sa.null() - elif dtype.is_struct(): - elements = ( - t.translate(ops.Literal(element, dtype=field_type)) - for element, field_type in zip(value.values(), dtype.types) - ) - return sa.cast(sa.func.row(*elements), t.get_sqla_type(dtype)) - elif dtype.is_array(): - value_type = dtype.value_type - return make_array( - *(t.translate(ops.Literal(element, dtype=value_type)) for element in value) - ) - elif dtype.is_map(): - return sa.func.map(_array(t, value.keys()), _array(t, value.values())) - elif dtype.is_float64(): - return sa.literal(float(value), type_=DOUBLE()) - elif dtype.is_integer(): - return sa.literal(int(value), type_=t.get_sqla_type(dtype)) - elif dtype.is_timestamp(): - return sa.cast( - sa.func.from_iso8601_timestamp(value.isoformat()), t.get_sqla_type(dtype) - ) - elif dtype.is_date(): - return sa.func.from_iso8601_date(value.isoformat()) - elif dtype.is_time(): - return sa.cast(sa.literal(str(value)), t.get_sqla_type(dtype)) - elif dtype.is_interval(): - return sa.literal_column( - f"INTERVAL '{value}' {dtype.resolution.upper()}", type_=INTERVAL - ) - - return _alchemy_literal(t, op) - - -def _arbitrary(t, op): - if op.how != "first": - raise com.UnsupportedOperationError( - 'Trino only supports how="first" for `arbitrary` reduction' - ) - return reduction(sa.func.arbitrary)(t, op) - - -def _json_get_item(t, op): - arg = t.translate(op.arg) - index = t.translate(op.index) - fmt = "%d" if op.index.dtype.is_integer() else '"%s"' - return sa.func.json_extract(arg, sa.func.format(f"$[{fmt}]", index)) - - -def _group_concat(t, op): - if not isinstance(op.sep, ops.Literal): - raise com.UnsupportedOperationError( - "Trino group concat separator must be a literal value" - ) - - arg = sa.func.array_agg(t.translate(op.arg)) - if (where := op.where) is not None: - arg = arg.filter(t.translate(where)) - return sa.func.array_join(arg, t.translate(op.sep)) - - -def _array_column(t, op): - args = ", ".join( - str(t.translate(arg).compile(compile_kwargs={"literal_binds": True})) - for arg in op.exprs - ) - return sa.literal_column(f"ARRAY[{args}]", type_=t.get_sqla_type(op.dtype)) - - -_truncate_precisions = { - # ms unit is not yet officially documented in Trino's public documentation, - # but it just works. - "ms": "millisecond", - "s": "second", - "m": "minute", - "h": "hour", - "D": "day", - "W": "week", - "M": "month", - "Q": "quarter", - "Y": "year", -} - - -def _timestamp_truncate(t, op): - sa_arg = t.translate(op.arg) - try: - precision = _truncate_precisions[op.unit.short] - except KeyError: - raise com.UnsupportedOperationError(f"Unsupported truncate unit {op.unit!r}") - return sa.func.date_trunc(precision, sa_arg) - - -def _timestamp_from_unix(t, op): - arg, unit = op.args - arg = t.translate(arg) - - unit_short = unit.short - if unit_short == "ms": - try: - arg //= 1_000 - except TypeError: - arg = sa.func.floor(arg / 1_000) - res = sa.func.from_unixtime(arg) - elif unit_short == "s": - res = sa.func.from_unixtime(arg) - elif unit_short == "us": - res = sa.func.from_unixtime_nanos((arg - arg % 1_000_000) * 1_000) - elif unit_short == "ns": - res = sa.func.from_unixtime_nanos(arg - arg % 1_000_000_000) - else: - raise com.UnsupportedOperationError(f"{unit!r} unit is not supported") - return sa.cast(res, t.get_sqla_type(op.dtype)) - - -if_ = getattr(sa.func, "if") - - -def _neg_idx_to_pos(array, idx): - arg_length = sa.func.cardinality(array) - return if_(idx < 0, arg_length + sa.func.greatest(idx, -arg_length), idx) - - -def _array_slice(t, op): - arg = t.translate(op.arg) - - arg_length = sa.func.cardinality(arg) - - if (start := op.start) is None: - start = 0 - else: - start = sa.func.least(arg_length, _neg_idx_to_pos(arg, t.translate(start))) - - if (stop := op.stop) is None: - stop = arg_length - else: - stop = _neg_idx_to_pos(arg, t.translate(stop)) - - length = stop - start - return sa.func.slice(arg, start + 1, length, type_=arg.type) - - -def _extract_url_query(t, op): - arg = t.translate(op.arg) - key = op.key - if key is None: - result = sa.func.url_extract_query(arg) - else: - result = sa.func.url_extract_parameter(arg, t.translate(key)) - return sa.func.nullif(result, "") - - -def _round(t, op): - arg = t.translate(op.arg) - if (digits := op.digits) is not None: - return sa.func.round(arg, t.translate(digits)) - return sa.func.round(arg) - - -def _unnest(t, op): - arg = op.arg - name = arg.name - row_type = op.arg.dtype.value_type - names = getattr(row_type, "names", (name,)) - rd = sa.func.unnest(t.translate(arg)).table_valued(*names).render_derived() - # when unnesting a single column, unwrap the single ROW field access that - # would otherwise be generated, but keep the ROW if the array's element - # type is struct - if not row_type.is_struct(): - assert ( - len(names) == 1 - ), f"got non-struct dtype {row_type} with more than one name: {len(names)}" - return rd.c[0] - row = sa.func.row(*(rd.c[name] for name in names)) - return sa.cast(row, t.get_sqla_type(row_type)) - - -def _ifelse(t, op): - return if_( - t.translate(op.bool_expr), - t.translate(op.true_expr), - t.translate(op.false_null_expr), - type_=t.get_sqla_type(op.dtype), - ) - - -def _cot(t, op): - arg = t.translate(op.arg) - return 1.0 / sa.func.tan(arg, type_=t.get_sqla_type(op.arg.dtype)) - - -@compiles(array_map, "trino") -def compiles_list_apply(element, compiler, **kw): - *args, signature, result = map(partial(compiler.process, **kw), element.clauses) - return f"transform({', '.join(args)}, {signature} -> {result})" - - -def _array_map(t, op): - return array_map( - t.translate(op.arg), sa.literal_column(f"({op.param})"), t.translate(op.body) - ) - - -@compiles(array_filter, "trino") -def compiles_list_filter(element, compiler, **kw): - *args, signature, result = map(partial(compiler.process, **kw), element.clauses) - return f"filter({', '.join(args)}, {signature} -> {result})" - - -def _array_filter(t, op): - return array_filter( - t.translate(op.arg), sa.literal_column(f"({op.param})"), t.translate(op.body) - ) - - -def _first_last(t, op, *, offset: Literal[-1, 1]): - return sa.func.element_at(t._reduction(sa.func.array_agg, op), offset) - - -def _zip(t, op): - # more than one chunk means more than 5 arguments to zip, which trino - # doesn't support - # - # help trino out by reducing in chunks of 5 using zip_with - max_zip_arguments = 5 - chunks = ( - (len(chunk), sa.func.zip(*chunk) if len(chunk) > 1 else chunk[0]) - for chunk in toolz.partition_all(max_zip_arguments, map(t.translate, op.arg)) - ) - - def combine_zipped(left, right): - left_n, left_chunk = left - lhs = ( - ", ".join(f"x[{i:d}]" for i in range(1, left_n + 1)) if left_n > 1 else "x" - ) - - right_n, right_chunk = right - rhs = ( - ", ".join(f"y[{i:d}]" for i in range(1, right_n + 1)) - if right_n > 1 - else "y" - ) - - zipped_chunk = sa.func.zip_with( - left_chunk, right_chunk, sa.literal_column(f"(x, y) -> ROW({lhs}, {rhs})") - ) - return left_n + right_n, zipped_chunk - - all_n, chunk = reduce(combine_zipped, chunks) - - dtype = op.dtype - - assert all_n == len(dtype.value_type) - - return sa.type_coerce(chunk, t.get_sqla_type(dtype)) - - -@compiles(try_cast, "trino") -def compiles_try_cast(element, compiler, **kw): - return "TRY_CAST({} AS {})".format( - compiler.process(element.clauses.clauses[0], **kw), - compiler.visit_typeclause(element), - ) - - -def _try_cast(t, op): - arg = t.translate(op.arg) - to = t.get_sqla_type(op.to) - return try_cast(arg, type_=to) - - -def _array_intersect(t, op): - x = ops.Argument(name="x", shape=op.left.shape, dtype=op.left.dtype.value_type) - return t.translate( - ops.ArrayFilter(op.left, param=x.param, body=ops.ArrayContains(op.right, x)) - ) - - -_temporal_delta = fixed_arity( - lambda part, left, right: sa.func.date_diff( - part, sa.func.date_trunc(part, right), sa.func.date_trunc(part, left) - ), - 3, -) - - -def _interval_from_integer(t, op): - unit = op.unit.short - if unit in ("Y", "Q", "M", "W"): - raise com.UnsupportedOperationError(f"Interval unit {unit!r} not supported") - arg = sa.func.concat( - t.translate(ops.Cast(op.arg, dt.String(nullable=op.arg.dtype.nullable))), - unit.lower(), - ) - return sa.type_coerce(sa.func.parse_duration(arg), INTERVAL) - - -def zero_value(dtype): - if dtype.is_interval(): - # the unit doesn't matter here, because e.g. 0d = 0s - return sa.func.parse_duration("0s") - return 0 - - -def interval_sign(v): - zero = sa.func.parse_duration("0s") - return sa.case((v == zero, 0), (v < zero, -1), (v > zero, 1)) - - -def _sign(value, dtype): - if dtype.is_interval(): - return interval_sign(value) - return sa.func.sign(value) - - -def _range(t, op): - start = t.translate(op.start) - stop = t.translate(op.stop) - step = t.translate(op.step) - satype = t.get_sqla_type(op.dtype) - zero = zero_value(op.step.dtype) - return if_( - sa.and_( - sa.func.nullif(step, zero).is_not(None), - _sign(step, op.step.dtype) == _sign(stop - start, op.step.dtype), - ), - sa.func.array_remove( - sa.func.sequence(start, stop, step, type_=satype), stop, type_=satype - ), - sa.literal_column("ARRAY[]"), - ) - - -operation_registry.update( - { - # conditional expressions - # static checks are not happy with using "if" as a property - ops.IfElse: _ifelse, - # boolean reductions - ops.Any: reduction(sa.func.bool_or), - ops.All: reduction(sa.func.bool_and), - ops.ArgMin: reduction(sa.func.min_by), - ops.ArgMax: reduction(sa.func.max_by), - # array ops - ops.Correlation: _corr, - ops.Covariance: _covar, - ops.ExtractMillisecond: unary(sa.func.millisecond), - ops.Arbitrary: _arbitrary, - ops.ApproxCountDistinct: reduction(sa.func.approx_distinct), - ops.ApproxMedian: reduction(lambda arg: sa.func.approx_percentile(arg, 0.5)), - ops.RegexExtract: fixed_arity(sa.func.regexp_extract, 3), - ops.RegexReplace: fixed_arity(sa.func.regexp_replace, 3), - ops.RegexSearch: fixed_arity( - lambda arg, pattern: sa.func.regexp_position(arg, pattern) != -1, 2 - ), - ops.GroupConcat: _group_concat, - ops.BitAnd: reduction(sa.func.bitwise_and_agg), - ops.BitOr: reduction(sa.func.bitwise_or_agg), - ops.BitXor: reduction( - lambda arg: sa.func.reduce_agg( - arg, - 0, - sa.text("(a, b) -> bitwise_xor(a, b)"), - sa.text("(a, b) -> bitwise_xor(a, b)"), - ) - ), - ops.BitwiseAnd: fixed_arity(sa.func.bitwise_and, 2), - ops.BitwiseOr: fixed_arity(sa.func.bitwise_or, 2), - ops.BitwiseXor: fixed_arity(sa.func.bitwise_xor, 2), - ops.BitwiseLeftShift: fixed_arity(sa.func.bitwise_left_shift, 2), - ops.BitwiseRightShift: fixed_arity(sa.func.bitwise_right_shift, 2), - ops.BitwiseNot: unary(sa.func.bitwise_not), - ops.ArrayCollect: reduction(sa.func.array_agg), - ops.ArrayConcat: varargs(sa.func.concat), - ops.ArrayLength: unary(sa.func.cardinality), - ops.ArrayIndex: fixed_arity( - lambda arg, index: sa.func.element_at(arg, index + 1), 2 - ), - ops.Array: _array_column, - ops.ArrayRepeat: fixed_arity( - lambda arg, times: sa.func.flatten(sa.func.repeat(arg, times)), 2 - ), - ops.ArraySlice: _array_slice, - ops.ArrayMap: _array_map, - ops.ArrayFilter: _array_filter, - ops.ArrayContains: fixed_arity( - lambda arr, el: if_( - arr != sa.null(), - sa.func.coalesce(sa.func.contains(arr, el), sa.false()), - sa.null(), - ), - 2, - ), - ops.ArrayPosition: fixed_arity( - lambda lst, el: sa.func.array_position(lst, el) - 1, 2 - ), - ops.ArrayDistinct: fixed_arity(sa.func.array_distinct, 1), - ops.ArraySort: fixed_arity(sa.func.array_sort, 1), - ops.ArrayRemove: fixed_arity(sa.func.array_remove, 2), - ops.ArrayUnion: fixed_arity(sa.func.array_union, 2), - ops.ArrayFlatten: unary(sa.func.flatten), - ops.JSONGetItem: _json_get_item, - ops.ExtractDayOfYear: unary(sa.func.day_of_year), - ops.ExtractWeekOfYear: unary(sa.func.week_of_year), - ops.DayOfWeekIndex: unary( - lambda arg: sa.cast( - sa.cast(sa.func.day_of_week(arg) + 6, sa.SMALLINT) % 7, sa.SMALLINT - ) - ), - ops.DayOfWeekName: unary(lambda arg: sa.func.date_format(arg, "%W")), - ops.ExtractEpochSeconds: unary(sa.func.to_unixtime), - ops.Translate: fixed_arity(sa.func.translate, 3), - ops.StrRight: fixed_arity(lambda arg, nchars: sa.func.substr(arg, -nchars), 2), - ops.StringSplit: fixed_arity(sa.func.split, 2), - ops.Repeat: fixed_arity( - lambda value, count: sa.func.array_join(sa.func.repeat(value, count), ""), 2 - ), - ops.DateTruncate: _timestamp_truncate, - ops.TimestampTruncate: _timestamp_truncate, - ops.DateFromYMD: fixed_arity( - lambda y, m, d: sa.func.from_iso8601_date( - sa.func.format("%04d-%02d-%02d", y, m, d) - ), - 3, - ), - ops.TimeFromHMS: fixed_arity( - lambda h, m, s: sa.cast(sa.func.format("%02d:%02d:%02d", h, m, s), sa.TIME), - 3, - ), - ops.TimestampFromYMDHMS: fixed_arity( - lambda y, mo, d, h, m, s: sa.cast( - sa.func.from_iso8601_timestamp( - sa.func.format("%04d-%02d-%02dT%02d:%02d:%02d", y, mo, d, h, m, s) - ), - sa.TIMESTAMP(timezone=False), - ), - 6, - ), - ops.Strftime: fixed_arity(sa.func.date_format, 2), - ops.StringToTimestamp: fixed_arity(sa.func.date_parse, 2), - ops.TimestampNow: fixed_arity(sa.func.now, 0), - ops.TimestampFromUNIX: _timestamp_from_unix, - ops.StructField: lambda t, op: t.translate(op.arg).op(".")(sa.text(op.field)), - ops.StructColumn: lambda t, op: sa.cast( - sa.func.row(*map(t.translate, op.values)), t.get_sqla_type(op.dtype) - ), - ops.Literal: _literal, - ops.IsNan: unary(sa.func.is_nan), - ops.IsInf: unary(sa.func.is_infinite), - ops.Log: fixed_arity(lambda arg, base: sa.func.log(base, arg), 2), - ops.Log2: unary(sa.func.log2), - ops.Log10: unary(sa.func.log10), - ops.MapLength: unary(sa.func.cardinality), - ops.MapGet: fixed_arity( - lambda arg, key, default: sa.func.coalesce( - sa.func.element_at(arg, key), default - ), - 3, - ), - ops.MapKeys: unary(sa.func.map_keys), - ops.MapValues: unary(sa.func.map_values), - ops.Map: fixed_arity(sa.func.map, 2), - ops.MapMerge: fixed_arity(sa.func.map_concat, 2), - ops.MapContains: fixed_arity( - lambda arg, key: sa.func.contains(sa.func.map_keys(arg), key), 2 - ), - ops.ExtractProtocol: unary( - lambda arg: sa.func.nullif(sa.func.url_extract_protocol(arg), "") - ), - ops.ExtractHost: unary( - lambda arg: sa.func.nullif(sa.func.url_extract_host(arg), "") - ), - ops.ExtractPath: unary( - lambda arg: sa.func.nullif(sa.func.url_extract_path(arg), "") - ), - ops.ExtractFragment: unary( - lambda arg: sa.func.nullif(sa.func.url_extract_fragment(arg), "") - ), - ops.ExtractFile: unary( - lambda arg: sa.func.concat_ws( - "?", - sa.func.nullif(sa.func.url_extract_path(arg), ""), - sa.func.nullif(sa.func.url_extract_query(arg), ""), - ) - ), - ops.ExtractQuery: _extract_url_query, - ops.Cot: _cot, - ops.Round: _round, - ops.Pi: fixed_arity(sa.func.pi, 0), - ops.E: fixed_arity(sa.func.e, 0), - ops.Quantile: reduction(sa.func.approx_percentile), - ops.MultiQuantile: reduction(sa.func.approx_percentile), - ops.StringAscii: unary( - lambda d: sa.func.codepoint( - sa.func.cast(sa.func.substr(d, 1, 2), sa.VARCHAR(1)) - ) - ), - ops.TypeOf: unary(sa.func.typeof), - ops.Unnest: _unnest, - ops.ArrayStringJoin: fixed_arity( - lambda sep, arr: sa.func.array_join(arr, sep), 2 - ), - ops.StartsWith: fixed_arity(sa.func.starts_with, 2), - ops.Argument: lambda _, op: sa.literal_column(op.param), - ops.First: partial(_first_last, offset=1), - ops.Last: partial(_first_last, offset=-1), - ops.ArrayZip: _zip, - ops.TryCast: _try_cast, - ops.ExtractMicrosecond: fixed_arity( - # trino only seems to store milliseconds, but the result of - # formatting always pads the right with 000 - lambda arg: sa.cast(sa.func.date_format(arg, "%f"), sa.INTEGER()), - 1, - ), - ops.Levenshtein: fixed_arity(sa.func.levenshtein_distance, 2), - ops.ArrayIntersect: _array_intersect, - # trino truncates _after_ the delta, whereas many other backends - # truncates each operand - ops.TimeDelta: _temporal_delta, - ops.DateDelta: _temporal_delta, - ops.TimestampDelta: _temporal_delta, - ops.TimestampAdd: fixed_arity(operator.add, 2), - ops.TimestampSub: fixed_arity(operator.sub, 2), - ops.TimestampDiff: fixed_arity(lambda x, y: sa.type_coerce(x - y, INTERVAL), 2), - ops.DateAdd: fixed_arity(operator.add, 2), - ops.DateSub: fixed_arity(operator.sub, 2), - ops.DateDiff: fixed_arity(lambda x, y: sa.type_coerce(x - y, INTERVAL), 2), - ops.IntervalAdd: fixed_arity(operator.add, 2), - ops.IntervalSubtract: fixed_arity(operator.sub, 2), - ops.IntervalFromInteger: _interval_from_integer, - ops.IntegerRange: _range, - ops.TimestampRange: _range, - ops.RegexSplit: fixed_arity(sa.func.regexp_split, 2), - } -) - -_invalid_operations = { - # ibis.expr.operations.reductions - ops.MultiQuantile, - ops.Quantile, -} - -operation_registry = { - k: v for k, v in operation_registry.items() if k not in _invalid_operations -} diff --git a/ibis/backends/trino/tests/conftest.py b/ibis/backends/trino/tests/conftest.py index da6af3583daf..c8f0839255e7 100644 --- a/ibis/backends/trino/tests/conftest.py +++ b/ibis/backends/trino/tests/conftest.py @@ -2,10 +2,11 @@ import os import subprocess -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pytest import sqlglot as sg +import sqlglot.expressions as sge import ibis import ibis.expr.datatypes as dt @@ -42,10 +43,7 @@ class TestConf(ServiceBackendTest): supports_structs = True supports_map = True supports_tpch = True - deps = ("sqlalchemy", "trino.sqlalchemy") - - _tpch_data_schema = "tpch.tiny" - _tpch_query_schema = "hive.ibis_sf1" + deps = ("trino",) def preload(self): # copy files to the minio host @@ -72,7 +70,8 @@ def preload(self): def _transform_tpch_sql(self, parsed): def add_catalog_and_schema(node): if isinstance(node, sg.exp.Table): - catalog, db = self._tpch_query_schema.split(".") + catalog = "hive" + db = "ibis_sf1" return node.__class__( db=db, catalog=catalog, @@ -93,11 +92,10 @@ def load_tpch(self) -> None: to match the DuckDB TPC-H query conventions. """ con = self.connection - query_schema = self._tpch_query_schema - data_schema = self._tpch_data_schema - database, schema = query_schema.split(".") + database = "hive" + schema = "ibis_sf1" - tables = con.list_tables(schema=self._tpch_data_schema) + tables = con.list_tables(schema="tiny", database="tpch") con.create_schema(schema, database=database, force=True) prefixes = {"partsupp": "ps"} @@ -110,7 +108,7 @@ def load_tpch(self) -> None: prefix = prefixes.get(table, table[0]) t = ( - con.table(table, schema=data_schema) + con.table(table, schema="tiny", database="tpch") .rename(f"{prefix}_{{}}".format) # https://github.com/trinodb/trino/issues/19477 .mutate( @@ -118,16 +116,29 @@ def load_tpch(self) -> None: ) ) - sql = ibis.to_sql(t, dialect="trino") - c.exec_driver_sql( - f"CREATE OR REPLACE VIEW {query_schema}.{table} AS {sql}" - ) + sql = sge.Create( + kind="VIEW", + this=sg.table(table, db=schema, catalog=database), + expression=self.connection._to_sqlglot(t), + replace=True, + ).sql("trino", pretty=True) + + c.execute(sql) + + def _load_data(self, **_: Any) -> None: + """Load test data into a backend.""" + with self.connection.begin() as cur: + for stmt in self.ddl_script: + cur.execute(stmt) def _tpch_table(self, name: str): - return self.connection.table( - self.default_identifier_case_fn(name), - schema=self._tpch_query_schema, + from ibis import _ + + table = self.connection.table( + self.default_identifier_case_fn(name), schema="ibis_sf1", database="hive" ) + table = table.mutate(s.across(s.of_type("double"), _.cast("decimal(15, 2)"))) + return table @property def test_files(self) -> Iterable[Path]: diff --git a/ibis/backends/trino/tests/test_client.py b/ibis/backends/trino/tests/test_client.py index 7b0f01142c86..31e40314c24e 100644 --- a/ibis/backends/trino/tests/test_client.py +++ b/ibis/backends/trino/tests/test_client.py @@ -41,7 +41,8 @@ def test_table_properties(tmp_name): ) assert t.schema() == schema with con.begin() as c: - ddl = c.exec_driver_sql(f"SHOW CREATE TABLE {tmp_name}").scalar() + c.execute(f"SHOW CREATE TABLE {tmp_name}") + [(ddl,)] = c.fetchall() assert "ORC" in ddl assert "bucketed_by" in ddl @@ -78,20 +79,20 @@ def test_con_source(source, expected): schema="default", source=source, ) - assert con.con.url.query["source"] == expected + assert con.con.source == expected @pytest.mark.parametrize( - ("schema", "table"), + ("database", "schema", "table"), [ # tables known to exist - ("system.metadata", "table_comments"), - ("tpcds.sf1", "store"), - ("tpch.sf1", "nation"), + ("system", "metadata", "table_comments"), + ("tpcds", "sf1", "store"), + ("tpch", "sf1", "nation"), ], ) -def test_cross_schema_table_access(con, schema, table): - t = con.table(table, schema=schema) +def test_cross_schema_table_access(con, database, schema, table): + t = con.table(table, schema=schema, database=database) assert t.count().execute() @@ -115,9 +116,8 @@ def geometric_mean(x) -> float: result_n, result = expr.execute().squeeze().tolist() with con.begin() as c: - expected_n, expected = c.exec_driver_sql( - "SELECT COUNT(*), GEOMETRIC_MEAN(price) FROM diamonds" - ).one() + c.execute("SELECT COUNT(*), GEOMETRIC_MEAN(price) FROM diamonds") + [(expected_n, expected)] = c.fetchall() # check the count assert result_n > 0 @@ -148,28 +148,14 @@ def test_create_table_timestamp(): assert table not in con.list_tables() -def test_table_access_from_connection_without_catalog_or_schema(): - con = ibis.trino.connect() - # can't use the `system` catalog to test here, because the trino sqlalchemy - # dialect defaults to `system` if no catalog is passed, so it wouldn't be a - # useful test - assert con.current_database != "tpch" - assert con.current_schema is None - - t = con.table("region", schema="tpch.sf1") - - assert con.current_database != "tpch" - assert con.current_schema is None - - assert t.count().execute() - - def test_table_access_database_schema(con): t = con.table("region", schema="sf1", database="tpch") assert t.count().execute() - with pytest.raises(exc.IbisError, match="Cannot specify both"): + with pytest.raises(exc.IbisError, match='Table not found: tpch."tpch.sf1".region'): con.table("region", schema="tpch.sf1", database="tpch") - with pytest.raises(exc.IbisError, match="Cannot specify both"): + with pytest.raises( + exc.IbisError, match='Table not found: system."tpch.sf1".region' + ): con.table("region", schema="tpch.sf1", database="system") diff --git a/ibis/backends/trino/tests/test_datatypes.py b/ibis/backends/trino/tests/test_datatypes.py index 85c435e117d3..fc7164fcfa87 100644 --- a/ibis/backends/trino/tests/test_datatypes.py +++ b/ibis/backends/trino/tests/test_datatypes.py @@ -4,7 +4,7 @@ from pytest import param import ibis.expr.datatypes as dt -from ibis.backends.trino.datatypes import TrinoType +from ibis.backends.base.sqlglot.datatypes import TrinoType dtypes = [ ("interval year to month", dt.Interval(unit="M")), diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 840cb59459e7..e59be96407a5 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -18,6 +18,7 @@ d = Namespace(deferred, module=ops) +x = var("x") y = var("y") name = var("name") diff --git a/poetry.lock b/poetry.lock index 9a15d9383518..60300ad138b3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6727,7 +6727,6 @@ files = [ python-dateutil = "*" pytz = "*" requests = ">=2.31.0" -sqlalchemy = {version = ">=1.3", optional = true, markers = "extra == \"sqlalchemy\""} tzlocal = "*" [package.extras] @@ -7375,10 +7374,10 @@ pyspark = ["packaging", "pyspark", "sqlalchemy"] risingwave = ["psycopg2", "sqlalchemy", "sqlalchemy-risingwave", "sqlalchemy-views"] snowflake = ["packaging", "snowflake-connector-python"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] -trino = ["sqlalchemy", "sqlalchemy-views", "trino"] +trino = ["trino"] visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "2191160a6ebf9c3e237ada9bba709eb3a912f31f60a25138c9c6b5aace96ee9f" +content-hash = "083f8f6a6d3dab493009395aabe051d1758183ba4e2588fc505aac883e3beafe" diff --git a/pyproject.toml b/pyproject.toml index 1d047429fb62..18fe8098a3dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ sqlalchemy = { version = ">=1.4,<3", optional = true } sqlalchemy-exasol = { version = ">=4.6.0", optional = true } sqlalchemy-views = { version = ">=0.3.1,<1", optional = true } sqlalchemy-risingwave = { version = ">=1.0.0,<2", optional = true } -trino = { version = ">=0.321,<1", optional = true, extras = ["sqlalchemy"] } +trino = { version = ">=0.321,<1", optional = true } [tool.poetry.group.dev.dependencies] codespell = { version = ">=2.2.6,<3", extras = [ @@ -205,7 +205,7 @@ risingwave = [ pyspark = ["pyspark", "sqlalchemy", "packaging"] snowflake = ["snowflake-connector-python", "packaging"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] -trino = ["trino", "sqlalchemy", "sqlalchemy-views"] +trino = ["trino"] # non-backend extras visualization = ["graphviz"] decompiler = ["black"] From c293b2357f90534a4d15f55ed2313190bff20655 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 5 Jan 2024 11:51:19 -0500 Subject: [PATCH 047/161] fix(polars): force null sorting to match the rest of ibis --- ibis/backends/polars/compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 97a45438609c..f9acc6d01664 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -234,9 +234,9 @@ def sort(op, **kw): by = [key.name for key in op.keys] descending = [key.descending for key in op.keys] try: - lf = lf.sort(by, descending=descending) + lf = lf.sort(by, descending=descending, nulls_last=True) except TypeError: # pragma: no cover - lf = lf.sort(by, reverse=descending) # pragma: no cover + lf = lf.sort(by, reverse=descending, nulls_last=True) # pragma: no cover return lf From c8563b1f6b1cafe2ac9eca8e580ecbe64134a928 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 5 Jan 2024 11:53:43 -0500 Subject: [PATCH 048/161] test(pandas): ignore array size warning --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 18fe8098a3dd..d13c16e55bbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -292,6 +292,8 @@ filterwarnings = [ 'ignore:`np\.bool` is a deprecated alias for the builtin `bool`:DeprecationWarning', # numpy, coming from a pandas call 'ignore:In the future `np\.bool` will be defined as the corresponding NumPy scalar:FutureWarning', + # pandas by way of polars when comparing arrays + 'ignore:The truth value of an empty array is ambiguous.:DeprecationWarning', # druid 'ignore:Dialect druid.rest will not make use of SQL compilation caching:', # ibis From 2876676740fed75b81cd6c81fedc140d9131e4ed Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 6 Jan 2024 06:05:39 -0500 Subject: [PATCH 049/161] refactor(postgres): port to sqlglot (#7877) Port the postgres backend to sqlglot. --- .github/workflows/ibis-backends.yml | 350 ++++--- ibis/backends/base/sql/registry/geospatial.py | 148 --- ibis/backends/base/sqlglot/__init__.py | 2 + ibis/backends/base/sqlglot/compiler.py | 99 +- ibis/backends/base/sqlglot/datatypes.py | 49 +- ibis/backends/clickhouse/compiler.py | 2 + ibis/backends/conftest.py | 1 - ibis/backends/datafusion/__init__.py | 6 +- ibis/backends/datafusion/compiler.py | 2 +- ibis/backends/duckdb/__init__.py | 4 +- ibis/backends/duckdb/compiler.py | 7 + ibis/backends/duckdb/tests/test_register.py | 3 +- ibis/backends/postgres/__init__.py | 719 ++++++++++++--- ibis/backends/postgres/compiler.py | 621 ++++++++++++- ibis/backends/postgres/converter.py | 24 + ibis/backends/postgres/datatypes.py | 88 -- ibis/backends/postgres/registry.py | 864 ------------------ ibis/backends/postgres/tests/conftest.py | 45 +- .../test_client/test_compile_toplevel/out.sql | 5 +- .../test_analytic_functions/out.sql | 12 +- .../test_cast/double_to_int16/out.sql | 3 + .../test_cast/double_to_int8/out.sql | 3 + .../string_to_decimal_no_params/out.sql | 3 + .../string_to_decimal_params/out.sql | 3 + .../test_cast/string_to_double/out.sql | 3 + .../test_cast/string_to_float/out.sql | 3 + .../test_functions/test_date_cast/out.sql | 3 + .../test_timestamp_cast_noop/out1.sql | 3 + .../test_timestamp_cast_noop/out2.sql | 3 + .../test_union_cte/False/out.sql | 2 +- .../test_union_cte/True/out.sql | 2 +- .../test_geospatial/test_geo_equals/out1.sql | 9 + .../test_geospatial/test_geo_equals/out2.sql | 3 + .../test_geospatial/test_geo_equals/out3.sql | 3 + .../linestring-geography/out.sql | 6 + .../linestring-geometry/out.sql | 6 + .../linestring-none/out.sql | 6 + .../linestring-srid/out.sql | 6 + .../multilinestring-geography/out.sql | 6 + .../multilinestring-geometry/out.sql | 6 + .../multilinestring-none/out.sql | 6 + .../multilinestring-srid/out.sql | 6 + .../multipoint-geography/out.sql | 6 + .../multipoint-geometry/out.sql | 6 + .../multipoint-none/out.sql | 6 + .../multipoint-srid/out.sql | 6 + .../multipolygon-geography/out.sql | 11 + .../multipolygon-geometry/out.sql | 11 + .../multipolygon-none/out.sql | 10 + .../multipolygon-srid/out.sql | 11 + .../point-geography/out.sql | 6 + .../point-geometry/out.sql | 6 + .../point-none/out.sql | 6 + .../point-srid/out.sql | 6 + .../polygon-geography/out.sql | 11 + .../polygon-geometry/out.sql | 11 + .../polygon-none/out.sql | 10 + .../polygon-srid/out.sql | 11 + .../polygon_single-geography/out.sql | 6 + .../polygon_single-geometry/out.sql | 6 + .../polygon_single-none/out.sql | 6 + .../polygon_single-srid/out.sql | 6 + .../linestring_contains/out.sql | 3 + .../linestring_end_point/out.sql | 3 + .../linestring_length/out.sql | 3 + .../linestring_start_point/out.sql | 3 + .../multipolygon_n_points/out.sql | 3 + .../test_geo_ops_smoke/point_set_srid/out.sql | 3 + .../test_geo_ops_smoke/point_srid/out.sql | 3 + .../test_geo_ops_smoke/point_x/out.sql | 3 + .../test_geo_ops_smoke/point_y/out.sql | 3 + .../test_geo_ops_smoke/polygon_area/out.sql | 3 + .../polygon_perimeter/out.sql | 3 + .../expr0/out.sql | 6 + .../expr1/out.sql | 6 + .../expr2/out.sql | 6 + .../expr3/out.sql | 6 + .../expr4/out.sql | 6 + .../expr5/out.sql | 6 + .../expr6/out.sql | 6 + .../expr7/out.sql | 6 + .../shp0/out.sql | 6 + .../shp1/out.sql | 6 + .../shp2/out.sql | 6 + .../shp3/out.sql | 6 + .../shp4/out.sql | 6 + .../shp5/out.sql | 6 + .../shp6/out.sql | 6 + .../shp7/out.sql | 6 + .../shp8/out.sql | 6 + .../test_select_linestring_geodata/out.sql | 3 + .../test_select_multipolygon_geodata/out.sql | 3 + .../test_select_point_geodata/out.sql | 3 + .../test_select_polygon_geodata/out.sql | 3 + ibis/backends/postgres/tests/test_client.py | 89 +- .../backends/postgres/tests/test_functions.py | 209 ++--- .../postgres/tests/test_geospatial.py | 169 ++-- ibis/backends/postgres/tests/test_postgis.py | 33 +- ibis/backends/postgres/tests/test_udf.py | 13 +- ibis/backends/postgres/udf.py | 201 ---- ibis/backends/tests/errors.py | 12 + .../test_default_limit/postgres/out.sql | 5 + .../test_disable_query_limit/postgres/out.sql | 5 + .../postgres/out.sql | 3 + .../test_respect_set_limit/postgres/out.sql | 10 + .../test_group_by_has_index/postgres/out.sql | 8 +- .../test_sql/test_isin_bug/postgres/out.sql | 18 +- .../test_union_aliasing/postgres/out.sql | 161 ++-- ibis/backends/tests/test_aggregation.py | 3 +- ibis/backends/tests/test_array.py | 19 +- ibis/backends/tests/test_asof_join.py | 4 +- ibis/backends/tests/test_generic.py | 2 +- ibis/backends/tests/test_join.py | 5 + ibis/backends/tests/test_numeric.py | 15 +- ibis/backends/tests/test_sql.py | 2 +- ibis/backends/tests/test_temporal.py | 22 +- .../test_h22/test_tpc_h22/duckdb/h22.sql | 18 +- .../test_h22/test_tpc_h22/snowflake/h22.sql | 24 +- .../test_h22/test_tpc_h22/trino/h22.sql | 24 +- ibis/common/exceptions.py | 8 +- ibis/expr/datatypes/value.py | 48 +- ibis/expr/operations/udf.py | 1 - ibis/expr/rewrites.py | 53 ++ ibis/expr/types/core.py | 2 +- ibis/tests/expr/test_value_exprs.py | 16 +- poetry.lock | 4 +- pyproject.toml | 9 +- 127 files changed, 2439 insertions(+), 2226 deletions(-) delete mode 100644 ibis/backends/base/sql/registry/geospatial.py create mode 100644 ibis/backends/postgres/converter.py delete mode 100644 ibis/backends/postgres/datatypes.py delete mode 100644 ibis/backends/postgres/registry.py create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_cast/double_to_int16/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_cast/double_to_int8/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_decimal_no_params/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_decimal_params/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_double/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_float/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_date_cast/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_timestamp_cast_noop/out1.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_functions/test_timestamp_cast_noop/out2.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out1.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out2.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out3.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-geography/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-geometry/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-none/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-geography/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-geometry/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-none/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-geography/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-geometry/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-none/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-geography/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-geometry/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-none/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-geography/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-geometry/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-none/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-geography/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-geometry/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-none/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-geography/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-geometry/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-none/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_contains/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_end_point/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_length/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_start_point/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/multipolygon_n_points/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_set_srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_srid/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_x/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_y/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/polygon_area/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/polygon_perimeter/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr2/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr3/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr4/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr5/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr6/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr7/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp0/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp1/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp2/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp3/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp4/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp5/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp6/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp7/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp8/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_postgis/test_select_linestring_geodata/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_postgis/test_select_multipolygon_geodata/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_postgis/test_select_point_geodata/out.sql create mode 100644 ibis/backends/postgres/tests/snapshots/test_postgis/test_select_polygon_geodata/out.sql delete mode 100644 ibis/backends/postgres/udf.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/postgres/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/postgres/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/postgres/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/postgres/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index bc06cd0ab9df..b299dff14f93 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -103,26 +103,26 @@ jobs: # - geospatial # sys-deps: # - libgeos-dev - # - name: postgres - # title: PostgreSQL - # extras: - # - postgres - # - geospatial - # services: - # - postgres - # sys-deps: - # - libgeos-dev - # - name: postgres - # title: PostgreSQL + Torch - # extras: - # - postgres - # - geospatial - # additional_deps: - # - torch - # services: - # - postgres - # sys-deps: - # - libgeos-dev + - name: postgres + title: PostgreSQL + extras: + - postgres + - geospatial + services: + - postgres + sys-deps: + - libgeos-dev + - name: postgres + title: PostgreSQL + Torch + extras: + - postgres + - geospatial + additional_deps: + - torch + services: + - postgres + sys-deps: + - libgeos-dev # - name: impala # title: Impala # extras: @@ -206,30 +206,30 @@ jobs: - examples services: - clickhouse - # - os: windows-latest - # backend: - # name: postgres - # title: PostgreSQL - # extras: - # - postgres - # - geospatial - # services: - # - postgres - # sys-deps: - # - libgeos-dev - # - os: windows-latest - # backend: - # name: postgres - # title: PostgreSQL + Torch - # extras: - # - postgres - # - geospatial - # additional_deps: - # - torch - # services: - # - postgres - # sys-deps: - # - libgeos-dev + - os: windows-latest + backend: + name: postgres + title: PostgreSQL + extras: + - postgres + - geospatial + services: + - postgres + sys-deps: + - libgeos-dev + - os: windows-latest + backend: + name: postgres + title: PostgreSQL + Torch + extras: + - postgres + - geospatial + additional_deps: + - torch + services: + - postgres + sys-deps: + - libgeos-dev # - os: windows-latest # backend: # name: impala @@ -407,134 +407,131 @@ jobs: if: matrix.backend.services != null && failure() run: docker compose logs - # test_backends_min_version: - # name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }} - # runs-on: ${{ matrix.os }} - # env: - # SQLALCHEMY_WARN_20: "1" - # strategy: - # fail-fast: false - # matrix: - # os: - # - ubuntu-latest - # - windows-latest - # python-version: - # - "3.9" - # - "3.11" - # backend: - # - name: dask - # title: Dask - # deps: - # - "dask[array,dataframe]@2022.9.1" - # - "pandas@1.5.3" - # extras: - # - dask - # - name: postgres - # title: PostgreSQL - # deps: - # - "psycopg2@2.8.4" - # - "GeoAlchemy2@0.6.3" - # - "geopandas@0.6" - # - "Shapely@2" - # services: - # - postgres - # extras: - # - postgres - # - geospatial - # exclude: - # - os: windows-latest - # backend: - # name: postgres - # title: PostgreSQL - # deps: - # - "psycopg2@2.8.4" - # - "GeoAlchemy2@0.6.3" - # - "geopandas@0.6" - # - "Shapely@2" - # services: - # - postgres - # extras: - # - postgres - # - geospatial - # - python-version: "3.11" - # backend: - # name: postgres - # title: PostgreSQL - # deps: - # - "psycopg2@2.8.4" - # - "GeoAlchemy2@0.6.3" - # - "geopandas@0.6" - # - "Shapely@2" - # services: - # - postgres - # extras: - # - postgres - # - geospatial - # steps: - # - name: checkout - # uses: actions/checkout@v4 - # - # - name: install libgeos for shapely - # if: matrix.backend.name == 'postgres' - # run: | - # sudo apt-get update -y -qq - # sudo apt-get install -qq -y build-essential libgeos-dev - # - # - uses: extractions/setup-just@v1 - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # - # - name: download backend data - # run: just download-data - # - # - name: start services - # if: matrix.backend.services != null - # run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} - # - # - name: install python - # uses: actions/setup-python@v5 - # id: install_python - # with: - # python-version: ${{ matrix.python-version }} - # - # - name: install poetry - # run: python -m pip install --upgrade pip 'poetry==1.7.1' - # - # - name: remove lonboard - # # it requires a version of pandas that min versions are not compatible with - # run: poetry remove lonboard - # - # - name: install minimum versions - # run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }} - # - # - name: checkout the lock file - # run: git checkout poetry.lock - # - # - name: lock with no updates - # # poetry add is aggressive and will update other dependencies like - # # numpy and pandas so we keep the pyproject.toml edits and then relock - # # without updating anything except the requested versions - # run: poetry lock --no-update - # - # - name: install ibis - # run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" - # - # - name: run tests - # run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup - # - # - name: check that no untracked files were produced - # shell: bash - # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - # - # - name: upload code coverage - # if: success() - # uses: codecov/codecov-action@v4 - # with: - # flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - # - # - name: Show docker compose logs on fail - # if: matrix.backend.services != null && failure() - # run: docker compose logs + test_backends_min_version: + name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + env: + SQLALCHEMY_WARN_20: "1" + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + python-version: + - "3.9" + - "3.11" + backend: + # - name: dask + # title: Dask + # deps: + # - "dask[array,dataframe]@2022.9.1" + # - "pandas@1.5.3" + # extras: + # - dask + - name: postgres + title: PostgreSQL + deps: + - "psycopg2@2.8.4" + - "geopandas@0.6" + - "Shapely@2" + services: + - postgres + extras: + - postgres + - geospatial + exclude: + - os: windows-latest + backend: + name: postgres + title: PostgreSQL + deps: + - "psycopg2@2.8.4" + - "geopandas@0.6" + - "Shapely@2" + services: + - postgres + extras: + - postgres + - geospatial + - python-version: "3.11" + backend: + name: postgres + title: PostgreSQL + deps: + - "psycopg2@2.8.4" + - "geopandas@0.6" + - "Shapely@2" + services: + - postgres + extras: + - postgres + - geospatial + steps: + - name: checkout + uses: actions/checkout@v4 + + - name: install libgeos for shapely + if: matrix.backend.name == 'postgres' + run: | + sudo apt-get update -y -qq + sudo apt-get install -qq -y build-essential libgeos-dev + + - uses: extractions/setup-just@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: download backend data + run: just download-data + + - name: start services + if: matrix.backend.services != null + run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} + + - name: install python + uses: actions/setup-python@v5 + id: install_python + with: + python-version: ${{ matrix.python-version }} + + - name: install poetry + run: python -m pip install --upgrade pip 'poetry==1.7.1' + + - name: remove lonboard + # it requires a version of pandas that min versions are not compatible with + run: poetry remove lonboard + + - name: install minimum versions + run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }} + + - name: checkout the lock file + run: git checkout poetry.lock + + - name: lock with no updates + # poetry add is aggressive and will update other dependencies like + # numpy and pandas so we keep the pyproject.toml edits and then relock + # without updating anything except the requested versions + run: poetry lock --no-update + + - name: install ibis + run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" + + - name: run tests + run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup + + - name: check that no untracked files were produced + shell: bash + run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . + + - name: upload code coverage + if: success() + uses: codecov/codecov-action@v4 + with: + flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} + + - name: Show docker compose logs on fail + if: matrix.backend.services != null && failure() + run: docker compose logs # test_pyspark: # name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} @@ -676,15 +673,6 @@ jobs: # extras: # - geospatial # - mysql - # - name: postgres - # title: PostgreSQL - # services: - # - postgres - # extras: - # - geospatial - # - postgres - # sys-deps: - # - libgeos-dev # - name: sqlite # title: SQLite # extras: diff --git a/ibis/backends/base/sql/registry/geospatial.py b/ibis/backends/base/sql/registry/geospatial.py deleted file mode 100644 index 7e8fb4da5a96..000000000000 --- a/ibis/backends/base/sql/registry/geospatial.py +++ /dev/null @@ -1,148 +0,0 @@ -from __future__ import annotations - -from collections.abc import Iterable -from typing import TYPE_CHECKING, TypeVar - -import ibis.expr.datatypes as dt -from ibis.common import exceptions as ex - -if TYPE_CHECKING: - import ibis.expr.operations as ops - -# TODO(kszucs): move this module to the base sql backend - -NumberType = TypeVar("NumberType", int, float) -# Geometry primitives (2D) -PointType = Iterable[NumberType] -LineStringType = list[PointType] -PolygonType = list[LineStringType] -# Multipart geometries (2D) -MultiPointType = list[PointType] -MultiLineStringType = list[LineStringType] -MultiPolygonType = list[PolygonType] - - -def _format_point_value(value: PointType) -> str: - """Convert a iterable with a point to text.""" - return " ".join(str(v) for v in value) - - -def _format_linestring_value(value: LineStringType, nested=False) -> str: - """Convert a iterable with a linestring to text.""" - template = "({})" if nested else "{}" - if not isinstance(value[0], (tuple, list)): - msg = "{} structure expected: LineStringType".format( - "Data" if not nested else "Inner data" - ) - raise ex.IbisInputError(msg) - return template.format(", ".join(_format_point_value(point) for point in value)) - - -def _format_polygon_value(value: PolygonType, nested=False) -> str: - """Convert a iterable with a polygon to text.""" - template = "({})" if nested else "{}" - if not isinstance(value[0][0], (tuple, list)): - msg = "{} data structure expected: PolygonType".format( - "Data" if not nested else "Inner data" - ) - raise ex.IbisInputError(msg) - - return template.format( - ", ".join(_format_linestring_value(line, nested=True) for line in value) - ) - - -def _format_multipoint_value(value: MultiPointType) -> str: - """Convert a iterable with a multipoint to text.""" - if not isinstance(value[0], (tuple, list)): - raise ex.IbisInputError("Data structure expected: MultiPointType") - return ", ".join(f"({_format_point_value(point)})" for point in value) - - -def _format_multilinestring_value(value: MultiLineStringType) -> str: - """Convert a iterable with a multilinestring to text.""" - if not isinstance(value[0][0], (tuple, list)): - raise ex.IbisInputError("Data structure expected: MultiLineStringType") - return ", ".join(f"({_format_linestring_value(line)})" for line in value) - - -def _format_multipolygon_value(value: MultiPolygonType) -> str: - """Convert a iterable with a multipolygon to text.""" - if not isinstance(value[0][0], (tuple, list)): - raise ex.IbisInputError("Data structure expected: MultiPolygonType") - return ", ".join(_format_polygon_value(polygon, nested=True) for polygon in value) - - -def _format_geo_metadata(op, value: str, inline_metadata: bool = False) -> str: - """Format a geometry/geography text when it is necessary.""" - srid = op.args[1].srid - geotype = op.args[1].geotype - - if inline_metadata: - value = "'{}{}'{}".format( - f"SRID={srid};" if srid else "", - value, - f"::{geotype}" if geotype else "", - ) - return value - - geofunc = "ST_GeogFromText" if geotype == "geography" else "ST_GeomFromText" - - value = repr(value) - if srid: - value += f", {srid}" - - return f"{geofunc}({value})" - - -def translate_point(value: Iterable) -> str: - """Translate a point to WKT.""" - return f"POINT ({_format_point_value(value)})" - - -def translate_linestring(value: list) -> str: - """Translate a linestring to WKT.""" - return f"LINESTRING ({_format_linestring_value(value)})" - - -def translate_polygon(value: list) -> str: - """Translate a polygon to WKT.""" - return f"POLYGON ({_format_polygon_value(value)})" - - -def translate_multilinestring(value: list) -> str: - """Translate a multilinestring to WKT.""" - return f"MULTILINESTRING ({_format_multilinestring_value(value)})" - - -def translate_multipoint(value: list) -> str: - """Translate a multipoint to WKT.""" - return f"MULTIPOINT ({_format_multipoint_value(value)})" - - -def translate_multipolygon(value: list) -> str: - """Translate a multipolygon to WKT.""" - return f"MULTIPOLYGON ({_format_multipolygon_value(value)})" - - -def translate_literal(op: ops.Literal, inline_metadata: bool = False) -> str: - value = op.value - dtype = op.dtype - - if isinstance(value, dt._WellKnownText): - result = value.text - elif dtype.is_point(): - result = translate_point(value) - elif dtype.is_linestring(): - result = translate_linestring(value) - elif dtype.is_polygon(): - result = translate_polygon(value) - elif dtype.is_multilinestring(): - result = translate_multilinestring(value) - elif dtype.is_multipoint(): - result = translate_multipoint(value) - elif dtype.is_multipolygon(): - result = translate_multipolygon(value) - else: - raise ex.UnboundExpressionError("Geo Spatial type not supported.") - return _format_geo_metadata(op, result, inline_metadata) diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index b9055e3aa292..1ce1c33c7d56 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -9,6 +9,7 @@ import ibis import ibis.expr.operations as ops import ibis.expr.schema as sch +from ibis import util from ibis.backends.base import BaseBackend from ibis.backends.base.sqlglot.compiler import STAR @@ -258,6 +259,7 @@ def _cursor_batches( while batch := cursor.fetchmany(chunk_size): yield batch + @util.experimental def to_pyarrow_batches( self, expr: ir.Expr, diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index b7e8dbc9a83c..527ef9388a7c 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -26,6 +26,7 @@ add_order_by_to_empty_ranking_window_functions, empty_in_values_right_side, one_to_zero_index, + replace_bucket, replace_scalar_parameter, unwrap_scalar_parameter, ) @@ -89,13 +90,16 @@ def map(self, keys, values): class ColGen: - __slots__ = () + __slots__ = ("table",) + + def __init__(self, table: str | None = None) -> None: + self.table = table def __getattr__(self, name: str) -> sge.Column: - return sg.column(name) + return sg.column(name, table=self.table) def __getitem__(self, key: str) -> sge.Column: - return sg.column(key) + return sg.column(key, table=self.table) def paren(expr): @@ -127,6 +131,7 @@ class SQLGlotCompiler(abc.ABC): add_order_by_to_empty_ranking_window_functions, one_to_zero_index, add_one_to_nth_value_input, + replace_bucket, ) """A sequence of rewrites to apply to the expression tree before compilation.""" @@ -260,6 +265,10 @@ def visit_Field(self, op, *, rel, name): self._gen_valid_name(name), table=rel.alias_or_name, quoted=self.quoted ) + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + return self.cast(arg, to) + @visit_node.register(ops.ScalarSubquery) def visit_ScalarSubquery(self, op, *, rel): return rel.this.subquery() @@ -384,8 +393,13 @@ def visit_DefaultLiteral(self, op, *, value, dtype): return sge.Struct.from_arg_list(items) elif dtype.is_uuid(): return self.cast(str(value), dtype) - else: - raise NotImplementedError(f"Unsupported type: {dtype!r}") + elif dtype.is_geospatial(): + args = [value.wkt] + if (srid := dtype.srid) is not None: + args.append(srid) + return self.f.st_geomfromtext(*args) + + raise NotImplementedError(f"Unsupported type: {dtype!r}") @visit_node.register(ops.BitwiseNot) def visit_BitwiseNot(self, op, *, arg): @@ -562,9 +576,20 @@ def visit_LStrip(self, op, *, arg): @visit_node.register(ops.Substring) def visit_Substring(self, op, *, arg, start, length): - if_pos = sge.Substring(this=arg, start=start + 1, length=length) - if_neg = sge.Substring(this=arg, start=start, length=length) - return self.if_(start >= 0, if_pos, if_neg) + start += 1 + arg_length = self.f.length(arg) + + if length is None: + return self.if_( + start >= 1, + self.f.substring(arg, start), + self.f.substring(arg, start + arg_length), + ) + return self.if_( + start >= 1, + self.f.substring(arg, start, length), + self.f.substring(arg, start + arg_length, length), + ) @visit_node.register(ops.StringFind) def visit_StringFind(self, op, *, arg, substr, start, end): @@ -629,16 +654,42 @@ def visit_CountStar(self, op, *, arg, where): @visit_node.register(ops.Sum) def visit_Sum(self, op, *, arg, where): - arg = self.cast(arg, op.dtype) if op.arg.dtype.is_boolean() else arg + if op.arg.dtype.is_boolean(): + arg = self.cast(arg, dt.int32) return self.agg.sum(arg, where=where) + @visit_node.register(ops.Mean) + def visit_Mean(self, op, *, arg, where): + if op.arg.dtype.is_boolean(): + arg = self.cast(arg, dt.int32) + return self.agg.avg(arg, where=where) + + @visit_node.register(ops.Min) + def visit_Min(self, op, *, arg, where): + if op.arg.dtype.is_boolean(): + return self.agg.bool_and(arg, where=where) + return self.agg.min(arg, where=where) + + @visit_node.register(ops.Max) + def visit_Max(self, op, *, arg, where): + if op.arg.dtype.is_boolean(): + return self.agg.bool_or(arg, where=where) + return self.agg.max(arg, where=where) + ### Stats @visit_node.register(ops.Quantile) @visit_node.register(ops.MultiQuantile) def visit_Quantile(self, op, *, arg, quantile, where): suffix = "cont" if op.arg.dtype.is_numeric() else "disc" - return self.agg[f"quantile_{suffix}"](arg, quantile, where=where) + funcname = f"percentile_{suffix}" + expr = sge.WithinGroup( + this=self.f[funcname](quantile), + expression=sge.Order(expressions=[sge.Ordered(this=arg)]), + ) + if where is not None: + expr = sge.Filter(this=expr, expression=sge.Where(this=where)) + return expr @visit_node.register(ops.Variance) @visit_node.register(ops.StandardDev) @@ -784,13 +835,18 @@ def visit_Argument(self, op, *, name: str, shape, dtype): def visit_RowID(self, op, *, table): return sg.column(op.name, table=table.alias_or_name, quoted=self.quoted) + def __sql_name__(self, op: ops.ScalarUDF | ops.AggUDF) -> str: + # not actually a table, but easier to quote individual namespace + # components this way + return sg.table(op.__func_name__, db=op.__udf_namespace__).sql(self.dialect) + @visit_node.register(ops.ScalarUDF) def visit_ScalarUDF(self, op, **kw): - return self.f[op.__full_name__](*kw.values()) + return self.f[self.__sql_name__(op)](*kw.values()) @visit_node.register(ops.AggUDF) def visit_AggUDF(self, op, *, where, **kw): - return self.agg[op.__full_name__](*kw.values(), where=where) + return self.agg[self.__sql_name__(op)](*kw.values(), where=where) @visit_node.register(ops.TimeDelta) @visit_node.register(ops.DateDelta) @@ -1113,18 +1169,22 @@ def visit_SQLStringView(self, op, *, query: str, name: str, child): def visit_SQLQueryResult(self, op, *, query, schema, source): return sg.parse_one(query, read=self.dialect).subquery() + @visit_node.register(ops.Unnest) + def visit_Unnest(self, op, *, arg): + return sge.Explode(this=arg) + @visit_node.register(ops.JoinTable) def visit_JoinTable(self, op, *, parent, index): return parent - @visit_node.register(ops.Cast) - def visit_Cast(self, op, *, arg, to): - return self.cast(arg, to) - @visit_node.register(ops.Value) def visit_Undefined(self, op, **_): raise com.OperationNotDefinedError(type(op).__name__) + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + return self.f.regexp_extract(arg, pattern, index, dialect=self.dialect) + _SIMPLE_OPS = { ops.All: "bool_and", @@ -1153,16 +1213,11 @@ def visit_Undefined(self, op, **_): ops.Sign: "sign", ops.ApproxCountDistinct: "approx_distinct", ops.Median: "median", - ops.Mean: "avg", - ops.Max: "max", - ops.Min: "min", ops.ArgMin: "argmin", ops.ArgMax: "argmax", ops.First: "first", ops.Last: "last", ops.Count: "count", - ops.All: "bool_and", - ops.Any: "bool_or", ops.ArrayCollect: "array_agg", ops.GroupConcat: "group_concat", ops.StringContains: "contains", @@ -1205,7 +1260,6 @@ def visit_Undefined(self, op, **_): ops.Unnest: "explode", ops.RegexSplit: "regexp_split", ops.ArrayContains: "array_contains", - ops.RegexExtract: "regexp_extract", } _BINARY_INFIX_OPS = { @@ -1215,6 +1269,7 @@ def visit_Undefined(self, op, **_): ops.Multiply: sge.Mul, ops.Divide: sge.Div, ops.Modulus: sge.Mod, + ops.Power: sge.Pow, # Comparisons ops.GreaterEqual: sge.GTE, ops.Greater: sge.GT, diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index b3a9b643c1ed..21f2242251c0 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -148,6 +148,14 @@ def to_ibis(cls, typ: sge.DataType, nullable: bool | None = None) -> dt.DataType """Convert a sqlglot type to an ibis type.""" typecode = typ.this + # broken sqlglot thing + if isinstance(typecode, sge.Interval): + typ = sge.DataType( + this=sge.DataType.Type.INTERVAL, + expressions=[sge.IntervalSpan(this=typecode.unit)], + ) + typecode = typ.this + if method := getattr(cls, f"_from_sqlglot_{typecode.name}", None): dtype = method(*typ.expressions) else: @@ -237,7 +245,11 @@ def _from_sqlglot_INTERVAL( if isinstance(precision_or_span, str): return dt.Interval(precision_or_span, nullable=nullable) elif isinstance(precision_or_span, sge.IntervalSpan): - return dt.Interval(unit=precision_or_span.this.this, nullable=nullable) + if (expression := precision_or_span.expression) is not None: + unit = expression.this + else: + unit = precision_or_span.this.this + return dt.Interval(unit=unit, nullable=nullable) elif precision_or_span is None: raise com.IbisTypeError("Interval precision is None") else: @@ -262,7 +274,11 @@ def _from_sqlglot_DECIMAL( return dt.Decimal(precision, scale, nullable=cls.default_nullable) @classmethod - def _from_sqlglot_GEOMETRY(cls) -> sge.DataType: + def _from_sqlglot_GEOMETRY( + cls, arg: sge.DataTypeParam | None = None + ) -> sge.DataType: + if arg is not None: + return getattr(dt, str(arg))(nullable=cls.default_nullable) return dt.GeoSpatial(geotype="geometry", nullable=cls.default_nullable) @classmethod @@ -306,13 +322,19 @@ def _from_ibis_Decimal(cls, dtype: dt.Decimal) -> sge.DataType: if (scale := dtype.scale) is None: scale = cls.default_decimal_scale - return sge.DataType( - this=typecode.DECIMAL, - expressions=[ - sge.DataTypeParam(this=sge.Literal.number(precision)), - sge.DataTypeParam(this=sge.Literal.number(scale)), - ], - ) + expressions = [] + + if precision is not None: + expressions.append(sge.DataTypeParam(this=sge.Literal.number(precision))) + + if scale is not None: + if precision is None: + raise com.IbisTypeError( + "Decimal scale cannot be specified without precision" + ) + expressions.append(sge.DataTypeParam(this=sge.Literal.number(scale))) + + return sge.DataType(this=typecode.DECIMAL, expressions=expressions or None) @classmethod def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType: @@ -341,6 +363,7 @@ def _from_ibis_GeoSpatial(cls, dtype: dt.GeoSpatial): class PostgresType(SqlglotType): dialect = "postgres" default_interval_precision = "s" + default_temporal_scale = 6 unknown_type_strings = FrozenDict( { @@ -359,6 +382,14 @@ class PostgresType(SqlglotType): } ) + @classmethod + def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType: + if not dtype.key_type.is_string(): + raise com.IbisTypeError("Postgres only supports string keys in maps") + if not dtype.value_type.is_string(): + raise com.IbisTypeError("Postgres only supports string values in maps") + return sge.DataType(this=typecode.HSTORE) + class DataFusionType(PostgresType): unknown_type_strings = { diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py index 7fdb5428f1d8..a0919312f78a 100644 --- a/ibis/backends/clickhouse/compiler.py +++ b/ibis/backends/clickhouse/compiler.py @@ -27,6 +27,8 @@ ClickHouse.Generator.TRANSFORMS |= { exp.ArraySize: rename_func("length"), exp.ArraySort: rename_func("arraySort"), + exp.LogicalAnd: rename_func("min"), + exp.LogicalOr: rename_func("max"), } diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 51f8153a8f1f..d9475b533ea5 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -539,7 +539,6 @@ def ddl_con(ddl_backend): "mssql", "mysql", "oracle", - "postgres", "risingwave", "sqlite", ) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index d47d5fc7c317..c73ef96dc0b8 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -169,8 +169,10 @@ def _compile_pyarrow_udf(self, udf_node): udf_node.__func__, input_types=[PyArrowType.from_ibis(arg.dtype) for arg in udf_node.args], return_type=PyArrowType.from_ibis(udf_node.dtype), - volatility=getattr(udf_node, "config", {}).get("volatility", "volatile"), - name=udf_node.__full_name__, + volatility=getattr(udf_node, "__config__", {}).get( + "volatility", "volatile" + ), + name=udf_node.__func_name__, ) def _compile_elementwise_udf(self, udf_node): diff --git a/ibis/backends/datafusion/compiler.py b/ibis/backends/datafusion/compiler.py index d82ad23e5597..7b5e77e735ab 100644 --- a/ibis/backends/datafusion/compiler.py +++ b/ibis/backends/datafusion/compiler.py @@ -147,7 +147,7 @@ def visit_StandardDev(self, op, *, arg, how, where): def visit_ScalarUDF(self, op, **kw): input_type = op.__input_type__ if input_type in (InputType.PYARROW, InputType.BUILTIN): - return self.f[op.__full_name__](*kw.values()) + return self.f[op.__func_name__](*kw.values()) else: raise NotImplementedError( f"DataFusion only supports PyArrow UDFs: got a {input_type.name.lower()} UDF" diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index b25257e3979d..6bd467185640 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1412,9 +1412,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: # only register if we haven't already done so if (name := op.name) not in self.list_tables(): - table = op.data.to_pyarrow(schema) - table = getattr(table, "obj", table) - self.con.register(name, table) + self.con.register(name, op.data.to_pyarrow(schema)) def _register_udfs(self, expr: ir.Expr) -> None: import ibis.expr.operations as ops diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 86d60785895f..082dd45dfe7f 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -341,6 +341,13 @@ def visit_TimestampNow(self, op): def visit_RegexExtract(self, op, *, arg, pattern, index): return self.f.regexp_extract(arg, pattern, index, dialect=self.dialect) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + def visit_Quantile(self, op, *, arg, quantile, where): + suffix = "cont" if op.arg.dtype.is_numeric() else "disc" + funcname = f"percentile_{suffix}" + return self.agg[funcname](arg, quantile, where=where) + _SIMPLE_OPS = { ops.ArrayPosition: "list_indexof", diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index 16b8f2344d79..c1f7b6cfa8f4 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -49,7 +49,8 @@ def test_read_parquet(con, data_dir): reason="nix on linux cannot download duckdb extensions or data due to sandboxing", ) def test_load_spatial_when_geo_column(tmpdir): - pytest.importorskip("geoalchemy2") + pytest.importorskip("geopandas") + pytest.importorskip("shapely") path = str(tmpdir.join("test_load_spatial.ddb")) diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index 252b94a807da..7e25f97c9432 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -2,24 +2,38 @@ from __future__ import annotations +import contextlib import inspect import textwrap -from typing import TYPE_CHECKING, Callable, Literal - -import sqlalchemy as sa - +from functools import partial +from itertools import repeat, takewhile +from operator import itemgetter +from typing import TYPE_CHECKING, Any, Callable +from urllib.parse import parse_qs, urlparse + +import psycopg2 +import sqlglot as sg +import sqlglot.expressions as sge +from psycopg2 import extras + +import ibis +import ibis.common.exceptions as com import ibis.common.exceptions as exc +import ibis.expr.datatypes as dt import ibis.expr.operations as ops +import ibis.expr.schema as sch +import ibis.expr.types as ir from ibis import util -from ibis.backends.base.sql.alchemy import AlchemyCanCreateSchema, BaseAlchemyBackend -from ibis.backends.postgres.compiler import PostgreSQLCompiler -from ibis.backends.postgres.datatypes import PostgresType +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import TRUE, C, ColGen, F +from ibis.backends.postgres.compiler import PostgresCompiler from ibis.common.exceptions import InvalidDecoratorError if TYPE_CHECKING: from collections.abc import Iterable - import ibis.expr.datatypes as dt + import pandas as pd + import pyarrow as pa def _verify_source_line(func_name: str, line: str): @@ -28,12 +42,162 @@ def _verify_source_line(func_name: str, line: str): return line -class Backend(BaseAlchemyBackend, AlchemyCanCreateSchema): +class Backend(SQLGlotBackend): name = "postgres" - compiler = PostgreSQLCompiler - supports_create_or_replace = False + compiler = PostgresCompiler() supports_python_udfs = True + def _from_url(self, url: str, **kwargs): + """Connect to a backend using a URL `url`. + + Parameters + ---------- + url + URL with which to connect to a backend. + kwargs + Additional keyword arguments + + Returns + ------- + BaseBackend + A backend instance + """ + + url = urlparse(url) + database, *schema = url.path[1:].split("/", 1) + query_params = parse_qs(url.query) + connect_args = { + "user": url.username, + "password": url.password or "", + "host": url.hostname, + "database": database or "", + "schema": schema[0] if schema else "", + } + + for name, value in query_params.items(): + if len(value) > 1: + connect_args[name] = value + elif len(value) == 1: + connect_args[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + + kwargs.update(connect_args) + self._convert_kwargs(kwargs) + + if "user" in kwargs and not kwargs["user"]: + del kwargs["user"] + + if "host" in kwargs and not kwargs["host"]: + del kwargs["host"] + + if "database" in kwargs and not kwargs["database"]: + del kwargs["database"] + + if "schema" in kwargs and not kwargs["schema"]: + del kwargs["schema"] + + if "password" in kwargs and kwargs["password"] is None: + del kwargs["password"] + + return self.connect(**kwargs) + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise exc.IbisTypeError( + "Postgres cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" + ) + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [ + sg.exp.ColumnConstraint( + kind=sg.exp.NotNullColumnConstraint() + ) + ] + ), + ) + for colname, typ in schema.items() + ] + + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema( + this=sg.to_identifier(name, quoted=quoted), expressions=column_defs + ), + properties=sg.exp.Properties(expressions=[sge.TemporaryProperty()]), + ) + create_stmt_sql = create_stmt.sql(self.name) + + columns = schema.keys() + df = op.data.to_frame() + data = df.itertuples(index=False) + cols = ", ".join( + ident.sql(self.name) + for ident in map(partial(sg.to_identifier, quoted=quoted), columns) + ) + specs = ", ".join(repeat("%s", len(columns))) + table = sg.table(name, quoted=quoted) + sql = f"INSERT INTO {table.sql(self.name)} ({cols}) VALUES ({specs})" + with self.begin() as cur: + cur.execute(create_stmt_sql) + extras.execute_batch(cur, sql, data, 128) + + @contextlib.contextmanager + def begin(self): + con = self.con + cursor = con.cursor() + try: + yield cursor + except Exception: + con.rollback() + raise + else: + con.commit() + finally: + cursor.close() + + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + import pandas as pd + + from ibis.backends.postgres.converter import PostgresPandasData + + try: + df = pd.DataFrame.from_records( + cursor, columns=schema.names, coerce_float=True + ) + except Exception: + # clean up the cursor if we fail to create the DataFrame + # + # in the sqlite case failing to close the cursor results in + # artificially locked tables + cursor.close() + raise + df = PostgresPandasData.convert_table(df, schema) + return df + + @property + def version(self): + version = f"{self.con.server_version:0>6}" + major = int(version[:2]) + minor = int(version[2:4]) + patch = int(version[4:]) + pieces = [major] + if minor: + pieces.append(minor) + pieces.append(patch) + return ".".join(map(str, pieces)) + def do_connect( self, host: str | None = None, @@ -42,8 +206,6 @@ def do_connect( port: int = 5432, database: str | None = None, schema: str | None = None, - url: str | None = None, - driver: Literal["psycopg2"] = "psycopg2", ) -> None: """Create an Ibis client connected to PostgreSQL database. @@ -61,12 +223,6 @@ def do_connect( Database to connect to schema PostgreSQL schema to use. If `None`, use the default `search_path`. - url - SQLAlchemy connection string. - - If passed, the other connection arguments are ignored. - driver - Database driver Examples -------- @@ -99,41 +255,35 @@ def do_connect( year : int32 month : int32 """ - if driver != "psycopg2": - raise NotImplementedError("psycopg2 is currently the only supported driver") - alchemy_url = self._build_alchemy_url( - url=url, + self.con = psycopg2.connect( host=host, port=port, user=user, password=password, database=database, - driver=f"postgresql+{driver}", - ) - - connect_args = {} - if schema is not None: - connect_args["options"] = f"-csearch_path={schema}" - - engine = sa.create_engine( - alchemy_url, connect_args=connect_args, poolclass=sa.pool.StaticPool + options=(f"-csearch_path={schema}" * (schema is not None)) or None, ) - @sa.event.listens_for(engine, "connect") - def connect(dbapi_connection, connection_record): - with dbapi_connection.cursor() as cur: - cur.execute("SET TIMEZONE = UTC") + with self.begin() as cur: + cur.execute("SET TIMEZONE = UTC") - super().do_connect(engine) + self._temp_views = set() - def list_tables(self, like=None, schema=None): + def list_tables( + self, + like: str | None = None, + database: str | None = None, + schema: str | None = None, + ) -> list[str]: """List the tables in the database. Parameters ---------- like A pattern to use for listing tables. + database + (deprecated) The database to perform the list against. schema The schema to perform the list against. @@ -144,59 +294,92 @@ def list_tables(self, like=None, schema=None): types of `table`. ::: """ - tables = self.inspector.get_table_names(schema=schema) - views = self.inspector.get_view_names(schema=schema) - return self._filter_with_like(tables + views, like) + if database is not None: + util.warn_deprecated( + "database", + instead="Use the `schema` keyword argument instead", + as_of="7.1", + removed_in="8.0", + ) + + conditions = [TRUE] + + if database is not None: + conditions = C.table_catalog.eq(sge.convert(database)) + + if schema is not None: + conditions = C.table_schema.eq(sge.convert(schema)) + + col = "table_name" + sql = ( + sg.select(col) + .from_(sg.table("tables", db="information_schema")) + .distinct() + .where(*conditions) + .sql(self.name) + ) + + with self._safe_raw_sql(sql) as cur: + out = cur.fetchall() + + return self._filter_with_like(map(itemgetter(0), out), like) def list_databases(self, like=None) -> list[str]: # http://dba.stackexchange.com/a/1304/58517 - dbs = sa.table( - "pg_database", - sa.column("datname", sa.TEXT()), - sa.column("datistemplate", sa.BOOLEAN()), - schema="pg_catalog", + dbs = ( + sg.select(C.datname) + .from_(sg.table("pg_database", db="pg_catalog")) + .where(sg.not_(C.datistemplate)) ) - query = sa.select(dbs.c.datname).where(sa.not_(dbs.c.datistemplate)) - with self.begin() as con: - databases = list(con.execute(query).scalars()) + with self._safe_raw_sql(dbs) as cur: + databases = list(map(itemgetter(0), cur)) return self._filter_with_like(databases, like) @property def current_database(self) -> str: - return self._scalar_query(sa.select(sa.func.current_database())) + with self._safe_raw_sql(sg.select(F.current_database())) as cur: + (db,) = cur.fetchone() + return db @property def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.func.current_schema())) + with self._safe_raw_sql(sg.select(F.current_schema())) as cur: + (schema,) = cur.fetchone() + return schema def function(self, name: str, *, schema: str | None = None) -> Callable: - query = sa.text( - """ -SELECT - n.nspname as schema, - pg_catalog.pg_get_function_result(p.oid) as return_type, - string_to_array(pg_catalog.pg_get_function_arguments(p.oid), ', ') as signature, - CASE p.prokind - WHEN 'a' THEN 'agg' - WHEN 'w' THEN 'window' - WHEN 'p' THEN 'proc' - ELSE 'func' - END as "Type" -FROM pg_catalog.pg_proc p -LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace -WHERE p.proname = :name -""" - + "AND n.nspname OPERATOR(pg_catalog.~) :schema COLLATE pg_catalog.default" - * (schema is not None) - ).bindparams(name=name, schema=f"^({schema})$") + n = ColGen(table="n") + p = ColGen(table="p") + f = self.compiler.f + + predicates = [p.proname.eq(name)] + + if schema is not None: + predicates.append(n.nspname.rlike(sge.convert(f"^({schema})$"))) + + query = ( + sg.select( + f["pg_catalog.pg_get_function_result"](p.oid).as_("return_type"), + f.string_to_array( + f["pg_catalog.pg_get_function_arguments"](p.oid), ", " + ).as_("signature"), + ) + .from_(sg.table("pg_proc", db="pg_catalog").as_("p")) + .join( + sg.table("pg_namespace", db="pg_catalog").as_("n"), + on=n.oid.eq(p.pronamespace), + join_type="LEFT", + ) + .where(sg.and_(*predicates)) + ) def split_name_type(arg: str) -> tuple[str, dt.DataType]: name, typ = arg.split(" ", 1) - return name, PostgresType.from_string(typ) + return name, self.compiler.type_mapper.from_string(typ) - with self.begin() as con: - rows = con.execute(query).mappings().fetchall() + with self._safe_raw_sql(query) as cur: + rows = cur.fetchall() if not rows: name = f"{schema}.{name}" if schema else name @@ -204,9 +387,9 @@ def split_name_type(arg: str) -> tuple[str, dt.DataType]: elif len(rows) > 1: raise exc.AmbiguousUDFError(name) - [row] = rows - return_type = PostgresType.from_string(row["return_type"]) - signature = list(map(split_name_type, row["signature"])) + [(raw_return_type, signature)] = rows + return_type = self.compiler.type_mapper.from_string(raw_return_type) + signature = list(map(split_name_type, signature)) # dummy callable def fake_func(*args, **kwargs): @@ -230,23 +413,50 @@ def _get_udf_source(self, udf_node: ops.ScalarUDF): config = udf_node.__config__ func = udf_node.__func__ func_name = func.__name__ + + lines, _ = inspect.getsourcelines(func) + iter_lines = iter(lines) + + function_premable_lines = list( + takewhile(lambda line: not line.lstrip().startswith("def "), iter_lines) + ) + + if len(function_premable_lines) > 1: + raise InvalidDecoratorError( + name=func_name, lines="".join(function_premable_lines) + ) + + source = textwrap.dedent( + "".join(map(partial(_verify_source_line, func_name), iter_lines)) + ).strip() + + type_mapper = self.compiler.type_mapper + argnames = udf_node.argnames return dict( name=udf_node.__func_name__, - ident=udf_node.__full_name__, + ident=self.compiler.__sql_name__(udf_node), signature=", ".join( - f"{argname} {self._compile_type(arg.dtype)}" - for argname, arg in zip(udf_node.argnames, udf_node.args) + f"{argname} {type_mapper.to_string(arg.dtype)}" + for argname, arg in zip(argnames, udf_node.args) ), - return_type=self._compile_type(udf_node.dtype), + return_type=type_mapper.to_string(udf_node.dtype), language=config.get("language", "plpython3u"), - source="\n".join( - _verify_source_line(func_name, line) - for line in textwrap.dedent(inspect.getsource(func)).splitlines() - if not line.strip().startswith("@udf") - ), - args=", ".join(udf_node.argnames), + source=source, + args=", ".join(argnames), ) + def _compile_builtin_udf(self, udf_node: ops.ScalarUDF) -> None: + """No op.""" + + def _compile_pyarrow_udf(self, udf_node: ops.ScalarUDF) -> None: + raise NotImplementedError("pyarrow UDFs are not supported in Postgres") + + def _compile_pandas_udf(self, udf_node: ops.ScalarUDF) -> str: + raise NotImplementedError("pandas UDFs are not supported in Postgres") + + def _define_udf_translation_rules(self, expr: ir.Expr) -> None: + """No-op, these are defined in the compiler.""" + def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> str: return """\ CREATE OR REPLACE FUNCTION {ident}({signature}) @@ -257,36 +467,97 @@ def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> str: return {name}({args}) $$""".format(**self._get_udf_source(udf_node)) + def _register_udfs(self, expr: ir.Expr) -> None: + udf_sources = [] + for udf_node in expr.op().find(ops.ScalarUDF): + compile_func = getattr( + self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" + ) + if sql := compile_func(udf_node): + udf_sources.append(sql) + if udf_sources: + # define every udf in one execution to avoid the overhead of + # database round trips per udf + with self._safe_raw_sql(";\n".join(udf_sources)): + pass + + def get_schema( + self, name: str, schema: str | None = None, database: str | None = None + ): + a = ColGen(table="a") + c = ColGen(table="c") + n = ColGen(table="n") + + format_type = self.compiler.f["pg_catalog.format_type"] + + type_info = ( + sg.select( + a.attname.as_("column_name"), + format_type(a.atttypid, a.atttypmod).as_("data_type"), + sg.not_(a.attnotnull).as_("nullable"), + ) + .from_(sg.table("pg_attribute", db="pg_catalog").as_("a")) + .join( + sg.table("pg_class", db="pg_catalog").as_("c"), + on=c.oid.eq(a.attrelid), + join_type="INNER", + ) + .join( + sg.table("pg_namespace", db="pg_catalog").as_("n"), + on=n.oid.eq(c.relnamespace), + join_type="INNER", + ) + .where( + a.attnum > 0, + sg.not_(a.attisdropped), + n.nspname.eq(schema) if schema is not None else TRUE, + c.relname.eq(name), + ) + .order_by(a.attnum) + ) + + type_mapper = self.compiler.type_mapper + + with self._safe_raw_sql(type_info) as cur: + rows = cur.fetchall() + + if not rows: + raise com.IbisError(f"Table not found: {name!r}") + + return sch.Schema( + { + col: type_mapper.from_string(typestr, nullable=nullable) + for col, typestr, nullable in rows + } + ) + def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: - name = util.gen_name("postgres_metadata") - type_info_sql = """\ -SELECT - attname, - format_type(atttypid, atttypmod) AS type -FROM pg_attribute -WHERE attrelid = CAST(:name AS regclass) - AND attnum > 0 - AND NOT attisdropped -ORDER BY attnum""" - if self.inspector.has_table(query): - query = f"TABLE {query}" - - text = sa.text(type_info_sql).bindparams(name=name) - with self.begin() as con: - con.exec_driver_sql(f"CREATE TEMPORARY VIEW {name} AS {query}") - try: - yield from ( - (col, PostgresType.from_string(typestr)) - for col, typestr in con.execute(text) - ) - finally: - con.exec_driver_sql(f"DROP VIEW IF EXISTS {name}") + name = util.gen_name(f"{self.name}_metadata") - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - yield f"DROP VIEW IF EXISTS {name}" - yield f"CREATE TEMPORARY VIEW {name} AS {definition}" + create_stmt = sge.Create( + kind="VIEW", + this=sg.table(name), + expression=sg.parse_one(query, read=self.name), + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + ) + drop_stmt = sge.Drop(kind="VIEW", this=sg.table(name), exists=True).sql( + self.name + ) + + with self._safe_raw_sql(create_stmt): + pass + try: + yield from self.get_schema(name).items() + finally: + with self._safe_raw_sql(drop_stmt): + pass + + def _get_temp_view_definition(self, name: str, definition): + drop = sge.Drop( + kind="VIEW", exists=True, this=sg.table(name), cascade=True + ).sql(self.name) + create = super()._get_temp_view_definition(name, definition) + return f"{drop}; {create}" def create_schema( self, name: str, database: str | None = None, force: bool = False @@ -295,19 +566,221 @@ def create_schema( raise exc.UnsupportedOperationError( "Postgres does not support creating a schema in a different database" ) - if_not_exists = "IF NOT EXISTS " * force - name = self._quote(name) - with self.begin() as con: - con.exec_driver_sql(f"CREATE SCHEMA {if_not_exists}{name}") + sql = sge.Create( + kind="SCHEMA", this=sg.table(name, catalog=database), exists=force + ) + with self._safe_raw_sql(sql): + pass def drop_schema( - self, name: str, database: str | None = None, force: bool = False + self, + name: str, + database: str | None = None, + force: bool = False, + cascade: bool = False, ) -> None: if database is not None and database != self.current_database: raise exc.UnsupportedOperationError( "Postgres does not support dropping a schema in a different database" ) - name = self._quote(name) - if_exists = "IF EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"DROP SCHEMA {if_exists}{name}") + + sql = sge.Drop( + kind="SCHEMA", + this=sg.table(name, catalog=database), + exists=force, + cascade=cascade, + ) + with self._safe_raw_sql(sql): + pass + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: ibis.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ): + """Create a table in Postgres. + + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The name of the database in which to create the table; if not + passed, the current database is used. + temp + Create a temporary table + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists + """ + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + if database is not None and database != self.current_database: + raise com.UnsupportedOperationError( + "Creating tables in other databases is not supported by Postgres" + ) + else: + database = None + + properties = [] + + if temp: + properties.append(sge.TemporaryProperty()) + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self._to_sqlglot(table) + else: + query = None + + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name(f"{self.name}_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create( + kind="TABLE", + this=target, + properties=sge.Properties(expressions=properties), + ) + + this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + with self._safe_raw_sql(create_stmt) as cur: + if query is not None: + insert_stmt = sge.Insert(this=table, expression=query).sql(self.name) + cur.execute(insert_stmt) + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + ) + cur.execute( + f"ALTER TABLE IF EXISTS {table.sql(self.name)} RENAME TO {this.sql(self.name)}" + ) + + if schema is None: + return self.table(name, schema=database) + + # preserve the input schema if it was provided + return ops.DatabaseTable( + name, schema=schema, source=self, namespace=ops.Namespace(database=database) + ).to_expr() + + def drop_table( + self, + name: str, + database: str | None = None, + schema: str | None = None, + force: bool = False, + ) -> None: + if database is not None and database != self.current_database: + raise com.UnsupportedOperationError( + "Droppping tables in other databases is not supported by Postgres" + ) + else: + database = None + drop_stmt = sg.exp.Drop( + kind="TABLE", + this=sg.table( + name, db=schema, catalog=database, quoted=self.compiler.quoted + ), + exists=force, + ) + with self._safe_raw_sql(drop_stmt): + pass + + @contextlib.contextmanager + def _safe_raw_sql(self, *args, **kwargs): + with contextlib.closing(self.raw_sql(*args, **kwargs)) as result: + yield result + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name) + + con = self.con + cursor = con.cursor() + + try: + # try to load hstore, uuid and ipaddress extensions + with contextlib.suppress(psycopg2.ProgrammingError): + extras.register_hstore(cursor) + with contextlib.suppress(psycopg2.ProgrammingError): + extras.register_uuid(conn_or_curs=cursor) + with contextlib.suppress(psycopg2.ProgrammingError): + extras.register_ipaddress(cursor) + except Exception: + cursor.close() + raise + + try: + cursor.execute(query, **kwargs) + except Exception: + con.rollback() + cursor.close() + raise + else: + con.commit() + return cursor + + def _to_sqlglot( + self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any + ): + table_expr = expr.as_table() + conversions = { + name: table_expr[name].as_ewkb() + for name, typ in table_expr.schema().items() + if typ.is_geospatial() + } + + if conversions: + table_expr = table_expr.mutate(**conversions) + return super()._to_sqlglot(table_expr, limit=limit, params=params) + + def truncate_table(self, name: str, database: str | None = None) -> None: + """Delete all rows from a table. + + Parameters + ---------- + name + Table name + database + Schema name + """ + ident = sg.table(name, db=database).sql(self.name) + with self._safe_raw_sql(f"TRUNCATE TABLE {ident}"): + pass diff --git a/ibis/backends/postgres/compiler.py b/ibis/backends/postgres/compiler.py index 48a5f24b0111..18b9090663af 100644 --- a/ibis/backends/postgres/compiler.py +++ b/ibis/backends/postgres/compiler.py @@ -1,39 +1,620 @@ from __future__ import annotations +import string +from functools import partial, reduce, singledispatchmethod + +import sqlglot as sg +import sqlglot.expressions as sge +from public import public +from sqlglot.dialects import Postgres +from sqlglot.dialects.dialect import rename_func + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops import ibis.expr.rules as rlz -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.postgres.datatypes import PostgresType -from ibis.backends.postgres.registry import operation_registry +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler, paren +from ibis.backends.base.sqlglot.datatypes import PostgresType from ibis.expr.rewrites import rewrite_sample +Postgres.Generator.TRANSFORMS |= { + sge.Map: rename_func("hstore"), + sge.Split: rename_func("string_to_array"), + sge.RegexpSplit: rename_func("regexp_split_to_array"), + sge.DateFromParts: rename_func("make_date"), + sge.ArraySize: rename_func("cardinality"), + sge.Pow: rename_func("pow"), +} + class PostgresUDFNode(ops.Value): shape = rlz.shape_like("args") -class PostgreSQLExprTranslator(AlchemyExprTranslator): - _registry = operation_registry.copy() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _has_reduction_filter_syntax = True - _supports_tuple_syntax = True - _dialect_name = "postgresql" - - # it does support it, but we can't use it because of support for pivot - supports_unnest_in_select = False +@public +class PostgresCompiler(SQLGlotCompiler): + __slots__ = () + dialect = "postgres" type_mapper = PostgresType + rewrites = rewrite_sample, *SQLGlotCompiler.rewrites + quoted = True + + NAN = sge.Literal.number("'NaN'::double precision") + POS_INF = sge.Literal.number("'Inf'::double precision") + NEG_INF = sge.Literal.number("'-Inf'::double precision") + + def _aggregate(self, funcname: str, *args, where): + expr = self.f[funcname](*args) + if where is not None: + return sge.Filter(this=expr, expression=sge.Where(this=where)) + return expr + + @singledispatchmethod + def visit_node(self, op, **kwargs): + return super().visit_node(op, **kwargs) + + @visit_node.register(ops.Mode) + def visit_Mode(self, op, *, arg, where): + expr = self.f.mode() + expr = sge.WithinGroup( + this=expr, + expression=sge.Order(expressions=[sge.Ordered(this=arg)]), + ) + if where is not None: + expr = sge.Filter(this=expr, expression=sge.Where(this=where)) + return expr + + def visit_ArgMinMax(self, op, *, arg, key, where, desc: bool): + conditions = [arg.is_(sg.not_(NULL)), key.is_(sg.not_(NULL))] + + if where is not None: + conditions.append(where) + + agg = self.agg.array_agg( + sge.Ordered(this=sge.Order(this=arg, expressions=[key]), desc=desc), + where=sg.and_(*conditions), + ) + return paren(agg)[0] + + @visit_node.register(ops.ArgMin) + def visit_ArgMin(self, op, *, arg, key, where): + return self.visit_ArgMinMax(op, arg=arg, key=key, where=where, desc=False) + + @visit_node.register(ops.ArgMax) + def visit_ArgMax(self, op, *, arg, key, where): + return self.visit_ArgMinMax(op, arg=arg, key=key, where=where, desc=True) + + @visit_node.register(ops.Sum) + def visit_Sum(self, op, *, arg, where): + arg = ( + self.cast(self.cast(arg, dt.int32), op.dtype) + if op.arg.dtype.is_boolean() + else arg + ) + return self.agg.sum(arg, where=where) + + @visit_node.register(ops.IsNan) + def visit_IsNan(self, op, *, arg): + return arg.eq(self.cast(sge.convert("NaN"), op.arg.dtype)) + + @visit_node.register(ops.IsInf) + def visit_IsInf(self, op, *, arg): + return arg.isin(self.POS_INF, self.NEG_INF) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, where, arg): + # use a tuple because postgres doesn't accept COUNT(DISTINCT a, b, c, ...) + # + # this turns the expression into COUNT(DISTINCT ROW(a, b, c, ...)) + row = sge.Tuple( + expressions=list( + map(partial(sg.column, quoted=self.quoted), op.arg.schema.keys()) + ) + ) + return self.agg.count(sge.Distinct(expressions=[row]), where=where) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "sample": + raise com.UnsupportedOperationError( + f"{self.dialect} only implements `pop` correlation coefficient" + ) + + # TODO: rewrite rule? + if (left_type := op.left.dtype).is_boolean(): + left = self.cast(left, dt.Int32(nullable=left_type.nullable)) + + if (right_type := op.right.dtype).is_boolean(): + right = self.cast(right, dt.Int32(nullable=right_type.nullable)) + + return self.agg.corr(left, right, where=where) + + @visit_node.register(ops.ApproxMedian) + def visit_ApproxMedian(self, op, *, arg, where): + return self.visit_Median(op, arg=arg, where=where) + + @visit_node.register(ops.Median) + def visit_Median(self, op, *, arg, where): + return self.visit_Quantile(op, arg=arg, quantile=sge.convert(0.5), where=where) + + @visit_node.register(ops.ApproxCountDistinct) + def visit_ApproxCountDistinct(self, op, *, arg, where): + return self.agg.count(sge.Distinct(expressions=[arg]), where=where) + + def array_func(self, *args): + return sge.Anonymous(this=sg.to_identifier("array"), expressions=list(args)) + + @visit_node.register(ops.IntegerRange) + @visit_node.register(ops.TimestampRange) + def visit_Range(self, op, *, start, stop, step): + def zero_value(dtype): + if dtype.is_interval(): + return self.f.make_interval() + return 0 + + def interval_sign(v): + zero = self.f.make_interval() + return sge.Case( + ifs=[ + self.if_(v.eq(zero), 0), + self.if_(v < zero, -1), + self.if_(v > zero, 1), + ], + default=NULL, + ) + + def _sign(value, dtype): + if dtype.is_interval(): + return interval_sign(value) + return self.f.sign(value) + + step_dtype = op.step.dtype + return self.if_( + sg.and_( + self.f.nullif(step, zero_value(step_dtype)).is_(sg.not_(NULL)), + _sign(step, step_dtype).eq(_sign(stop - start, step_dtype)), + ), + self.f.array_remove( + self.array_func( + sg.select(STAR).from_(self.f.generate_series(start, stop, step)) + ), + stop, + ), + self.cast(self.f.array(), op.dtype), + ) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + return reduce(self.f.array_cat, map(partial(self.cast, to=op.dtype), arg)) + + @visit_node.register(ops.ArrayContains) + def visit_ArrayContains(self, op, *, arg, other): + return sge.ArrayContains( + this=arg, expression=self.f.array(self.cast(other, op.arg.dtype.value_type)) + ) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, body, param): + return self.array_func( + sg.select(sg.column(param, quoted=self.quoted)) + .from_(sge.Unnest(expressions=[arg], alias=param)) + .where(body) + ) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, body, param): + return self.array_func( + sg.select(body).from_(sge.Unnest(expressions=[arg], alias=param)) + ) + + @visit_node.register(ops.ArrayPosition) + def visit_ArrayPosition(self, op, *, arg, other): + t = sge.Unnest(expressions=[arg], alias="value", offset=True) + idx = sg.column("ordinality") + value = sg.column("value") + return self.f.coalesce( + sg.select(idx).from_(t).where(value.eq(other)).limit(1).subquery(), 0 + ) + + @visit_node.register(ops.ArraySort) + def visit_ArraySort(self, op, *, arg): + return self.array_func( + sg.select("x").from_(sge.Unnest(expressions=[arg], alias="x")).order_by("x") + ) + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + i = sg.to_identifier("i") + length = self.f.cardinality(arg) + return self.array_func( + sg.select(arg[i % length + 1]).from_( + self.f.generate_series(0, length * times - 1).as_(i.name) + ) + ) + + @visit_node.register(ops.ArrayDistinct) + def visit_ArrayDistinct(self, op, *, arg): + return self.if_( + arg.is_(NULL), + NULL, + self.array_func(sg.select(sge.Explode(this=arg)).distinct()), + ) + + @visit_node.register(ops.ArrayUnion) + def visit_ArrayUnion(self, op, *, left, right): + return self.array_func( + sg.union( + sg.select(sge.Explode(this=left)), sg.select(sge.Explode(this=right)) + ) + ) + + @visit_node.register(ops.ArrayIntersect) + def visit_ArrayIntersect(self, op, *, left, right): + return self.array_func( + sg.intersect( + sg.select(sge.Explode(this=left)), sg.select(sge.Explode(this=right)) + ) + ) + + @visit_node.register(ops.Log2) + def visit_Log2(self, op, *, arg): + return self.cast( + self.f.log( + self.cast(sge.convert(2), dt.decimal), + arg if op.arg.dtype.is_decimal() else self.cast(arg, dt.decimal), + ), + op.dtype, + ) + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + if base is not None: + if not op.base.dtype.is_decimal(): + base = self.cast(base, dt.decimal) + else: + base = self.cast(sge.convert(self.f.exp(1)), dt.decimal) + + if not op.arg.dtype.is_decimal(): + arg = self.cast(arg, dt.decimal) + return self.cast(self.f.log(base, arg), op.dtype) + + @visit_node.register(ops.StructField) + def visit_StructField(self, op, *, arg, field): + idx = op.arg.dtype.names.index(field) + 1 + # postgres doesn't have anonymous structs :( + # + # this works around ibis not having a way to tell sqlglot to transform + # an exploded array(row) into the equivalent unnest(t) _ (col1, ..., colN) + # element + # + # but also postgres should really support anonymous structs + return self.cast( + self.f.json_extract(self.f.to_jsonb(arg), sge.convert(f"f{idx:d}")), + op.dtype, + ) + + @visit_node.register(ops.StructColumn) + def visit_StructColumn(self, op, *, names, values): + return self.f.row(*map(self.cast, values, op.dtype.types)) + + @visit_node.register(ops.ToJSONArray) + def visit_ToJSONArray(self, op, *, arg): + return self.if_( + self.f.json_typeof(arg).eq(sge.convert("array")), + self.array_func(sg.select(STAR).from_(self.f.json_array_elements(arg))), + NULL, + ) + + @visit_node.register(ops.Map) + def visit_Map(self, op, *, keys, values): + return self.f.map(self.f.array(*keys), self.f.array(*values)) + + @visit_node.register(ops.MapLength) + def visit_MapLength(self, op, *, arg): + return self.f.cardinality(self.f.akeys(arg)) + + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + return self.if_(self.f.exist(arg, key), self.f.json_extract(arg, key), default) + + @visit_node.register(ops.MapMerge) + def visit_MapMerge(self, op, *, left, right): + return sge.DPipe(this=left, expression=right) + + @visit_node.register(ops.TypeOf) + def visit_TypeOf(self, op, *, arg): + typ = self.cast(self.f.pg_typeof(arg), dt.string) + return self.if_( + typ.eq(sge.convert("unknown")), + "null" if op.arg.dtype.is_null() else "text", + typ, + ) + + @visit_node.register(ops.Round) + def visit_Round(self, op, *, arg, digits): + if digits is None: + return self.f.round(arg) + + result = self.f.round(self.cast(arg, dt.decimal), digits) + if op.arg.dtype.is_decimal(): + return result + return self.cast(result, dt.float64) + + @visit_node.register(ops.Modulus) + def visit_Modulus(self, op, *, left, right): + # postgres doesn't allow modulus of double precision values, so upcast and + # then downcast later if necessary + if not op.dtype.is_integer(): + left = self.cast(left, dt.decimal) + right = self.cast(right, dt.decimal) + + result = left % right + if op.dtype.is_float64(): + return self.cast(result, dt.float64) + else: + return result + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + pattern = self.f.concat("(", pattern, ")") + matches = self.f.regexp_match(arg, pattern) + return self.if_(arg.rlike(pattern), paren(matches)[index], NULL) + + @visit_node.register(ops.FindInSet) + def visit_FindInSet(self, op, *, needle, values): + return self.f.coalesce( + self.f.array_position(self.f.array(*values), needle), + 0, + ) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.strpos(haystack, needle) > 0 + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return self.f.right(arg, self.f.length(end)).eq(end) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_binary(): + return self.cast("".join(map(r"\x{:0>2x}".format, value)), dt.binary) + elif dtype.is_time(): + to_int32 = partial(self.cast, to=dt.int32) + to_float64 = partial(self.cast, to=dt.float64) + + return self.f.make_time( + to_int32(value.hour), + to_int32(value.minute), + to_float64(value.second + value.microsecond / 1e6), + ) + elif dtype.is_json(): + return self.cast(value, dt.json) + return None + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds + ): + to_int32 = partial(self.cast, to=dt.int32) + return self.f.make_timestamp( + to_int32(year), + to_int32(month), + to_int32(day), + to_int32(hours), + to_int32(minutes), + self.cast(seconds, dt.float64), + ) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + to_int32 = partial(self.cast, to=dt.int32) + return self.f.datefromparts(to_int32(year), to_int32(month), to_int32(day)) + + @visit_node.register(ops.TimestampBucket) + def visit_TimestampBucket(self, op, *, arg, interval, offset): + origin = self.f.make_timestamp( + *map(partial(self.cast, to=dt.int32), (1970, 1, 1, 0, 0, 0)) + ) + + if offset is not None: + origin += offset + + return self.f.date_bin(interval, arg, origin) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return self.cast(self.f.extract("dow", arg) + 6, dt.int16) % 7 + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + return self.f.trim(self.f.to_char(arg, "Day"), string.whitespace) + + @visit_node.register(ops.ExtractSecond) + def visit_ExtractSecond(self, op, *, arg): + return self.cast(self.f.floor(self.f.extract("second", arg)), op.dtype) + + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + return self.cast( + self.f.floor(self.f.extract("millisecond", arg)) % 1_000, op.dtype + ) + + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + return self.f.extract("microsecond", arg) % 1_000_000 + + @visit_node.register(ops.ExtractDayOfYear) + def visit_ExtractDayOfYear(self, op, *, arg): + return self.f.extract("doy", arg) + + @visit_node.register(ops.ExtractWeekOfYear) + def visit_ExtractWeekOfYear(self, op, *, arg): + return self.f.extract("week", arg) + + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractEpochSeconds(self, op, *, arg): + return self.f.extract("epoch", arg) + + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + index = self.if_(index < 0, self.f.cardinality(arg) + index, index) + return paren(arg)[index + 1] + + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + neg_to_pos_index = lambda n, index: self.if_(index < 0, n + index, index) + + arg_length = self.f.cardinality(arg) + + if start is None: + start = 0 + else: + start = self.f.least(arg_length, neg_to_pos_index(arg_length, start)) + + if stop is None: + stop = arg_length + else: + stop = neg_to_pos_index(arg_length, stop) + + slice_expr = sge.Slice(this=start + 1, expression=stop) + return paren(arg)[slice_expr] + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + plural = unit.plural + if plural == "minutes": + plural = "mins" + arg = self.cast(arg, dt.int32) + elif plural == "seconds": + plural = "secs" + arg = self.cast(arg, dt.float64) + elif plural == "milliseconds": + plural = "secs" + arg /= 1e3 + elif plural == "microseconds": + plural = "secs" + arg /= 1e6 + elif plural == "nanoseconds": + plural = "secs" + arg /= 1e9 + else: + arg = self.cast(arg, dt.int32) + + key = sg.to_identifier(plural) + + return self.f.make_interval(sge.Kwarg(this=key, expression=arg)) + + @visit_node.register(ops.Cast) + @visit_node.register(ops.TryCast) + def visit_Cast(self, op, *, arg, to): + from_ = op.arg.dtype + + if from_.is_timestamp() and to.is_integer(): + return self.f.extract("epoch", arg) + elif from_.is_integer() and to.is_timestamp(): + arg = self.f.to_timestamp(arg) + if (timezone := to.timezone) is not None: + arg = self.f.timezone(timezone, arg) + return arg + elif from_.is_integer() and to.is_interval(): + unit = to.unit + return self.visit_IntervalFromInteger( + ops.IntervalFromInteger(op.arg, unit), arg=arg, unit=unit + ) + elif from_.is_string() and to.is_binary(): + # Postgres and Python use the words "decode" and "encode" in + # opposite ways, sweet! + return self.f.decode(arg, "escape") + elif from_.is_binary() and to.is_string(): + return self.f.encode(arg, "escape") + + return self.cast(arg, op.to) + + @visit_node.register(ops.RowID) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.ArrayFlatten) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.ArrayCollect: "array_agg", + ops.ArrayRemove: "array_remove", + ops.BitAnd: "bit_and", + ops.BitOr: "bit_or", + ops.BitXor: "bit_xor", + ops.GeoArea: "st_area", + ops.GeoAsBinary: "st_asbinary", + ops.GeoAsEWKB: "st_asewkb", + ops.GeoAsEWKT: "st_asewkt", + ops.GeoAsText: "st_astext", + ops.GeoAzimuth: "st_azimuth", + ops.GeoBuffer: "st_buffer", + ops.GeoCentroid: "st_centroid", + ops.GeoContains: "st_contains", + ops.GeoContainsProperly: "st_contains", + ops.GeoCoveredBy: "st_coveredby", + ops.GeoCovers: "st_covers", + ops.GeoCrosses: "st_crosses", + ops.GeoDFullyWithin: "st_dfullywithin", + ops.GeoDWithin: "st_dwithin", + ops.GeoDifference: "st_difference", + ops.GeoDisjoint: "st_disjoint", + ops.GeoDistance: "st_distance", + ops.GeoEndPoint: "st_endpoint", + ops.GeoEnvelope: "st_envelope", + ops.GeoEquals: "st_equals", + ops.GeoGeometryN: "st_geometryn", + ops.GeoGeometryType: "st_geometrytype", + ops.GeoIntersection: "st_intersection", + ops.GeoIntersects: "st_intersects", + ops.GeoIsValid: "st_isvalid", + ops.GeoLength: "st_length", + ops.GeoLineLocatePoint: "st_linelocatepoint", + ops.GeoLineMerge: "st_linemerge", + ops.GeoLineSubstring: "st_linesubstring", + ops.GeoNPoints: "st_npoints", + ops.GeoOrderingEquals: "st_orderingequals", + ops.GeoOverlaps: "st_overlaps", + ops.GeoPerimeter: "st_perimeter", + ops.GeoSRID: "st_srid", + ops.GeoSetSRID: "st_setsrid", + ops.GeoSimplify: "st_simplify", + ops.GeoStartPoint: "st_startpoint", + ops.GeoTouches: "st_touches", + ops.GeoTransform: "st_transform", + ops.GeoUnaryUnion: "st_union", + ops.GeoUnion: "st_union", + ops.GeoWithin: "st_within", + ops.GeoX: "st_x", + ops.GeoY: "st_y", + ops.LPad: "lpad", + ops.MapContains: "exist", + ops.MapKeys: "akeys", + ops.MapValues: "avals", + ops.RPad: "rpad", + ops.RegexSearch: "regexp_like", + ops.StringAscii: "ascii", + ops.TimeFromHMS: "make_time", +} + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + @PostgresCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) -rewrites = PostgreSQLExprTranslator.rewrites + else: + @PostgresCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) -@rewrites(ops.Any) -@rewrites(ops.All) -def _any_all_no_op(expr): - return expr + setattr(PostgresCompiler, f"visit_{_op.__name__}", _fmt) -class PostgreSQLCompiler(AlchemyCompiler): - translator_class = PostgreSQLExprTranslator - rewrites = AlchemyCompiler.rewrites | rewrite_sample +del _op, _name, _fmt diff --git a/ibis/backends/postgres/converter.py b/ibis/backends/postgres/converter.py new file mode 100644 index 000000000000..7dc5dc640b75 --- /dev/null +++ b/ibis/backends/postgres/converter.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from ibis.formats.pandas import PandasData + + +class PostgresPandasData(PandasData): + @classmethod + def convert_GeoSpatial(cls, s, dtype, pandas_type): + import geopandas as gpd + import shapely as shp + + return gpd.GeoSeries(shp.from_wkb(s.map(bytes, na_action="ignore"))) + + convert_Point = ( + convert_LineString + ) = ( + convert_Polygon + ) = ( + convert_MultiLineString + ) = convert_MultiPoint = convert_MultiPolygon = convert_GeoSpatial + + @classmethod + def convert_Binary(cls, s, dtype, pandas_type): + return s.map(bytes, na_action="ignore") diff --git a/ibis/backends/postgres/datatypes.py b/ibis/backends/postgres/datatypes.py deleted file mode 100644 index 4a2bd2c1f71f..000000000000 --- a/ibis/backends/postgres/datatypes.py +++ /dev/null @@ -1,88 +0,0 @@ -from __future__ import annotations - -import sqlalchemy as sa -import sqlalchemy.dialects.postgresql as psql -import sqlalchemy.types as sat - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import PostgresType as SqlglotPostgresType - -_from_postgres_types = { - psql.DOUBLE_PRECISION: dt.Float64, - psql.UUID: dt.UUID, - psql.MACADDR: dt.MACADDR, - psql.INET: dt.INET, - psql.JSONB: dt.JSON, - psql.JSON: dt.JSON, - psql.TSVECTOR: dt.Unknown, - psql.BYTEA: dt.Binary, - psql.UUID: dt.UUID, -} - - -_postgres_interval_fields = { - "YEAR": "Y", - "MONTH": "M", - "DAY": "D", - "HOUR": "h", - "MINUTE": "m", - "SECOND": "s", - "YEAR TO MONTH": "M", - "DAY TO HOUR": "h", - "DAY TO MINUTE": "m", - "DAY TO SECOND": "s", - "HOUR TO MINUTE": "m", - "HOUR TO SECOND": "s", - "MINUTE TO SECOND": "s", -} - - -class PostgresType(AlchemyType): - dialect = "postgresql" - - @classmethod - def from_ibis(cls, dtype: dt.DataType) -> sat.TypeEngine: - if dtype.is_floating(): - if isinstance(dtype, dt.Float64): - return psql.DOUBLE_PRECISION - else: - return psql.REAL - elif dtype.is_array(): - # Unwrap the array element type because sqlalchemy doesn't allow arrays of - # arrays. This doesn't affect the underlying data. - while dtype.is_array(): - dtype = dtype.value_type - return sa.ARRAY(cls.from_ibis(dtype)) - elif dtype.is_map(): - if not (dtype.key_type.is_string() and dtype.value_type.is_string()): - raise TypeError( - f"PostgreSQL only supports map, got: {dtype}" - ) - return psql.HSTORE() - elif dtype.is_uuid(): - return psql.UUID() - else: - return super().from_ibis(dtype) - - @classmethod - def to_ibis(cls, typ: sat.TypeEngine, nullable: bool = True) -> dt.DataType: - if dtype := _from_postgres_types.get(type(typ)): - return dtype(nullable=nullable) - elif isinstance(typ, psql.HSTORE): - return dt.Map(dt.string, dt.string, nullable=nullable) - elif isinstance(typ, psql.INTERVAL): - field = typ.fields.upper() - if (unit := _postgres_interval_fields.get(field, None)) is None: - raise ValueError(f"Unknown PostgreSQL interval field {field!r}") - elif unit in {"Y", "M"}: - raise ValueError( - "Variable length intervals are not yet supported with PostgreSQL" - ) - return dt.Interval(unit=unit, nullable=nullable) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_string(cls, type_string: str) -> PostgresType: - return SqlglotPostgresType.from_string(type_string) diff --git a/ibis/backends/postgres/registry.py b/ibis/backends/postgres/registry.py deleted file mode 100644 index 1cc3e028a47e..000000000000 --- a/ibis/backends/postgres/registry.py +++ /dev/null @@ -1,864 +0,0 @@ -from __future__ import annotations - -import functools -import itertools -import locale -import operator -import platform -import re -import string - -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql as pg -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.functions import GenericFunction - -import ibis.backends.base.sql.registry.geospatial as geo -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops - -# used for literal translate -from ibis.backends.base.sql.alchemy import ( - fixed_arity, - get_sqla_table, - reduction, - sqlalchemy_operation_registry, - sqlalchemy_window_functions_registry, - unary, - varargs, -) -from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported -from ibis.backends.base.sql.alchemy.registry import ( - _bitwise_op, - _extract, - geospatial_functions, - get_col, -) - -operation_registry = sqlalchemy_operation_registry.copy() -operation_registry.update(sqlalchemy_window_functions_registry) - -if geospatial_supported: - operation_registry.update(geospatial_functions) - - -_truncate_precisions = { - "us": "microseconds", - "ms": "milliseconds", - "s": "second", - "m": "minute", - "h": "hour", - "D": "day", - "W": "week", - "M": "month", - "Q": "quarter", - "Y": "year", -} - - -def _timestamp_truncate(t, op): - sa_arg = t.translate(op.arg) - try: - precision = _truncate_precisions[op.unit.short] - except KeyError: - raise com.UnsupportedOperationError(f"Unsupported truncate unit {op.unit!r}") - return sa.func.date_trunc(precision, sa_arg) - - -def _timestamp_bucket(t, op): - arg = t.translate(op.arg) - interval = t.translate(op.interval) - - origin = sa.literal_column("timestamp '1970-01-01 00:00:00'") - - if op.offset is not None: - origin = origin + t.translate(op.offset) - return sa.func.date_bin(interval, arg, origin) - - -def _typeof(t, op): - sa_arg = t.translate(op.arg) - typ = sa.cast(sa.func.pg_typeof(sa_arg), sa.TEXT) - - # select pg_typeof('thing') returns unknown so we have to check the child's - # type for nullness - return sa.case( - ((typ == "unknown") & (op.arg.dtype != dt.null), "text"), - ((typ == "unknown") & (op.arg.dtype == dt.null), "null"), - else_=typ, - ) - - -_strftime_to_postgresql_rules = { - "%a": "TMDy", # TM does it in a locale dependent way - "%A": "TMDay", - "%w": "D", # 1-based day of week, see below for how we make this 0-based - "%d": "DD", # day of month - "%-d": "FMDD", # - is no leading zero for Python same for FM in postgres - "%b": "TMMon", # Sep - "%B": "TMMonth", # September - "%m": "MM", # 01 - "%-m": "FMMM", # 1 - "%y": "YY", # 15 - "%Y": "YYYY", # 2015 - "%H": "HH24", # 09 - "%-H": "FMHH24", # 9 - "%I": "HH12", # 09 - "%-I": "FMHH12", # 9 - "%p": "AM", # AM or PM - "%M": "MI", # zero padded minute - "%-M": "FMMI", # Minute - "%S": "SS", # zero padded second - "%-S": "FMSS", # Second - "%f": "US", # zero padded microsecond - "%z": "OF", # utf offset - "%Z": "TZ", # uppercase timezone name - "%j": "DDD", # zero padded day of year - "%-j": "FMDDD", # day of year - "%U": "WW", # 1-based week of year - # 'W': ?, # meh -} - -try: - _strftime_to_postgresql_rules.update( - { - "%c": locale.nl_langinfo(locale.D_T_FMT), # locale date and time - "%x": locale.nl_langinfo(locale.D_FMT), # locale date - "%X": locale.nl_langinfo(locale.T_FMT), # locale time - } - ) -except AttributeError: - HAS_LANGINFO = False -else: - HAS_LANGINFO = True - - -# translate strftime spec into mostly equivalent PostgreSQL spec -_scanner = re.Scanner( # type: ignore # re does have a Scanner attribute - # double quotes need to be escaped - [('"', lambda *_: r"\"")] - + [ - ( - "|".join( - map( - "(?:{})".format, - itertools.chain( - _strftime_to_postgresql_rules.keys(), - [ - # "%e" is in the C standard and Python actually - # generates this if your spec contains "%c" but we - # don't officially support it as a specifier so we - # need to special case it in the scanner - "%e", - r"\s+", - rf"[{re.escape(string.punctuation)}]", - rf"[^{re.escape(string.punctuation)}\s]+", - ], - ), - ) - ), - lambda _, token: token, - ) - ] -) - - -_lexicon_values = frozenset(_strftime_to_postgresql_rules.values()) - -_locale_specific_formats = frozenset(["%c", "%x", "%X"]) -_strftime_blacklist = frozenset(["%w", "%U", "%e"]) | _locale_specific_formats - - -def _reduce_tokens(tokens, arg): - # current list of tokens - curtokens = [] - - # reduced list of tokens that accounts for blacklisted values - reduced = [] - - non_special_tokens = frozenset(_strftime_to_postgresql_rules) - _strftime_blacklist - - # TODO: how much of a hack is this? - for token in tokens: - if token in _locale_specific_formats and not HAS_LANGINFO: - raise com.UnsupportedOperationError( - f"Format string component {token!r} is not supported on {platform.system()}" - ) - # we are a non-special token %A, %d, etc. - if token in non_special_tokens: - curtokens.append(_strftime_to_postgresql_rules[token]) - - # we have a string like DD, to escape this we - # surround it with double quotes - elif token in _lexicon_values: - curtokens.append(f'"{token}"') - - # we have a token that needs special treatment - elif token in _strftime_blacklist: - if token == "%w": - value = sa.extract("dow", arg) # 0 based day of week - elif token == "%U": - value = sa.cast(sa.func.to_char(arg, "WW"), sa.SMALLINT) - 1 - elif token in ("%c", "%x", "%X"): - # re scan and tokenize this pattern - try: - new_pattern = _strftime_to_postgresql_rules[token] - except KeyError: - raise ValueError( - "locale specific date formats (%%c, %%x, %%X) are " - "not yet implemented for %s" % platform.system() - ) - - new_tokens, _ = _scanner.scan(new_pattern) - value = functools.reduce( - sa.sql.ColumnElement.concat, - _reduce_tokens(new_tokens, arg), - ) - elif token == "%e": - # pad with spaces instead of zeros - value = sa.func.replace(sa.func.to_char(arg, "DD"), "0", " ") - - reduced += [ - sa.func.to_char(arg, "".join(curtokens)), - sa.cast(value, sa.TEXT), - ] - - # empty current token list in case there are more tokens - del curtokens[:] - - # uninteresting text - else: - curtokens.append(token) - # append result to r if we had more tokens or if we have no - # blacklisted tokens - if curtokens: - reduced.append(sa.func.to_char(arg, "".join(curtokens))) - return reduced - - -def _strftime(arg, pattern): - tokens, _ = _scanner.scan(pattern.value) - reduced = _reduce_tokens(tokens, arg) - return functools.reduce(sa.sql.ColumnElement.concat, reduced) - - -def _find_in_set(t, op): - # TODO - # this operation works with any type, not just strings. should the - # operation itself also have this property? - return ( - sa.func.coalesce( - sa.func.array_position( - pg.array(list(map(t.translate, op.values))), - t.translate(op.needle), - ), - 0, - ) - - 1 - ) - - -def _log(t, op): - arg, base = op.args - sa_arg = t.translate(arg) - if base is not None: - sa_base = t.translate(base) - return sa.cast( - sa.func.log(sa.cast(sa_base, sa.NUMERIC), sa.cast(sa_arg, sa.NUMERIC)), - t.get_sqla_type(op.dtype), - ) - return sa.func.ln(sa_arg) - - -def _regex_extract(arg, pattern, index): - # wrap in parens to support 0th group being the whole string - pattern = "(" + pattern + ")" - # arrays are 1-based in postgres - index = index + 1 - does_match = sa.func.textregexeq(arg, pattern) - matches = sa.func.regexp_match(arg, pattern, type_=pg.ARRAY(sa.TEXT)) - return sa.case((does_match, matches[index]), else_=None) - - -def _array_repeat(t, op): - """Repeat an array.""" - arg = t.translate(op.arg) - times = t.translate(op.times) - - array_length = sa.func.cardinality(arg) - array = sa.sql.elements.Grouping(arg) if isinstance(op.arg, ops.Literal) else arg - - # sequence from 1 to the total number of elements desired in steps of 1. - series = sa.func.generate_series(1, times * array_length).table_valued() - - # if our current index modulo the array's length is a multiple of the - # array's length, then the index is the array's length - index = sa.func.coalesce( - sa.func.nullif(series.column % array_length, 0), array_length - ) - - # tie it all together in a scalar subquery and collapse that into an ARRAY - return sa.func.array(sa.select(array[index]).scalar_subquery()) - - -def _table_column(t, op): - ctx = t.context - table = op.table - - sa_table = get_sqla_table(ctx, table) - out_expr = get_col(sa_table, op) - - # If the column does not originate from the table set in the current SELECT - # context, we should format as a subquery - if t.permit_subquery and ctx.is_foreign_expr(table): - return sa.select(out_expr) - - return out_expr - - -def _round(t, op): - arg, digits = op.args - sa_arg = t.translate(arg) - - if digits is None: - return sa.func.round(sa_arg) - - # postgres doesn't allow rounding of double precision values to a specific - # number of digits (though simple truncation on doubles is allowed) so - # we cast to numeric and then cast back if necessary - result = sa.func.round(sa.cast(sa_arg, sa.NUMERIC), t.translate(digits)) - if digits is not None and arg.dtype.is_decimal(): - return result - result = sa.cast(result, pg.DOUBLE_PRECISION()) - return result - - -def _mod(t, op): - left, right = map(t.translate, op.args) - - # postgres doesn't allow modulus of double precision values, so upcast and - # then downcast later if necessary - if not op.dtype.is_integer(): - left = sa.cast(left, sa.NUMERIC) - right = sa.cast(right, sa.NUMERIC) - - result = left % right - if op.dtype.is_float64(): - return sa.cast(result, pg.DOUBLE_PRECISION()) - else: - return result - - -def _neg_idx_to_pos(array, idx): - return sa.case((idx < 0, sa.func.cardinality(array) + idx), else_=idx) - - -def _array_slice(*, index_converter, array_length, func): - def translate(t, op): - arg = t.translate(op.arg) - - arg_length = array_length(arg) - - if (start := op.start) is None: - start = 0 - else: - start = t.translate(start) - start = sa.func.least(arg_length, index_converter(arg, start)) - - if (stop := op.stop) is None: - stop = arg_length - else: - stop = index_converter(arg, t.translate(stop)) - - return func(arg, start + 1, stop) - - return translate - - -def _array_index(*, index_converter, func): - def translate(t, op): - sa_array = t.translate(op.arg) - sa_index = t.translate(op.index) - if isinstance(op.arg, ops.Literal): - sa_array = sa.sql.elements.Grouping(sa_array) - return func(sa_array, index_converter(sa_array, sa_index) + 1) - - return translate - - -def _literal(t, op): - dtype = op.dtype - value = op.value - - if value is None: - return ( - sa.null() if dtype.is_null() else sa.cast(sa.null(), t.get_sqla_type(dtype)) - ) - if dtype.is_interval(): - return sa.literal_column(f"INTERVAL '{value} {dtype.resolution}'") - elif dtype.is_geospatial(): - # inline_metadata ex: 'SRID=4326;POINT( ... )' - return sa.literal_column(geo.translate_literal(op, inline_metadata=True)) - elif dtype.is_array(): - return pg.array(value) - elif dtype.is_map(): - return pg.hstore(list(value.keys()), list(value.values())) - elif dtype.is_time(): - return sa.func.make_time( - value.hour, value.minute, value.second + value.microsecond / 1e6 - ) - elif dtype.is_date(): - return sa.func.make_date(value.year, value.month, value.day) - elif dtype.is_timestamp(): - if (tz := dtype.timezone) is not None: - return sa.func.to_timestamp(value.timestamp()).op("AT TIME ZONE")(tz) - return sa.cast(sa.literal(value.isoformat()), sa.TIMESTAMP()) - else: - return sa.literal(value) - - -def _string_agg(t, op): - agg = sa.func.string_agg(t.translate(op.arg), t.translate(op.sep)) - if (where := op.where) is not None: - return agg.filter(t.translate(where)) - return agg - - -def _corr(t, op): - if op.how == "sample": - raise ValueError( - f"{t.__class__.__name__} only implements population correlation " - "coefficient" - ) - return _binary_variance_reduction(sa.func.corr)(t, op) - - -def _covar(t, op): - suffix = {"sample": "samp", "pop": "pop"} - how = suffix.get(op.how, "samp") - func = getattr(sa.func, f"covar_{how}") - return _binary_variance_reduction(func)(t, op) - - -def _mode(t, op): - arg = op.arg - if (where := op.where) is not None: - arg = ops.IfElse(where, arg, None) - return sa.func.mode().within_group(t.translate(arg)) - - -def _quantile(t, op): - arg = op.arg - if (where := op.where) is not None: - arg = ops.IfElse(where, arg, None) - if arg.dtype.is_numeric(): - func = sa.func.percentile_cont - else: - func = sa.func.percentile_disc - return func(t.translate(op.quantile)).within_group(t.translate(arg)) - - -def _median(t, op): - arg = op.arg - if (where := op.where) is not None: - arg = ops.IfElse(where, arg, None) - - if arg.dtype.is_numeric(): - func = sa.func.percentile_cont - else: - func = sa.func.percentile_disc - return func(0.5).within_group(t.translate(arg)) - - -def _binary_variance_reduction(func): - def variance_compiler(t, op): - x = op.left - if (x_type := x.dtype).is_boolean(): - x = ops.Cast(x, dt.Int32(nullable=x_type.nullable)) - - y = op.right - if (y_type := y.dtype).is_boolean(): - y = ops.Cast(y, dt.Int32(nullable=y_type.nullable)) - - if t._has_reduction_filter_syntax: - result = func(t.translate(x), t.translate(y)) - - if (where := op.where) is not None: - return result.filter(t.translate(where)) - return result - else: - if (where := op.where) is not None: - x = ops.IfElse(where, x, None) - y = ops.IfElse(where, y, None) - return func(t.translate(x), t.translate(y)) - - return variance_compiler - - -def _arg_min_max(sort_func): - def translate(t, op: ops.ArgMin | ops.ArgMax) -> str: - arg = t.translate(op.arg) - key = t.translate(op.key) - - conditions = [arg != sa.null(), key != sa.null()] - - agg = sa.func.array_agg(pg.aggregate_order_by(arg, sort_func(key))) - - if (where := op.where) is not None: - conditions.append(t.translate(where)) - return agg.filter(sa.and_(*conditions))[1] - - return translate - - -def _arbitrary(t, op): - if (how := op.how) == "heavy": - raise com.UnsupportedOperationError( - f"postgres backend doesn't support how={how!r} for the arbitrary() aggregate" - ) - func = getattr(sa.func, op.how) - return t._reduction(func, op) - - -class struct_field(GenericFunction): - inherit_cache = True - - -@compiles(struct_field) -def compile_struct_field_postgresql(element, compiler, **kw): - arg, field = element.clauses - return f"({compiler.process(arg, **kw)}).{field.name}" - - -def _struct_field(t, op): - arg = op.arg - idx = arg.dtype.names.index(op.field) + 1 - field_name = sa.literal_column(f"f{idx:d}") - return struct_field(t.translate(arg), field_name, type_=t.get_sqla_type(op.dtype)) - - -def _struct_column(t, op): - types = op.dtype.types - return sa.func.row( - # we have to cast here, otherwise postgres refuses to allow the statement - *map(t.translate, map(ops.Cast, op.values, types)), - type_=t.get_sqla_type( - dt.Struct({f"f{i:d}": typ for i, typ in enumerate(types, start=1)}) - ), - ) - - -def _unnest(t, op): - arg = op.arg - row_type = arg.dtype.value_type - - types = getattr(row_type, "types", (row_type,)) - - is_struct = row_type.is_struct() - derived = ( - sa.func.unnest(t.translate(arg)) - .table_valued( - *( - sa.column(f"f{i:d}", stype) - for i, stype in enumerate(map(t.get_sqla_type, types), start=1) - ) - ) - .render_derived(with_types=is_struct) - ) - - # wrap in a row column so that we can return a single column from this rule - if not is_struct: - return derived.c[0] - return sa.func.row(*derived.c) - - -def _array_sort(arg): - flat = sa.func.unnest(arg).column_valued() - return sa.func.array(sa.select(flat).order_by(flat).scalar_subquery()) - - -def _array_position(haystack, needle): - t = ( - sa.func.unnest(haystack) - .table_valued("value", with_ordinality="idx", name="haystack") - .render_derived() - ) - idx = t.c.idx - 1 - return sa.func.coalesce( - sa.select(idx).where(t.c.value == needle).limit(1).scalar_subquery(), -1 - ) - - -def _array_map(t, op): - return sa.func.array( - # this translates to the function call, with column names the same as - # the parameter names in the lambda - sa.select(t.translate(op.body)) - .select_from( - # unnest the input array - sa.func.unnest(t.translate(op.arg)) - # name the columns of the result the same as the lambda parameter - # so that we can reference them as such in the outer query - .table_valued(op.param) - .render_derived() - ) - .scalar_subquery() - ) - - -def _array_filter(t, op): - param = op.param - return sa.func.array( - sa.select(sa.column(param, type_=t.get_sqla_type(op.arg.dtype.value_type))) - .select_from( - sa.func.unnest(t.translate(op.arg)).table_valued(param).render_derived() - ) - .where(t.translate(op.body)) - .scalar_subquery() - ) - - -def zero_value(dtype): - if dtype.is_interval(): - return sa.func.make_interval() - return 0 - - -def interval_sign(v): - zero = sa.func.make_interval() - return sa.case((v == zero, 0), (v < zero, -1), (v > zero, 1)) - - -def _sign(value, dtype): - if dtype.is_interval(): - return interval_sign(value) - return sa.func.sign(value) - - -def _range(t, op): - start = t.translate(op.start) - stop = t.translate(op.stop) - step = t.translate(op.step) - satype = t.get_sqla_type(op.dtype) - seq = sa.func.generate_series(start, stop, step, type_=satype) - zero = zero_value(op.step.dtype) - return sa.case( - ( - sa.and_( - sa.func.nullif(step, zero).is_not(None), - _sign(step, op.step.dtype) == _sign(stop - start, op.step.dtype), - ), - sa.func.array_remove( - sa.func.array(sa.select(seq).scalar_subquery()), stop, type_=satype - ), - ), - else_=sa.cast(pg.array([]), satype), - ) - - -operation_registry.update( - { - ops.Literal: _literal, - # We override this here to support time zones - ops.TableColumn: _table_column, - ops.Argument: lambda t, op: sa.column( - op.param, type_=t.get_sqla_type(op.dtype) - ), - # types - ops.TypeOf: _typeof, - # Floating - ops.IsNan: fixed_arity(lambda arg: arg == float("nan"), 1), - ops.IsInf: fixed_arity( - lambda arg: sa.or_(arg == float("inf"), arg == float("-inf")), 1 - ), - # boolean reductions - ops.Any: reduction(sa.func.bool_or), - ops.All: reduction(sa.func.bool_and), - # strings - ops.GroupConcat: _string_agg, - ops.Capitalize: unary(sa.func.initcap), - ops.RegexSearch: fixed_arity(lambda x, y: x.op("~")(y), 2), - # postgres defaults to replacing only the first occurrence - ops.RegexReplace: fixed_arity( - lambda string, pattern, replacement: sa.func.regexp_replace( - string, pattern, replacement, "g" - ), - 3, - ), - ops.Translate: fixed_arity(sa.func.translate, 3), - ops.RegexExtract: fixed_arity(_regex_extract, 3), - ops.StringSplit: fixed_arity( - lambda col, sep: sa.func.string_to_array( - col, sep, type_=sa.ARRAY(col.type) - ), - 2, - ), - ops.FindInSet: _find_in_set, - # math - ops.Log: _log, - ops.Log2: unary(lambda x: sa.func.log(2, x)), - ops.Log10: unary(sa.func.log), - ops.Round: _round, - ops.Modulus: _mod, - # dates and times - ops.DateFromYMD: fixed_arity(sa.func.make_date, 3), - ops.DateTruncate: _timestamp_truncate, - ops.TimestampTruncate: _timestamp_truncate, - ops.TimestampBucket: _timestamp_bucket, - ops.IntervalFromInteger: ( - lambda t, op: t.translate(op.arg) - * sa.text(f"INTERVAL '1 {op.dtype.resolution}'") - ), - ops.DateAdd: fixed_arity(operator.add, 2), - ops.DateSub: fixed_arity(operator.sub, 2), - ops.DateDiff: fixed_arity(operator.sub, 2), - ops.TimestampAdd: fixed_arity(operator.add, 2), - ops.TimestampSub: fixed_arity(operator.sub, 2), - ops.TimestampDiff: fixed_arity(operator.sub, 2), - ops.Strftime: fixed_arity(_strftime, 2), - ops.ExtractEpochSeconds: fixed_arity( - lambda arg: sa.cast(sa.extract("epoch", arg), sa.INTEGER), 1 - ), - ops.ExtractDayOfYear: _extract("doy"), - ops.ExtractWeekOfYear: _extract("week"), - # extracting the second gives us the fractional part as well, so smash that - # with a cast to SMALLINT - ops.ExtractSecond: fixed_arity( - lambda arg: sa.cast(sa.func.floor(sa.extract("second", arg)), sa.SMALLINT), - 1, - ), - # we get total number of milliseconds including seconds with extract so we - # mod 1000 - ops.ExtractMillisecond: fixed_arity( - lambda arg: sa.cast( - sa.func.floor(sa.extract("millisecond", arg)) % 1000, - sa.SMALLINT, - ), - 1, - ), - ops.DayOfWeekIndex: fixed_arity( - lambda arg: sa.cast( - sa.cast(sa.extract("dow", arg) + 6, sa.SMALLINT) % 7, sa.SMALLINT - ), - 1, - ), - ops.DayOfWeekName: fixed_arity( - lambda arg: sa.func.trim(sa.func.to_char(arg, "Day")), 1 - ), - ops.TimeFromHMS: fixed_arity(sa.func.make_time, 3), - # array operations - ops.ArrayLength: unary(sa.func.cardinality), - ops.ArrayCollect: reduction(sa.func.array_agg), - ops.Array: (lambda t, op: pg.array(list(map(t.translate, op.exprs)))), - ops.ArraySlice: _array_slice( - index_converter=_neg_idx_to_pos, - array_length=sa.func.cardinality, - func=lambda arg, start, stop: arg[start:stop], - ), - ops.ArrayIndex: _array_index( - index_converter=_neg_idx_to_pos, func=lambda arg, index: arg[index] - ), - ops.ArrayConcat: varargs(lambda *args: functools.reduce(operator.add, args)), - ops.ArrayRepeat: _array_repeat, - ops.Unnest: _unnest, - ops.Covariance: _covar, - ops.Correlation: _corr, - ops.BitwiseXor: _bitwise_op("#"), - ops.Mode: _mode, - ops.ApproxMedian: _median, - ops.Median: _median, - ops.Quantile: _quantile, - ops.MultiQuantile: _quantile, - ops.TimestampNow: lambda t, op: sa.literal_column( - "CURRENT_TIMESTAMP", type_=t.get_sqla_type(op.dtype) - ), - ops.MapGet: fixed_arity( - lambda arg, key, default: sa.case( - (arg.has_key(key), arg[key]), else_=default - ), - 3, - ), - ops.MapContains: fixed_arity(pg.HSTORE.Comparator.has_key, 2), - ops.MapKeys: unary(pg.HSTORE.Comparator.keys), - ops.MapValues: unary(pg.HSTORE.Comparator.vals), - ops.MapMerge: fixed_arity(operator.add, 2), - ops.MapLength: unary(lambda arg: sa.func.cardinality(arg.keys())), - ops.Map: fixed_arity(pg.hstore, 2), - ops.ArgMin: _arg_min_max(sa.asc), - ops.ArgMax: _arg_min_max(sa.desc), - ops.ToJSONArray: unary( - lambda arg: sa.case( - ( - sa.func.json_typeof(arg) == "array", - sa.func.array( - sa.select( - sa.func.json_array_elements(arg).column_valued() - ).scalar_subquery() - ), - ), - else_=sa.null(), - ) - ), - ops.ArrayStringJoin: fixed_arity( - lambda sep, arr: sa.func.array_to_string(arr, sep), 2 - ), - ops.Strip: unary(lambda arg: sa.func.trim(arg, string.whitespace)), - ops.LStrip: unary(lambda arg: sa.func.ltrim(arg, string.whitespace)), - ops.RStrip: unary(lambda arg: sa.func.rtrim(arg, string.whitespace)), - ops.StartsWith: fixed_arity(lambda arg, prefix: arg.op("^@")(prefix), 2), - ops.Arbitrary: _arbitrary, - ops.StructColumn: _struct_column, - ops.StructField: _struct_field, - ops.First: reduction(sa.func.first), - ops.Last: reduction(sa.func.last), - ops.ExtractMicrosecond: fixed_arity( - lambda arg: sa.extract("microsecond", arg) % 1_000_000, 1 - ), - ops.Levenshtein: fixed_arity(sa.func.levenshtein, 2), - ops.ArraySort: fixed_arity(_array_sort, 1), - ops.ArrayIntersect: fixed_arity( - lambda left, right: sa.func.array( - sa.intersect( - sa.select(sa.func.unnest(left).column_valued()), - sa.select(sa.func.unnest(right).column_valued()), - ).scalar_subquery() - ), - 2, - ), - ops.ArrayRemove: fixed_arity( - lambda left, right: sa.func.array( - sa.except_( - sa.select(sa.func.unnest(left).column_valued()), sa.select(right) - ).scalar_subquery() - ), - 2, - ), - ops.ArrayUnion: fixed_arity( - lambda left, right: sa.func.array( - sa.union( - sa.select(sa.func.unnest(left).column_valued()), - sa.select(sa.func.unnest(right).column_valued()), - ).scalar_subquery() - ), - 2, - ), - ops.ArrayDistinct: fixed_arity( - lambda arg: sa.case( - (arg.is_(sa.null()), sa.null()), - else_=sa.func.array( - sa.select( - sa.distinct(sa.func.unnest(arg).column_valued()) - ).scalar_subquery() - ), - ), - 1, - ), - ops.ArrayPosition: fixed_arity(_array_position, 2), - ops.ArrayMap: _array_map, - ops.ArrayFilter: _array_filter, - ops.IntegerRange: _range, - ops.TimestampRange: _range, - ops.RegexSplit: fixed_arity(sa.func.regexp_split_to_array, 2), - } -) diff --git a/ibis/backends/postgres/tests/conftest.py b/ibis/backends/postgres/tests/conftest.py index 23d3c8f2d27d..7fd19bcabdd8 100644 --- a/ibis/backends/postgres/tests/conftest.py +++ b/ibis/backends/postgres/tests/conftest.py @@ -17,10 +17,8 @@ from typing import TYPE_CHECKING, Any import pytest -import sqlalchemy as sa import ibis -from ibis.backends.conftest import init_database from ibis.backends.tests.base import ServiceBackendTest if TYPE_CHECKING: @@ -50,22 +48,13 @@ class TestConf(ServiceBackendTest): supports_structs = False rounding_method = "half_to_even" service_name = "postgres" - deps = "psycopg2", "sqlalchemy" + deps = ("psycopg2",) @property def test_files(self) -> Iterable[Path]: return self.data_dir.joinpath("csv").glob("*.csv") - def _load_data( - self, - *, - user: str = PG_USER, - password: str = PG_PASS, - host: str = PG_HOST, - port: int = PG_PORT, - database: str = IBIS_TEST_POSTGRES_DB, - **_: Any, - ) -> None: + def _load_data(self, **_: Any) -> None: """Load test data into a PostgreSQL backend instance. Parameters @@ -75,21 +64,14 @@ def _load_data( script_dir Location of scripts defining schemas """ - init_database( - url=sa.engine.make_url( - f"postgresql://{user}:{password}@{host}:{port:d}/{database}" - ), - database=database, - schema=self.ddl_script, - isolation_level="AUTOCOMMIT", - recreate=False, - ) + with self.connection._safe_raw_sql(";".join(self.ddl_script)): + pass @staticmethod - def connect(*, tmpdir, worker_id, port: int | None = None, **kw): + def connect(*, tmpdir, worker_id, **kw): return ibis.postgres.connect( host=PG_HOST, - port=port or PG_PORT, + port=PG_PORT, user=PG_USER, password=PG_PASS, database=IBIS_TEST_POSTGRES_DB, @@ -103,13 +85,8 @@ def con(tmp_path_factory, data_dir, worker_id): @pytest.fixture(scope="module") -def db(con): - return con.database() - - -@pytest.fixture(scope="module") -def alltypes(db): - return db.functional_alltypes +def alltypes(con): + return con.table("functional_alltypes") @pytest.fixture(scope="module") @@ -127,12 +104,6 @@ def gdf(geotable): return geotable.execute() -@pytest.fixture(scope="module") -def alltypes_sqla(con, alltypes): - name = alltypes.op().name - return con._get_sqla_table(name) - - @pytest.fixture(scope="module") def intervals(con): return con.table("intervals") diff --git a/ibis/backends/postgres/tests/snapshots/test_client/test_compile_toplevel/out.sql b/ibis/backends/postgres/tests/snapshots/test_client/test_compile_toplevel/out.sql index cfbcf133a863..c0b4a0b83304 100644 --- a/ibis/backends/postgres/tests/snapshots/test_client/test_compile_toplevel/out.sql +++ b/ibis/backends/postgres/tests/snapshots/test_client/test_compile_toplevel/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.foo) AS "Sum(foo)" -FROM t0 AS t0 \ No newline at end of file +SELECT + SUM("t0"."foo") AS "Sum(foo)" +FROM "t0" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_analytic_functions/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_analytic_functions/out.sql index c00dec1bed25..4c9ed5e3c8fe 100644 --- a/ibis/backends/postgres/tests/snapshots/test_functions/test_analytic_functions/out.sql +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_analytic_functions/out.sql @@ -1,7 +1,7 @@ SELECT - RANK() OVER (ORDER BY t0.double_col ASC) - 1 AS rank, - DENSE_RANK() OVER (ORDER BY t0.double_col ASC) - 1 AS dense_rank, - CUME_DIST() OVER (ORDER BY t0.double_col ASC) AS cume_dist, - NTILE(7) OVER (ORDER BY t0.double_col ASC) - 1 AS ntile, - PERCENT_RANK() OVER (ORDER BY t0.double_col ASC) AS percent_rank -FROM functional_alltypes AS t0 \ No newline at end of file + RANK() OVER (ORDER BY "t0"."double_col" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "rank", + DENSE_RANK() OVER (ORDER BY "t0"."double_col" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "dense_rank", + CUME_DIST() OVER (ORDER BY "t0"."double_col" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "cume_dist", + NTILE(7) OVER (ORDER BY "t0"."double_col" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "ntile", + PERCENT_RANK() OVER (ORDER BY "t0"."double_col" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "percent_rank" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/double_to_int16/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/double_to_int16/out.sql new file mode 100644 index 000000000000..18531aedee79 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/double_to_int16/out.sql @@ -0,0 +1,3 @@ +SELECT + CAST("t0"."double_col" AS SMALLINT) AS "Cast(double_col, int16)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/double_to_int8/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/double_to_int8/out.sql new file mode 100644 index 000000000000..7b0c85dba24e --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/double_to_int8/out.sql @@ -0,0 +1,3 @@ +SELECT + CAST("t0"."double_col" AS SMALLINT) AS "Cast(double_col, int8)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_decimal_no_params/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_decimal_no_params/out.sql new file mode 100644 index 000000000000..f35429ea1219 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_decimal_no_params/out.sql @@ -0,0 +1,3 @@ +SELECT + CAST("t0"."string_col" AS DECIMAL) AS "Cast(string_col, decimal)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_decimal_params/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_decimal_params/out.sql new file mode 100644 index 000000000000..5d76cb2e8739 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_decimal_params/out.sql @@ -0,0 +1,3 @@ +SELECT + CAST("t0"."string_col" AS DECIMAL(9, 3)) AS "Cast(string_col, decimal(9, 3))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_double/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_double/out.sql new file mode 100644 index 000000000000..5a1d036fe36a --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_double/out.sql @@ -0,0 +1,3 @@ +SELECT + CAST("t0"."string_col" AS DOUBLE PRECISION) AS "Cast(string_col, float64)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_float/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_float/out.sql new file mode 100644 index 000000000000..c87a4ad2c2ab --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_cast/string_to_float/out.sql @@ -0,0 +1,3 @@ +SELECT + CAST("t0"."string_col" AS REAL) AS "Cast(string_col, float32)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_date_cast/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_date_cast/out.sql new file mode 100644 index 000000000000..86944fb34d13 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_date_cast/out.sql @@ -0,0 +1,3 @@ +SELECT + CAST("t0"."date_string_col" AS DATE) AS "Cast(date_string_col, date)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_timestamp_cast_noop/out1.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_timestamp_cast_noop/out1.sql new file mode 100644 index 000000000000..cffb9b1135c6 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_timestamp_cast_noop/out1.sql @@ -0,0 +1,3 @@ +SELECT + "t0"."timestamp_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_timestamp_cast_noop/out2.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_timestamp_cast_noop/out2.sql new file mode 100644 index 000000000000..d732cb470e76 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_timestamp_cast_noop/out2.sql @@ -0,0 +1,3 @@ +SELECT + TO_TIMESTAMP("t0"."int_col") AS "Cast(int_col, timestamp)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/False/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/False/out.sql index 34761d9a76e0..5b727f7ca817 100644 --- a/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/False/out.sql +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/False/out.sql @@ -1 +1 @@ -WITH anon_2 AS (SELECT t2.string_col AS string_col, sum(t2.double_col) AS metric FROM functional_alltypes AS t2 GROUP BY 1), anon_3 AS (SELECT t3.string_col AS string_col, sum(t3.double_col) AS metric FROM functional_alltypes AS t3 GROUP BY 1), anon_1 AS (SELECT t2.string_col AS string_col, t2.metric AS metric FROM (SELECT anon_2.string_col AS string_col, anon_2.metric AS metric FROM anon_2 UNION ALL SELECT anon_3.string_col AS string_col, anon_3.metric AS metric FROM anon_3) AS t2), anon_4 AS (SELECT t3.string_col AS string_col, sum(t3.double_col) AS metric FROM functional_alltypes AS t3 GROUP BY 1) SELECT t1.string_col, t1.metric FROM (SELECT anon_1.string_col AS string_col, anon_1.metric AS metric FROM anon_1 UNION ALL SELECT anon_4.string_col AS string_col, anon_4.metric AS metric FROM anon_4) AS t1 \ No newline at end of file +SELECT "t6"."string_col", "t6"."metric" FROM ( SELECT "t4"."string_col", "t4"."metric" FROM ( SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 UNION ALL SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) AS "t4" UNION ALL SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) AS "t6" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/True/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/True/out.sql index 6ce31e7468bb..1388747c56f0 100644 --- a/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/True/out.sql +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/True/out.sql @@ -1 +1 @@ -WITH anon_2 AS (SELECT t2.string_col AS string_col, sum(t2.double_col) AS metric FROM functional_alltypes AS t2 GROUP BY 1), anon_3 AS (SELECT t3.string_col AS string_col, sum(t3.double_col) AS metric FROM functional_alltypes AS t3 GROUP BY 1), anon_1 AS (SELECT t2.string_col AS string_col, t2.metric AS metric FROM (SELECT anon_2.string_col AS string_col, anon_2.metric AS metric FROM anon_2 UNION SELECT anon_3.string_col AS string_col, anon_3.metric AS metric FROM anon_3) AS t2), anon_4 AS (SELECT t3.string_col AS string_col, sum(t3.double_col) AS metric FROM functional_alltypes AS t3 GROUP BY 1) SELECT t1.string_col, t1.metric FROM (SELECT anon_1.string_col AS string_col, anon_1.metric AS metric FROM anon_1 UNION SELECT anon_4.string_col AS string_col, anon_4.metric AS metric FROM anon_4) AS t1 \ No newline at end of file +SELECT "t6"."string_col", "t6"."metric" FROM ( SELECT "t4"."string_col", "t4"."metric" FROM ( SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 UNION SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) AS "t4" UNION SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) AS "t6" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out1.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out1.sql new file mode 100644 index 000000000000..aa14d4bdad31 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out1.sql @@ -0,0 +1,9 @@ +SELECT + "t0"."id", + ST_ASEWKB("t0"."geo_point") AS "geo_point", + ST_ASEWKB("t0"."geo_linestring") AS "geo_linestring", + ST_ASEWKB("t0"."geo_polygon") AS "geo_polygon", + ST_ASEWKB("t0"."geo_multipolygon") AS "geo_multipolygon", + ST_Y("t0"."geo_point") AS "Location_Latitude", + ST_Y("t0"."geo_point") AS "Latitude" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out2.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out2.sql new file mode 100644 index 000000000000..cea9603098e7 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out2.sql @@ -0,0 +1,3 @@ +SELECT + "t0"."geo_point" = "t0"."geo_point" AS "Equals(geo_point, geo_point)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out3.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out3.sql new file mode 100644 index 000000000000..c8ade437f35f --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_equals/out3.sql @@ -0,0 +1,3 @@ +SELECT + ST_EQUALS("t0"."geo_point", "t0"."geo_point") AS "tmp" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-geography/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-geography/out.sql new file mode 100644 index 000000000000..384653def536 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-geography/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('LINESTRING (30 10, 10 30, 40 40)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-geometry/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-geometry/out.sql new file mode 100644 index 000000000000..384653def536 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-geometry/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('LINESTRING (30 10, 10 30, 40 40)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-none/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-none/out.sql new file mode 100644 index 000000000000..16f93230793e --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-none/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('LINESTRING (30 10, 10 30, 40 40)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-srid/out.sql new file mode 100644 index 000000000000..384653def536 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/linestring-srid/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('LINESTRING (30 10, 10 30, 40 40)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-geography/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-geography/out.sql new file mode 100644 index 000000000000..231ae306b595 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-geography/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-geometry/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-geometry/out.sql new file mode 100644 index 000000000000..231ae306b595 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-geometry/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-none/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-none/out.sql new file mode 100644 index 000000000000..3d6d6e59e03b --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-none/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-srid/out.sql new file mode 100644 index 000000000000..231ae306b595 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multilinestring-srid/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-geography/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-geography/out.sql new file mode 100644 index 000000000000..0acd04127b7e --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-geography/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTIPOINT (10 40, 40 30, 20 20, 30 10)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-geometry/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-geometry/out.sql new file mode 100644 index 000000000000..0acd04127b7e --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-geometry/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTIPOINT (10 40, 40 30, 20 20, 30 10)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-none/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-none/out.sql new file mode 100644 index 000000000000..12a5a5bf5a4e --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-none/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTIPOINT (10 40, 40 30, 20 20, 30 10)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-srid/out.sql new file mode 100644 index 000000000000..0acd04127b7e --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipoint-srid/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTIPOINT (10 40, 40 30, 20 20, 30 10)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-geography/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-geography/out.sql new file mode 100644 index 000000000000..93994732f2a6 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-geography/out.sql @@ -0,0 +1,11 @@ +SELECT + ST_ASEWKB( + "t0"."" + ) AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT( + 'MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35, 30 20, 20 15, 20 25, 30 20, 20 35)))', + 4326 + ) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-geometry/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-geometry/out.sql new file mode 100644 index 000000000000..93994732f2a6 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-geometry/out.sql @@ -0,0 +1,11 @@ +SELECT + ST_ASEWKB( + "t0"."" + ) AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT( + 'MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35, 30 20, 20 15, 20 25, 30 20, 20 35)))', + 4326 + ) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-none/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-none/out.sql new file mode 100644 index 000000000000..f2844bcefad4 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-none/out.sql @@ -0,0 +1,10 @@ +SELECT + ST_ASEWKB( + "t0"."" + ) AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT( + 'MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35, 30 20, 20 15, 20 25, 30 20, 20 35)))' + ) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-srid/out.sql new file mode 100644 index 000000000000..93994732f2a6 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/multipolygon-srid/out.sql @@ -0,0 +1,11 @@ +SELECT + ST_ASEWKB( + "t0"."" + ) AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT( + 'MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35, 30 20, 20 15, 20 25, 30 20, 20 35)))', + 4326 + ) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-geography/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-geography/out.sql new file mode 100644 index 000000000000..9880e4359114 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-geography/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (30 10)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-geometry/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-geometry/out.sql new file mode 100644 index 000000000000..9880e4359114 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-geometry/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (30 10)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-none/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-none/out.sql new file mode 100644 index 000000000000..e7f1dd649849 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-none/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (30 10)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-srid/out.sql new file mode 100644 index 000000000000..9880e4359114 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/point-srid/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (30 10)', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-geography/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-geography/out.sql new file mode 100644 index 000000000000..8336f188def1 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-geography/out.sql @@ -0,0 +1,11 @@ +SELECT + ST_ASEWKB( + "t0"."" + ) AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT( + 'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10, 20 30, 35 35, 30 20, 20 30, 35 10))', + 4326 + ) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-geometry/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-geometry/out.sql new file mode 100644 index 000000000000..8336f188def1 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-geometry/out.sql @@ -0,0 +1,11 @@ +SELECT + ST_ASEWKB( + "t0"."" + ) AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT( + 'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10, 20 30, 35 35, 30 20, 20 30, 35 10))', + 4326 + ) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-none/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-none/out.sql new file mode 100644 index 000000000000..0ae929972525 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-none/out.sql @@ -0,0 +1,10 @@ +SELECT + ST_ASEWKB( + "t0"."" + ) AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT( + 'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10, 20 30, 35 35, 30 20, 20 30, 35 10))' + ) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-srid/out.sql new file mode 100644 index 000000000000..8336f188def1 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon-srid/out.sql @@ -0,0 +1,11 @@ +SELECT + ST_ASEWKB( + "t0"."" + ) AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT( + 'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10, 20 30, 35 35, 30 20, 20 30, 35 10))', + 4326 + ) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-geography/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-geography/out.sql new file mode 100644 index 000000000000..22a502649186 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-geography/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-geometry/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-geometry/out.sql new file mode 100644 index 000000000000..22a502649186 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-geometry/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-none/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-none/out.sql new file mode 100644 index 000000000000..730d9bb92ef5 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-none/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-srid/out.sql new file mode 100644 index 000000000000..22a502649186 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_literals_smoke/polygon_single-srid/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))', 4326) AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_contains/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_contains/out.sql new file mode 100644 index 000000000000..3310ba250b2c --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_contains/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_CONTAINS("t0"."geo_linestring", "t0"."geo_point") AS "GeoContains(geo_linestring, geo_point)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_end_point/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_end_point/out.sql new file mode 100644 index 000000000000..fe76257dc4eb --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_end_point/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_ASEWKB(ST_ENDPOINT("t0"."geo_linestring")) AS "GeoEndPoint(geo_linestring)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_length/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_length/out.sql new file mode 100644 index 000000000000..d5276eec3b7b --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_length/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_LENGTH("t0"."geo_linestring") AS "GeoLength(geo_linestring)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_start_point/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_start_point/out.sql new file mode 100644 index 000000000000..1b2daba5bb61 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/linestring_start_point/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_ASEWKB(ST_STARTPOINT("t0"."geo_linestring")) AS "GeoStartPoint(geo_linestring)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/multipolygon_n_points/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/multipolygon_n_points/out.sql new file mode 100644 index 000000000000..c4677d695775 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/multipolygon_n_points/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_NPOINTS("t0"."geo_multipolygon") AS "GeoNPoints(geo_multipolygon)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_set_srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_set_srid/out.sql new file mode 100644 index 000000000000..556d7bd96994 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_set_srid/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_ASEWKB(ST_SETSRID("t0"."geo_point", 4326)) AS "GeoSetSRID(geo_point, 4326)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_srid/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_srid/out.sql new file mode 100644 index 000000000000..e2a132394a0b --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_srid/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_SRID("t0"."geo_point") AS "GeoSRID(geo_point)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_x/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_x/out.sql new file mode 100644 index 000000000000..39342ae28992 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_x/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_X("t0"."geo_point") AS "GeoX(geo_point)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_y/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_y/out.sql new file mode 100644 index 000000000000..6cbd1d831d6e --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/point_y/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_Y("t0"."geo_point") AS "GeoY(geo_point)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/polygon_area/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/polygon_area/out.sql new file mode 100644 index 000000000000..bd730a23dee2 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/polygon_area/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_AREA("t0"."geo_polygon") AS "GeoArea(geo_polygon)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/polygon_perimeter/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/polygon_perimeter/out.sql new file mode 100644 index 000000000000..675821a3d303 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_geo_ops_smoke/polygon_perimeter/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_PERIMETER("t0"."geo_polygon") AS "GeoPerimeter(geo_polygon)" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql new file mode 100644 index 000000000000..9e1fccff2c8d --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."p") AS "p" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (0 0)') AS "p" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql new file mode 100644 index 000000000000..4e4e332e87c7 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."p") AS "p" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (0 0)', 4326) AS "p" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr2/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr2/out.sql new file mode 100644 index 000000000000..4e4e332e87c7 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr2/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."p") AS "p" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (0 0)', 4326) AS "p" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr3/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr3/out.sql new file mode 100644 index 000000000000..63dc27e2cea0 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr3/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."p") AS "p" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (1 1)', 4326) AS "p" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr4/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr4/out.sql new file mode 100644 index 000000000000..98a279ff518c --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr4/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."p") AS "p" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (2 2)', 4326) AS "p" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr5/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr5/out.sql new file mode 100644 index 000000000000..4e4e332e87c7 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr5/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."p") AS "p" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (0 0)', 4326) AS "p" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr6/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr6/out.sql new file mode 100644 index 000000000000..63dc27e2cea0 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr6/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."p") AS "p" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (1 1)', 4326) AS "p" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr7/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr7/out.sql new file mode 100644 index 000000000000..98a279ff518c --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr7/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."p") AS "p" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (2 2)', 4326) AS "p" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp0/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp0/out.sql new file mode 100644 index 000000000000..fdae5ceda977 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (0 0)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp1/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp1/out.sql new file mode 100644 index 000000000000..b415da2fa321 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp1/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (1 1)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp2/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp2/out.sql new file mode 100644 index 000000000000..61eaed43848f --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp2/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (2 2)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp3/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp3/out.sql new file mode 100644 index 000000000000..c34726cf1776 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp3/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('LINESTRING (0 0, 1 1, 2 2)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp4/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp4/out.sql new file mode 100644 index 000000000000..2a64abfadac5 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp4/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('LINESTRING (2 2, 1 1, 0 0)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp5/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp5/out.sql new file mode 100644 index 000000000000..cba5f9d62498 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp5/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('POLYGON ((0 0, 1 1, 2 2, 0 0))') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp6/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp6/out.sql new file mode 100644 index 000000000000..f30ed0e5b25a --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp6/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTIPOLYGON (((0 0, 1 1, 2 2, 0 0)))') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp7/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp7/out.sql new file mode 100644 index 000000000000..65785b17019f --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp7/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTILINESTRING ((0 0, 1 1, 2 2), (2 2, 1 1, 0 0))') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp8/out.sql b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp8/out.sql new file mode 100644 index 000000000000..f481c00ddfe9 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp8/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASEWKB("t0"."") AS "" +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTIPOINT (0 0, 1 1, 2 2)') AS "" +) AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_linestring_geodata/out.sql b/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_linestring_geodata/out.sql new file mode 100644 index 000000000000..994b32c6a43b --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_linestring_geodata/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_ASEWKB("t0"."geo_linestring") AS "geo_linestring" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_multipolygon_geodata/out.sql b/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_multipolygon_geodata/out.sql new file mode 100644 index 000000000000..f043a8fcca89 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_multipolygon_geodata/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_ASEWKB("t0"."geo_multipolygon") AS "geo_multipolygon" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_point_geodata/out.sql b/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_point_geodata/out.sql new file mode 100644 index 000000000000..9849ac707de2 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_point_geodata/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_ASEWKB("t0"."geo_point") AS "geo_point" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_polygon_geodata/out.sql b/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_polygon_geodata/out.sql new file mode 100644 index 000000000000..b2b91e534792 --- /dev/null +++ b/ibis/backends/postgres/tests/snapshots/test_postgis/test_select_polygon_geodata/out.sql @@ -0,0 +1,3 @@ +SELECT + ST_ASEWKB("t0"."geo_polygon") AS "geo_polygon" +FROM "geo" AS "t0" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/test_client.py b/ibis/backends/postgres/tests/test_client.py index 0029a5925d1b..25afed4a0b9e 100644 --- a/ibis/backends/postgres/tests/test_client.py +++ b/ibis/backends/postgres/tests/test_client.py @@ -19,17 +19,15 @@ import pandas as pd import pandas.testing as tm import pytest +import sqlglot as sg from pytest import param import ibis +import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.types as ir -from ibis.tests.util import assert_equal pytest.importorskip("psycopg2") -sa = pytest.importorskip("sqlalchemy") - -from sqlalchemy.dialects import postgresql # noqa: E402 POSTGRES_TEST_DB = os.environ.get("IBIS_TEST_POSTGRES_DATABASE", "ibis_testing") IBIS_POSTGRES_HOST = os.environ.get("IBIS_TEST_POSTGRES_HOST", "localhost") @@ -80,33 +78,6 @@ def test_list_databases(con): assert POSTGRES_TEST_DB in con.list_databases() -def test_schema_type_conversion(con): - typespec = [ - # name, type, nullable - ("json", postgresql.JSON, True, dt.JSON), - ("jsonb", postgresql.JSONB, True, dt.JSON), - ("uuid", postgresql.UUID, True, dt.UUID), - ("macaddr", postgresql.MACADDR, True, dt.MACADDR), - ("inet", postgresql.INET, True, dt.INET), - ] - - sqla_types = [] - ibis_types = [] - for name, t, nullable, ibis_type in typespec: - sqla_types.append(sa.Column(name, t, nullable=nullable)) - ibis_types.append((name, ibis_type(nullable=nullable))) - - # Create a table with placeholder stubs for JSON, JSONB, and UUID. - table = sa.Table("tname", sa.MetaData(), *sqla_types) - - # Check that we can correctly create a schema with dt.any for the - # missing types. - schema = con._schema_from_sqla_table(table) - expected = ibis.schema(ibis_types) - - assert_equal(schema, expected) - - def test_interval_films_schema(con): t = con.table("films") assert t.len.type() == dt.Interval(unit="m") @@ -131,13 +102,10 @@ def test_all_interval_types_execute(intervals, column, expected_dtype): assert issubclass(series.dtype.type, np.timedelta64) -@pytest.mark.xfail( - raises=ValueError, reason="Year and month interval types not yet supported" -) def test_unsupported_intervals(con): t = con.table("not_supported_intervals") assert t["a"].type() == dt.Interval("Y") - assert t["b"].type() == dt.Interval("M") + assert t["b"].type() == dt.Interval("Y") assert t["g"].type() == dt.Interval("M") @@ -152,12 +120,13 @@ def test_create_and_drop_table(con, temp_table, params): ] ) - con.create_table(temp_table, schema=sch, **params) + t = con.create_table(temp_table, schema=sch, **params) + assert t is not None assert con.table(temp_table, **params) is not None con.drop_table(temp_table, **params) - with pytest.raises(sa.exc.NoSuchTableError): + with pytest.raises(com.IbisError): con.table(temp_table, **params) @@ -187,8 +156,8 @@ def test_create_and_drop_table(con, temp_table, params): ("date", dt.date), ("time", dt.time), ("time without time zone", dt.time), - ("timestamp without time zone", dt.timestamp), - ("timestamp with time zone", dt.Timestamp("UTC")), + ("timestamp without time zone", dt.Timestamp(scale=6)), + ("timestamp with time zone", dt.Timestamp("UTC", scale=6)), ("interval", dt.Interval("s")), ("numeric", dt.decimal), ("numeric(3, 2)", dt.Decimal(3, 2)), @@ -200,9 +169,9 @@ def test_create_and_drop_table(con, temp_table, params): ], ) def test_get_schema_from_query(con, pg_type, expected_type): - name = con._quote(ibis.util.guid()) - with con.begin() as c: - c.exec_driver_sql(f"CREATE TEMP TABLE {name} (x {pg_type}, y {pg_type}[])") + name = sg.table(ibis.util.guid()).sql("postgres") + with con._safe_raw_sql(f"CREATE TEMP TABLE {name} (x {pg_type}, y {pg_type}[])"): + pass expected_schema = ibis.schema(dict(x=expected_type, y=dt.Array(expected_type))) result_schema = con._get_schema_using_query(f"SELECT x, y FROM {name}") assert result_schema == expected_schema @@ -216,6 +185,7 @@ def test_unknown_column_type(con, col): def test_insert_with_cte(con): X = con.create_table("X", schema=ibis.schema(dict(id="int")), temp=True) + assert "X" in con.list_tables() expr = X.join(X.mutate(a=X["id"] + 1), ["id"]) Y = con.create_table("Y", expr, temp=True) assert Y.execute().empty @@ -228,32 +198,29 @@ def test_connect_url_with_empty_host(): @pytest.fixture(scope="module") def contz(con): - with con.begin() as c: - tz = c.exec_driver_sql("SHOW TIMEZONE").scalar() - c.exec_driver_sql("SET TIMEZONE TO 'America/New_York'") + (tz,) = con.raw_sql("SHOW TIMEZONE").fetchone() + con.raw_sql("SET TIMEZONE TO 'America/New_York'") yield con - with con.begin() as c: - c.exec_driver_sql(f"SET TIMEZONE TO '{tz}'") + con.raw_sql(f"SET TIMEZONE TO '{tz}'") -def test_timezone_from_column(contz, snapshot): - with contz.begin() as c: - c.exec_driver_sql( - """ - CREATE TEMPORARY TABLE x ( - id BIGINT, - ts_tz TIMESTAMP WITH TIME ZONE NOT NULL, - ts_no_tz TIMESTAMP WITHOUT TIME ZONE NOT NULL - ); +def test_timezone_from_column(con, contz, snapshot): + con.raw_sql( + """ + CREATE TEMPORARY TABLE x ( + id BIGINT, + ts_tz TIMESTAMP WITH TIME ZONE NOT NULL, + ts_no_tz TIMESTAMP WITHOUT TIME ZONE NOT NULL + ); - INSERT INTO x VALUES - (1, '2018-01-01 00:00:01+00', '2018-01-01 00:00:02'); + INSERT INTO x VALUES + (1, '2018-01-01 00:00:01+00', '2018-01-01 00:00:02'); - CREATE TEMPORARY TABLE y AS SELECT 1::BIGINT AS id; - """ - ) + CREATE TEMPORARY TABLE y AS SELECT 1::BIGINT AS id; + """ + ) case = ( contz.table("x") diff --git a/ibis/backends/postgres/tests/test_functions.py b/ibis/backends/postgres/tests/test_functions.py index 5ade3026bd81..d4929de48b6d 100644 --- a/ibis/backends/postgres/tests/test_functions.py +++ b/ibis/backends/postgres/tests/test_functions.py @@ -12,64 +12,35 @@ from pytest import param import ibis -import ibis.common.exceptions as exc import ibis.expr.datatypes as dt import ibis.expr.types as ir from ibis import config from ibis import literal as L -from ibis.backends.conftest import WINDOWS pytest.importorskip("psycopg2") -sa = pytest.importorskip("sqlalchemy") - -from sqlalchemy.dialects import postgresql # noqa: E402 @pytest.mark.parametrize( - ("left_func", "right_func"), + ("expr_fn"), [ + param(lambda t: t.double_col.cast("int8"), id="double_to_int8"), + param(lambda t: t.double_col.cast("int16"), id="double_to_int16"), + param(lambda t: t.string_col.cast("double"), id="string_to_double"), + param(lambda t: t.string_col.cast("float32"), id="string_to_float"), + param(lambda t: t.string_col.cast("decimal"), id="string_to_decimal_no_params"), param( - lambda t: t.double_col.cast("int8"), - lambda at: sa.cast(at.c.double_col, sa.SMALLINT), - id="double_to_int8", - ), - param( - lambda t: t.double_col.cast("int16"), - lambda at: sa.cast(at.c.double_col, sa.SMALLINT), - id="double_to_int16", - ), - param( - lambda t: t.string_col.cast("double"), - lambda at: sa.cast(at.c.string_col, postgresql.DOUBLE_PRECISION), - id="string_to_double", - ), - param( - lambda t: t.string_col.cast("float32"), - lambda at: sa.cast(at.c.string_col, postgresql.REAL), - id="string_to_float", - ), - param( - lambda t: t.string_col.cast("decimal"), - lambda at: sa.cast(at.c.string_col, sa.NUMERIC()), - id="string_to_decimal_no_params", - ), - param( - lambda t: t.string_col.cast("decimal(9, 3)"), - lambda at: sa.cast(at.c.string_col, sa.NUMERIC(9, 3)), - id="string_to_decimal_params", + lambda t: t.string_col.cast("decimal(9, 3)"), id="string_to_decimal_params" ), ], ) -def test_cast(alltypes, alltypes_sqla, translate, left_func, right_func): - left = left_func(alltypes) - right = right_func(alltypes_sqla.alias("t0")) - assert str(translate(left.op()).compile()) == str(right.compile()) +def test_cast(alltypes, expr_fn, snapshot): + expr = expr_fn(alltypes) + snapshot.assert_match(expr.compile(), "out.sql") -def test_date_cast(alltypes, alltypes_sqla, translate): +def test_date_cast(alltypes, snapshot): result = alltypes.date_string_col.cast("date") - expected = sa.cast(alltypes_sqla.alias("t0").c.date_string_col, sa.DATE) - assert str(translate(result.op())) == str(expected) + snapshot.assert_match(result.compile(), "out.sql") @pytest.mark.parametrize( @@ -90,29 +61,24 @@ def test_date_cast(alltypes, alltypes_sqla, translate): "month", ], ) -def test_noop_cast(alltypes, alltypes_sqla, translate, column): +def test_noop_cast(alltypes, column): col = alltypes[column] - result = col.cast(col.type()) - expected = alltypes_sqla.alias("t0").c[column] - assert result.equals(col) - assert str(translate(result.op())) == str(expected) + assert col.cast(col.type()).equals(col) -def test_timestamp_cast_noop(alltypes, alltypes_sqla, translate): +def test_timestamp_cast_noop(alltypes, snapshot): # See GH #592 - result1 = alltypes.timestamp_col.cast("timestamp") + timestamp_col_type = alltypes.timestamp_col.type() + result1 = alltypes.timestamp_col.cast(timestamp_col_type) result2 = alltypes.int_col.cast("timestamp") assert isinstance(result1, ir.TimestampColumn) assert isinstance(result2, ir.TimestampColumn) - expected1 = alltypes_sqla.alias("t0").c.timestamp_col - expected2 = sa.cast( - sa.func.to_timestamp(alltypes_sqla.alias("t0").c.int_col), sa.TIMESTAMP() - ) + assert result1.type() == timestamp_col_type - assert str(translate(result1.op())) == str(expected1) - assert str(translate(result2.op())) == str(expected2) + snapshot.assert_match(result1.compile(), "out1.sql") + snapshot.assert_match(result2.compile(), "out2.sql") @pytest.mark.parametrize( @@ -120,35 +86,48 @@ def test_timestamp_cast_noop(alltypes, alltypes_sqla, translate): [ # there could be pathological failure at midnight somewhere, but # that's okay - "%Y%m%d %H", + param("%Y%m%d %H", id="hourly"), # test quoting behavior - 'DD BAR %w FOO "DD"', - 'DD BAR %w FOO "D', - 'DD BAR "%w" FOO "D', - 'DD BAR "%d" FOO "D', + param( + 'DD BAR %w FOO "DD"', + id="quoted-dd", + marks=pytest.mark.xfail(reason="broken in sqlglot"), + ), + param( + 'DD BAR %w FOO "D', + id="w", + marks=pytest.mark.xfail(reason="broken in sqlglot"), + ), + param( + 'DD BAR "%w" FOO "D', + id="quoted-w", + marks=pytest.mark.xfail(reason="broken in sqlglot"), + ), + param( + 'DD BAR "%d" FOO "D', + id="quoted-d", + marks=pytest.mark.xfail(reason="broken in sqlglot"), + ), param( 'DD BAR "%c" FOO "D', marks=pytest.mark.xfail( - condition=WINDOWS, - raises=exc.UnsupportedOperationError, - reason="Locale-specific format specs not available on Windows", + reason="Locale-specific format specs not implemented in sqlglot" ), + id="quoted-c", ), param( 'DD BAR "%x" FOO "D', marks=pytest.mark.xfail( - condition=WINDOWS, - raises=exc.UnsupportedOperationError, - reason="Locale-specific format specs not available on Windows", + reason="Locale-specific format specs not implemented in sqlglot" ), + id="quoted-x", ), param( 'DD BAR "%X" FOO "D', marks=pytest.mark.xfail( - condition=WINDOWS, - raises=exc.UnsupportedOperationError, - reason="Locale-specific format specs not available on Windows", + reason="Locale-specific format specs not implemented in sqlglot" ), + id="quoted-X", ), ], ) @@ -196,11 +175,6 @@ def test_typeof(con, value, expected): assert con.execute(value.typeof()) == expected -@pytest.mark.parametrize(("value", "expected"), [(0, None), (5.5, 5.5)]) -def test_nullif_zero(con, value, expected): - assert con.execute(L(value).nullif(0)) == expected - - @pytest.mark.parametrize(("value", "expected"), [("foo_bar", 7), ("", 0)]) def test_string_length(con, value, expected): assert con.execute(L(value).length()) == expected @@ -526,12 +500,7 @@ def test_union_cte(alltypes, distinct, snapshot): expr2 = expr1.view() expr3 = expr1.view() expr = expr1.union(expr2, distinct=distinct).union(expr3, distinct=distinct) - result = " ".join( - line.strip() - for line in str( - expr.compile().compile(compile_kwargs={"literal_binds": True}) - ).splitlines() - ) + result = " ".join(line.strip() for line in expr.compile().splitlines()) snapshot.assert_match(result, "out.sql") @@ -698,15 +667,12 @@ def test_not_exists(alltypes, df): def test_interactive_repr_shows_error(alltypes): - # #591. Doing this in PostgreSQL because so many built-in functions are - # not available - expr = alltypes.int_col.convert_base(10, 2) with config.option_context("interactive", True): result = repr(expr) - assert "no translation rule" in result.lower() + assert "OperationNotDefinedError('BaseConvert')" in result def test_subquery(alltypes, df): @@ -1014,9 +980,7 @@ def test_array_concat_mixed_types(array_types): @pytest.fixture def t(con, temp_table): with con.begin() as c: - c.exec_driver_sql( - f"CREATE TABLE {con._quote(temp_table)} (id SERIAL PRIMARY KEY, name TEXT)" - ) + c.execute(f"CREATE TABLE {temp_table} (id SERIAL PRIMARY KEY, name TEXT)") return con.table(temp_table) @@ -1026,11 +990,11 @@ def s(con, t, temp_table2): assert temp_table != temp_table2 with con.begin() as c: - c.exec_driver_sql( + c.execute( f""" - CREATE TABLE {con._quote(temp_table2)} ( + CREATE TABLE {temp_table2} ( id SERIAL PRIMARY KEY, - left_t_id INTEGER REFERENCES {con._quote(temp_table)}, + left_t_id INTEGER REFERENCES {temp_table}, cost DOUBLE PRECISION ) """ @@ -1040,43 +1004,13 @@ def s(con, t, temp_table2): @pytest.fixture def trunc(con, temp_table): - quoted = con._quote(temp_table) + quoted = temp_table with con.begin() as c: - c.exec_driver_sql(f"CREATE TABLE {quoted} (id SERIAL PRIMARY KEY, name TEXT)") - c.exec_driver_sql(f"INSERT INTO {quoted} (name) VALUES ('a'), ('b'), ('c')") + c.execute(f"CREATE TABLE {quoted} (id SERIAL PRIMARY KEY, name TEXT)") + c.execute(f"INSERT INTO {quoted} (name) VALUES ('a'), ('b'), ('c')") return con.table(temp_table) -def test_semi_join(con, t, s): - t_a = con._get_sqla_table(t.op().name).alias("t0") - s_a = con._get_sqla_table(s.op().name).alias("t1") - - expr = t.semi_join(s, t.id == s.id) - result = expr.compile().compile(compile_kwargs={"literal_binds": True}) - base = ( - sa.select(t_a.c.id, t_a.c.name) - .where(sa.exists(sa.select(1).where(t_a.c.id == s_a.c.id))) - .subquery() - ) - expected = sa.select(base.c.id, base.c.name) - assert str(result) == str(expected) - - -def test_anti_join(con, t, s): - t_a = con._get_sqla_table(t.op().name).alias("t0") - s_a = con._get_sqla_table(s.op().name).alias("t1") - - expr = t.anti_join(s, t.id == s.id) - result = expr.compile().compile(compile_kwargs={"literal_binds": True}) - base = ( - sa.select(t_a.c.id, t_a.c.name) - .where(~sa.exists(sa.select(1).where(t_a.c.id == s_a.c.id))) - .subquery() - ) - expected = sa.select(base.c.id, base.c.name) - assert str(result) == str(expected) - - def test_create_table_from_expr(con, trunc, temp_table2): con.create_table(temp_table2, obj=trunc) t = con.table(temp_table2) @@ -1193,10 +1127,6 @@ def tz(request): @pytest.fixture def tzone_compute(con, temp_table, tz): - schema = ibis.schema([("ts", dt.Timestamp(tz)), ("b", "double"), ("c", "string")]) - con.create_table(temp_table, schema=schema, temp=True) - t = con.table(temp_table) - n = 10 df = pd.DataFrame( { @@ -1206,19 +1136,14 @@ def tzone_compute(con, temp_table, tz): } ) - df.to_sql( - temp_table, - con.con, - index=False, - if_exists="append", - dtype={"ts": sa.TIMESTAMP(timezone=True), "b": sa.FLOAT, "c": sa.TEXT}, + schema = ibis.schema( + {"ts": dt.Timestamp(timezone=tz, scale=6), "b": "float64", "c": "string"} ) - - yield t + return con.create_table(temp_table, df, schema=schema, temp=True) def test_ts_timezone_is_preserved(tzone_compute, tz): - assert dt.Timestamp(tz).equals(tzone_compute.ts.type()) + assert dt.Timestamp(tz, scale=6).equals(tzone_compute.ts.type()) def test_timestamp_with_timezone_select(tzone_compute, tz): @@ -1228,8 +1153,9 @@ def test_timestamp_with_timezone_select(tzone_compute, tz): def test_timestamp_type_accepts_all_timezones(con): with con.begin() as c: - cur = c.exec_driver_sql("SELECT name FROM pg_timezone_names").fetchall() - assert all(dt.Timestamp(row.name).timezone == row.name for row in cur) + c.execute("SELECT name FROM pg_timezone_names") + rows = c.fetchall() + assert all(dt.Timestamp(timezone=row).timezone == row for (row,) in rows) @pytest.mark.parametrize( @@ -1328,8 +1254,8 @@ def test_string_to_binary_cast(con): "FROM functional_alltypes LIMIT 10" ) with con.begin() as c: - cur = c.exec_driver_sql(sql_string) - raw_data = [row[0][0] for row in cur] + c.execute(sql_string) + raw_data = [row[0][0] for row in c.fetchall()] expected = pd.Series(raw_data, name=name) tm.assert_series_equal(result, expected) @@ -1345,8 +1271,9 @@ def test_string_to_binary_round_trip(con): "FROM functional_alltypes LIMIT 10" ) with con.begin() as c: - cur = c.exec_driver_sql(sql_string) - expected = pd.Series([row[0][0] for row in cur], name=name) + c.execute(sql_string) + rows = [row[0] for (row,) in c.fetchall()] + expected = pd.Series(rows, name=name) tm.assert_series_equal(result, expected) diff --git a/ibis/backends/postgres/tests/test_geospatial.py b/ibis/backends/postgres/tests/test_geospatial.py index 97a5de9ae266..f6fc3fd36232 100644 --- a/ibis/backends/postgres/tests/test_geospatial.py +++ b/ibis/backends/postgres/tests/test_geospatial.py @@ -7,13 +7,13 @@ from pytest import param import ibis +import ibis.expr.datatypes as dt pytestmark = pytest.mark.geospatial # TODO find a way to just run for the backends that support geo, without # skipping if dependencies are missing -pytest.importorskip("geoalchemy2") pytest.importorskip("geopandas") shapely = pytest.importorskip("shapely") @@ -53,42 +53,39 @@ @pytest.mark.parametrize( - ("expr", "expected"), + "expr", [ - (point_0, "'POINT (0.0 0.0)'"), - (point_0_4326, "'SRID=4326;POINT (0.0 0.0)'"), - (point_geom_0, "'SRID=4326;POINT (0.0 0.0)'::geometry"), - (point_geom_1, "'SRID=4326;POINT (1.0 1.0)'::geometry"), - (point_geom_2, "'SRID=4326;POINT (2.0 2.0)'::geometry"), - (point_geog_0, "'SRID=4326;POINT (0.0 0.0)'::geography"), - (point_geog_1, "'SRID=4326;POINT (1.0 1.0)'::geography"), - (point_geog_2, "'SRID=4326;POINT (2.0 2.0)'::geography"), + point_0, + point_0_4326, + point_geom_0, + point_geom_1, + point_geom_2, + point_geog_0, + point_geog_1, + point_geog_2, ], ) -def test_literal_geospatial_explicit(con, expr, expected): - result = str(con.compile(expr)) - assert result == f"SELECT {expected} AS p" +def test_literal_geospatial_explicit(con, expr, snapshot): + result = con.compile(expr) + snapshot.assert_match(result, "out.sql") @pytest.mark.parametrize( - ("shp", "expected"), + "shp", [ - (shp_point_0, "(0 0)"), - (shp_point_1, "(1 1)"), - (shp_point_2, "(2 2)"), - (shp_linestring_0, "(0 0, 1 1, 2 2)"), - (shp_linestring_1, "(2 2, 1 1, 0 0)"), - (shp_polygon_0, "((0 0, 1 1, 2 2, 0 0))"), - (shp_multipolygon_0, "(((0 0, 1 1, 2 2, 0 0)))"), - (shp_multilinestring_0, "((0 0, 1 1, 2 2), (2 2, 1 1, 0 0))"), - (shp_multipoint_0, "(0 0, 1 1, 2 2)"), + shp_point_0, + shp_point_1, + shp_point_2, + shp_linestring_0, + shp_linestring_1, + shp_polygon_0, + shp_multipolygon_0, + shp_multilinestring_0, + shp_multipoint_0, ], ) -def test_literal_geospatial_inferred(con, shp, expected): - result = str(con.compile(ibis.literal(shp).name("result"))) - name = type(shp).__name__.upper() - pair = f"{name} {expected}" - assert result == f"SELECT {pair!r} AS result" +def test_literal_geospatial_inferred(con, shp, snapshot): + snapshot.assert_match(con.compile(ibis.literal(shp)), "out.sql") @pytest.mark.parametrize( @@ -238,12 +235,12 @@ def test_get_point(geotable, expr_fn, expected): testing.assert_almost_equal(result, expected, decimal=2) -@pytest.mark.parametrize(("arg", "expected"), [(polygon_0, [1.98] * 5)]) -def test_area(geotable, arg, expected): +def test_area(con, geotable): """Testing for geo spatial area operation.""" - expr = geotable[geotable.id, arg.area().name("tmp")] - result = expr.execute()["tmp"] - testing.assert_almost_equal(result, expected, decimal=2) + expr = geotable.select("id", tmp=polygon_0.area()) + result = expr.execute()["tmp"].values + expected = np.array([con.execute(polygon_0).area] * len(result)) + assert pytest.approx(result) == expected @pytest.mark.parametrize( @@ -364,58 +361,36 @@ def test_geo_dataframe(geotable): @pytest.mark.parametrize( "modifier", [ - {}, - {"srid": "4326"}, - {"srid": "4326", "geo_type": "geometry"}, - {"srid": "4326", "geo_type": "geography"}, + param({}, id="none"), + param({"srid": 4326}, id="srid"), + param({"srid": 4326, "geotype": "geometry"}, id="geometry"), + param({"srid": 4326, "geotype": "geography"}, id="geography"), ], ) @pytest.mark.parametrize( - ("shape", "value", "expected"), + ("shape", "value"), [ # Geometry primitives (2D) - param("point", (30, 10), "(30.0 10.0)", id="point"), - param( - "linestring", - ((30, 10), (10, 30), (40, 40)), - "(30.0 10.0, 10.0 30.0, 40.0 40.0)", - id="linestring", - ), + param("point", (30, 10), id="point"), + param("linestring", ((30, 10), (10, 30), (40, 40)), id="linestring"), param( "polygon", ( ((35, 10), (45, 45), (15, 40), (10, 20), (35, 10)), ((20, 30), (35, 35), (30, 20), (20, 30)), ), - ( - "((35.0 10.0, 45.0 45.0, 15.0 40.0, 10.0 20.0, 35.0 10.0), " - "(20.0 30.0, 35.0 35.0, 30.0 20.0, 20.0 30.0))" - ), id="polygon", ), param( "polygon", (((30, 10), (40, 40), (20, 40), (10, 20), (30, 10)),), - "((30.0 10.0, 40.0 40.0, 20.0 40.0, 10.0 20.0, 30.0 10.0))", id="polygon_single", ), # Multipart geometries (2D) - param( - "multipoint", - ((10, 40), (40, 30), (20, 20), (30, 10)), - "((10.0 40.0), (40.0 30.0), (20.0 20.0), (30.0 10.0))", - id="multipoint", - ), + param("multipoint", ((10, 40), (40, 30), (20, 20), (30, 10)), id="multipoint"), param( "multilinestring", - ( - ((10, 10), (20, 20), (10, 40)), - ((40, 40), (30, 30), (40, 20), (30, 10)), - ), - ( - "((10.0 10.0, 20.0 20.0, 10.0 40.0), " - "(40.0 40.0, 30.0 30.0, 40.0 20.0, 30.0 10.0))" - ), + (((10, 10), (20, 20), (10, 40)), ((40, 40), (30, 30), (40, 20), (30, 10))), id="multilinestring", ), param( @@ -423,38 +398,19 @@ def test_geo_dataframe(geotable): ( (((40, 40), (20, 45), (45, 30), (40, 40)),), ( - ( - (20, 35), - (10, 30), - (10, 10), - (30, 5), - (45, 20), - (20, 35), - ), + ((20, 35), (10, 30), (10, 10), (30, 5), (45, 20), (20, 35)), ((30, 20), (20, 15), (20, 25), (30, 20)), ), ), - ( - "(((40.0 40.0, 20.0 45.0, 45.0 30.0, 40.0 40.0)), " - "((20.0 35.0, 10.0 30.0, 10.0 10.0, 30.0 5.0, 45.0 20.0, 20.0 35.0), " - "(30.0 20.0, 20.0 15.0, 20.0 25.0, 30.0 20.0)))" - ), id="multipolygon", ), ], ) -def test_geo_literals_smoke(con, shape, value, modifier, expected): +def test_geo_literals_smoke(con, shape, value, modifier, snapshot): """Smoke tests for geo spatial literals.""" - srid = f";{modifier['srid']}" if "srid" in modifier else "" - geo_type = f":{modifier['geo_type']}" if "geo_type" in modifier else "" - expr_type = f"{shape.upper()} {srid}{geo_type}" - expr = ibis.literal(value, type=expr_type).name("tmp") - prefix = f"SRID={modifier['srid']};" if "srid" in modifier else "" - suffix = f"::{modifier['geo_type']}" if "geo_type" in modifier else "" + expr = ibis.literal(value, type=getattr(dt, shape).copy(**modifier)) - result = str(con.compile(expr)) - expected = f"SELECT '{prefix}{shape.upper()} {expected}'{suffix} AS tmp" - assert result == expected + snapshot.assert_match(con.compile(expr), "out.sql") @pytest.mark.parametrize( @@ -514,47 +470,34 @@ def test_geo_literals_smoke(con, shape, value, modifier, expected): id="multipolygon_n_rings", marks=pytest.mark.notimpl(["postgres"]), ), - # TODO: the mock tests don't support multipoint and multilinestring - # yet, but once they do, add some more tests here. ], ) -def test_geo_ops_smoke(geotable, fn_expr): +def test_geo_ops_smoke(geotable, fn_expr, snapshot): """Smoke tests for geo spatial operations.""" - assert str(fn_expr(geotable).compile()) + snapshot.assert_match(fn_expr(geotable).compile(), "out.sql") -def test_geo_equals(geotable): +def test_geo_equals(geotable, snapshot): # Fix https://github.com/ibis-project/ibis/pull/2956 expr = geotable.mutate( - [ - geotable.geo_point.y().name("Location_Latitude"), - geotable.geo_point.y().name("Latitude"), - ] + Location_Latitude=geotable.geo_point.y(), Latitude=geotable.geo_point.y() ) - result = str(expr.compile().compile()) - - assert result == ( - "SELECT t0.id, ST_AsEWKB(t0.geo_point) AS geo_point, " - "ST_AsEWKB(t0.geo_linestring) AS geo_linestring, " - "ST_AsEWKB(t0.geo_polygon) AS geo_polygon, " - "ST_AsEWKB(t0.geo_multipolygon) AS geo_multipolygon, " - 'ST_Y(t0.geo_point) AS "Location_Latitude", ' - 'ST_Y(t0.geo_point) AS "Latitude" \n' - "FROM geo AS t0" - ) + snapshot.assert_match(expr.compile(), "out1.sql") # simple test using == - expected = "SELECT t0.geo_point = t0.geo_point AS tmp \nFROM geo AS t0" expr = geotable.geo_point == geotable.geo_point - assert str(expr.name("tmp").compile().compile()) == expected - assert expr.execute().all() + snapshot.assert_match(expr.compile(), "out2.sql") + result = expr.execute() + assert not result.empty + assert result.all() # using geo_equals - expected = "SELECT ST_Equals(t0.geo_point, t0.geo_point) AS tmp \nFROM geo AS t0" expr = geotable.geo_point.geo_equals(geotable.geo_point).name("tmp") - assert str(expr.compile().compile()) == expected - assert expr.execute().all() + snapshot.assert_match(expr.compile(), "out3.sql") + result = expr.execute() + assert not result.empty + assert result.all() # equals returns a boolean object assert geotable.geo_point.equals(geotable.geo_point) diff --git a/ibis/backends/postgres/tests/test_postgis.py b/ibis/backends/postgres/tests/test_postgis.py index 8c96db87d323..34d4972a80f8 100644 --- a/ibis/backends/postgres/tests/test_postgis.py +++ b/ibis/backends/postgres/tests/test_postgis.py @@ -8,12 +8,9 @@ from numpy import testing pytest.importorskip("psycopg2") -pytest.importorskip("geoalchemy2") gpd = pytest.importorskip("geopandas") pytest.importorskip("shapely") -sa = pytest.importorskip("sqlalchemy") - pytestmark = pytest.mark.geospatial @@ -29,44 +26,38 @@ def test_empty_select(geotable): assert len(result) == 0 -def test_select_point_geodata(geotable): +def test_select_point_geodata(geotable, snapshot): expr = geotable["geo_point"] sqla_expr = expr.compile() - compiled = str(sqla_expr.compile(compile_kwargs={"literal_binds": True})) - expected = "SELECT ST_AsEWKB(t0.geo_point) AS geo_point \nFROM geo AS t0" - assert compiled == expected + compiled = str(sqla_expr) + snapshot.assert_match(compiled, "out.sql") data = expr.execute() assert data.geom_type.iloc[0] == "Point" -def test_select_linestring_geodata(geotable): +def test_select_linestring_geodata(geotable, snapshot): expr = geotable["geo_linestring"] sqla_expr = expr.compile() - compiled = str(sqla_expr.compile(compile_kwargs={"literal_binds": True})) - expected = "SELECT ST_AsEWKB(t0.geo_linestring) AS geo_linestring \nFROM geo AS t0" - assert compiled == expected + compiled = str(sqla_expr) + snapshot.assert_match(compiled, "out.sql") data = expr.execute() assert data.geom_type.iloc[0] == "LineString" -def test_select_polygon_geodata(geotable): +def test_select_polygon_geodata(geotable, snapshot): expr = geotable["geo_polygon"] sqla_expr = expr.compile() - compiled = str(sqla_expr.compile(compile_kwargs={"literal_binds": True})) - expected = "SELECT ST_AsEWKB(t0.geo_polygon) AS geo_polygon \nFROM geo AS t0" - assert compiled == expected + compiled = str(sqla_expr) + snapshot.assert_match(compiled, "out.sql") data = expr.execute() assert data.geom_type.iloc[0] == "Polygon" -def test_select_multipolygon_geodata(geotable): +def test_select_multipolygon_geodata(geotable, snapshot): expr = geotable["geo_multipolygon"] sqla_expr = expr.compile() - compiled = str(sqla_expr.compile(compile_kwargs={"literal_binds": True})) - expected = ( - "SELECT ST_AsEWKB(t0.geo_multipolygon) AS geo_multipolygon \nFROM geo AS t0" - ) - assert compiled == expected + compiled = str(sqla_expr) + snapshot.assert_match(compiled, "out.sql") data = expr.execute() assert data.geom_type.iloc[0] == "MultiPolygon" diff --git a/ibis/backends/postgres/tests/test_udf.py b/ibis/backends/postgres/tests/test_udf.py index cd682972409b..8563f2e65336 100644 --- a/ibis/backends/postgres/tests/test_udf.py +++ b/ibis/backends/postgres/tests/test_udf.py @@ -14,17 +14,14 @@ from ibis.util import guid pytest.importorskip("psycopg2") -sa = pytest.importorskip("sqlalchemy") @pytest.fixture(scope="session") def test_schema(con): schema_name = f"udf_test_{guid()}" - with con.begin() as c: - c.exec_driver_sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}") + con.create_schema(schema_name, force=True) yield schema_name - with con.begin() as c: - c.exec_driver_sql(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE") + con.drop_schema(schema_name, force=True, cascade=True) @pytest.fixture(scope="session") @@ -74,9 +71,9 @@ def sql_define_udf(test_schema): @pytest.fixture(scope="session") def con_for_udf(con, sql_table_setup, sql_define_udf, sql_define_py_udf, test_schema): with con.begin() as c: - c.exec_driver_sql(sql_table_setup) - c.exec_driver_sql(sql_define_udf) - c.exec_driver_sql(sql_define_py_udf) + c.execute(sql_table_setup) + c.execute(sql_define_udf) + c.execute(sql_define_py_udf) yield con diff --git a/ibis/backends/postgres/udf.py b/ibis/backends/postgres/udf.py deleted file mode 100644 index c0d793d87976..000000000000 --- a/ibis/backends/postgres/udf.py +++ /dev/null @@ -1,201 +0,0 @@ -from __future__ import annotations - -import collections -import inspect -import itertools -from textwrap import dedent -from typing import TYPE_CHECKING, Any, Callable - -import sqlalchemy as sa -from sqlalchemy.dialects.postgresql import dialect - -import ibis -import ibis.expr.datatypes as dt -import ibis.expr.rules as rlz -from ibis import IbisError -from ibis.backends.postgres.compiler import PostgreSQLExprTranslator, PostgresUDFNode -from ibis.backends.postgres.datatypes import PostgresType -from ibis.legacy.udf.validate import validate_output_type - -if TYPE_CHECKING: - from collections.abc import MutableMapping, Sequence - -_udf_name_cache: MutableMapping[str, Any] = collections.defaultdict(itertools.count) - -_postgres_dialect = dialect() - - -class PostgresUDFError(IbisError): - pass - - -def _ibis_to_postgres_str(ibis_type): - """Map an ibis DataType to a Postgres-appropriate string.""" - satype = PostgresType.from_ibis(ibis_type) - if callable(satype): - satype = satype() - return satype.compile(dialect=_postgres_dialect) - - -def _create_udf_node( - name: str, - fields: dict[str, Any], -) -> type[PostgresUDFNode]: - """Create a new UDF node type. - - Parameters - ---------- - name - Then name of the UDF node - fields - Mapping of class member name to definition - - Returns - ------- - type[PostgresUDFNode] - A new PostgresUDFNode subclass - """ - definition = next(_udf_name_cache[name]) - external_name = f"{name}_{definition:d}" - return type(external_name, (PostgresUDFNode,), fields) - - -def existing_udf(name, input_types, output_type, schema=None, parameters=None): - """Create an ibis function that refers to an existing Postgres UDF.""" - - if parameters is None: - parameters = [f"v{i}" for i in range(len(input_types))] - elif len(input_types) != len(parameters): - raise ValueError( - ( - "Length mismatch in arguments to existing_udf: " - "len(input_types)={}, len(parameters)={}" - ).format(len(input_types), len(parameters)) - ) - - validate_output_type(output_type) - - udf_node_fields = { - name: rlz.ValueOf(type_) for name, type_ in zip(parameters, input_types) - } - udf_node_fields["name"] = name - udf_node_fields["dtype"] = output_type - - udf_node = _create_udf_node(name, udf_node_fields) - - def _translate_udf(t, op): - func_obj = sa.func - if schema is not None: - func_obj = getattr(func_obj, schema) - func_obj = getattr(func_obj, name) - - sa_args = [t.translate(arg) for arg in op.args] - - return func_obj(*sa_args) - - PostgreSQLExprTranslator.add_operation(udf_node, _translate_udf) - - def wrapped(*args, **kwargs): - node = udf_node(*args, **kwargs) - return node.to_expr() - - return wrapped - - -def udf( - client: ibis.backends.postgres.Backend, - python_func: Callable[..., Any], - in_types: Sequence[dt.DataType], - out_type: dt.DataType, - schema: str | None = None, - replace: bool = False, - name: str | None = None, - language: str = "plpythonu", -): - """Define a UDF in the database. - - Parameters - ---------- - client - A postgres Backend instance - python_func - Python function - in_types - Input DataTypes - out_type - Output DataType - schema - The postgres schema in which to define the UDF - replace - Replace UDF in database if already exists - name - Name for the UDF to be defined in database - language - The language to use for the UDF - - Returns - ------- - Callable - The ibis UDF object as a wrapped function - """ - if name is None: - internal_name = python_func.__name__ - else: - internal_name = name - signature = inspect.signature(python_func) - parameter_names = signature.parameters.keys() - replace_text = " OR REPLACE " if replace else "" - schema_fragment = (schema + ".") if schema else "" - template = """CREATE {replace} FUNCTION -{schema_fragment}{name}({signature}) -RETURNS {return_type} -LANGUAGE {language} -AS $$ -{func_definition} -return {internal_name}({args}) -$$; -""" - - postgres_signature = ", ".join( - f"{name} {_ibis_to_postgres_str(type_)}" - for name, type_ in zip(parameter_names, in_types) - ) - return_type = _ibis_to_postgres_str(out_type) - # If function definition is indented extra, - # Postgres UDF will fail with indentation error. - func_definition = dedent(inspect.getsource(python_func)) - if func_definition.strip().startswith("@"): - raise PostgresUDFError( - "Use of decorators on a function to be turned into Postgres UDF " - "is not supported. The body of the UDF must be wholly " - "self-contained. " - ) - # Since the decorator syntax does not first bind - # the function name to the wrapped function but instead includes - # the decorator(s). Therefore, the decorators themselves will - # be included in the string coming from `inspect.getsource()`. - # Since the decorator objects are not defined, execution of the - # UDF results in a NameError. - - formatted_sql = template.format( - replace=replace_text, - schema_fragment=schema_fragment, - name=internal_name, - signature=postgres_signature, - return_type=return_type, - language=language, - func_definition=func_definition, - # for internal_name, need to make sure this works if passing - # name parameter - internal_name=python_func.__name__, - args=", ".join(parameter_names), - ) - with client.begin() as con: - con.exec_driver_sql(formatted_sql) - return existing_udf( - name=internal_name, - input_types=in_types, - output_type=out_type, - schema=schema, - parameters=parameter_names, - ) diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 769f746654e0..ca70557f6321 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -79,3 +79,15 @@ from trino.exceptions import TrinoUserError except ImportError: TrinoUserError = None + +try: + from psycopg2.errors import DivisionByZero as PsycoPg2DivisionByZero + from psycopg2.errors import IndeterminateDatatype as PsycoPg2IndeterminateDatatype + from psycopg2.errors import ( + InvalidTextRepresentation as PsycoPg2InvalidTextRepresentation, + ) + from psycopg2.errors import SyntaxError as PsycoPg2SyntaxError +except ImportError: + PsycoPg2SyntaxError = ( + PsycoPg2IndeterminateDatatype + ) = PsycoPg2InvalidTextRepresentation = PsycoPg2DivisionByZero = None diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/postgres/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/postgres/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/postgres/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/postgres/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/postgres/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/postgres/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/postgres/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/postgres/out.sql new file mode 100644 index 000000000000..6bd0ba8c995d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/postgres/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/postgres/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/postgres/out.sql new file mode 100644 index 000000000000..97338646649f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/postgres/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/postgres/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/postgres/out.sql index fc16f2428d16..d3969647c9ea 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/postgres/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.continent + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,8 +15,8 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/postgres/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/postgres/out.sql index 3f66295a7f5a..c1611d8cecc3 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/postgres/out.sql @@ -1,13 +1,9 @@ SELECT - t0.x IN ( + "t0"."x" IN ( SELECT - t1.x - FROM ( - SELECT - t0.x AS x - FROM t AS t0 - WHERE - t0.x > 2 - ) AS t1 - ) AS "InColumn(x, x)" -FROM t AS t0 \ No newline at end of file + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql index 014e47475a3d..c674f81521f2 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql @@ -1,79 +1,96 @@ -WITH t0 AS ( - SELECT - t7.field_of_study AS field_of_study, - ROW(anon_2.f1, anon_2.f2) AS __pivoted__ - FROM humanities AS t7 - JOIN UNNEST(ARRAY[ROW(CAST('1970-71' AS TEXT), CAST(t7."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS TEXT), CAST(t7."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS TEXT), CAST(t7."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS TEXT), CAST(t7."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS TEXT), CAST(t7."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS TEXT), CAST(t7."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS TEXT), CAST(t7."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS TEXT), CAST(t7."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS TEXT), CAST(t7."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS TEXT), CAST(t7."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS TEXT), CAST(t7."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS TEXT), CAST(t7."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS TEXT), CAST(t7."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS TEXT), CAST(t7."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS TEXT), CAST(t7."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS TEXT), CAST(t7."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS TEXT), CAST(t7."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS TEXT), CAST(t7."2019-20" AS BIGINT))]) AS anon_2(f1 TEXT, f2 BIGINT) - ON TRUE -), t1 AS ( - SELECT - t0.field_of_study AS field_of_study, - ( - t0.__pivoted__ - ).f1 AS years, - ( - t0.__pivoted__ - ).f2 AS degrees - FROM t0 -), t2 AS ( - SELECT - t1.field_of_study AS field_of_study, - t1.years AS years, - t1.degrees AS degrees, - FIRST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - LAST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM t1 -), t3 AS ( - SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, - t2.earliest_degrees AS earliest_degrees, - t2.latest_degrees AS latest_degrees, - t2.latest_degrees - t2.earliest_degrees AS diff - FROM t2 -), t4 AS ( - SELECT - t3.field_of_study AS field_of_study, - FIRST(t3.diff) AS diff - FROM t3 - GROUP BY - 1 -), anon_1 AS ( +SELECT + "t10"."field_of_study", + "t10"."diff" +FROM ( SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + "t5"."field_of_study", + "t5"."diff" + FROM ( + SELECT + "t4"."field_of_study", + FIRST("t4"."diff") AS "diff" + FROM ( + SELECT + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + FROM ( + SELECT + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + FROM ( + SELECT + "t1"."field_of_study", + CAST(TO_JSONB("t1"."__pivoted__") -> 'f1' AS VARCHAR) AS "years", + CAST(TO_JSONB("t1"."__pivoted__") -> 'f2' AS BIGINT) AS "degrees" + FROM ( + SELECT + "t0"."field_of_study", + UNNEST( + ARRAY[ROW(CAST('1970-71' AS VARCHAR), CAST("t0"."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS VARCHAR), CAST("t0"."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS VARCHAR), CAST("t0"."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS VARCHAR), CAST("t0"."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS VARCHAR), CAST("t0"."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS VARCHAR), CAST("t0"."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS VARCHAR), CAST("t0"."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS VARCHAR), CAST("t0"."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS VARCHAR), CAST("t0"."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS VARCHAR), CAST("t0"."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS VARCHAR), CAST("t0"."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS VARCHAR), CAST("t0"."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS VARCHAR), CAST("t0"."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS VARCHAR), CAST("t0"."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS VARCHAR), CAST("t0"."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS VARCHAR), CAST("t0"."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS VARCHAR), CAST("t0"."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS VARCHAR), CAST("t0"."2019-20" AS BIGINT))] + ) AS "__pivoted__" + FROM "humanities" AS "t0" + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 + ) AS "t5" ORDER BY - t4.diff DESC + "t5"."diff" DESC NULLS LAST LIMIT 10 -), t5 AS ( + UNION ALL SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + "t5"."field_of_study", + "t5"."diff" + FROM ( + SELECT + "t4"."field_of_study", + FIRST("t4"."diff") AS "diff" + FROM ( + SELECT + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + FROM ( + SELECT + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + FROM ( + SELECT + "t1"."field_of_study", + CAST(TO_JSONB("t1"."__pivoted__") -> 'f1' AS VARCHAR) AS "years", + CAST(TO_JSONB("t1"."__pivoted__") -> 'f2' AS BIGINT) AS "degrees" + FROM ( + SELECT + "t0"."field_of_study", + UNNEST( + ARRAY[ROW(CAST('1970-71' AS VARCHAR), CAST("t0"."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS VARCHAR), CAST("t0"."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS VARCHAR), CAST("t0"."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS VARCHAR), CAST("t0"."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS VARCHAR), CAST("t0"."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS VARCHAR), CAST("t0"."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS VARCHAR), CAST("t0"."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS VARCHAR), CAST("t0"."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS VARCHAR), CAST("t0"."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS VARCHAR), CAST("t0"."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS VARCHAR), CAST("t0"."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS VARCHAR), CAST("t0"."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS VARCHAR), CAST("t0"."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS VARCHAR), CAST("t0"."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS VARCHAR), CAST("t0"."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS VARCHAR), CAST("t0"."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS VARCHAR), CAST("t0"."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS VARCHAR), CAST("t0"."2019-20" AS BIGINT))] + ) AS "__pivoted__" + FROM "humanities" AS "t0" + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 + ) AS "t5" WHERE - t4.diff < 0 -), anon_3 AS ( - SELECT - t5.field_of_study AS field_of_study, - t5.diff AS diff - FROM t5 + "t5"."diff" < 0 ORDER BY - t5.diff ASC + "t5"."diff" ASC LIMIT 10 -) -SELECT - t6.field_of_study, - t6.diff -FROM ( - SELECT - anon_1.field_of_study AS field_of_study, - anon_1.diff AS diff - FROM anon_1 - UNION ALL - SELECT - anon_3.field_of_study AS field_of_study, - anon_3.diff AS diff - FROM anon_3 -) AS t6 \ No newline at end of file +) AS "t10" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index e008089e632d..24fa0c702fd1 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1097,7 +1097,7 @@ def test_quantile( raises=com.OperationNotDefinedError, ), pytest.mark.notyet( - ["duckdb", "snowflake"], + ["postgres", "duckdb", "snowflake"], raises=com.UnsupportedOperationError, reason="backend only implements population correlation coefficient", ), @@ -1112,7 +1112,6 @@ def test_quantile( ), pytest.mark.notyet( [ - "postgres", "risingwave", "snowflake", "oracle", diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 72272e4a3411..9385b84e966d 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -21,6 +21,8 @@ ClickHouseDatabaseError, GoogleBadRequest, PolarsComputeError, + PsycoPg2IndeterminateDatatype, + PsycoPg2SyntaxError, Py4JJavaError, PySparkAnalysisException, TrinoUserError, @@ -98,11 +100,6 @@ def test_array_concat_variadic(con): # Issues #2370 @pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) -@pytest.mark.notyet( - ["postgres"], - raises=sa.exc.ProgrammingError, - reason="backend can't infer the type of an empty array", -) @pytest.mark.notyet( ["risingwave"], raises=sa.exc.InternalError, @@ -536,9 +533,7 @@ def test_array_filter(con, input, output): @builtin_array -@pytest.mark.notimpl( - ["mssql", "polars", "postgres"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["mssql", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.never(["impala"], reason="array_types table isn't defined") @pytest.mark.notimpl( @@ -810,7 +805,8 @@ def test_array_intersect(con, data): raises=ClickHouseDatabaseError, reason="ClickHouse won't accept dicts for struct type values", ) -@pytest.mark.notimpl(["postgres", "risingwave"], raises=sa.exc.ProgrammingError) +@pytest.mark.notimpl(["risingwave"], raises=sa.exc.ProgrammingError) +@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError) @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError @@ -873,7 +869,8 @@ def test_zip(backend): raises=ClickHouseDatabaseError, reason="https://github.com/ClickHouse/ClickHouse/issues/41112", ) -@pytest.mark.notimpl(["postgres", "risingwave"], raises=sa.exc.ProgrammingError) +@pytest.mark.notimpl(["risingwave"], raises=sa.exc.ProgrammingError) +@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError) @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["polars"], @@ -933,7 +930,7 @@ def flatten_data(): @pytest.mark.notyet( ["postgres", "risingwave"], reason="Postgres doesn't truly support arrays of arrays", - raises=com.OperationNotDefinedError, + raises=(com.OperationNotDefinedError, PsycoPg2IndeterminateDatatype), ) @pytest.mark.parametrize( ("column", "expected"), diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 904948e03051..1250e55ca35f 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -84,7 +84,7 @@ def time_keyed_right(time_keyed_df2): ("forward", operator.le), ], ) -@pytest.mark.notimpl(["datafusion", "snowflake", "trino"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "trino", "postgres"]) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): on = op(time_left["time"], time_right["time"]) expr = time_left.asof_join(time_right, on=on, predicates="group") @@ -112,7 +112,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op @pytest.mark.broken( ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds" ) -@pytest.mark.notimpl(["datafusion", "snowflake", "trino"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "trino", "postgres"]) def test_keyed_asof_join_with_tolerance( con, time_keyed_left, diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 0a6c98d2fd27..f389e52c3411 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -24,6 +24,7 @@ GoogleBadRequest, ImpalaHiveServer2Error, Py4JJavaError, + PsycoPg2InvalidTextRepresentation, SnowflakeProgrammingError, TrinoUserError, ) @@ -1381,7 +1382,6 @@ def hash_256(col): "mssql", "mysql", "oracle", - "postgres", "risingwave", "pyspark", "snowflake", diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index b1c3939bf515..95087ba64d8e 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -183,6 +183,11 @@ def test_semi_join_topk(batting, awards_players, func): @pytest.mark.notimpl(["dask", "druid", "exasol"]) +@pytest.mark.notimpl( + ["postgres"], + raises=com.IbisTypeError, + reason="postgres can't handle null types columns", +) def test_join_with_pandas(batting, awards_players): batting_filt = batting[lambda t: t.yearID < 1900] awards_players_filt = awards_players[lambda t: t.yearID < 1900].execute() diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 4dfc06dcd4f4..217f3cff10de 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -22,6 +22,7 @@ ExaQueryError, GoogleBadRequest, ImpalaHiveServer2Error, + PsycoPg2DivisionByZero, Py4JError, SnowflakeProgrammingError, TrinoUserError, @@ -261,8 +262,9 @@ def test_numeric_literal(con, backend, expr, expected_types): "trino": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.1"), - "postgres": 1.1, + "risingwave": 1.1, + "postgres": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), "mysql": 1.1, @@ -314,8 +316,8 @@ def test_numeric_literal(con, backend, expr, expected_types): "sqlite": 1.1, "trino": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.100000000"), - "postgres": 1.1, "risingwave": 1.1, + "postgres": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), "mysql": 1.1, @@ -363,8 +365,8 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": decimal.Decimal("1.1"), "sqlite": 1.1, "dask": decimal.Decimal("1.1"), - "postgres": 1.1, "risingwave": 1.1, + "postgres": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), "mysql": 1.1, @@ -420,8 +422,8 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": float("inf"), "sqlite": float("inf"), - "postgres": float("nan"), "risingwave": float("nan"), + "postgres": decimal.Decimal("Infinity"), "pandas": decimal.Decimal("Infinity"), "dask": decimal.Decimal("Infinity"), "impala": float("inf"), @@ -506,8 +508,8 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": float("-inf"), "sqlite": float("-inf"), - "postgres": float("nan"), "risingwave": float("nan"), + "postgres": decimal.Decimal("-Infinity"), "pandas": decimal.Decimal("-Infinity"), "dask": decimal.Decimal("-Infinity"), "impala": float("-inf"), @@ -1459,8 +1461,8 @@ def test_floating_mod(backend, alltypes, df): reason="returns NULL when dividing by zero", ) @pytest.mark.notyet(["mssql"], raises=(sa.exc.OperationalError, sa.exc.DataError)) -@pytest.mark.notyet(["postgres"], raises=sa.exc.DataError) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) +@pytest.mark.notyet(["postgres"], raises=PsycoPg2DivisionByZero) @pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, com.IbisTypeError)) def test_divide_by_zero(backend, alltypes, df, column, denominator): expr = alltypes[column] / denominator @@ -1513,6 +1515,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "flink", "snowflake", "trino", + "postgres", ], reason="Not SQLAlchemy backends", ) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 0ef9d20ba505..0ca72f6a45a5 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -31,7 +31,7 @@ reason="structs not supported in the backend", ) no_struct_literals = pytest.mark.notimpl( - ["postgres", "mssql", "oracle"], reason="struct literals are not yet implemented" + ["mssql", "oracle"], reason="struct literals are not yet implemented" ) not_sql = pytest.mark.never( ["pandas", "dask"], diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index c48ea004f6b8..e8a34654835e 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1029,7 +1029,6 @@ def convert_to_offset(x): "dask", "impala", "mysql", - "postgres", "risingwave", "snowflake", "sqlite", @@ -1054,7 +1053,6 @@ def convert_to_offset(x): pytest.mark.notimpl( [ "sqlite", - "postgres", "risingwave", "polars", "mysql", @@ -1653,7 +1651,6 @@ def test_interval_add_cast_column(backend, alltypes, df): ), pytest.mark.notimpl( [ - "postgres", "risingwave", ], raises=AttributeError, @@ -1687,12 +1684,7 @@ def test_interval_add_cast_column(backend, alltypes, df): ], ) @pytest.mark.notimpl( - [ - "datafusion", - "mssql", - "oracle", - ], - raises=com.OperationNotDefinedError, + ["datafusion", "mssql", "oracle"], raises=com.OperationNotDefinedError ) @pytest.mark.broken( ["druid"], @@ -1857,7 +1849,6 @@ def test_integer_to_timestamp(backend, con, unit): [ "dask", "pandas", - "postgres", "risingwave", "clickhouse", "sqlite", @@ -2758,8 +2749,8 @@ def test_timestamp_precision_output(con, ts, scale, unit): raises=com.OperationNotDefinedError, ) @pytest.mark.notyet( - ["postgres", "risingwave"], - reason="postgres doesn't have any easy way to accurately compute the delta in specific units", + ["risingwave"], + reason="risingwave doesn't have any easy way to accurately compute the delta in specific units", raises=com.OperationNotDefinedError, ) @pytest.mark.parametrize( @@ -2776,7 +2767,12 @@ def test_timestamp_precision_output(con, ts, scale, unit): ["clickhouse"], raises=com.OperationNotDefinedError, reason="time types not yet implemented in ibis for the clickhouse backend", - ) + ), + pytest.mark.notyet( + ["postgres"], + reason="postgres doesn't have any easy way to accurately compute the delta in specific units", + raises=com.OperationNotDefinedError, + ), ], ), param(ibis.date("1992-09-30"), ibis.date("1992-10-01"), "day", 1, id="date"), diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql index 323185ab0e0d..25009decccf0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql @@ -10,17 +10,21 @@ FROM ( FROM ( SELECT CASE - WHEN CAST(0 AS TINYINT) >= 0 + WHEN ( + CAST(0 AS TINYINT) + 1 + ) >= 1 THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t0.c_phone), CAST(2 AS TINYINT)) END AS cntrycode, t0.c_acctbal FROM customer AS t0 WHERE CASE - WHEN CAST(0 AS TINYINT) >= 0 + WHEN ( + CAST(0 AS TINYINT) + 1 + ) >= 1 THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t0.c_phone), CAST(2 AS TINYINT)) END IN ('13', '31', '23', '29', '30', '18', '17') AND t0.c_acctbal > ( SELECT @@ -39,9 +43,11 @@ FROM ( WHERE t0.c_acctbal > CAST(0.0 AS DOUBLE) AND CASE - WHEN CAST(0 AS TINYINT) >= 0 + WHEN ( + CAST(0 AS TINYINT) + 1 + ) >= 1 THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t0.c_phone), CAST(2 AS TINYINT)) END IN ('13', '31', '23', '29', '30', '18', '17') ) AS t3 ) diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql index ec0032c198a4..acb134eac72f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql @@ -9,7 +9,13 @@ FROM ( SUM("t6"."c_acctbal") AS "totacctbal" FROM ( SELECT - IFF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) AS "cntrycode", + IFF( + ( + 0 + 1 + ) >= 1, + SUBSTRING("t2"."c_phone", 0 + 1, 2), + SUBSTRING("t2"."c_phone", 0 + 1 + LENGTH("t2"."c_phone"), 2) + ) AS "cntrycode", "t2"."c_acctbal" FROM ( SELECT @@ -24,7 +30,13 @@ FROM ( FROM "CUSTOMER" AS "t0" ) AS "t2" WHERE - IFF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + IFF( + ( + 0 + 1 + ) >= 1, + SUBSTRING("t2"."c_phone", 0 + 1, 2), + SUBSTRING("t2"."c_phone", 0 + 1 + LENGTH("t2"."c_phone"), 2) + ) IN ('13', '31', '23', '29', '30', '18', '17') AND "t2"."c_acctbal" > ( SELECT AVG("t3"."c_acctbal") AS "Mean(c_acctbal)" @@ -41,7 +53,13 @@ FROM ( FROM "CUSTOMER" AS "t0" WHERE "t0"."C_ACCTBAL" > 0.0 - AND IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + AND IFF( + ( + 0 + 1 + ) >= 1, + SUBSTRING("t0"."C_PHONE", 0 + 1, 2), + SUBSTRING("t0"."C_PHONE", 0 + 1 + LENGTH("t0"."C_PHONE"), 2) + ) IN ('13', '31', '23', '29', '30', '18', '17') ) AS "t3" ) AND NOT ( diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql index 10f4cf53dd9c..1d8794cc6e66 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql @@ -9,7 +9,13 @@ FROM ( SUM("t6"."c_acctbal") AS "totacctbal" FROM ( SELECT - IF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) AS "cntrycode", + IF( + ( + 0 + 1 + ) >= 1, + SUBSTRING("t2"."c_phone", 0 + 1, 2), + SUBSTRING("t2"."c_phone", 0 + 1 + LENGTH("t2"."c_phone"), 2) + ) AS "cntrycode", "t2"."c_acctbal" FROM ( SELECT @@ -24,7 +30,13 @@ FROM ( FROM "customer" AS "t0" ) AS "t2" WHERE - IF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + IF( + ( + 0 + 1 + ) >= 1, + SUBSTRING("t2"."c_phone", 0 + 1, 2), + SUBSTRING("t2"."c_phone", 0 + 1 + LENGTH("t2"."c_phone"), 2) + ) IN ('13', '31', '23', '29', '30', '18', '17') AND "t2"."c_acctbal" > ( SELECT AVG("t3"."c_acctbal") AS "Mean(c_acctbal)" @@ -41,7 +53,13 @@ FROM ( FROM "customer" AS "t0" WHERE CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) > CAST(0.0 AS DOUBLE) - AND IF(0 >= 0, SUBSTRING("t0"."c_phone", 0 + 1, 2), SUBSTRING("t0"."c_phone", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + AND IF( + ( + 0 + 1 + ) >= 1, + SUBSTRING("t0"."c_phone", 0 + 1, 2), + SUBSTRING("t0"."c_phone", 0 + 1 + LENGTH("t0"."c_phone"), 2) + ) IN ('13', '31', '23', '29', '30', '18', '17') ) AS "t3" ) AND NOT ( diff --git a/ibis/common/exceptions.py b/ibis/common/exceptions.py index 2922e5a784f3..feb89b76cd34 100644 --- a/ibis/common/exceptions.py +++ b/ibis/common/exceptions.py @@ -135,12 +135,12 @@ def __str__(self): class InvalidDecoratorError(IbisError): - def __init__(self, name: str, line: str): - super().__init__(name, line) + def __init__(self, name: str, lines: str): + super().__init__(name, lines) def __str__(self) -> str: - name, line = self.args - return f"Only the `@udf` decorator is allowed in user-defined function: `{name}`; found line {line}" + name, lines = self.args + return f"Only the `@udf` decorator is allowed in user-defined function: `{name}`; found lines {lines}" def mark_as_unsupported(f: Callable) -> Callable: diff --git a/ibis/expr/datatypes/value.py b/ibis/expr/datatypes/value.py index 97886ffe6154..d29ad385b0fc 100644 --- a/ibis/expr/datatypes/value.py +++ b/ibis/expr/datatypes/value.py @@ -8,6 +8,8 @@ import json import uuid from collections.abc import Mapping, Sequence +from functools import partial +from operator import attrgetter from typing import Any import toolz @@ -232,18 +234,6 @@ def infer_shapely_multipolygon(value) -> dt.MultiPolygon: del infer.register -@public -class _WellKnownText: - def __init__(self, text: str): - self.text = text - - def __str__(self): - return self.text - - def __repr__(self): - return self.text - - # TODO(kszucs): should raise ValueError instead of TypeError def normalize(typ, value): """Ensure that the Python type underlying a literal resolves to a single type.""" @@ -307,18 +297,36 @@ def normalize(typ, value): ) return frozendict({k: normalize(t, value[k]) for k, t in dtype.items()}) elif dtype.is_geospatial(): + import shapely as shp + if isinstance(value, (tuple, list)): if dtype.is_point(): - return tuple(normalize(dt.float64, item) for item in value) - elif dtype.is_linestring() or dtype.is_multipoint(): - return tuple(normalize(dt.point, item) for item in value) - elif dtype.is_polygon() or dtype.is_multilinestring(): - return tuple(normalize(dt.linestring, item) for item in value) + return shp.Point(value) + elif dtype.is_linestring(): + return shp.LineString(value) + elif dtype.is_polygon(): + return shp.Polygon( + toolz.concat( + map( + attrgetter("coords"), + map(partial(normalize, dt.linestring), value), + ) + ) + ) + elif dtype.is_multipoint(): + return shp.MultiPoint(tuple(map(partial(normalize, dt.point), value))) + elif dtype.is_multilinestring(): + return shp.MultiLineString( + tuple(map(partial(normalize, dt.linestring), value)) + ) elif dtype.is_multipolygon(): - return tuple(normalize(dt.polygon, item) for item in value) - elif isinstance(value, _WellKnownText): + return shp.MultiPolygon(map(partial(normalize, dt.polygon), value)) + else: + raise IbisTypeError(f"Unsupported geospatial type: {dtype}") + elif isinstance(value, shp.geometry.base.BaseGeometry): return value - return _WellKnownText(value.wkt) + else: + return shp.from_wkt(value) elif dtype.is_date(): return normalize_datetime(value).date() elif dtype.is_time(): diff --git a/ibis/expr/operations/udf.py b/ibis/expr/operations/udf.py index 1097a51e55f6..7d900df7c813 100644 --- a/ibis/expr/operations/udf.py +++ b/ibis/expr/operations/udf.py @@ -116,7 +116,6 @@ def _make_node( "__udf_namespace__": schema, "__module__": fn.__module__, "__func_name__": func_name, - "__full_name__": ".".join(filter(None, (schema, func_name))), } ) diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index e59be96407a5..e62a339d5d78 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -240,6 +240,59 @@ def add_one_to_nth_value_input(_, **__): return _.copy(nth=nth) +@replace(ops.Bucket) +def replace_bucket(_): + cases = [] + results = [] + + if _.closed == "left": + l_cmp = ops.LessEqual + r_cmp = ops.Less + else: + l_cmp = ops.Less + r_cmp = ops.LessEqual + + user_num_buckets = len(_.buckets) - 1 + + bucket_id = 0 + if _.include_under: + if user_num_buckets > 0: + cmp = ops.Less if _.close_extreme else r_cmp + else: + cmp = ops.LessEqual if _.closed == "right" else ops.Less + cases.append(cmp(_.arg, _.buckets[0])) + results.append(bucket_id) + bucket_id += 1 + + for j, (lower, upper) in enumerate(zip(_.buckets, _.buckets[1:])): + if _.close_extreme and ( + (_.closed == "right" and j == 0) + or (_.closed == "left" and j == (user_num_buckets - 1)) + ): + cases.append( + ops.And(ops.LessEqual(lower, _.arg), ops.LessEqual(_.arg, upper)) + ) + results.append(bucket_id) + else: + cases.append(ops.And(l_cmp(lower, _.arg), r_cmp(_.arg, upper))) + results.append(bucket_id) + bucket_id += 1 + + if _.include_over: + if user_num_buckets > 0: + cmp = ops.Less if _.close_extreme else l_cmp + else: + cmp = ops.Less if _.closed == "right" else ops.LessEqual + + cases.append(cmp(_.buckets[-1], _.arg)) + results.append(bucket_id) + bucket_id += 1 + + return ops.SearchedCase( + cases=tuple(cases), results=tuple(results), default=ops.NULL + ) + + # TODO(kszucs): schema comparison should be updated to not distinguish between # different column order @replace(p.Project(y @ p.Relation) & Check(_.schema == y.schema)) diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index be7944a912f1..a77bfcfdc59a 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -83,7 +83,7 @@ def __repr__(self) -> str: except TranslationError as e: lines = [ "Translation to backend failed", - f"Error message: {e.args[0]}", + f"Error message: {repr(e)}", "Expression repr follows:", self._repr(), ] diff --git a/ibis/tests/expr/test_value_exprs.py b/ibis/tests/expr/test_value_exprs.py index bb9e3e8ba8f6..ebb86c7a4d11 100644 --- a/ibis/tests/expr/test_value_exprs.py +++ b/ibis/tests/expr/test_value_exprs.py @@ -115,6 +115,16 @@ def test_listeral_with_unhashable_values(value, expected_type, expected_value): ("foo", "string"), (ipaddress.ip_address("1.2.3.4"), "inet"), (ipaddress.ip_address("::1"), "inet"), + ], +) +def test_literal_with_explicit_type(value, expected_type): + expr = ibis.literal(value, type=expected_type) + assert expr.type().equals(dt.validate_type(expected_type)) + + +@pytest.mark.parametrize( + ["value", "expected_type"], + [ (list(pointA), "point"), (tuple(pointA), "point"), (list(lineAB), "linestring"), @@ -133,7 +143,8 @@ def test_listeral_with_unhashable_values(value, expected_type, expected_value): param(234234, "decimal(9, 3)", id="decimal_int"), ], ) -def test_literal_with_explicit_type(value, expected_type): +def test_literal_with_explicit_geotype(value, expected_type): + pytest.importorskip("shapely") expr = ibis.literal(value, type=expected_type) assert expr.type().equals(dt.validate_type(expected_type)) @@ -247,6 +258,9 @@ def test_list_and_tuple_literals(): # it works! repr(expr) + +def test_list_and_tuple_literals_geotype(): + pytest.importorskip("shapely") # test using explicit type point = ibis.literal((1, 2, 1000), type="point") assert point.type() == dt.point diff --git a/poetry.lock b/poetry.lock index 60300ad138b3..a63dc0e775be 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7369,7 +7369,7 @@ mysql = ["pymysql", "sqlalchemy", "sqlalchemy-views"] oracle = ["oracledb", "packaging", "sqlalchemy", "sqlalchemy-views"] pandas = ["regex"] polars = ["packaging", "polars"] -postgres = ["psycopg2", "sqlalchemy", "sqlalchemy-views"] +postgres = ["psycopg2"] pyspark = ["packaging", "pyspark", "sqlalchemy"] risingwave = ["psycopg2", "sqlalchemy", "sqlalchemy-risingwave", "sqlalchemy-views"] snowflake = ["packaging", "snowflake-connector-python"] @@ -7380,4 +7380,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "083f8f6a6d3dab493009395aabe051d1758183ba4e2588fc505aac883e3beafe" +content-hash = "19f28c10cf57b43986df07521c076f7d3942fd1b61aa73d5cc5e7350f3a6842e" diff --git a/pyproject.toml b/pyproject.toml index d13c16e55bbe..f39175ac8ab9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -195,13 +195,8 @@ mysql = ["sqlalchemy", "pymysql", "sqlalchemy-views"] oracle = ["sqlalchemy", "oracledb", "packaging", "sqlalchemy-views"] pandas = ["regex"] polars = ["polars", "packaging"] -postgres = ["psycopg2", "sqlalchemy", "sqlalchemy-views"] -risingwave = [ - "psycopg2", - "sqlalchemy", - "sqlalchemy-views", - "sqlalchemy-risingwave", -] +risingwave = ["psycopg2"] +postgres = ["psycopg2"] pyspark = ["pyspark", "sqlalchemy", "packaging"] snowflake = ["snowflake-connector-python", "packaging"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] From abdebfd7f65c04e1a1d734029ab35d749f0d37c2 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 6 Jan 2024 07:34:22 -0500 Subject: [PATCH 050/161] refactor(mysql): port to sqlglot (#7926) Port the MySQL backend to sqlglot. --- .github/workflows/ibis-backends.yml | 61 +-- docker/mysql/startup.sql | 2 + ibis/backends/base/sql/alchemy/__init__.py | 25 - ibis/backends/base/sql/alchemy/datatypes.py | 145 ----- ibis/backends/base/sql/alchemy/geospatial.py | 10 - ibis/backends/base/sqlglot/datatypes.py | 10 + ibis/backends/base/sqlglot/rewrites.py | 2 +- ibis/backends/conftest.py | 1 - ibis/backends/mssql/tests/test_client.py | 9 - ibis/backends/mysql/__init__.py | 500 +++++++++++++++--- ibis/backends/mysql/compiler.py | 430 ++++++++++++++- ibis/backends/mysql/converter.py | 26 + ibis/backends/mysql/datatypes.py | 101 ---- ibis/backends/mysql/registry.py | 265 ---------- ibis/backends/mysql/tests/conftest.py | 60 +-- ibis/backends/mysql/tests/test_client.py | 73 +-- ibis/backends/tests/errors.py | 7 + .../test_default_limit/mysql/out.sql | 5 + .../test_disable_query_limit/mysql/out.sql | 5 + .../mysql/out.sql | 19 + .../test_respect_set_limit/mysql/out.sql | 10 + .../test_group_by_has_index/mysql/out.sql | 8 +- .../test_sql/test_isin_bug/mysql/out.sql | 18 +- ibis/backends/tests/test_aggregation.py | 10 +- ibis/backends/tests/test_array.py | 27 +- ibis/backends/tests/test_asof_join.py | 16 +- ibis/backends/tests/test_client.py | 14 +- ibis/backends/tests/test_export.py | 3 +- ibis/backends/tests/test_generic.py | 16 +- ibis/backends/tests/test_join.py | 15 +- ibis/backends/tests/test_numeric.py | 67 +-- ibis/backends/tests/test_sql.py | 2 +- ibis/backends/tests/test_string.py | 14 +- ibis/backends/tests/test_temporal.py | 93 +--- ibis/backends/tests/test_window.py | 10 +- ibis/formats/pandas.py | 5 +- poetry.lock | 26 +- pyproject.toml | 6 +- requirements-dev.txt | 3 +- 39 files changed, 1085 insertions(+), 1034 deletions(-) delete mode 100644 ibis/backends/base/sql/alchemy/geospatial.py create mode 100644 ibis/backends/mysql/converter.py delete mode 100644 ibis/backends/mysql/datatypes.py delete mode 100644 ibis/backends/mysql/registry.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/mysql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/mysql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/mysql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/mysql/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index b299dff14f93..bda3dbff3ae1 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -94,15 +94,15 @@ jobs: extras: - polars - deltalake - # - name: mysql - # title: MySQL - # services: - # - mysql - # extras: - # - mysql - # - geospatial - # sys-deps: - # - libgeos-dev + - name: mysql + title: MySQL + services: + - mysql + extras: + - mysql + - geospatial + sys-deps: + - libgeos-dev - name: postgres title: PostgreSQL extras: @@ -186,17 +186,17 @@ jobs: # extras: # - risingwave exclude: - # - os: windows-latest - # backend: - # name: mysql - # title: MySQL - # extras: - # - mysql - # - geospatial - # services: - # - mysql - # sys-deps: - # - libgeos-dev + - os: windows-latest + backend: + name: mysql + title: MySQL + extras: + - mysql + - geospatial + services: + - mysql + sys-deps: + - libgeos-dev - os: windows-latest backend: name: clickhouse @@ -315,13 +315,13 @@ jobs: # extras: # - risingwave steps: - # - name: update and install system dependencies - # if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null - # run: | - # set -euo pipefail - # - # sudo apt-get update -qq -y - # sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} + - name: update and install system dependencies + if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null + run: | + set -euo pipefail + + sudo apt-get update -qq -y + sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} - name: install sqlite if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite' @@ -666,13 +666,6 @@ jobs: # - freetds-dev # - unixodbc-dev # - tdsodbc - # - name: mysql - # title: MySQL - # services: - # - mysql - # extras: - # - geospatial - # - mysql # - name: sqlite # title: SQLite # extras: diff --git a/docker/mysql/startup.sql b/docker/mysql/startup.sql index 06d40f979281..29982e2f7d89 100644 --- a/docker/mysql/startup.sql +++ b/docker/mysql/startup.sql @@ -1,3 +1,5 @@ CREATE USER 'ibis'@'localhost' IDENTIFIED BY 'ibis'; +CREATE SCHEMA IF NOT EXISTS test_schema; GRANT CREATE, DROP ON *.* TO 'ibis'@'%'; +GRANT CREATE,SELECT,DROP ON `test_schema`.* TO 'ibis'@'%'; FLUSH PRIVILEGES; diff --git a/ibis/backends/base/sql/alchemy/__init__.py b/ibis/backends/base/sql/alchemy/__init__.py index 6088e75447b8..ec64b484061a 100644 --- a/ibis/backends/base/sql/alchemy/__init__.py +++ b/ibis/backends/base/sql/alchemy/__init__.py @@ -22,7 +22,6 @@ from ibis import util from ibis.backends.base import CanCreateSchema from ibis.backends.base.sql import BaseSQLBackend -from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported from ibis.backends.base.sql.alchemy.query_builder import AlchemyCompiler from ibis.backends.base.sql.alchemy.registry import ( fixed_arity, @@ -204,28 +203,6 @@ def _safe_raw_sql(self, *args, **kwargs): with self.begin() as con: yield con.execute(*args, **kwargs) - # TODO(kszucs): move to ibis.formats.pandas - @staticmethod - def _to_geodataframe(df, schema): - """Convert `df` to a `GeoDataFrame`. - - Required libraries for geospatial support must be installed and - a geospatial column is present in the dataframe. - """ - import geopandas as gpd - from geoalchemy2 import shape - - geom_col = None - for name, dtype in schema.items(): - if dtype.is_geospatial(): - if not geom_col: - geom_col = name - df[name] = df[name].map(shape.to_shape, na_action="ignore") - if geom_col: - df[geom_col] = gpd.array.GeometryArray(df[geom_col].values) - df = gpd.GeoDataFrame(df, geometry=geom_col) - return df - def fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: import pandas as pd @@ -241,8 +218,6 @@ def fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: cursor.close() raise df = PandasData.convert_table(df, schema) - if not df.empty and geospatial_supported: - return self._to_geodataframe(df, schema) return df @contextlib.contextmanager diff --git a/ibis/backends/base/sql/alchemy/datatypes.py b/ibis/backends/base/sql/alchemy/datatypes.py index 1608faee29aa..d78739264dc3 100644 --- a/ibis/backends/base/sql/alchemy/datatypes.py +++ b/ibis/backends/base/sql/alchemy/datatypes.py @@ -1,90 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING - import sqlalchemy as sa import sqlalchemy.types as sat -import toolz from sqlalchemy.ext.compiler import compiles import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported from ibis.backends.base.sqlglot.datatypes import SqlglotType -from ibis.common.collections import FrozenDict from ibis.formats import TypeMapper -if TYPE_CHECKING: - from collections.abc import Mapping - -if geospatial_supported: - import geoalchemy2 as ga - - -class ArrayType(sat.UserDefinedType): - def __init__(self, value_type: sat.TypeEngine): - self.value_type = sat.to_instance(value_type) - - def result_processor(self, dialect, coltype) -> None: - if not coltype.lower().startswith("array"): - return None - - inner_processor = ( - self.value_type.result_processor(dialect, coltype[len("array(") : -1]) - or toolz.identity - ) - - return lambda v: v if v is None else list(map(inner_processor, v)) - - -@compiles(ArrayType, "default") -def compiles_array(element, compiler, **kw): - return f"ARRAY({compiler.process(element.value_type, **kw)})" - - -@compiles(sat.FLOAT, "duckdb") -def compiles_float(element, compiler, **kw): - precision = element.precision - if precision is None or 1 <= precision <= 24: - return "FLOAT" - elif 24 < precision <= 53: - return "DOUBLE" - else: - raise ValueError( - "FLOAT precision must be between 1 and 53 inclusive, or `None`" - ) - - -class StructType(sat.UserDefinedType): - cache_ok = True - - def __init__(self, fields: Mapping[str, sat.TypeEngine]) -> None: - self.fields = FrozenDict( - {name: sat.to_instance(typ) for name, typ in fields.items()} - ) - - -@compiles(StructType, "default") -def compiles_struct(element, compiler, **kw): - quote = compiler.dialect.identifier_preparer.quote - content = ", ".join( - f"{quote(field)} {compiler.process(typ, **kw)}" - for field, typ in element.fields.items() - ) - return f"STRUCT({content})" - - -class MapType(sat.UserDefinedType): - def __init__(self, key_type: sat.TypeEngine, value_type: sat.TypeEngine): - self.key_type = sat.to_instance(key_type) - self.value_type = sat.to_instance(value_type) - - -@compiles(MapType, "default") -def compiles_map(element, compiler, **kw): - key_type = compiler.process(element.key_type, **kw) - value_type = compiler.process(element.value_type, **kw) - return f"MAP({key_type}, {value_type})" - class UInt64(sat.Integer): pass @@ -102,30 +25,14 @@ class UInt8(sat.Integer): pass -@compiles(UInt64, "postgresql") -@compiles(UInt32, "postgresql") -@compiles(UInt16, "postgresql") -@compiles(UInt8, "postgresql") @compiles(UInt64, "mssql") @compiles(UInt32, "mssql") @compiles(UInt16, "mssql") @compiles(UInt8, "mssql") -@compiles(UInt64, "mysql") -@compiles(UInt32, "mysql") -@compiles(UInt16, "mysql") -@compiles(UInt8, "mysql") -@compiles(UInt64, "snowflake") -@compiles(UInt32, "snowflake") -@compiles(UInt16, "snowflake") -@compiles(UInt8, "snowflake") @compiles(UInt64, "sqlite") @compiles(UInt32, "sqlite") @compiles(UInt16, "sqlite") @compiles(UInt8, "sqlite") -@compiles(UInt64, "trino") -@compiles(UInt32, "trino") -@compiles(UInt16, "trino") -@compiles(UInt8, "trino") def compile_uint(element, compiler, **kw): dialect_name = compiler.dialect.name raise TypeError( @@ -220,17 +127,6 @@ class Unknown(sa.Text): 53: dt.Float64, } -_GEOSPATIAL_TYPES = { - "POINT": dt.Point, - "LINESTRING": dt.LineString, - "POLYGON": dt.Polygon, - "MULTILINESTRING": dt.MultiLineString, - "MULTIPOINT": dt.MultiPoint, - "MULTIPOLYGON": dt.MultiPolygon, - "GEOMETRY": dt.Geometry, - "GEOGRAPHY": dt.Geography, -} - class AlchemyType(TypeMapper): @classmethod @@ -261,25 +157,6 @@ def from_ibis(cls, dtype: dt.DataType) -> sat.TypeEngine: return sat.NUMERIC(dtype.precision, dtype.scale) elif dtype.is_timestamp(): return sat.TIMESTAMP(timezone=bool(dtype.timezone)) - elif dtype.is_array(): - return ArrayType(cls.from_ibis(dtype.value_type)) - elif dtype.is_struct(): - fields = {k: cls.from_ibis(v) for k, v in dtype.fields.items()} - return StructType(fields) - elif dtype.is_map(): - return MapType( - cls.from_ibis(dtype.key_type), cls.from_ibis(dtype.value_type) - ) - elif dtype.is_geospatial(): - if geospatial_supported: - if dtype.geotype == "geometry": - return ga.Geometry - elif dtype.geotype == "geography": - return ga.Geography - else: - return ga.types._GISType - else: - raise TypeError("geospatial types are not supported") else: return _to_sqlalchemy_types[type(dtype)] @@ -306,32 +183,10 @@ def to_ibis(cls, typ: sat.TypeEngine, nullable: bool = True) -> dt.DataType: return dt.Decimal(typ.precision, typ.scale, nullable=nullable) elif isinstance(typ, sat.Numeric): return dt.Decimal(typ.precision, typ.scale, nullable=nullable) - elif isinstance(typ, ArrayType): - return dt.Array(cls.to_ibis(typ.value_type), nullable=nullable) - elif isinstance(typ, sat.ARRAY): - ndim = typ.dimensions - if ndim is not None and ndim != 1: - raise NotImplementedError("Nested array types not yet supported") - return dt.Array(cls.to_ibis(typ.item_type), nullable=nullable) - elif isinstance(typ, StructType): - fields = {k: cls.to_ibis(v) for k, v in typ.fields.items()} - return dt.Struct(fields, nullable=nullable) - elif isinstance(typ, MapType): - return dt.Map( - cls.to_ibis(typ.key_type), - cls.to_ibis(typ.value_type), - nullable=nullable, - ) elif isinstance(typ, sa.DateTime): timezone = "UTC" if typ.timezone else None return dt.Timestamp(timezone, nullable=nullable) elif isinstance(typ, sat.String): return dt.String(nullable=nullable) - elif geospatial_supported and isinstance(typ, ga.types._GISType): - name = typ.geometry_type.upper() - try: - return _GEOSPATIAL_TYPES[name](geotype=typ.name, nullable=nullable) - except KeyError: - raise ValueError(f"Unrecognized geometry type: {name}") else: raise TypeError(f"Unable to convert type: {typ!r}") diff --git a/ibis/backends/base/sql/alchemy/geospatial.py b/ibis/backends/base/sql/alchemy/geospatial.py deleted file mode 100644 index 41b86ca00e1b..000000000000 --- a/ibis/backends/base/sql/alchemy/geospatial.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import annotations - -from importlib.util import find_spec as _find_spec - -geospatial_supported = ( - _find_spec("geoalchemy2") is not None - and _find_spec("geopandas") is not None - and _find_spec("shapely") is not None -) -__all__ = ["geospatial_supported"] diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 21f2242251c0..8b379d1d0db9 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -50,7 +50,9 @@ typecode.TEXT: dt.String, typecode.TIME: dt.Time, typecode.TIMETZ: dt.Time, + typecode.TINYBLOB: dt.Binary, typecode.TINYINT: dt.Int8, + typecode.TINYTEXT: dt.String, typecode.UBIGINT: dt.UInt64, typecode.UINT: dt.UInt32, typecode.USMALLINT: dt.UInt16, @@ -400,6 +402,10 @@ class DataFusionType(PostgresType): class MySQLType(SqlglotType): dialect = "mysql" + # these are mysql's defaults, see + # https://dev.mysql.com/doc/refman/8.0/en/fixed-point-types.html + default_decimal_precision = 10 + default_decimal_scale = 0 unknown_type_strings = FrozenDict( { @@ -428,6 +434,10 @@ def _from_sqlglot_DATETIME(cls) -> dt.Timestamp: def _from_sqlglot_TIMESTAMP(cls) -> dt.Timestamp: return dt.Timestamp(timezone="UTC", nullable=cls.default_nullable) + @classmethod + def _from_ibis_String(cls, dtype: dt.String) -> sge.DataType: + return sge.DataType(this=typecode.TEXT) + class DuckDBType(SqlglotType): dialect = "duckdb" diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 522380d9111e..c6b02d23423c 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -176,7 +176,7 @@ def rewrite_empty_order_by_window(_, y): @replace(p.WindowFunction(p.RowNumber | p.NTile, y)) def exclude_unsupported_window_frame_from_row_number(_, y): - return ops.Subtract(_.copy(frame=y.copy(start=None, end=None)), 1) + return ops.Subtract(_.copy(frame=y.copy(start=None, end=0)), 1) @replace( diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index d9475b533ea5..afa2479a44a0 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -537,7 +537,6 @@ def ddl_con(ddl_backend): keep=( "exasol", "mssql", - "mysql", "oracle", "risingwave", "sqlite", diff --git a/ibis/backends/mssql/tests/test_client.py b/ibis/backends/mssql/tests/test_client.py index 12012ac929d6..b26c78a53c38 100644 --- a/ibis/backends/mssql/tests/test_client.py +++ b/ibis/backends/mssql/tests/test_client.py @@ -7,7 +7,6 @@ import ibis import ibis.expr.datatypes as dt from ibis import udf -from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported DB_TYPES = [ # Exact numbers @@ -53,10 +52,6 @@ ] -skipif_no_geospatial_deps = pytest.mark.skipif( - not geospatial_supported, reason="geospatial dependencies not installed" -) - broken_sqlalchemy_autoload = pytest.mark.xfail( reason="scale not inferred by sqlalchemy autoload" ) @@ -65,10 +60,6 @@ @pytest.mark.parametrize( ("server_type", "expected_type"), DB_TYPES - + [ - param("GEOMETRY", dt.geometry, marks=[skipif_no_geospatial_deps]), - param("GEOGRAPHY", dt.geography, marks=[skipif_no_geospatial_deps]), - ] + [ param( "DATETIME2(4)", dt.timestamp(scale=4), marks=[broken_sqlalchemy_autoload] diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 7ea409b73701..a52edbe5fba8 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -2,31 +2,101 @@ from __future__ import annotations +import atexit +import contextlib +import re import warnings -from typing import TYPE_CHECKING, Literal +from functools import cached_property, partial +from itertools import repeat +from operator import itemgetter +from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse import pymysql -import sqlalchemy as sa -from sqlalchemy.dialects import mysql +import sqlglot as sg +import sqlglot.expressions as sge +import ibis +import ibis.common.exceptions as com +import ibis.expr.operations as ops import ibis.expr.schema as sch +import ibis.expr.types as ir from ibis import util from ibis.backends.base import CanCreateDatabase -from ibis.backends.base.sql.alchemy import BaseAlchemyBackend +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import TRUE, C from ibis.backends.mysql.compiler import MySQLCompiler -from ibis.backends.mysql.datatypes import MySQLDateTime, MySQLType if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Iterable, Mapping + + import pandas as pd + import pyarrow as pa import ibis.expr.datatypes as dt -class Backend(BaseAlchemyBackend, CanCreateDatabase): +class Backend(SQLGlotBackend, CanCreateDatabase): name = "mysql" - compiler = MySQLCompiler + compiler = MySQLCompiler() supports_create_or_replace = False + def _from_url(self, url: str, **kwargs): + """Connect to a backend using a URL `url`. + + Parameters + ---------- + url + URL with which to connect to a backend. + kwargs + Additional keyword arguments + + Returns + ------- + BaseBackend + A backend instance + """ + + url = urlparse(url) + database, *_ = url.path[1:].split("/", 1) + query_params = parse_qs(url.query) + connect_args = { + "user": url.username, + "password": url.password or "", + "host": url.hostname, + "database": database or "", + } + + for name, value in query_params.items(): + if len(value) > 1: + connect_args[name] = value + elif len(value) == 1: + connect_args[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + + kwargs.update(connect_args) + self._convert_kwargs(kwargs) + + if "user" in kwargs and not kwargs["user"]: + del kwargs["user"] + + if "host" in kwargs and not kwargs["host"]: + del kwargs["host"] + + if "database" in kwargs and not kwargs["database"]: + del kwargs["database"] + + if "password" in kwargs and kwargs["password"] is None: + del kwargs["password"] + + return self.connect(**kwargs) + + @cached_property + def version(self): + matched = re.search(r"(\d+)\.(\d+)\.(\d+)", self.con.server_version) + return ".".join(matched.groups()) + def do_connect( self, host: str = "localhost", @@ -34,8 +104,7 @@ def do_connect( password: str | None = None, port: int = 3306, database: str | None = None, - url: str | None = None, - driver: Literal["pymysql"] = "pymysql", + autocommit: bool = True, **kwargs, ) -> None: """Create an Ibis client using the passed connection parameters. @@ -52,15 +121,10 @@ def do_connect( Port database Database to connect to - url - Complete SQLAlchemy connection string. If passed, the other - connection arguments are ignored. - driver - Python MySQL database driver + autocommit + Autocommit mode kwargs - Additional keyword arguments passed to `connect_args` in - `sqlalchemy.create_engine`. Use these to pass dialect specific - arguments. + Additional keyword arguments passed to `pymysql.connect` Examples -------- @@ -92,96 +156,362 @@ def do_connect( year : int32 month : int32 """ - if driver != "pymysql": - raise NotImplementedError("pymysql is currently the only supported driver") - alchemy_url = self._build_alchemy_url( - url=url, + con = pymysql.connect( + user=user, host=host, port=port, - user=user, password=password, database=database, - driver=f"mysql+{driver}", + autocommit=autocommit, + conv=pymysql.converters.conversions, + **kwargs, ) - engine = sa.create_engine( - alchemy_url, poolclass=sa.pool.StaticPool, connect_args=kwargs - ) - - @sa.event.listens_for(engine, "connect") - def connect(dbapi_connection, connection_record): - with dbapi_connection.cursor() as cur: - try: - cur.execute("SET @@session.time_zone = 'UTC'") - except (sa.exc.OperationalError, pymysql.err.OperationalError): - warnings.warn("Unable to set session timezone to UTC.") + with contextlib.closing(con.cursor()) as cur: + try: + cur.execute("SET @@session.time_zone = 'UTC'") + except Exception as e: # noqa: BLE001 + warnings.warn(f"Unable to set session timezone to UTC: {e}") - super().do_connect(engine) + self.con = con + self._temp_views = set() @property def current_database(self) -> str: - return self._scalar_query(sa.select(sa.func.database())) - - @staticmethod - def _new_sa_metadata(): - meta = sa.MetaData() - - @sa.event.listens_for(meta, "column_reflect") - def column_reflect(inspector, table, column_info): - if isinstance(column_info["type"], mysql.DATETIME): - column_info["type"] = MySQLDateTime() - if isinstance(column_info["type"], mysql.DOUBLE): - column_info["type"] = mysql.DOUBLE(asdecimal=False) - if isinstance(column_info["type"], mysql.FLOAT): - column_info["type"] = mysql.FLOAT(asdecimal=False) - - return meta + with self._safe_raw_sql(sg.select(self.compiler.f.database())) as cur: + [(database,)] = cur.fetchall() + return database def list_databases(self, like: str | None = None) -> list[str]: # In MySQL, "database" and "schema" are synonymous - databases = self.inspector.get_schema_names() + with self._safe_raw_sql("SHOW DATABASES") as cur: + databases = list(map(itemgetter(0), cur.fetchall())) return self._filter_with_like(databases, like) - def _metadata(self, table: str) -> Iterable[tuple[str, dt.DataType]]: - with self.begin() as con: - result = con.exec_driver_sql(f"DESCRIBE {table}").mappings().all() + def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: + table = util.gen_name("mysql_metadata") - for field in result: - name = field["Field"] - type_string = field["Type"] - is_nullable = field["Null"] == "YES" - yield name, MySQLType.from_string(type_string, nullable=is_nullable) + with self.begin() as cur: + cur.execute(f"CREATE TEMPORARY TABLE {table} AS {query}") + try: + cur.execute(f"DESCRIBE {table}") + result = cur.fetchall() + finally: + cur.execute(f"DROP TABLE {table}") - def _get_schema_using_query(self, query: str): - table = f"__ibis_mysql_metadata_{util.guid()}" + type_mapper = self.compiler.type_mapper + return ( + (name, type_mapper.from_string(type_string, nullable=is_nullable == "YES")) + for name, type_string, is_nullable, *_ in result + ) - with self.begin() as con: - con.exec_driver_sql(f"CREATE TEMPORARY TABLE {table} AS {query}") - result = con.exec_driver_sql(f"DESCRIBE {table}").mappings().all() - con.exec_driver_sql(f"DROP TABLE {table}") + def get_schema( + self, name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + table = sg.table(name, db=schema, catalog=database, quoted=True).sql(self.name) - fields = {} - for field in result: - name = field["Field"] - type_string = field["Type"] - is_nullable = field["Null"] == "YES" - fields[name] = MySQLType.from_string(type_string, nullable=is_nullable) + with self.begin() as cur: + cur.execute(f"DESCRIBE {table}") + result = cur.fetchall() + + type_mapper = self.compiler.type_mapper + fields = { + name: type_mapper.from_string(type_string, nullable=is_nullable == "YES") + for name, type_string, is_nullable, *_ in result + } return sch.Schema(fields) - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - yield f"CREATE OR REPLACE VIEW {name} AS {definition}" + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + kind="VIEW", + replace=True, + this=sg.to_identifier(name, quoted=self.compiler.quoted), + expression=definition, + ) def create_database(self, name: str, force: bool = False) -> None: - name = self._quote(name) - if_exists = "IF NOT EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"CREATE DATABASE {if_exists}{name}") + sql = sge.Create(kind="DATABASE", exist=force, this=sg.to_identifier(name)).sql( + self.name + ) + with self.begin() as cur: + cur.execute(sql) def drop_database(self, name: str, force: bool = False) -> None: - name = self._quote(name) - if_exists = "IF EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"DROP DATABASE {if_exists}{name}") + sql = sge.Drop(kind="DATABASE", exist=force, this=sg.to_identifier(name)).sql( + self.name + ) + with self.begin() as cur: + cur.execute(sql) + + @contextlib.contextmanager + def begin(self): + con = self.con + cur = con.cursor() + try: + yield cur + except Exception: + con.rollback() + raise + else: + con.commit() + finally: + cur.close() + + @contextlib.contextmanager + def _safe_raw_sql(self, *args, **kwargs): + with contextlib.closing(self.raw_sql(*args, **kwargs)) as result: + yield result + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name) + + con = self.con + cursor = con.cursor() + + try: + cursor.execute(query, **kwargs) + except Exception: + con.rollback() + cursor.close() + raise + else: + con.commit() + return cursor + + def list_tables( + self, like: str | None = None, schema: str | None = None + ) -> list[str]: + """List the tables in the database. + + Parameters + ---------- + like + A pattern to use for listing tables. + schema + The schema to perform the list against. + """ + conditions = [TRUE] + + if schema is not None: + conditions = C.table_schema.eq(sge.convert(schema)) + + col = "table_name" + sql = ( + sg.select(col) + .from_(sg.table("tables", db="information_schema")) + .distinct() + .where(*conditions) + .sql(self.name, pretty=True) + ) + + with self._safe_raw_sql(sql) as cur: + out = cur.fetchall() + + return self._filter_with_like(map(itemgetter(0), out), like) + + def execute( + self, expr: ir.Expr, limit: str | None = "default", **kwargs: Any + ) -> Any: + """Execute an expression.""" + + self._run_pre_execute_hooks(expr) + table = expr.as_table() + sql = self.compile(table, limit=limit, **kwargs) + + schema = table.schema() + + with self._safe_raw_sql(sql) as cur: + result = self._fetch_from_cursor(cur, schema) + return expr.__pandas_result__(result) + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: ibis.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ) -> ir.Table: + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + if database is not None and database != self.current_database: + raise com.UnsupportedOperationError( + "Creating tables in other databases is not supported by Postgres" + ) + else: + database = None + + properties = [] + + if temp: + properties.append(sge.TemporaryProperty()) + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self._to_sqlglot(table) + else: + query = None + + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name(f"{self.name}_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create( + kind="TABLE", + this=target, + properties=sge.Properties(expressions=properties), + ) + + this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + with self._safe_raw_sql(create_stmt) as cur: + if query is not None: + insert_stmt = sge.Insert(this=table, expression=query).sql(self.name) + cur.execute(insert_stmt) + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + ) + cur.execute( + f"ALTER TABLE IF EXISTS {table.sql(self.name)} RENAME TO {this.sql(self.name)}" + ) + + if schema is None: + return self.table(name, schema=database) + + # preserve the input schema if it was provided + return ops.DatabaseTable( + name, schema=schema, source=self, namespace=ops.Namespace(database=database) + ).to_expr() + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise com.IbisTypeError( + "MySQL cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" + ) + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [ + sg.exp.ColumnConstraint( + kind=sg.exp.NotNullColumnConstraint() + ) + ] + ), + ) + for colname, typ in schema.items() + ] + + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema( + this=sg.to_identifier(name, quoted=quoted), expressions=column_defs + ), + properties=sg.exp.Properties(expressions=[sge.TemporaryProperty()]), + ) + create_stmt_sql = create_stmt.sql(self.name) + + columns = schema.keys() + df = op.data.to_frame() + data = df.itertuples(index=False) + cols = ", ".join( + ident.sql(self.name) + for ident in map(partial(sg.to_identifier, quoted=quoted), columns) + ) + specs = ", ".join(repeat("%s", len(columns))) + table = sg.table(name, quoted=quoted) + sql = f"INSERT INTO {table.sql(self.name)} ({cols}) VALUES ({specs})" + with self.begin() as cur: + cur.execute(create_stmt_sql) + + if not df.empty: + cur.executemany(sql, data) + + @util.experimental + def to_pyarrow_batches( + self, + expr: ir.Expr, + *, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1_000_000, + **_: Any, + ) -> pa.ipc.RecordBatchReader: + import pyarrow as pa + + self._run_pre_execute_hooks(expr) + + schema = expr.as_table().schema() + with self._safe_raw_sql( + self.compile(expr, limit=limit, params=params) + ) as cursor: + df = self._fetch_from_cursor(cursor, schema) + table = pa.Table.from_pandas( + df, schema=schema.to_pyarrow(), preserve_index=False + ) + return table.to_reader(max_chunksize=chunk_size) + + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + import pandas as pd + + from ibis.backends.mysql.converter import MySQLPandasData + + try: + df = pd.DataFrame.from_records( + cursor, columns=schema.names, coerce_float=True + ) + except Exception: + # clean up the cursor if we fail to create the DataFrame + # + # in the sqlite case failing to close the cursor results in + # artificially locked tables + cursor.close() + raise + df = MySQLPandasData.convert_table(df, schema) + return df + + def _register_temp_view_cleanup(self, name: str) -> None: + def drop(self, name: str, query: str): + self.raw_sql(query) + self._temp_views.discard(name) + + query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) + atexit.register(drop, self, name=name, query=query) diff --git a/ibis/backends/mysql/compiler.py b/ibis/backends/mysql/compiler.py index 529dfe84b211..d053c83c4300 100644 --- a/ibis/backends/mysql/compiler.py +++ b/ibis/backends/mysql/compiler.py @@ -1,28 +1,422 @@ from __future__ import annotations -import sqlalchemy as sa +import string +from functools import partial, reduce, singledispatchmethod -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.mysql.datatypes import MySQLType -from ibis.backends.mysql.registry import operation_registry -from ibis.expr.rewrites import rewrite_sample +import sqlglot as sg +import sqlglot.expressions as sge +from public import public +from sqlglot.dialects import MySQL +from sqlglot.dialects.dialect import rename_func +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import MySQLType +from ibis.backends.base.sqlglot.rewrites import ( + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_row_number, + rewrite_empty_order_by_window, + rewrite_first_to_first_value, + rewrite_last_to_last_value, +) +from ibis.common.patterns import replace +from ibis.expr.rewrites import p, rewrite_sample -class MySQLExprTranslator(AlchemyExprTranslator): - # https://dev.mysql.com/doc/refman/8.0/en/spatial-function-reference.html - _registry = operation_registry.copy() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _integer_to_timestamp = sa.func.from_unixtime - native_json_type = False - _dialect_name = "mysql" +MySQL.Generator.TRANSFORMS |= { + sge.LogicalOr: rename_func("max"), + sge.LogicalAnd: rename_func("min"), + sge.VariancePop: rename_func("var_pop"), + sge.Variance: rename_func("var_samp"), + sge.Stddev: rename_func("stddev_pop"), + sge.StddevPop: rename_func("stddev_pop"), + sge.StddevSamp: rename_func("stddev_samp"), + sge.RegexpLike: ( + lambda _, e: f"({e.this.sql('mysql')} RLIKE {e.expression.sql('mysql')})" + ), +} + + +@replace(p.Limit) +def rewrite_limit(_, **kwargs): + """Rewrite limit for MySQL to include a large upper bound. + + From the MySQL docs @ https://dev.mysql.com/doc/refman/8.0/en/select.html + + > To retrieve all rows from a certain offset up to the end of the result + > set, you can use some large number for the second parameter. This statement + > retrieves all rows from the 96th row to the last: + > + > SELECT * FROM tbl LIMIT 95,18446744073709551615; + """ + if _.n is None and _.offset is not None: + some_large_number = (1 << 64) - 1 + return _.copy(n=some_large_number) + return _ + + +@public +class MySQLCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "mysql" type_mapper = MySQLType + rewrites = ( + rewrite_limit, + rewrite_sample, + rewrite_first_to_first_value, + rewrite_last_to_last_value, + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_row_number, + rewrite_empty_order_by_window, + *SQLGlotCompiler.rewrites, + ) + quoted = True + + @property + def NAN(self): + raise NotImplementedError("MySQL does not support NaN") + + @property + def POS_INF(self): + raise NotImplementedError("MySQL does not support Infinity") + + NEG_INF = POS_INF + + def _aggregate(self, funcname: str, *args, where): + func = self.f[funcname] + if where is not None: + args = tuple(self.if_(where, arg, NULL) for arg in args) + return func(*args) + + @singledispatchmethod + def visit_node(self, op, **kwargs): + return super().visit_node(op, **kwargs) + + @staticmethod + def _minimize_spec(start, end, spec): + if ( + start is None + and isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + from_ = op.arg.dtype + if (from_.is_json() or from_.is_string()) and to.is_json(): + # MariaDB does not support casting to JSON because it's an alias + # for TEXT (except when casting of course!) + return arg + elif from_.is_integer() and to.is_interval(): + return self.visit_IntervalFromInteger( + ops.IntervalFromInteger(op.arg, unit=to.unit), arg=arg, unit=to.unit + ) + elif from_.is_integer() and to.is_timestamp(): + return self.f.from_unixtime(arg) + return super().visit_Cast(op, arg=arg, to=to) + + @visit_node.register(ops.TimestampDiff) + def visit_TimestampDiff(self, op, *, left, right): + return self.f.timestampdiff( + sge.Var(this="SECOND"), right, left, dialect=self.dialect + ) + + @visit_node.register(ops.DateDiff) + def visit_DateDiff(self, op, *, left, right): + return self.f.timestampdiff( + sge.Var(this="DAY"), right, left, dialect=self.dialect + ) + + @visit_node.register(ops.ApproxCountDistinct) + def visit_ApproxCountDistinct(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg) + return self.f.count(sge.Distinct(expressions=[arg])) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + if where is not None: + return self.f.sum(self.cast(where, op.dtype)) + return self.f.count(STAR) + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg) + return self.f.count(sge.Distinct(expressions=[arg])) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, arg, where): + if where is not None: + raise com.UnsupportedOperationError( + "Filtered table count distinct is not supported in MySQL" + ) + func = partial(sg.column, table=arg.alias_or_name, quoted=self.quoted) + return self.f.count( + sge.Distinct(expressions=list(map(func, op.arg.schema.keys()))) + ) + + @visit_node.register(ops.GroupConcat) + def visit_GroupConcat(self, op, *, arg, sep, where): + if not isinstance(op.sep, ops.Literal): + raise com.UnsupportedOperationError( + "Only string literal separators are supported" + ) + if where is not None: + arg = self.if_(where, arg) + return self.f.group_concat(arg, sep) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return (self.f.dayofweek(arg) + 5) % 7 + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype): + # avoid casting NULL: the set of types allowed by MySQL and + # MariaDB when casting is a strict subset of allowed types in other + # contexts like CREATE TABLE + if value is None: + return NULL + return super().visit_Literal(op, value=value, dtype=dtype) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_decimal() and not value.is_finite(): + raise com.UnsupportedOperationError( + "MySQL does not support NaN or infinity" + ) + elif dtype.is_binary(): + return self.f.unhex(value.hex()) + elif dtype.is_date(): + return self.f.date(value.isoformat()) + elif dtype.is_timestamp(): + return self.f.timestamp(value.isoformat()) + elif dtype.is_time(): + return self.f.maketime( + value.hour, value.minute, value.second + value.microsecond / 1e6 + ) + elif dtype.is_array() or dtype.is_struct() or dtype.is_map(): + raise com.UnsupportedBackendType( + "MySQL does not support arrays, structs or maps" + ) + elif dtype.is_string(): + return sge.convert(value.replace("\\", "\\\\")) + return None + + @visit_node.register(ops.JSONGetItem) + def visit_JSONGetItem(self, op, *, arg, index): + if op.index.dtype.is_integer(): + path = self.f.concat("$[", self.cast(index, dt.string), "]") + else: + path = self.f.concat("$.", index) + return self.f.json_extract(arg, path) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return self.f.str_to_date( + self.f.concat( + self.f.lpad(year, 4, "0"), + self.f.lpad(month, 2, "0"), + self.f.lpad(day, 2, "0"), + ), + "%Y%m%d", + ) + + @visit_node.register(ops.FindInSet) + def visit_FindInSet(self, op, *, needle, values): + return self.f.find_in_set(needle, self.f.concat_ws(",", values)) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + to = sge.DataType(this=sge.DataType.Type.BINARY) + return self.f.right(arg, self.f.char_length(end)).eq(sge.Cast(this=end, to=to)) + + @visit_node.register(ops.StartsWith) + def visit_StartsWith(self, op, *, arg, start): + to = sge.DataType(this=sge.DataType.Type.BINARY) + return self.f.left(arg, self.f.length(start)).eq(sge.Cast(this=start, to=to)) + + @visit_node.register(ops.RegexSearch) + def visit_RegexSearch(self, op, *, arg, pattern): + return arg.rlike(pattern) + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + extracted = self.f.regexp_substr(arg, pattern) + return self.if_( + arg.rlike(pattern), + self.if_( + index.eq(0), + extracted, + self.f.regexp_replace( + extracted, pattern, rf"\\{index.sql(self.dialect)}" + ), + ), + NULL, + ) + + @visit_node.register(ops.Equals) + def visit_Equals(self, op, *, left, right): + if op.left.dtype.is_string(): + assert op.right.dtype.is_string(), op.right.dtype + to = sge.DataType(this=sge.DataType.Type.BINARY) + return sge.Cast(this=left, to=to).eq(right) + return super().visit_Equals(op, left=left, right=right) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.instr(haystack, needle) > 0 + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if end is not None: + raise NotImplementedError( + "`end` argument is not implemented for MySQL `StringValue.find`" + ) + substr = sge.Cast(this=substr, to=sge.DataType(this=sge.DataType.Type.BINARY)) + + if start is not None: + return self.f.locate(substr, arg, start + 1) + return self.f.locate(substr, arg) + + @visit_node.register(ops.Capitalize) + def visit_Capitalize(self, op, *, arg): + return self.f.concat( + self.f.upper(self.f.left(arg, 1)), self.f.lower(self.f.substr(arg, 2)) + ) + + def visit_LRStrip(self, op, *, arg, position): + return reduce( + lambda arg, char: self.f.trim(this=arg, position=position, expression=char), + map( + partial(self.cast, to=dt.string), + map(self.f.unhex, map(self.f.hex, string.whitespace.encode())), + ), + arg, + ) + + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimestampTruncate) + def visit_DateTimestampTruncate(self, op, *, arg, unit): + truncate_formats = { + "s": "%Y-%m-%d %H:%i:%s", + "m": "%Y-%m-%d %H:%i:00", + "h": "%Y-%m-%d %H:00:00", + "D": "%Y-%m-%d", + # 'W': 'week', + "M": "%Y-%m-01", + "Y": "%Y-01-01", + } + if (format := truncate_formats.get(unit.short)) is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {op.unit}") + return self.f.date_format(arg, format) + + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.DateDelta) + def visit_DateTimeDelta(self, op, *, left, right, part): + return self.f.timestampdiff( + sge.Var(this=part.this), right, left, dialect=self.dialect + ) + + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + return self.f.floor(self.f.extract(sge.Var(this="microsecond"), arg) / 1_000) + + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + return self.f.floor(self.f.extract(sge.Var(this="microsecond"), arg)) + + @visit_node.register(ops.Strip) + def visit_Strip(self, op, *, arg): + return self.visit_LRStrip(op, arg=arg, position="BOTH") + + @visit_node.register(ops.LStrip) + def visit_LStrip(self, op, *, arg): + return self.visit_LRStrip(op, arg=arg, position="LEADING") + + @visit_node.register(ops.RStrip) + def visit_RStrip(self, op, *, arg): + return self.visit_LRStrip(op, arg=arg, position="TRAILING") + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + return sge.Interval(this=arg, unit=sge.convert(op.resolution.upper())) + + @visit_node.register(ops.TimestampAdd) + def visit_TimestampAdd(self, op, *, left, right): + if op.right.dtype.unit.short == "ms": + right = sge.Interval( + this=right.this * 1_000, unit=sge.Var(this="MICROSECOND") + ) + return self.f.date_add(left, right, dialect=self.dialect) + + @visit_node.register(ops.ApproxMedian) + @visit_node.register(ops.Arbitrary) + @visit_node.register(ops.ArgMax) + @visit_node.register(ops.ArgMin) + @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.Array) + @visit_node.register(ops.ArrayFlatten) + @visit_node.register(ops.ArrayMap) + @visit_node.register(ops.Covariance) + @visit_node.register(ops.First) + @visit_node.register(ops.Last) + @visit_node.register(ops.Levenshtein) + @visit_node.register(ops.Median) + @visit_node.register(ops.Mode) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.RegexReplace) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.RowID) + @visit_node.register(ops.StringSplit) + @visit_node.register(ops.StructColumn) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimestampDelta) + @visit_node.register(ops.Translate) + @visit_node.register(ops.Unnest) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.BitAnd: "bit_and", + ops.BitOr: "bit_or", + ops.BitXor: "bit_xor", + ops.DayOfWeekName: "dayname", + ops.Log10: "log10", + ops.LPad: "lpad", + ops.RPad: "rpad", + ops.StringAscii: "ascii", + ops.StringContains: "instr", + ops.ExtractWeekOfYear: "weekofyear", + ops.ExtractEpochSeconds: "unix_timestamp", + ops.ExtractDayOfYear: "dayofyear", + ops.Strftime: "date_format", + ops.StringToTimestamp: "str_to_date", + ops.Log2: "log2", +} + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @MySQLCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + @MySQLCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) -rewrites = MySQLExprTranslator.rewrites + setattr(MySQLCompiler, f"visit_{_op.__name__}", _fmt) -class MySQLCompiler(AlchemyCompiler): - translator_class = MySQLExprTranslator - support_values_syntax_in_select = False - null_limit = None - rewrites = AlchemyCompiler.rewrites | rewrite_sample +del _op, _name, _fmt diff --git a/ibis/backends/mysql/converter.py b/ibis/backends/mysql/converter.py new file mode 100644 index 000000000000..ffa277c56de4 --- /dev/null +++ b/ibis/backends/mysql/converter.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import datetime + +from ibis.formats.pandas import PandasData + + +class MySQLPandasData(PandasData): + @classmethod + def convert_Time(cls, s, dtype, pandas_type): + def convert(timedelta): + comps = timedelta.components + return datetime.time( + hour=comps.hours, + minute=comps.minutes, + second=comps.seconds, + microsecond=comps.microseconds, + ) + + return s.map(convert, na_action="ignore") + + @classmethod + def convert_Timestamp(cls, s, dtype, pandas_type): + if s.dtype == "object": + s = s.replace("0000-00-00 00:00:00", None) + return super().convert_Timestamp(s, dtype, pandas_type) diff --git a/ibis/backends/mysql/datatypes.py b/ibis/backends/mysql/datatypes.py deleted file mode 100644 index 05ef1cc5a496..000000000000 --- a/ibis/backends/mysql/datatypes.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import annotations - -import sqlalchemy.types as sat -from sqlalchemy.dialects import mysql - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import UUID, AlchemyType -from ibis.backends.base.sqlglot.datatypes import MySQLType as SqlglotMySQLType - - -class MySQLDateTime(mysql.DATETIME): - """Custom DATETIME type for MySQL that handles zero values.""" - - def result_processor(self, *_): - return lambda v: None if v == "0000-00-00 00:00:00" else v - - -_to_mysql_types = { - dt.Boolean: mysql.BOOLEAN, - dt.Int8: mysql.TINYINT, - dt.Int16: mysql.SMALLINT, - dt.Int32: mysql.INTEGER, - dt.Int64: mysql.BIGINT, - dt.Float16: mysql.FLOAT, - dt.Float32: mysql.FLOAT, - dt.Float64: mysql.DOUBLE, - dt.String: mysql.TEXT, - dt.JSON: mysql.JSON, - dt.Timestamp: MySQLDateTime, -} - -_from_mysql_types = { - mysql.BIGINT: dt.Int64, - mysql.BINARY: dt.Binary, - mysql.BLOB: dt.Binary, - mysql.BOOLEAN: dt.Boolean, - mysql.DATETIME: dt.Timestamp, - mysql.DOUBLE: dt.Float64, - mysql.FLOAT: dt.Float32, - mysql.INTEGER: dt.Int32, - mysql.JSON: dt.JSON, - mysql.LONGBLOB: dt.Binary, - mysql.LONGTEXT: dt.String, - mysql.MEDIUMBLOB: dt.Binary, - mysql.MEDIUMINT: dt.Int32, - mysql.MEDIUMTEXT: dt.String, - mysql.REAL: dt.Float64, - mysql.SMALLINT: dt.Int16, - mysql.TEXT: dt.String, - mysql.DATE: dt.Date, - mysql.TINYBLOB: dt.Binary, - mysql.TINYINT: dt.Int8, - mysql.VARBINARY: dt.Binary, - mysql.VARCHAR: dt.String, - mysql.ENUM: dt.String, - mysql.CHAR: dt.String, - mysql.TIME: dt.Time, - mysql.YEAR: dt.Int8, - MySQLDateTime: dt.Timestamp, - UUID: dt.UUID, -} - - -class MySQLType(AlchemyType): - dialect = "mysql" - - @classmethod - def from_ibis(cls, dtype): - try: - return _to_mysql_types[type(dtype)] - except KeyError: - return super().from_ibis(dtype) - - @classmethod - def to_ibis(cls, typ, nullable=True): - if isinstance(typ, (sat.NUMERIC, mysql.NUMERIC, mysql.DECIMAL)): - # https://dev.mysql.com/doc/refman/8.0/en/fixed-point-types.html - return dt.Decimal(typ.precision or 10, typ.scale or 0, nullable=nullable) - elif isinstance(typ, mysql.BIT): - if 1 <= (length := typ.length) <= 8: - return dt.Int8(nullable=nullable) - elif 9 <= length <= 16: - return dt.Int16(nullable=nullable) - elif 17 <= length <= 32: - return dt.Int32(nullable=nullable) - elif 33 <= length <= 64: - return dt.Int64(nullable=nullable) - else: - raise ValueError(f"Invalid MySQL BIT length: {length:d}") - elif isinstance(typ, mysql.TIMESTAMP): - return dt.Timestamp(timezone="UTC", nullable=nullable) - elif isinstance(typ, mysql.SET): - return dt.Array(dt.string, nullable=nullable) - elif dtype := _from_mysql_types.get(type(typ)): - return dtype(nullable=nullable) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotMySQLType.from_string(type_string, nullable=nullable) diff --git a/ibis/backends/mysql/registry.py b/ibis/backends/mysql/registry.py deleted file mode 100644 index 9b326cc2e63b..000000000000 --- a/ibis/backends/mysql/registry.py +++ /dev/null @@ -1,265 +0,0 @@ -from __future__ import annotations - -import contextlib -import functools -import operator -import string - -import sqlalchemy as sa -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.functions import GenericFunction - -import ibis -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import ( - fixed_arity, - sqlalchemy_operation_registry, - sqlalchemy_window_functions_registry, - unary, -) -from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported -from ibis.backends.base.sql.alchemy.registry import ( - geospatial_functions, -) - -operation_registry = sqlalchemy_operation_registry.copy() - -# NOTE: window functions are available from MySQL 8 and MariaDB 10.2 -operation_registry.update(sqlalchemy_window_functions_registry) - -if geospatial_supported: - operation_registry.update(geospatial_functions) - -_truncate_formats = { - "s": "%Y-%m-%d %H:%i:%s", - "m": "%Y-%m-%d %H:%i:00", - "h": "%Y-%m-%d %H:00:00", - "D": "%Y-%m-%d", - # 'W': 'week', - "M": "%Y-%m-01", - "Y": "%Y-01-01", -} - - -def _truncate(t, op): - sa_arg = t.translate(op.arg) - try: - fmt = _truncate_formats[op.unit.short] - except KeyError: - raise com.UnsupportedOperationError(f"Unsupported truncate unit {op.unit}") - return sa.func.date_format(sa_arg, fmt) - - -def _round(t, op): - sa_arg = t.translate(op.arg) - - if op.digits is None: - sa_digits = 0 - else: - sa_digits = t.translate(op.digits) - - return sa.func.round(sa_arg, sa_digits) - - -def _interval_from_integer(t, op): - if op.unit.short in {"ms", "ns"}: - raise com.UnsupportedOperationError( - f"MySQL does not allow operation with INTERVAL offset {op.unit}" - ) - - sa_arg = t.translate(op.arg) - text_unit = op.dtype.resolution.upper() - - # XXX: Is there a better way to handle this? I.e. can we somehow use - # the existing bind parameter produced by translate and reuse its name in - # the string passed to sa.text? - if isinstance(sa_arg, sa.sql.elements.BindParameter): - return sa.text(f"INTERVAL :arg {text_unit}").bindparams(arg=sa_arg.value) - return sa.text(f"INTERVAL {sa_arg} {text_unit}") - - -def _literal(_, op): - dtype = op.dtype - value = op.value - if value is None: - return sa.null() - if dtype.is_interval(): - if dtype.unit.short in {"ms", "ns"}: - raise com.UnsupportedOperationError( - f"MySQL does not allow operation with INTERVAL offset {dtype.unit}" - ) - text_unit = dtype.resolution.upper() - sa_text = sa.text(f"INTERVAL :value {text_unit}") - return sa_text.bindparams(value=value) - elif dtype.is_binary(): - # the cast to BINARY is necessary here, otherwise the data come back as - # Python strings - # - # This lets the database handle encoding rather than ibis - return sa.cast(sa.literal(value), type_=sa.BINARY()) - elif dtype.is_date(): - return sa.func.date(value.isoformat()) - elif dtype.is_timestamp(): - # TODO: timezones - return sa.func.timestamp(value.isoformat()) - elif dtype.is_time(): - return sa.func.maketime( - value.hour, value.minute, value.second + value.microsecond / 1e6 - ) - else: - with contextlib.suppress(AttributeError): - value = value.to_pydatetime() - - return sa.literal(value) - - -def _group_concat(t, op): - if op.where is not None: - arg = t.translate(ops.IfElse(op.where, op.arg, ibis.NA)) - else: - arg = t.translate(op.arg) - sep = t.translate(op.sep) - return sa.func.group_concat(arg.op("SEPARATOR")(sep)) - - -def _json_get_item(t, op): - arg = t.translate(op.arg) - index = t.translate(op.index) - if op.index.dtype.is_integer(): - path = "$[" + sa.cast(index, sa.TEXT) + "]" - else: - path = "$." + index - return sa.func.json_extract(arg, path) - - -def _regex_extract(arg, pattern, index): - return sa.func.IF( - arg.op("REGEXP")(pattern), - sa.func.IF( - index == 0, - sa.func.REGEXP_SUBSTR(arg, pattern), - sa.func.REGEXP_REPLACE( - sa.func.REGEXP_SUBSTR(arg, pattern), pattern, rf"\{index.value}" - ), - ), - None, - ) - - -def _string_find(t, op): - arg = t.translate(op.arg) - substr = t.translate(op.substr) - - if op_start := op.start: - start = t.translate(op_start) - return sa.func.locate(substr, arg, start) - 1 - - return sa.func.locate(substr, arg) - 1 - - -class _mysql_trim(GenericFunction): - inherit_cache = True - - def __init__(self, input, side: str) -> None: - super().__init__(input) - self.type = sa.VARCHAR() - self.side = side - - -@compiles(_mysql_trim, "mysql") -def compiles_mysql_trim(element, compiler, **kw): - arg = compiler.function_argspec(element, **kw) - side = element.side.upper() - # has to be called once for every whitespace character because mysql - # interprets `char` literally, not as a set of characters like Python - return functools.reduce( - lambda arg, char: f"TRIM({side} '{char}' FROM {arg})", string.whitespace, arg - ) - - -def _temporal_delta(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - part = sa.literal_column(op.part.value.upper()) - return sa.func.timestampdiff(part, right, left) - - -operation_registry.update( - { - ops.Literal: _literal, - # static checks are not happy with using "if" as a property - ops.IfElse: fixed_arity(getattr(sa.func, "if"), 3), - # strings - ops.StringFind: _string_find, - ops.FindInSet: ( - lambda t, op: ( - sa.func.find_in_set( - t.translate(op.needle), - sa.func.concat_ws(",", *map(t.translate, op.values)), - ) - - 1 - ) - ), - # LIKE in mysql is case insensitive - ops.StartsWith: fixed_arity( - lambda arg, start: sa.type_coerce( - arg.op("LIKE BINARY")(sa.func.concat(start, "%")), sa.BOOLEAN() - ), - 2, - ), - ops.EndsWith: fixed_arity( - lambda arg, end: sa.type_coerce( - arg.op("LIKE BINARY")(sa.func.concat("%", end)), sa.BOOLEAN() - ), - 2, - ), - ops.RegexSearch: fixed_arity( - lambda x, y: sa.type_coerce(x.op("REGEXP")(y), sa.BOOLEAN()), 2 - ), - ops.RegexExtract: fixed_arity(_regex_extract, 3), - # math - ops.Log: fixed_arity(lambda arg, base: sa.func.log(base, arg), 2), - ops.Log2: unary(sa.func.log2), - ops.Log10: unary(sa.func.log10), - ops.Round: _round, - # dates and times - ops.DateAdd: fixed_arity(operator.add, 2), - ops.DateSub: fixed_arity(operator.sub, 2), - ops.DateDiff: fixed_arity(sa.func.datediff, 2), - ops.TimestampAdd: fixed_arity(operator.add, 2), - ops.TimestampSub: fixed_arity(operator.sub, 2), - ops.TimestampDiff: fixed_arity( - lambda left, right: sa.func.timestampdiff(sa.text("SECOND"), right, left), 2 - ), - ops.StringToTimestamp: fixed_arity( - lambda arg, format_str: sa.func.str_to_date(arg, format_str), 2 - ), - ops.DateTruncate: _truncate, - ops.TimestampTruncate: _truncate, - ops.IntervalFromInteger: _interval_from_integer, - ops.Strftime: fixed_arity(sa.func.date_format, 2), - ops.ExtractDayOfYear: unary(sa.func.dayofyear), - ops.ExtractEpochSeconds: unary(sa.func.UNIX_TIMESTAMP), - ops.ExtractWeekOfYear: unary(sa.func.weekofyear), - ops.ExtractMicrosecond: fixed_arity( - lambda arg: sa.func.floor(sa.extract("microsecond", arg)), 1 - ), - ops.ExtractMillisecond: fixed_arity( - lambda arg: sa.func.floor(sa.extract("microsecond", arg) / 1000), 1 - ), - ops.TimestampNow: fixed_arity(sa.func.now, 0), - # others - ops.GroupConcat: _group_concat, - ops.DayOfWeekIndex: fixed_arity( - lambda arg: (sa.func.dayofweek(arg) + 5) % 7, 1 - ), - ops.DayOfWeekName: fixed_arity(lambda arg: sa.func.dayname(arg), 1), - ops.JSONGetItem: _json_get_item, - ops.Strip: unary(lambda arg: _mysql_trim(arg, "both")), - ops.LStrip: unary(lambda arg: _mysql_trim(arg, "leading")), - ops.RStrip: unary(lambda arg: _mysql_trim(arg, "trailing")), - ops.TimeDelta: _temporal_delta, - ops.DateDelta: _temporal_delta, - } -) diff --git a/ibis/backends/mysql/tests/conftest.py b/ibis/backends/mysql/tests/conftest.py index 8c0ad8007051..c7cadc448fd3 100644 --- a/ibis/backends/mysql/tests/conftest.py +++ b/ibis/backends/mysql/tests/conftest.py @@ -4,11 +4,9 @@ from typing import TYPE_CHECKING, Any import pytest -import sqlalchemy as sa -from packaging.version import parse as parse_version import ibis -from ibis.backends.conftest import TEST_TABLES, init_database +from ibis.backends.conftest import TEST_TABLES from ibis.backends.tests.base import ServiceBackendTest if TYPE_CHECKING: @@ -26,29 +24,18 @@ class TestConf(ServiceBackendTest): # mysql has the same rounding behavior as postgres check_dtype = False returned_timestamp_unit = "s" - supports_arrays = False - supports_arrays_outside_of_select = supports_arrays + supports_arrays = supports_arrays_outside_of_select = False native_bool = False supports_structs = False rounding_method = "half_to_even" service_name = "mysql" - deps = "pymysql", "sqlalchemy" + deps = ("pymysql",) + supports_window_operations = True @property def test_files(self) -> Iterable[Path]: return self.data_dir.joinpath("csv").glob("*.csv") - @property - def supports_window_operations(self) -> bool: - con = self.connection - with con.begin() as c: - version = c.execute(sa.select(sa.func.version())).scalar() - - # mariadb supports window operations after version 10.2 - # mysql supports window operations after version 8 - min_version = "10.2" if "MariaDB" in version else "8.0" - return parse_version(con.version) >= parse_version(min_version) - def _load_data( self, *, @@ -68,16 +55,10 @@ def _load_data( script_dir Location of scripts defining schemas """ - engine = init_database( - url=sa.engine.make_url( - f"mysql+pymysql://{user}:{password}@{host}:{port:d}?local_infile=1", - ), - database=database, - schema=self.ddl_script, - isolation_level="AUTOCOMMIT", - recreate=False, - ) - with engine.begin() as con: + with self.connection.begin() as cur: + for stmt in self.ddl_script: + cur.execute(stmt) + for table in TEST_TABLES: csv_path = self.data_dir / "csv" / f"{table}.csv" lines = [ @@ -88,7 +69,7 @@ def _load_data( "LINES TERMINATED BY '\\n'", "IGNORE 1 LINES", ] - con.exec_driver_sql("\n".join(lines)) + cur.execute("\n".join(lines)) @staticmethod def connect(*, tmpdir, worker_id, **kw): @@ -98,31 +79,12 @@ def connect(*, tmpdir, worker_id, **kw): password=MYSQL_PASS, database=IBIS_TEST_MYSQL_DB, port=MYSQL_PORT, + local_infile=1, + autocommit=True, **kw, ) -@pytest.fixture(scope="session") -def setup_privs(): - engine = sa.create_engine(f"mysql+pymysql://root:@{MYSQL_HOST}:{MYSQL_PORT:d}") - with engine.begin() as con: - # allow the ibis user to use any database - con.exec_driver_sql("CREATE SCHEMA IF NOT EXISTS `test_schema`") - con.exec_driver_sql( - f"GRANT CREATE,SELECT,DROP ON `test_schema`.* TO `{MYSQL_USER}`@`%%`" - ) - yield - with engine.begin() as con: - con.exec_driver_sql("DROP SCHEMA IF EXISTS `test_schema`") - - @pytest.fixture(scope="session") def con(tmp_path_factory, data_dir, worker_id): return TestConf.load_data(data_dir, tmp_path_factory, worker_id).connection - - -@pytest.fixture(scope="session") -def con_nodb(): - return ibis.mysql.connect( - host=MYSQL_HOST, user=MYSQL_USER, password=MYSQL_PASS, port=MYSQL_PORT - ) diff --git a/ibis/backends/mysql/tests/test_client.py b/ibis/backends/mysql/tests/test_client.py index 4c73faeb9c3d..18ad4a39c8af 100644 --- a/ibis/backends/mysql/tests/test_client.py +++ b/ibis/backends/mysql/tests/test_client.py @@ -7,22 +7,14 @@ import pandas as pd import pandas.testing as tm import pytest -import sqlalchemy as sa -from packaging.version import parse as vparse +import sqlglot as sg from pytest import param -from sqlalchemy.dialects import mysql import ibis import ibis.expr.datatypes as dt from ibis import udf -from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported from ibis.util import gen_name -if geospatial_supported: - import geoalchemy2 -else: - geoalchemy2 = None - MYSQL_TYPES = [ param("tinyint", dt.int8, id="tinyint"), param("int1", dt.int8, id="int1"), @@ -69,30 +61,20 @@ param("set('a', 'b', 'c', 'd')", dt.Array(dt.string), id="set"), param("mediumblob", dt.binary, id="mediumblob"), param("blob", dt.binary, id="blob"), - param( - "uuid", - dt.uuid, - marks=[ - pytest.mark.xfail( - condition=vparse(sa.__version__) < vparse("2"), - reason="geoalchemy2 0.14.x doesn't work", - ) - ], - id="uuid", - ), + param("uuid", dt.uuid, id="uuid"), ] @pytest.mark.parametrize(("mysql_type", "expected_type"), MYSQL_TYPES) def test_get_schema_from_query(con, mysql_type, expected_type): raw_name = ibis.util.guid() - name = con._quote(raw_name) + name = sg.to_identifier(raw_name, quoted=True).sql("mysql") expected_schema = ibis.schema(dict(x=expected_type)) # temporary tables get cleaned up by the db when the session ends, so we # don't need to explicitly drop the table with con.begin() as c: - c.exec_driver_sql(f"CREATE TEMPORARY TABLE {name} (x {mysql_type})") + c.execute(f"CREATE TEMPORARY TABLE {name} (x {mysql_type})") result_schema = con._get_schema_using_query(f"SELECT * FROM {name}") assert result_schema == expected_schema @@ -105,29 +87,23 @@ def test_get_schema_from_query(con, mysql_type, expected_type): def test_blob_type(con, coltype): tmp = f"tmp_{ibis.util.guid()}" with con.begin() as c: - c.exec_driver_sql(f"CREATE TEMPORARY TABLE {tmp} (a {coltype})") + c.execute(f"CREATE TEMPORARY TABLE {tmp} (a {coltype})") t = con.table(tmp) assert t.schema() == ibis.schema({"a": dt.binary}) @pytest.fixture(scope="session") -def tmp_t(con_nodb): - with con_nodb.begin() as c: - c.exec_driver_sql("CREATE TABLE IF NOT EXISTS test_schema.t (x INET6)") - yield - with con_nodb.begin() as c: - c.exec_driver_sql("DROP TABLE IF EXISTS test_schema.t") - - -@pytest.mark.usefixtures("setup_privs", "tmp_t") -@pytest.mark.xfail( - geospatial_supported and vparse(geoalchemy2.__version__) > vparse("0.13.3"), - reason="geoalchemy2 issues when using 0.14.x", - raises=sa.exc.OperationalError, -) -def test_get_schema_from_query_other_schema(con_nodb): - t = con_nodb.table("t", schema="test_schema") - assert t.schema() == ibis.schema({"x": dt.string}) +def tmp_t(con): + with con.begin() as c: + c.execute("CREATE TABLE IF NOT EXISTS test_schema.t (x INET6)") + yield "t" + with con.begin() as c: + c.execute("DROP TABLE IF EXISTS test_schema.t") + + +def test_get_schema_from_query_other_schema(con, tmp_t): + t = con.table(tmp_t, schema="test_schema") + assert t.schema() == ibis.schema({"x": dt.inet}) def test_zero_timestamp_data(con): @@ -137,11 +113,11 @@ def test_zero_timestamp_data(con): name CHAR(10) NULL, tradedate DATETIME NOT NULL, date DATETIME NULL - ); + ) """ with con.begin() as c: - c.exec_driver_sql(sql) - c.exec_driver_sql( + c.execute(sql) + c.execute( """ INSERT INTO ztmp_date_issue VALUES ('C', '2018-10-22', 0), @@ -166,12 +142,11 @@ def test_zero_timestamp_data(con): @pytest.fixture(scope="module") def enum_t(con): name = gen_name("mysql_enum_test") - t = sa.Table( - name, sa.MetaData(), sa.Column("sml", mysql.ENUM("small", "medium", "large")) - ) - with con.begin() as bind: - t.create(bind=bind) - bind.execute(t.insert().values(sml="small")) + with con.begin() as cur: + cur.execute( + f"CREATE TEMPORARY TABLE {name} (sml ENUM('small', 'medium', 'large'))" + ) + cur.execute(f"INSERT INTO {name} VALUES ('small')") yield con.table(name) con.drop_table(name, force=True) diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index ca70557f6321..229ad1577282 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -91,3 +91,10 @@ PsycoPg2SyntaxError = ( PsycoPg2IndeterminateDatatype ) = PsycoPg2InvalidTextRepresentation = PsycoPg2DivisionByZero = None + +try: + from pymysql.err import NotSupportedError as MySQLNotSupportedError + from pymysql.err import OperationalError as MySQLOperationalError + from pymysql.err import ProgrammingError as MySQLProgrammingError +except ImportError: + MySQLNotSupportedError = MySQLProgrammingError = MySQLOperationalError = None diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/mysql/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/mysql/out.sql new file mode 100644 index 000000000000..b4d624684bfc --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/mysql/out.sql @@ -0,0 +1,5 @@ +SELECT + `t0`.`id`, + `t0`.`bool_col` = 1 AS `bool_col` +FROM `functional_alltypes` AS `t0` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/mysql/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/mysql/out.sql new file mode 100644 index 000000000000..b4d624684bfc --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/mysql/out.sql @@ -0,0 +1,5 @@ +SELECT + `t0`.`id`, + `t0`.`bool_col` = 1 AS `bool_col` +FROM `functional_alltypes` AS `t0` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/mysql/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/mysql/out.sql new file mode 100644 index 000000000000..d93091fd3aba --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/mysql/out.sql @@ -0,0 +1,19 @@ +SELECT + SUM(`t1`.`bigint_col`) AS `Sum(bigint_col)` +FROM ( + SELECT + `t0`.`id`, + `t0`.`bool_col` = 1 AS `bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month` + FROM `functional_alltypes` AS `t0` +) AS `t1` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/mysql/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/mysql/out.sql new file mode 100644 index 000000000000..1c3fb645041c --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/mysql/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + `t0`.`id`, + `t0`.`bool_col` = 1 AS `bool_col` + FROM `functional_alltypes` AS `t0` + LIMIT 10 +) AS `t2` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/mysql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/mysql/out.sql index fc16f2428d16..ac006b1d5f25 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/mysql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/mysql/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.continent + CASE `t0`.`continent` WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,8 +15,8 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + END AS `cont`, + SUM(`t0`.`population`) AS `total_pop` +FROM `countries` AS `t0` GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/mysql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/mysql/out.sql index a3042e85b3e7..db5ddb124e86 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/mysql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/mysql/out.sql @@ -1,13 +1,9 @@ SELECT - t0.x IN ( + `t0`.`x` IN ( SELECT - t1.x - FROM ( - SELECT - t0.x AS x - FROM t AS t0 - WHERE - t0.x > 2 - ) AS t1 - ) AS `InColumn(x, x)` -FROM t AS t0 \ No newline at end of file + `t0`.`x` + FROM `t` AS `t0` + WHERE + `t0`.`x` > 2 + ) AS `InSubquery(x)` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 24fa0c702fd1..d6a3a4d0f0d1 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -18,6 +18,7 @@ ClickHouseDatabaseError, ExaQueryError, GoogleBadRequest, + MySQLNotSupportedError, PolarsInvalidOperationError, Py4JError, PySparkAnalysisException, @@ -948,7 +949,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): "datafusion", "impala", "mssql", - "mysql", "polars", "sqlite", "druid", @@ -957,6 +957,7 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): ], raises=com.OperationNotDefinedError, ), + pytest.mark.notyet(["mysql"], raises=com.UnsupportedBackendType), pytest.mark.notyet( ["snowflake"], reason="backend doesn't implement array of quantiles as input", @@ -1359,6 +1360,7 @@ def test_date_quantile(alltypes, func): "::", id="expr", marks=[ + pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.notyet( ["bigquery"], raises=GoogleBadRequest, @@ -1367,10 +1369,6 @@ def test_date_quantile(alltypes, func): pytest.mark.broken( ["pyspark"], raises=TypeError, reason="Column is not iterable" ), - pytest.mark.broken( - ["mysql"], - raises=sa.exc.ProgrammingError, - ), ], ), ], @@ -1680,7 +1678,7 @@ def test_grouped_case(backend, con): @pytest.mark.notyet(["druid"], raises=sa.exc.ProgrammingError) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notyet(["trino"], raises=TrinoUserError) -@pytest.mark.notyet(["mysql"], raises=sa.exc.NotSupportedError) +@pytest.mark.notyet(["mysql"], raises=MySQLNotSupportedError) @pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) def test_group_concat_over_window(backend, con): diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 9385b84e966d..b14afa5a9441 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -20,6 +20,7 @@ from ibis.backends.tests.errors import ( ClickHouseDatabaseError, GoogleBadRequest, + MySQLOperationalError, PolarsComputeError, PsycoPg2IndeterminateDatatype, PsycoPg2SyntaxError, @@ -30,10 +31,11 @@ pytestmark = [ pytest.mark.never( - ["sqlite", "mysql", "mssql", "exasol"], + ["sqlite", "mssql", "exasol"], reason="No array support", raises=Exception, ), + pytest.mark.never(["mysql"], reason="No array support", raises=(com.UnsupportedBackendType, com.OperationNotDefinedError, MySQLOperationalError)), pytest.mark.notyet(["impala"], reason="No array support", raises=Exception), pytest.mark.notimpl(["druid", "oracle"], raises=Exception), ] @@ -162,7 +164,11 @@ def test_array_index(con, idx): pytest.mark.never( ["mysql"], reason="array types are unsupported", - raises=com.OperationNotDefinedError, + raises=( + com.OperationNotDefinedError, + MySQLOperationalError, + com.UnsupportedBackendType, + ), ), pytest.mark.never( ["sqlite"], reason="array types are unsupported", raises=NotImplementedError @@ -419,7 +425,6 @@ def test_array_slice(backend, start, stop): "polars", "snowflake", "sqlite", - "mysql", ], raises=com.OperationNotDefinedError, ) @@ -429,9 +434,7 @@ def test_array_slice(backend, start, stop): reason="Operation 'ArrayMap' is not implemented for this backend", ) @pytest.mark.notimpl( - ["sqlite"], - raises=NotImplementedError, - reason="Unsupported type: Array: ...", + ["sqlite"], raises=NotImplementedError, reason="Unsupported type: Array: ..." ) @pytest.mark.parametrize( ("input", "output"), @@ -485,7 +488,6 @@ def test_array_map(con, input, output): "pandas", "polars", "snowflake", - "mysql", ], raises=com.OperationNotDefinedError, ) @@ -637,7 +639,7 @@ def test_array_remove(con, a): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "impala", "mssql", "polars", "mysql"], + ["dask", "datafusion", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( @@ -755,8 +757,9 @@ def test_array_union(con, a, b, expected_array): ) +@builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "impala", "mssql", "pandas", "polars", "mysql", "flink"], + ["dask", "datafusion", "impala", "mssql", "pandas", "polars", "flink"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( @@ -1083,7 +1086,6 @@ def test_unnest_empty_array(con): "polars", "snowflake", "sqlite", - "mysql", "dask", "pandas", ], @@ -1110,7 +1112,6 @@ def test_array_map_with_conflicting_names(backend, con): "polars", "snowflake", "sqlite", - "mysql", "dask", "pandas", ], @@ -1288,9 +1289,11 @@ def test_timestamp_range_zero_step(con, start, stop, step, tzinfo): def test_repr_timestamp_array(con, monkeypatch): monkeypatch.setattr(ibis.options, "interactive", True) - monkeypatch.setattr(ibis.options, "default_backend", con) assert ibis.options.interactive is True + + monkeypatch.setattr(ibis.options, "default_backend", con) assert ibis.options.default_backend is con + expr = ibis.array(pd.date_range("2010-01-01", "2010-01-03", freq="D").tolist()) assert "No translation rule" not in repr(expr) diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 1250e55ca35f..172075a0a860 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -78,13 +78,9 @@ def time_keyed_right(time_keyed_df2): @pytest.mark.parametrize( - ("direction", "op"), - [ - ("backward", operator.ge), - ("forward", operator.le), - ], + ("direction", "op"), [("backward", operator.ge), ("forward", operator.le)] ) -@pytest.mark.notimpl(["datafusion", "snowflake", "trino", "postgres"]) +@pytest.mark.notyet(["datafusion", "snowflake", "trino", "postgres", "mysql"]) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): on = op(time_left["time"], time_right["time"]) expr = time_left.asof_join(time_right, on=on, predicates="group") @@ -103,16 +99,12 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op @pytest.mark.parametrize( - ("direction", "op"), - [ - ("backward", operator.ge), - ("forward", operator.le), - ], + ("direction", "op"), [("backward", operator.ge), ("forward", operator.le)] ) @pytest.mark.broken( ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds" ) -@pytest.mark.notimpl(["datafusion", "snowflake", "trino", "postgres"]) +@pytest.mark.notyet(["datafusion", "snowflake", "trino", "postgres", "mysql"]) def test_keyed_asof_join_with_tolerance( con, time_keyed_left, diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 3904ff82fbba..ab0581a4d736 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -70,10 +70,7 @@ def _create_temp_table_with_schema(backend, con, temp_table_name, schema, data=N @pytest.mark.parametrize( "sch", [ - param( - None, - id="no schema", - ), + param(None, id="no schema"), param( ibis.schema( [ @@ -102,12 +99,7 @@ def test_create_table(backend, con, temp_table, lamduh, sch): } ) - obj = lamduh(df) - con.create_table( - temp_table, - obj, - schema=sch, - ) + con.create_table(temp_table, lamduh(df), schema=sch) result = ( con.table(temp_table).execute().sort_values("first_name").reset_index(drop=True) ) @@ -1124,7 +1116,7 @@ def test_repr_mimebundle(alltypes, interactive, expr_type, monkeypatch): @pytest.mark.never( - ["postgres", "mysql", "bigquery", "duckdb"], + ["postgres", "bigquery", "duckdb"], reason="These backends explicitly do support Geo operations", ) @pytest.mark.parametrize("op", [ops.GeoDistance, ops.GeoAsText, ops.GeoUnaryUnion]) diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 4b678b7fb3c0..8f37499438c2 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -13,6 +13,7 @@ from ibis.backends.tests.errors import ( DuckDBNotImplementedException, DuckDBParserException, + MySQLOperationalError, PyDeltaTableError, PySparkAnalysisException, SnowflakeProgrammingError, @@ -356,7 +357,7 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), - pytest.mark.notyet(["mysql"], raises=sa.exc.OperationalError), + pytest.mark.notyet(["mysql"], raises=MySQLOperationalError), pytest.mark.notyet( ["pyspark"], raises=PySparkAnalysisException, diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index f389e52c3411..a47d2c004907 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -24,7 +24,7 @@ GoogleBadRequest, ImpalaHiveServer2Error, Py4JJavaError, - PsycoPg2InvalidTextRepresentation, + MySQLProgrammingError, SnowflakeProgrammingError, TrinoUserError, ) @@ -1188,10 +1188,6 @@ def test_distinct_on_keep(backend, on, keep): idx=ibis.row_number().over(order_by=_.one, rows=(None, 0)) ) - requires_cache = backend.name() in ("mysql", "impala") - - if requires_cache: - t = t.cache() expr = t.distinct(on=on, keep=keep).order_by(ibis.asc("idx")) result = expr.execute() df = t.execute() @@ -1267,10 +1263,6 @@ def test_distinct_on_keep_is_none(backend, on): idx=ibis.row_number().over(order_by=_.one, rows=(None, 0)) ) - requires_cache = backend.name() in ("mysql", "impala") - - if requires_cache: - t = t.cache() expr = t.distinct(on=on, keep=None).order_by(ibis.asc("idx")) result = expr.execute() df = t.execute() @@ -1380,7 +1372,6 @@ def hash_256(col): "druid", "impala", "mssql", - "mysql", "oracle", "risingwave", "pyspark", @@ -1405,6 +1396,7 @@ def hash_256(col): pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), pytest.mark.broken(["datafusion"], reason="casts to 1672531200000000"), + pytest.mark.broken(["mysql"], reason="returns 20230101000000"), ], ), ], @@ -1684,7 +1676,7 @@ def test_static_table_slice(backend, slc, expected_count_fn): ) @pytest.mark.notyet( ["mysql"], - raises=sa.exc.ProgrammingError, + raises=MySQLProgrammingError, reason="backend doesn't support dynamic limit/offset", ) @pytest.mark.notyet( @@ -1747,7 +1739,7 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): @pytest.mark.notyet( ["mysql"], - raises=sa.exc.ProgrammingError, + raises=MySQLProgrammingError, reason="backend doesn't support dynamic limit/offset", ) @pytest.mark.notyet( diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 95087ba64d8e..1f264c34d455 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -256,11 +256,6 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): param( "outer", marks=[ - pytest.mark.notyet( - ["mysql"], - raises=sa.exc.ProgrammingError, - reason="MySQL doesn't support full outer joins natively", - ), pytest.mark.notyet( ["sqlite"], condition=vparse(sqlite3.sqlite_version) < vparse("3.39"), @@ -298,13 +293,9 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu assert len(result) == len(expected) -outer_join_nullability_failures = [ - pytest.mark.notyet( - ["mysql"], - raises=sa.exc.ProgrammingError, - reason="mysql doesn't support full outer joins", - ) -] + [pytest.mark.notyet(["sqlite"])] * (vparse(sqlite3.sqlite_version) < vparse("3.39")) +outer_join_nullability_failures = [pytest.mark.notyet(["sqlite"])] * ( + vparse(sqlite3.sqlite_version) < vparse("3.39") +) @pytest.mark.notimpl( diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 217f3cff10de..17c9cd7d348e 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -22,6 +22,7 @@ ExaQueryError, GoogleBadRequest, ImpalaHiveServer2Error, + MySQLOperationalError, PsycoPg2DivisionByZero, Py4JError, SnowflakeProgrammingError, @@ -267,7 +268,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "postgres": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), - "mysql": 1.1, + "mysql": decimal.Decimal("1"), "mssql": 1.1, "druid": 1.1, "datafusion": decimal.Decimal("1.1"), @@ -320,7 +321,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "postgres": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), - "mysql": 1.1, + "mysql": decimal.Decimal("1.1"), "clickhouse": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), "mssql": 1.1, @@ -369,7 +370,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "postgres": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), - "mysql": 1.1, "clickhouse": decimal.Decimal( "1.10000000000000003193790845333396190208" ), @@ -388,6 +388,7 @@ def test_numeric_literal(con, backend, expr, expected_types): }, marks=[ pytest.mark.notimpl(["exasol"], raises=ExaQueryError), + pytest.mark.notimpl(["mysql"], raises=MySQLOperationalError), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), pytest.mark.broken( ["impala"], @@ -456,12 +457,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "An error occurred while calling z:org.apache.spark.sql.functions.lit.", raises=Py4JError, ), - pytest.mark.broken( - ["mysql"], - "(pymysql.err.OperationalError) (1054, \"Unknown column 'Infinity' in 'field list'\")" - "[SQL: SELECT %(param_1)s AS `Decimal('Infinity')`]", - raises=sa.exc.OperationalError, - ), + pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["mssql"], "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'Infinity'." @@ -542,12 +538,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "An error occurred while calling z:org.apache.spark.sql.functions.lit.", raises=Py4JError, ), - pytest.mark.broken( - ["mysql"], - "(pymysql.err.OperationalError) (1054, \"Unknown column 'Infinity' in 'field list'\")" - "[SQL: SELECT %(param_1)s AS `Decimal('-Infinity')`]", - raises=sa.exc.OperationalError, - ), + pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["mssql"], "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'Infinity'." @@ -630,12 +621,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "An error occurred while calling z:org.apache.spark.sql.functions.lit.", raises=Py4JError, ), - pytest.mark.broken( - ["mysql"], - "(pymysql.err.OperationalError) (1054, \"Unknown column 'NaN' in 'field list'\")" - "[SQL: SELECT %(param_1)s AS `Decimal('NaN')`]", - raises=sa.exc.OperationalError, - ), + pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["mssql"], "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'NaN'." @@ -744,25 +730,15 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): id="float-literal", marks=[ pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - pytest.mark.notimpl( - ["druid"], - raises=com.OperationNotDefinedError, - ), + ["exasol", "druid"], raises=com.OperationNotDefinedError + ) ], ), param( lambda t: ibis.literal(np.nan), lambda t: np.nan, id="nan-literal", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=com.OperationNotDefinedError, - ) - ], + marks=[pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)], ), param( lambda t: ibis.literal(np.inf), @@ -770,13 +746,8 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): id="inf-literal", marks=[ pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - pytest.mark.notimpl( - ["druid"], - raises=com.OperationNotDefinedError, - ), + ["exasol", "druid"], raises=com.OperationNotDefinedError + ) ], ), param( @@ -785,13 +756,8 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): id="-inf-literal", marks=[ pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - pytest.mark.notimpl( - ["druid"], - raises=com.OperationNotDefinedError, - ), + ["exasol", "druid"], raises=com.OperationNotDefinedError + ) ], ), ], @@ -821,9 +787,9 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): ], ) @pytest.mark.notimpl( - ["mysql", "sqlite", "mssql", "oracle", "flink"], - raises=com.OperationNotDefinedError, + ["sqlite", "mssql", "oracle", "flink"], raises=com.OperationNotDefinedError ) +@pytest.mark.notimpl(["mysql"], raises=(MySQLOperationalError, NotImplementedError)) def test_isnan_isinf( backend, con, @@ -1516,6 +1482,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "snowflake", "trino", "postgres", + "mysql", ], reason="Not SQLAlchemy backends", ) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 0ca72f6a45a5..92c034b8124c 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -27,7 +27,7 @@ ) no_structs = pytest.mark.never( ["impala", "mysql", "sqlite", "mssql"], - raises=(NotImplementedError, sa.exc.CompileError), + raises=(NotImplementedError, sa.exc.CompileError, exc.UnsupportedBackendType), reason="structs not supported in the backend", ) no_struct_literals = pytest.mark.notimpl( diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index cde2dc86d1bc..9cc3e5dbd37d 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -13,7 +13,6 @@ import ibis.expr.datatypes as dt from ibis.backends.tests.errors import ClickHouseDatabaseError, PySparkPythonException from ibis.common.annotations import ValidationError -from ibis.common.exceptions import OperationNotDefinedError @pytest.mark.parametrize( @@ -932,7 +931,7 @@ def test_substr_with_null_values(backend, alltypes, df): marks=[ pytest.mark.notyet( ["clickhouse", "snowflake", "trino"], - raises=OperationNotDefinedError, + raises=com.OperationNotDefinedError, reason="doesn't support `USERINFO`", ) ], @@ -945,7 +944,7 @@ def test_substr_with_null_values(backend, alltypes, df): marks=[ pytest.mark.notyet( ["snowflake"], - raises=OperationNotDefinedError, + raises=com.OperationNotDefinedError, reason="host is netloc", ), pytest.mark.broken( @@ -1010,12 +1009,15 @@ def test_capitalize(con): @pytest.mark.notimpl( ["dask", "pandas", "polars", "druid", "oracle", "flink"], - raises=OperationNotDefinedError, + raises=com.OperationNotDefinedError, ) @pytest.mark.notyet( - ["impala", "mssql", "mysql", "sqlite", "exasol"], + ["impala", "mssql", "sqlite", "exasol"], reason="no arrays", - raises=OperationNotDefinedError, + raises=com.OperationNotDefinedError, +) +@pytest.mark.never( + ["mysql"], raises=com.OperationNotDefinedError, reason="no array support" ) def test_array_string_join(con): s = ibis.array(["a", "b", "c"]) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index e8a34654835e..4a395dfbac4b 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -10,6 +10,7 @@ import pandas as pd import pytest import sqlalchemy as sa +import sqlglot as sg from pytest import param import ibis @@ -24,6 +25,8 @@ GoogleBadRequest, ImpalaHiveServer2Error, ImpalaOperationalError, + MySQLOperationalError, + MySQLProgrammingError, PolarsComputeError, PolarsPanicException, Py4JJavaError, @@ -393,9 +396,9 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): param( "W", marks=[ - pytest.mark.notimpl(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.notimpl(["impala"], raises=AssertionError), pytest.mark.broken(["sqlite"], raises=AssertionError), + pytest.mark.notimpl(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["polars"], raises=AssertionError, @@ -623,12 +626,8 @@ def test_timestamp_truncate(backend, alltypes, df, unit): param( "W", marks=[ - pytest.mark.notimpl( - ["mysql"], - raises=com.UnsupportedOperationError, - reason="Unsupported truncate unit W", - ), pytest.mark.broken(["impala"], raises=AssertionError), + pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.never( ["flink"], raises=Py4JJavaError, @@ -824,7 +823,7 @@ def test_date_truncate(backend, alltypes, df, unit): pd.Timedelta, marks=[ pytest.mark.notimpl( - ["mysql", "clickhouse"], raises=com.UnsupportedOperationError + ["clickhouse"], raises=com.UnsupportedOperationError ), pytest.mark.notimpl( ["pyspark"], @@ -1028,7 +1027,6 @@ def convert_to_offset(x): [ "dask", "impala", - "mysql", "risingwave", "snowflake", "sqlite", @@ -1036,6 +1034,7 @@ def convert_to_offset(x): ], raises=com.OperationNotDefinedError, ), + pytest.mark.notimpl(["mysql"], raises=sg.ParseError), pytest.mark.notimpl( ["druid"], raises=ValidationError, @@ -1055,13 +1054,13 @@ def convert_to_offset(x): "sqlite", "risingwave", "polars", - "mysql", "impala", "snowflake", "bigquery", ], raises=com.OperationNotDefinedError, ), + pytest.mark.notimpl(["mysql"], raises=sg.ParseError), pytest.mark.notimpl( ["druid"], raises=ValidationError, @@ -1574,7 +1573,7 @@ def test_timestamp_comparison_filter_numpy(backend, con, alltypes, df, func_name @pytest.mark.notimpl( - ["sqlite", "snowflake", "mssql", "oracle"], + ["sqlite", "snowflake", "mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -1587,7 +1586,6 @@ def test_timestamp_comparison_filter_numpy(backend, con, alltypes, df, func_name raises=Py4JJavaError, reason="ParseException: Encountered '+ INTERVAL CAST'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_interval_add_cast_scalar(backend, alltypes): timestamp_date = alltypes.timestamp_col.date() delta = ibis.literal(10).cast("interval('D')") @@ -1601,7 +1599,7 @@ def test_interval_add_cast_scalar(backend, alltypes): ["pyspark"], reason="PySpark does not support casting columns to intervals" ) @pytest.mark.notimpl( - ["sqlite", "snowflake", "mssql", "oracle"], + ["sqlite", "snowflake", "mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( @@ -1609,7 +1607,6 @@ def test_interval_add_cast_scalar(backend, alltypes): raises=AttributeError, reason="'StringColumn' object has no attribute 'date'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_interval_add_cast_column(backend, alltypes, df): timestamp_date = alltypes.timestamp_col.date() delta = alltypes.bigint_col.cast("interval('D')") @@ -2024,16 +2021,6 @@ def test_now_from_projection(alltypes): @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936 missing expression" ) -@pytest.mark.broken( - ["mysql"], - raises=sa.exc.ProgrammingError, - reason=( - '(pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; ' - "check the manual that corresponds to your MariaDB server version for " - "the right syntax to use near ' 2, 4) AS `DateFromYMD(2022, 2, 4)`' at line 1\")" - "[SQL: SELECT date(%(param_1)s, %(param_2)s, %(param_3)s) AS `DateFromYMD(2022, 2, 4)`]" - ), -) @pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( @@ -2065,7 +2052,9 @@ def test_date_literal(con, backend): } -@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["pandas", "dask", "pyspark", "mysql"], raises=com.OperationNotDefinedError +) @pytest.mark.notimpl( ["druid"], raises=sa.exc.ProgrammingError, @@ -2076,11 +2065,6 @@ def test_date_literal(con, backend): "make_timestamp(, , , , , )" ), ) -@pytest.mark.broken( - ["mysql"], - raises=sa.exc.OperationalError, - reason="(pymysql.err.OperationalError) (1305, 'FUNCTION ibis_testing.make_timestamp does not exist')", -) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904: MAKE TIMESTAMP invalid" ) @@ -2106,11 +2090,6 @@ def test_timestamp_literal(con, backend): @pytest.mark.notimpl( ["pandas", "mysql", "dask", "pyspark"], raises=com.OperationNotDefinedError ) -@pytest.mark.notimpl( - ["mysql"], - raises=sa.exc.OperationalError, - reason="FUNCTION ibis_testing.make_timestamp does not exist", -) @pytest.mark.notimpl( ["sqlite"], raises=com.UnsupportedOperationError, @@ -2181,29 +2160,11 @@ def test_timestamp_with_timezone_literal(con, timezone, expected): @pytest.mark.notimpl( - [ - "pandas", - "datafusion", - "dask", - "pyspark", - "polars", - ], + ["pandas", "datafusion", "dask", "pyspark", "polars", "mysql"], raises=com.OperationNotDefinedError, ) @pytest.mark.notyet(["clickhouse", "impala"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.broken( - [ - "mysql", - ], - raises=sa.exc.ProgrammingError, - reason=( - '(pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that ' - "corresponds to your MariaDB server version for the right syntax to use near ' 20, 0) AS " - "`TimeFromHMS(16, 20, 0)`' at line 1\")" - "[SQL: SELECT time(%(param_1)s, %(param_2)s, %(param_3)s) AS `TimeFromHMS(16, 20, 0)`]" - ), -) @pytest.mark.broken( ["druid"], raises=sa.exc.ProgrammingError, reason="SQL parse failed" ) @@ -2323,7 +2284,7 @@ def test_extract_time_from_timestamp(con, microsecond): "AttributeError: 'TextClause' object has no attribute 'label'" "If SQLAlchemy >=2 is installed, test fails with the following exception:" "NotImplementedError", - raises=(NotImplementedError, AttributeError), + raises=MySQLProgrammingError, ) @pytest.mark.broken( ["bigquery", "duckdb"], @@ -2359,15 +2320,6 @@ def test_interval_literal(con, backend): @pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["mysql"], - raises=sa.exc.ProgrammingError, - reason=( - '(pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual ' - "that corresponds to your MariaDB server version for the right syntax to use near " - "' CAST(EXTRACT(month FROM t0.timestamp_col) AS SIGNED INTEGER), CAST(EXTRACT(d...' at line 1\")" - ), -) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -2393,17 +2345,14 @@ def test_date_column_from_ymd(backend, con, alltypes, df): backend.assert_series_equal(golden, result.timestamp_col) -@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["pandas", "dask", "pyspark", "mysql"], raises=com.OperationNotDefinedError +) @pytest.mark.broken( ["druid"], raises=AttributeError, reason="StringColumn' object has no attribute 'year'", ) -@pytest.mark.broken( - ["mysql"], - raises=sa.exc.OperationalError, - reason="(pymysql.err.OperationalError) (1305, 'FUNCTION ibis_testing.make_timestamp does not exist')", -) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904 make timestamp invalid" ) @@ -2689,6 +2638,11 @@ def test_large_timestamp(con): reason="doesn't support nanoseconds", raises=sa.exc.ProgrammingError, ), + pytest.mark.notyet( + ["mysql"], + reason="doesn't support nanoseconds", + raises=MySQLOperationalError, + ), pytest.mark.notyet( ["bigquery"], reason=( @@ -2711,7 +2665,6 @@ def test_large_timestamp(con): ), ], ) -@pytest.mark.notyet(["mysql"], raises=AssertionError) @pytest.mark.broken( ["druid"], raises=sa.exc.ProgrammingError, diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 432b18594d61..99ddaf6f5b46 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -16,6 +16,7 @@ ClickHouseDatabaseError, GoogleBadRequest, ImpalaHiveServer2Error, + MySQLOperationalError, Py4JJavaError, PySparkAnalysisException, SnowflakeProgrammingError, @@ -863,7 +864,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=com.UnsupportedOperationError, reason="Flink engine does not support generic window clause with no order by", ), - pytest.mark.broken(["mysql"], raises=sa.exc.OperationalError), pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), pytest.mark.notyet( ["snowflake"], @@ -916,7 +916,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=com.UnsupportedOperationError, reason="Flink engine does not support generic window clause with no order by", ), - pytest.mark.broken(["mysql"], raises=sa.exc.OperationalError), pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), pytest.mark.notyet( ["snowflake"], @@ -1046,6 +1045,11 @@ def test_ungrouped_unbounded_window( reason="Feature is not yet implemented: window frame in `RANGE` mode is not supported yet", ) @pytest.mark.notyet(["mssql"], raises=sa.exc.ProgrammingError) +@pytest.mark.broken( + ["mysql"], + raises=MySQLOperationalError, + reason="https://github.com/tobymao/sqlglot/issues/2779", +) def test_grouped_bounded_range_window(backend, alltypes, df): # Explanation of the range window spec below: # @@ -1224,7 +1228,7 @@ def test_first_last(backend): ["impala"], raises=ImpalaHiveServer2Error, reason="not supported by Impala" ) @pytest.mark.notyet( - ["mysql"], raises=sa.exc.ProgrammingError, reason="not supported by MySQL" + ["mysql"], raises=MySQLOperationalError, reason="not supported by MySQL" ) @pytest.mark.notyet( ["mssql", "oracle", "polars", "snowflake", "sqlite"], diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index e202b48f0621..0522e965c1d8 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -152,9 +152,6 @@ def convert_table(cls, df, schema): def convert_column(cls, obj, dtype): pandas_type = PandasType.from_ibis(dtype) - if obj.dtype == pandas_type and dtype.is_primitive(): - return obj - method_name = f"convert_{dtype.__class__.__name__}" convert_method = getattr(cls, method_name, cls.convert_default) @@ -185,6 +182,8 @@ def convert_GeoSpatial(cls, s, dtype, pandas_type): @classmethod def convert_default(cls, s, dtype, pandas_type): + if s.dtype == pandas_type and dtype.is_primitive(): + return s try: return s.astype(pandas_type) except Exception: # noqa: BLE001 diff --git a/poetry.lock b/poetry.lock index a63dc0e775be..16f45735ba6b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1891,24 +1891,6 @@ tqdm = "*" [package.extras] test = ["build", "mypy", "pytest", "ruff", "twine", "types-requests"] -[[package]] -name = "geoalchemy2" -version = "0.14.3" -description = "Using SQLAlchemy with Spatial Databases" -optional = true -python-versions = ">=3.7" -files = [ - {file = "GeoAlchemy2-0.14.3-py3-none-any.whl", hash = "sha256:a727198394fcc4760a27c4c5bff8b9f4f79324ec2dd98c4c1b8a7026b8918d81"}, - {file = "GeoAlchemy2-0.14.3.tar.gz", hash = "sha256:79c432b10dd8c48422f794eaf9a1200929de14f41d2396923bfe92f4c6abaf89"}, -] - -[package.dependencies] -packaging = "*" -SQLAlchemy = ">=1.4" - -[package.extras] -shapely = ["Shapely (>=1.7)"] - [[package]] name = "geojson" version = "3.1.0" @@ -7350,7 +7332,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geoalchemy2", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-risingwave", "sqlalchemy-views", "trino"] +all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views", "trino"] bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "pydata-google-auth"] clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] @@ -7362,10 +7344,10 @@ duckdb = ["duckdb"] examples = ["pins"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] flink = [] -geospatial = ["geoalchemy2", "geopandas", "shapely"] +geospatial = ["geopandas", "shapely"] impala = ["impyla", "sqlalchemy"] mssql = ["pyodbc", "sqlalchemy", "sqlalchemy-views"] -mysql = ["pymysql", "sqlalchemy", "sqlalchemy-views"] +mysql = ["pymysql"] oracle = ["oracledb", "packaging", "sqlalchemy", "sqlalchemy-views"] pandas = ["regex"] polars = ["packaging", "polars"] @@ -7380,4 +7362,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "19f28c10cf57b43986df07521c076f7d3942fd1b61aa73d5cc5e7350f3a6842e" +content-hash = "f48fea54ccdbe62885012b0c4b7d4ea605d83ea59ea67405e28ecd51d066fe53" diff --git a/pyproject.toml b/pyproject.toml index f39175ac8ab9..27c944bc3fc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,6 @@ datafusion = { version = ">=0.6,<35", optional = true } db-dtypes = { version = ">=0.3,<2", optional = true } deltalake = { version = ">=0.9.0,<1", optional = true } duckdb = { version = ">=0.8.1,<1", optional = true } -geoalchemy2 = { version = ">=0.6.3,<1", optional = true } geopandas = { version = ">=0.6,<1", optional = true } google-cloud-bigquery = { version = ">=3,<4", optional = true } google-cloud-bigquery-storage = { version = ">=2,<3", optional = true } @@ -150,7 +149,6 @@ all = [ "db-dtypes", "duckdb", "deltalake", - "geoalchemy2", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", @@ -188,10 +186,10 @@ druid = ["pydruid", "sqlalchemy"] duckdb = ["duckdb"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] flink = [] -geospatial = ["geoalchemy2", "geopandas", "shapely"] +geospatial = ["geopandas", "shapely"] impala = ["impyla", "sqlalchemy"] mssql = ["sqlalchemy", "pyodbc", "sqlalchemy-views"] -mysql = ["sqlalchemy", "pymysql", "sqlalchemy-views"] +mysql = ["pymysql"] oracle = ["sqlalchemy", "oracledb", "packaging", "sqlalchemy-views"] pandas = ["regex"] polars = ["polars", "packaging"] diff --git a/requirements-dev.txt b/requirements-dev.txt index ae9b4c7d1d43..648abb407bab 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -63,8 +63,7 @@ fonttools==4.47.2 ; python_version >= "3.10" and python_version < "3.13" frozenlist==1.4.1 ; python_version >= "3.9" and python_version < "4.0" fsspec==2023.12.2 ; python_version >= "3.9" and python_version < "4.0" gcsfs==2023.12.2.post1 ; python_version >= "3.9" and python_version < "4.0" -gdown==5.0.1 ; python_version >= "3.10" and python_version < "3.13" -geoalchemy2==0.14.3 ; python_version >= "3.9" and python_version < "4.0" +gdown==4.7.1 ; python_version >= "3.10" and python_version < "3.13" geojson==3.1.0 ; python_version >= "3.10" and python_version < "3.13" geopandas==0.14.3 ; python_version >= "3.9" and python_version < "4.0" google-api-core==2.16.1 ; python_version >= "3.9" and python_version < "4.0" From 15ba46c9da53c2303dc524dbda7dde6c2361fde7 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 9 Jan 2024 11:35:16 -0500 Subject: [PATCH 051/161] refactor(sqlglot): remove duplicated simple compilation rules and sort --- ibis/backends/base/sqlglot/compiler.py | 105 ++++++++++++------------- 1 file changed, 51 insertions(+), 54 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 527ef9388a7c..40a6afe5934b 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -1187,79 +1187,76 @@ def visit_RegexExtract(self, op, *, arg, pattern, index): _SIMPLE_OPS = { + ops.Abs: "abs", + ops.Acos: "acos", ops.All: "bool_and", ops.Any: "bool_or", + ops.ApproxCountDistinct: "approx_distinct", ops.ArgMax: "max_by", ops.ArgMin: "min_by", - ops.Power: "pow", - ops.IsNan: "isnan", - ops.IsInf: "isinf", - ops.Abs: "abs", - ops.Exp: "exp", - ops.Sqrt: "sqrt", - ops.Ln: "ln", - ops.Log2: "log2", - ops.Log10: "log", - ops.Acos: "acos", + ops.ArrayCollect: "array_agg", + ops.ArrayContains: "array_contains", + ops.ArrayFlatten: "flatten", + ops.ArrayLength: "array_size", + ops.ArraySort: "array_sort", + ops.ArrayStringJoin: "array_to_string", ops.Asin: "asin", - ops.Atan: "atan", ops.Atan2: "atan2", + ops.Atan: "atan", + ops.Capitalize: "initcap", ops.Cos: "cos", - ops.Sin: "sin", - ops.Tan: "tan", ops.Cot: "cot", - ops.Pi: "pi", - ops.RandomScalar: "random", - ops.Sign: "sign", - ops.ApproxCountDistinct: "approx_distinct", - ops.Median: "median", - ops.ArgMin: "argmin", - ops.ArgMax: "argmax", - ops.First: "first", - ops.Last: "last", ops.Count: "count", - ops.ArrayCollect: "array_agg", + ops.CumeDist: "cume_dist", + ops.Date: "date", + ops.DateFromYMD: "datefromparts", + ops.Degrees: "degrees", + ops.DenseRank: "dense_rank", + ops.Exp: "exp", + ops.First: "first", + ops.FirstValue: "first_value", ops.GroupConcat: "group_concat", - ops.StringContains: "contains", - ops.StringLength: "length", - ops.Lowercase: "lower", - ops.Uppercase: "upper", - ops.StartsWith: "starts_with", - ops.StrRight: "right", ops.IfElse: "if", - ops.ArrayLength: "length", - ops.NullIf: "nullif", - ops.Repeat: "repeat", - ops.Map: "map", + ops.IsInf: "isinf", + ops.IsNan: "isnan", ops.JSONGetItem: "json_extract", - ops.ArrayFlatten: "flatten", - ops.NTile: "ntile", - ops.Degrees: "degrees", - ops.Radians: "radians", - ops.FirstValue: "first_value", + ops.Last: "last", ops.LastValue: "last_value", - ops.NthValue: "nth_value", + ops.Levenshtein: "levenshtein", + ops.Ln: "ln", + ops.Log10: "log", + ops.Log2: "log2", + ops.Lowercase: "lower", + ops.Map: "map", + ops.Median: "median", ops.MinRank: "rank", - ops.DenseRank: "dense_rank", + ops.NTile: "ntile", + ops.NthValue: "nth_value", + ops.NullIf: "nullif", ops.PercentRank: "percent_rank", - ops.CumeDist: "cume_dist", - ops.ArrayLength: "array_size", - ops.ArraySort: "array_sort", - ops.Capitalize: "initcap", - ops.Translate: "translate", - ops.StringReplace: "replace", - ops.Reverse: "reverse", - ops.StringSplit: "split", + ops.Pi: "pi", + ops.Power: "pow", + ops.Radians: "radians", + ops.RandomScalar: "random", ops.RegexSearch: "regexp_like", - ops.DateFromYMD: "datefromparts", - ops.Date: "date", + ops.RegexSplit: "regexp_split", + ops.Repeat: "repeat", + ops.Reverse: "reverse", ops.RowNumber: "row_number", + ops.Sign: "sign", + ops.Sin: "sin", + ops.Sqrt: "sqrt", + ops.StartsWith: "starts_with", + ops.StrRight: "right", + ops.StringContains: "contains", + ops.StringLength: "length", + ops.StringReplace: "replace", + ops.StringSplit: "split", ops.StringToTimestamp: "str_to_time", - ops.ArrayStringJoin: "array_to_string", - ops.Levenshtein: "levenshtein", + ops.Tan: "tan", + ops.Translate: "translate", ops.Unnest: "explode", - ops.RegexSplit: "regexp_split", - ops.ArrayContains: "array_contains", + ops.Uppercase: "upper", } _BINARY_INFIX_OPS = { From 22493cecfa265aeb25493e4a4d853bc4e9882abf Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 06:39:12 -0500 Subject: [PATCH 052/161] chore(deps): bump sqlglot and regen sql --- .../snapshots/test_h01/test_tpc_h01/snowflake/h01.sql | 2 +- .../snapshots/test_h03/test_tpc_h03/snowflake/h03.sql | 4 ++-- .../snapshots/test_h04/test_tpc_h04/snowflake/h04.sql | 4 ++-- .../snapshots/test_h05/test_tpc_h05/snowflake/h05.sql | 4 ++-- .../snapshots/test_h06/test_tpc_h06/snowflake/h06.sql | 4 ++-- .../snapshots/test_h07/test_tpc_h07/snowflake/h07.sql | 2 +- .../snapshots/test_h08/test_tpc_h08/snowflake/h08.sql | 2 +- .../snapshots/test_h10/test_tpc_h10/snowflake/h10.sql | 4 ++-- .../snapshots/test_h12/test_tpc_h12/snowflake/h12.sql | 4 ++-- .../snapshots/test_h14/test_tpc_h14/snowflake/h14.sql | 4 ++-- .../snapshots/test_h15/test_tpc_h15/snowflake/h15.sql | 8 ++++---- .../snapshots/test_h20/test_tpc_h20/snowflake/h20.sql | 4 ++-- 12 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql index 369c228deff0..64bd3bdf481e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql @@ -51,7 +51,7 @@ FROM ( "t0"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t0" WHERE - "t0"."L_SHIPDATE" <= DATEFROMPARTS(1998, 9, 2) + "t0"."L_SHIPDATE" <= DATE_FROM_PARTS(1998, 9, 2) ) AS "t1" GROUP BY 1, diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql index 0c8dab8a1e70..7b063e4514b7 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql @@ -131,8 +131,8 @@ FROM ( ) AS "t11" WHERE "t11"."c_mktsegment" = 'BUILDING' - AND "t11"."o_orderdate" < DATEFROMPARTS(1995, 3, 15) - AND "t11"."l_shipdate" > DATEFROMPARTS(1995, 3, 15) + AND "t11"."o_orderdate" < DATE_FROM_PARTS(1995, 3, 15) + AND "t11"."l_shipdate" > DATE_FROM_PARTS(1995, 3, 15) ) AS "t12" GROUP BY 1, diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql index fe25f373c30c..1886853be37a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql @@ -42,8 +42,8 @@ FROM ( "t1"."L_COMMITDATE" < "t1"."L_RECEIPTDATE" ) ) - AND "t2"."o_orderdate" >= DATEFROMPARTS(1993, 7, 1) - AND "t2"."o_orderdate" < DATEFROMPARTS(1993, 10, 1) + AND "t2"."o_orderdate" >= DATE_FROM_PARTS(1993, 7, 1) + AND "t2"."o_orderdate" < DATE_FROM_PARTS(1993, 10, 1) ) AS "t4" GROUP BY 1 diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql index b1a604664723..badae658d4b5 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql @@ -185,8 +185,8 @@ FROM ( ) AS "t23" WHERE "t23"."r_name" = 'ASIA' - AND "t23"."o_orderdate" >= DATEFROMPARTS(1994, 1, 1) - AND "t23"."o_orderdate" < DATEFROMPARTS(1995, 1, 1) + AND "t23"."o_orderdate" >= DATE_FROM_PARTS(1994, 1, 1) + AND "t23"."o_orderdate" < DATE_FROM_PARTS(1995, 1, 1) ) AS "t24" GROUP BY 1 diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql index 5d0be126fb13..16b0662e7346 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql @@ -20,8 +20,8 @@ FROM ( "t0"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t0" WHERE - "t0"."L_SHIPDATE" >= DATEFROMPARTS(1994, 1, 1) - AND "t0"."L_SHIPDATE" < DATEFROMPARTS(1995, 1, 1) + "t0"."L_SHIPDATE" >= DATE_FROM_PARTS(1994, 1, 1) + AND "t0"."L_SHIPDATE" < DATE_FROM_PARTS(1995, 1, 1) AND "t0"."L_DISCOUNT" BETWEEN 0.05 AND 0.07 AND "t0"."L_QUANTITY" < 24 ) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql index ce954992953d..8bf2ec92c1f2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql @@ -124,7 +124,7 @@ FROM ( ) ) ) - AND "t22"."l_shipdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) + AND "t22"."l_shipdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) ) AS "t23" GROUP BY 1, diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql index e6b90d1f7a6e..ab47e8ef7f9f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql @@ -126,7 +126,7 @@ FROM ( ) AS "t30" WHERE "t30"."r_name" = 'AMERICA' - AND "t30"."o_orderdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) + AND "t30"."o_orderdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) AND "t30"."p_type" = 'ECONOMY ANODIZED STEEL' ) AS "t31" GROUP BY diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql index 49e710a61584..055718392e8d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql @@ -155,8 +155,8 @@ FROM ( ON "t8"."c_nationkey" = "t11"."n_nationkey" ) AS "t15" WHERE - "t15"."o_orderdate" >= DATEFROMPARTS(1993, 10, 1) - AND "t15"."o_orderdate" < DATEFROMPARTS(1994, 1, 1) + "t15"."o_orderdate" >= DATE_FROM_PARTS(1993, 10, 1) + AND "t15"."o_orderdate" < DATE_FROM_PARTS(1994, 1, 1) AND "t15"."l_returnflag" = 'R' ) AS "t16" GROUP BY diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql index d0b4e47f354a..e6baf40f88e8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql @@ -104,8 +104,8 @@ FROM ( "t7"."l_shipmode" IN ('MAIL', 'SHIP') AND "t7"."l_commitdate" < "t7"."l_receiptdate" AND "t7"."l_shipdate" < "t7"."l_commitdate" - AND "t7"."l_receiptdate" >= DATEFROMPARTS(1994, 1, 1) - AND "t7"."l_receiptdate" < DATEFROMPARTS(1995, 1, 1) + AND "t7"."l_receiptdate" >= DATE_FROM_PARTS(1994, 1, 1) + AND "t7"."l_receiptdate" < DATE_FROM_PARTS(1995, 1, 1) ) AS "t8" GROUP BY 1 diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql index 46bd271c3296..bc75afe8d8dd 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql @@ -98,6 +98,6 @@ FROM ( ON "t4"."l_partkey" = "t5"."p_partkey" ) AS "t7" WHERE - "t7"."l_shipdate" >= DATEFROMPARTS(1995, 9, 1) - AND "t7"."l_shipdate" < DATEFROMPARTS(1995, 10, 1) + "t7"."l_shipdate" >= DATE_FROM_PARTS(1995, 9, 1) + AND "t7"."l_shipdate" < DATE_FROM_PARTS(1995, 10, 1) ) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql index 014b7f420d8d..8c5d56f9f2b9 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql @@ -52,8 +52,8 @@ FROM ( "t1"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t1" WHERE - "t1"."L_SHIPDATE" >= DATEFROMPARTS(1996, 1, 1) - AND "t1"."L_SHIPDATE" < DATEFROMPARTS(1996, 4, 1) + "t1"."L_SHIPDATE" >= DATE_FROM_PARTS(1996, 1, 1) + AND "t1"."L_SHIPDATE" < DATE_FROM_PARTS(1996, 4, 1) ) AS "t4" GROUP BY 1 @@ -112,8 +112,8 @@ WHERE "t1"."L_COMMENT" AS "l_comment" FROM "LINEITEM" AS "t1" WHERE - "t1"."L_SHIPDATE" >= DATEFROMPARTS(1996, 1, 1) - AND "t1"."L_SHIPDATE" < DATEFROMPARTS(1996, 4, 1) + "t1"."L_SHIPDATE" >= DATE_FROM_PARTS(1996, 1, 1) + AND "t1"."L_SHIPDATE" < DATE_FROM_PARTS(1996, 4, 1) ) AS "t4" GROUP BY 1 diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql index 4d18b54ab6c0..1d5530c72b48 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql @@ -83,8 +83,8 @@ WHERE WHERE "t4"."L_PARTKEY" = "t6"."ps_partkey" AND "t4"."L_SUPPKEY" = "t6"."ps_suppkey" - AND "t4"."L_SHIPDATE" >= DATEFROMPARTS(1994, 1, 1) - AND "t4"."L_SHIPDATE" < DATEFROMPARTS(1995, 1, 1) + AND "t4"."L_SHIPDATE" >= DATE_FROM_PARTS(1994, 1, 1) + AND "t4"."L_SHIPDATE" < DATE_FROM_PARTS(1995, 1, 1) ) AS "t11" ) * 0.5 ) From 3f5a04b1e0a5a6ee14e0e9ae1e58e99994d8dd5e Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 06:37:53 -0500 Subject: [PATCH 053/161] fix(duckdb): add `flip_coordinates` translation to sqlglot duckdb backend --- ibis/backends/duckdb/compiler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 082dd45dfe7f..524e18ce9e0b 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -384,6 +384,7 @@ def visit_Quantile(self, op, *, arg, quantile, where): ops.GeoEndPoint: "st_endpoint", ops.GeoEnvelope: "st_envelope", ops.GeoEquals: "st_equals", + ops.GeoFlipCoordinates: "st_flipcoordinates", ops.GeoGeometryType: "st_geometrytype", ops.GeoIntersection: "st_intersection", ops.GeoIntersects: "st_intersects", From fb0aa604bc67c35bc23b8f21b40b2bca954f626a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 06:38:14 -0500 Subject: [PATCH 054/161] fix(snowflake): use `_safe_raw_sql` for `insert` implementation --- ibis/backends/snowflake/__init__.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index aa1258f24b7b..6096e2f75313 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -1062,8 +1062,12 @@ def insert( columns=[sg.column(col, quoted=True) for col in obj.columns], dialect=self.name, ) - with self.begin() as cur: - if overwrite: - cur.execute(f"TRUNCATE TABLE {table.sql(self.name)}") - cur.execute(query.sql(self.name)) + statements = [] + if overwrite: + statements.append(f"TRUNCATE TABLE {table.sql(self.name)}") + statements.append(query.sql(self.name)) + + statement = ";".join(statements) + with self._safe_raw_sql(statement): + pass From c2821375a3dc46ce8be6226784587cbd0e21faca Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 07:46:16 -0500 Subject: [PATCH 055/161] fix(mysql): remove not-allowed frame clause from rank window function --- ibis/backends/mysql/compiler.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ibis/backends/mysql/compiler.py b/ibis/backends/mysql/compiler.py index d053c83c4300..848916e8f1c1 100644 --- a/ibis/backends/mysql/compiler.py +++ b/ibis/backends/mysql/compiler.py @@ -22,7 +22,7 @@ rewrite_last_to_last_value, ) from ibis.common.patterns import replace -from ibis.expr.rewrites import p, rewrite_sample +from ibis.expr.rewrites import p, rewrite_sample, y MySQL.Generator.TRANSFORMS |= { sge.LogicalOr: rename_func("max"), @@ -56,6 +56,13 @@ def rewrite_limit(_, **kwargs): return _ +@replace(p.WindowFunction(p.MinRank | p.DenseRank, y @ p.WindowFrame(start=None))) +def exclude_unsupported_window_frame_from_rank(_, y): + return ops.Subtract( + _.copy(frame=y.copy(start=None, end=0, order_by=y.order_by or (ops.NULL,))), 1 + ) + + @public class MySQLCompiler(SQLGlotCompiler): __slots__ = () @@ -68,6 +75,7 @@ class MySQLCompiler(SQLGlotCompiler): rewrite_first_to_first_value, rewrite_last_to_last_value, exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, rewrite_empty_order_by_window, *SQLGlotCompiler.rewrites, From 3c9dbd20977317df4d1f2cad63e24836a5c8bf17 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 06:38:52 -0500 Subject: [PATCH 056/161] test(postgres): use DBAPI instead of sqlalchemy apis in timezone test --- .../test_timezone_from_column/out.sql | 27 +++++++------- ibis/backends/postgres/tests/test_client.py | 36 ++++++++++--------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/ibis/backends/postgres/tests/snapshots/test_client/test_timezone_from_column/out.sql b/ibis/backends/postgres/tests/snapshots/test_client/test_timezone_from_column/out.sql index b4b6f555f34c..dca5e2864aef 100644 --- a/ibis/backends/postgres/tests/snapshots/test_client/test_timezone_from_column/out.sql +++ b/ibis/backends/postgres/tests/snapshots/test_client/test_timezone_from_column/out.sql @@ -1,15 +1,14 @@ -WITH t0 AS ( - SELECT - t2.id AS id, - t2.ts_tz AS tz, - t2.ts_no_tz AS no_tz - FROM x AS t2 -) SELECT - t0.id, - CAST(t0.tz AS TIMESTAMPTZ) AS tz, - CAST(t0.no_tz AS TIMESTAMP) AS no_tz, - t1.id AS id_right -FROM t0 -LEFT OUTER JOIN y AS t1 - ON t0.id = t1.id \ No newline at end of file + "t4"."id", + "t4"."tz", + "t4"."no_tz", + "t2"."id" AS "id_right" +FROM ( + SELECT + "t0"."id", + "t0"."ts_tz" AS "tz", + "t0"."ts_no_tz" AS "no_tz" + FROM "x" AS "t0" +) AS "t4" +LEFT OUTER JOIN "y" AS "t2" + ON "t4"."id" = "t2"."id" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/test_client.py b/ibis/backends/postgres/tests/test_client.py index 25afed4a0b9e..eb40ecb3d9cf 100644 --- a/ibis/backends/postgres/tests/test_client.py +++ b/ibis/backends/postgres/tests/test_client.py @@ -198,29 +198,33 @@ def test_connect_url_with_empty_host(): @pytest.fixture(scope="module") def contz(con): - (tz,) = con.raw_sql("SHOW TIMEZONE").fetchone() - con.raw_sql("SET TIMEZONE TO 'America/New_York'") + with con.begin() as c: + c.execute("SHOW TIMEZONE") + [(tz,)] = c.fetchall() + c.execute("SET TIMEZONE TO 'America/New_York'") yield con - con.raw_sql(f"SET TIMEZONE TO '{tz}'") + with con.begin() as c: + c.execute(f"SET TIMEZONE TO '{tz}'") -def test_timezone_from_column(con, contz, snapshot): - con.raw_sql( - """ - CREATE TEMPORARY TABLE x ( - id BIGINT, - ts_tz TIMESTAMP WITH TIME ZONE NOT NULL, - ts_no_tz TIMESTAMP WITHOUT TIME ZONE NOT NULL - ); +def test_timezone_from_column(contz, snapshot): + with contz.begin() as c: + c.execute( + """ + CREATE TEMPORARY TABLE x ( + id BIGINT, + ts_tz TIMESTAMP WITH TIME ZONE NOT NULL, + ts_no_tz TIMESTAMP WITHOUT TIME ZONE NOT NULL + ); - INSERT INTO x VALUES - (1, '2018-01-01 00:00:01+00', '2018-01-01 00:00:02'); + INSERT INTO x VALUES + (1, '2018-01-01 00:00:01+00', '2018-01-01 00:00:02'); - CREATE TEMPORARY TABLE y AS SELECT 1::BIGINT AS id; - """ - ) + CREATE TEMPORARY TABLE y AS SELECT 1::BIGINT AS id; + """ + ) case = ( contz.table("x") From 922875458d3821f8c30d98851e2c94f446f591d7 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 08:00:01 -0500 Subject: [PATCH 057/161] test(postgres): remove test that no longer works --- ibis/backends/postgres/tests/test_client.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ibis/backends/postgres/tests/test_client.py b/ibis/backends/postgres/tests/test_client.py index eb40ecb3d9cf..03601d77efe3 100644 --- a/ibis/backends/postgres/tests/test_client.py +++ b/ibis/backends/postgres/tests/test_client.py @@ -191,11 +191,6 @@ def test_insert_with_cte(con): assert Y.execute().empty -def test_connect_url_with_empty_host(): - con = ibis.connect("postgres:///ibis_testing") - assert con.con.url.host is None - - @pytest.fixture(scope="module") def contz(con): with con.begin() as c: From f28f1a57fd410d66e2fb3ff66f4842f8b47a8d33 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 07:59:05 -0500 Subject: [PATCH 058/161] test(pandas): use the correct error type when xfailing for compound-sort-key rank --- ibis/backends/tests/test_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 99ddaf6f5b46..d12960f729bc 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -1329,7 +1329,7 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): @pytest.mark.broken( ["pandas"], raises=TypeError, - reason="'<' not supported between instances of 'bool' and 'NoneType'", + reason="pandas rank impl cannot handle compound sort keys with null", ) @pytest.mark.notimpl( ["risingwave"], From 1f92b2bed406f8a1b665c9f24557941ab3718ed2 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 05:12:13 -0500 Subject: [PATCH 059/161] refactor(ir): give unbound tables namespaces --- ibis/backends/base/sqlglot/compiler.py | 39 ++++++++++++------- .../test_h01/test_tpc_h01/snowflake/h01.sql | 2 +- .../test_h01/test_tpc_h01/trino/h01.sql | 2 +- .../test_h02/test_tpc_h02/snowflake/h02.sql | 18 ++++----- .../test_h02/test_tpc_h02/trino/h02.sql | 18 ++++----- .../test_h03/test_tpc_h03/snowflake/h03.sql | 6 +-- .../test_h03/test_tpc_h03/trino/h03.sql | 6 +-- .../test_h04/test_tpc_h04/snowflake/h04.sql | 4 +- .../test_h04/test_tpc_h04/trino/h04.sql | 4 +- .../test_h05/test_tpc_h05/snowflake/h05.sql | 12 +++--- .../test_h05/test_tpc_h05/trino/h05.sql | 12 +++--- .../test_h06/test_tpc_h06/snowflake/h06.sql | 2 +- .../test_h06/test_tpc_h06/trino/h06.sql | 2 +- .../test_h07/test_tpc_h07/snowflake/h07.sql | 12 +++--- .../test_h07/test_tpc_h07/trino/h07.sql | 12 +++--- .../test_h08/test_tpc_h08/snowflake/h08.sql | 16 ++++---- .../test_h08/test_tpc_h08/trino/h08.sql | 16 ++++---- .../test_h09/test_tpc_h09/snowflake/h09.sql | 12 +++--- .../test_h09/test_tpc_h09/trino/h09.sql | 12 +++--- .../test_h10/test_tpc_h10/snowflake/h10.sql | 8 ++-- .../test_h10/test_tpc_h10/trino/h10.sql | 8 ++-- .../test_h11/test_tpc_h11/snowflake/h11.sql | 12 +++--- .../test_h11/test_tpc_h11/trino/h11.sql | 12 +++--- .../test_h12/test_tpc_h12/snowflake/h12.sql | 4 +- .../test_h12/test_tpc_h12/trino/h12.sql | 4 +- .../test_h13/test_tpc_h13/snowflake/h13.sql | 4 +- .../test_h13/test_tpc_h13/trino/h13.sql | 4 +- .../test_h14/test_tpc_h14/snowflake/h14.sql | 4 +- .../test_h14/test_tpc_h14/trino/h14.sql | 4 +- .../test_h15/test_tpc_h15/snowflake/h15.sql | 8 ++-- .../test_h15/test_tpc_h15/trino/h15.sql | 8 ++-- .../test_h16/test_tpc_h16/snowflake/h16.sql | 6 +-- .../test_h16/test_tpc_h16/trino/h16.sql | 6 +-- .../test_h17/test_tpc_h17/snowflake/h17.sql | 6 +-- .../test_h17/test_tpc_h17/trino/h17.sql | 6 +-- .../test_h18/test_tpc_h18/snowflake/h18.sql | 8 ++-- .../test_h18/test_tpc_h18/trino/h18.sql | 8 ++-- .../test_h19/test_tpc_h19/snowflake/h19.sql | 4 +- .../test_h19/test_tpc_h19/trino/h19.sql | 4 +- .../test_h20/test_tpc_h20/snowflake/h20.sql | 10 ++--- .../test_h20/test_tpc_h20/trino/h20.sql | 10 ++--- .../test_h21/test_tpc_h21/snowflake/h21.sql | 12 +++--- .../test_h21/test_tpc_h21/trino/h21.sql | 12 +++--- .../test_h22/test_tpc_h22/snowflake/h22.sql | 6 +-- .../test_h22/test_tpc_h22/trino/h22.sql | 6 +-- ibis/expr/operations/relations.py | 11 +++--- ibis/expr/types/core.py | 4 +- ibis/tests/expr/test_table.py | 14 +++++++ 48 files changed, 225 insertions(+), 195 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 40a6afe5934b..e71344c78086 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -2,13 +2,12 @@ import abc import calendar -import functools import itertools import math import operator import string from collections.abc import Iterator, Mapping -from functools import partial, singledispatchmethod +from functools import partial, reduce, singledispatchmethod from itertools import starmap from typing import TYPE_CHECKING, Any, Callable @@ -244,8 +243,7 @@ def fn(node, _, **kwargs): } op = op.replace( - replace_scalar_parameter(params) - | functools.reduce(operator.or_, self.rewrites) + replace_scalar_parameter(params) | reduce(operator.or_, self.rewrites) ) op = sqlize(op) # apply translate rules in topological order @@ -553,7 +551,7 @@ def visit_DayOfWeekName(self, op, *, arg): # Saturday == 6 return sge.Case( this=(self.f.dayofweek(arg) + 6) % 7, - ifs=list(starmap(self.if_, enumerate(calendar.day_name))), + ifs=list(itertools.starmap(self.if_, enumerate(calendar.day_name))), ) @visit_node.register(ops.IntervalFromInteger) @@ -874,10 +872,11 @@ def visit_ArrayConcat(self, op, *, arg): def _dedup_name( self, key: str, value: sge.Expression ) -> Iterator[sge.Alias | sge.Column]: + """Don't alias columns that are already named the same as their alias.""" return ( - value.as_(key, quoted=self.quoted) - if not isinstance(value, sge.Column) or key != value.name - else value + value + if isinstance(value, sge.Column) and key == value.name + else value.as_(key, quoted=self.quoted) ) @visit_node.register(Select) @@ -906,15 +905,29 @@ def visit_DummyTable(self, op, *, values): return sg.select(*starmap(self._dedup_name, values.items())) @visit_node.register(ops.UnboundTable) - def visit_UnboundTable(self, op, *, name: str, schema: sch.Schema): - return sg.table(name, quoted=self.quoted) + def visit_UnboundTable( + self, op, *, name: str, schema: sch.Schema, namespace: ops.Namespace + ) -> sg.Table: + return sg.table( + name, db=namespace.schema, catalog=namespace.database, quoted=self.quoted + ) @visit_node.register(ops.InMemoryTable) - def visit_InMemoryTable(self, op, *, name: str, schema: sch.Schema, data): + def visit_InMemoryTable( + self, op, *, name: str, schema: sch.Schema, data + ) -> sg.Table: return sg.table(name, quoted=self.quoted) @visit_node.register(ops.DatabaseTable) - def visit_DatabaseTable(self, op, *, name, namespace, schema, source): + def visit_DatabaseTable( + self, + op, + *, + name: str, + schema: sch.Schema, + source: Any, + namespace: ops.Namespace, + ) -> sg.Table: return sg.table( name, db=namespace.schema, catalog=namespace.database, quoted=self.quoted ) @@ -1108,7 +1121,7 @@ def visit_DropNa(self, op, *, parent, how, subset): ] if subset: - predicate = functools.reduce( + predicate = reduce( sg.and_ if how == "any" else sg.or_, (sg.not_(col.is_(NULL)) for col in subset), ) diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql index 64bd3bdf481e..377ed49ad95e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql @@ -49,7 +49,7 @@ FROM ( "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t0" WHERE "t0"."L_SHIPDATE" <= DATE_FROM_PARTS(1998, 9, 2) ) AS "t1" diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql index 5f0048dce3fb..2de867ca8838 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql @@ -49,7 +49,7 @@ FROM ( "t0"."l_shipinstruct", "t0"."l_shipmode", "t0"."l_comment" - FROM "lineitem" AS "t0" + FROM "hive"."ibis_sf1"."lineitem" AS "t0" WHERE "t0"."l_shipdate" <= FROM_ISO8601_DATE('1998-09-02') ) AS "t1" diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql index e4d7499a9126..59ffed3b4cae 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql @@ -48,7 +48,7 @@ FROM ( "t0"."P_CONTAINER" AS "p_container", "t0"."P_RETAILPRICE" AS "p_retailprice", "t0"."P_COMMENT" AS "p_comment" - FROM "PART" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t0" ) AS "t10" INNER JOIN ( SELECT @@ -57,7 +57,7 @@ FROM ( "t1"."PS_AVAILQTY" AS "ps_availqty", "t1"."PS_SUPPLYCOST" AS "ps_supplycost", "t1"."PS_COMMENT" AS "ps_comment" - FROM "PARTSUPP" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS "t1" ) AS "t11" ON "t10"."p_partkey" = "t11"."ps_partkey" INNER JOIN ( @@ -69,7 +69,7 @@ FROM ( "t2"."S_PHONE" AS "s_phone", "t2"."S_ACCTBAL" AS "s_acctbal", "t2"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t2" ) AS "t13" ON "t13"."s_suppkey" = "t11"."ps_suppkey" INNER JOIN ( @@ -78,7 +78,7 @@ FROM ( "t3"."N_NAME" AS "n_name", "t3"."N_REGIONKEY" AS "n_regionkey", "t3"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t3" ) AS "t15" ON "t13"."s_nationkey" = "t15"."n_nationkey" INNER JOIN ( @@ -86,7 +86,7 @@ FROM ( "t4"."R_REGIONKEY" AS "r_regionkey", "t4"."R_NAME" AS "r_name", "t4"."R_COMMENT" AS "r_comment" - FROM "REGION" AS "t4" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t4" ) AS "t17" ON "t15"."n_regionkey" = "t17"."r_regionkey" ) AS "t26" @@ -146,7 +146,7 @@ WHERE "t1"."PS_AVAILQTY" AS "ps_availqty", "t1"."PS_SUPPLYCOST" AS "ps_supplycost", "t1"."PS_COMMENT" AS "ps_comment" - FROM "PARTSUPP" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS "t1" ) AS "t12" INNER JOIN ( SELECT @@ -157,7 +157,7 @@ WHERE "t2"."S_PHONE" AS "s_phone", "t2"."S_ACCTBAL" AS "s_acctbal", "t2"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t2" ) AS "t14" ON "t14"."s_suppkey" = "t12"."ps_suppkey" INNER JOIN ( @@ -166,7 +166,7 @@ WHERE "t3"."N_NAME" AS "n_name", "t3"."N_REGIONKEY" AS "n_regionkey", "t3"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t3" ) AS "t16" ON "t14"."s_nationkey" = "t16"."n_nationkey" INNER JOIN ( @@ -174,7 +174,7 @@ WHERE "t4"."R_REGIONKEY" AS "r_regionkey", "t4"."R_NAME" AS "r_name", "t4"."R_COMMENT" AS "r_comment" - FROM "REGION" AS "t4" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t4" ) AS "t18" ON "t16"."n_regionkey" = "t18"."r_regionkey" ) AS "t27" diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql index d76a0c18cfed..b44f31b764fc 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql @@ -48,7 +48,7 @@ FROM ( "t0"."p_container", CAST("t0"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", "t0"."p_comment" - FROM "part" AS "t0" + FROM "hive"."ibis_sf1"."part" AS "t0" ) AS "t14" INNER JOIN ( SELECT @@ -57,7 +57,7 @@ FROM ( "t1"."ps_availqty", CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", "t1"."ps_comment" - FROM "partsupp" AS "t1" + FROM "hive"."ibis_sf1"."partsupp" AS "t1" ) AS "t15" ON "t14"."p_partkey" = "t15"."ps_partkey" INNER JOIN ( @@ -69,7 +69,7 @@ FROM ( "t2"."s_phone", CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t2"."s_comment" - FROM "supplier" AS "t2" + FROM "hive"."ibis_sf1"."supplier" AS "t2" ) AS "t17" ON "t17"."s_suppkey" = "t15"."ps_suppkey" INNER JOIN ( @@ -78,7 +78,7 @@ FROM ( "t3"."n_name", "t3"."n_regionkey", "t3"."n_comment" - FROM "nation" AS "t3" + FROM "hive"."ibis_sf1"."nation" AS "t3" ) AS "t10" ON "t17"."s_nationkey" = "t10"."n_nationkey" INNER JOIN ( @@ -86,7 +86,7 @@ FROM ( "t4"."r_regionkey", "t4"."r_name", "t4"."r_comment" - FROM "region" AS "t4" + FROM "hive"."ibis_sf1"."region" AS "t4" ) AS "t12" ON "t10"."n_regionkey" = "t12"."r_regionkey" ) AS "t26" @@ -146,7 +146,7 @@ WHERE "t1"."ps_availqty", CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", "t1"."ps_comment" - FROM "partsupp" AS "t1" + FROM "hive"."ibis_sf1"."partsupp" AS "t1" ) AS "t16" INNER JOIN ( SELECT @@ -157,7 +157,7 @@ WHERE "t2"."s_phone", CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t2"."s_comment" - FROM "supplier" AS "t2" + FROM "hive"."ibis_sf1"."supplier" AS "t2" ) AS "t18" ON "t18"."s_suppkey" = "t16"."ps_suppkey" INNER JOIN ( @@ -166,7 +166,7 @@ WHERE "t3"."n_name", "t3"."n_regionkey", "t3"."n_comment" - FROM "nation" AS "t3" + FROM "hive"."ibis_sf1"."nation" AS "t3" ) AS "t11" ON "t18"."s_nationkey" = "t11"."n_nationkey" INNER JOIN ( @@ -174,7 +174,7 @@ WHERE "t4"."r_regionkey", "t4"."r_name", "t4"."r_comment" - FROM "region" AS "t4" + FROM "hive"."ibis_sf1"."region" AS "t4" ) AS "t13" ON "t11"."n_regionkey" = "t13"."r_regionkey" ) AS "t27" diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql index 7b063e4514b7..24678d9406a7 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql @@ -91,7 +91,7 @@ FROM ( "t0"."C_ACCTBAL" AS "c_acctbal", "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t0" ) AS "t6" INNER JOIN ( SELECT @@ -104,7 +104,7 @@ FROM ( "t1"."O_CLERK" AS "o_clerk", "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t1" ) AS "t7" ON "t6"."c_custkey" = "t7"."o_custkey" INNER JOIN ( @@ -125,7 +125,7 @@ FROM ( "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" ) AS "t8" ON "t8"."l_orderkey" = "t7"."o_orderkey" ) AS "t11" diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql index ca724d740022..d1905163479d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql @@ -91,7 +91,7 @@ FROM ( CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t0"."c_mktsegment", "t0"."c_comment" - FROM "customer" AS "t0" + FROM "hive"."ibis_sf1"."customer" AS "t0" ) AS "t6" INNER JOIN ( SELECT @@ -104,7 +104,7 @@ FROM ( "t1"."o_clerk", "t1"."o_shippriority", "t1"."o_comment" - FROM "orders" AS "t1" + FROM "hive"."ibis_sf1"."orders" AS "t1" ) AS "t7" ON "t6"."c_custkey" = "t7"."o_custkey" INNER JOIN ( @@ -125,7 +125,7 @@ FROM ( "t2"."l_shipinstruct", "t2"."l_shipmode", "t2"."l_comment" - FROM "lineitem" AS "t2" + FROM "hive"."ibis_sf1"."lineitem" AS "t2" ) AS "t8" ON "t8"."l_orderkey" = "t7"."o_orderkey" ) AS "t11" diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql index 1886853be37a..bd81df86629f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql @@ -27,13 +27,13 @@ FROM ( "t0"."O_CLERK" AS "o_clerk", "t0"."O_SHIPPRIORITY" AS "o_shippriority", "t0"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t0" ) AS "t2" WHERE EXISTS( SELECT 1 AS "1" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" WHERE ( "t1"."L_ORDERKEY" = "t2"."o_orderkey" diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql index 10bf14955d70..dbf6ee6ae809 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql @@ -27,13 +27,13 @@ FROM ( "t0"."o_clerk", "t0"."o_shippriority", "t0"."o_comment" - FROM "orders" AS "t0" + FROM "hive"."ibis_sf1"."orders" AS "t0" ) AS "t2" WHERE EXISTS( SELECT 1 AS "1" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" WHERE ( "t1"."l_orderkey" = "t2"."o_orderkey" diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql index badae658d4b5..045a902f9d0b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql @@ -115,7 +115,7 @@ FROM ( "t0"."C_ACCTBAL" AS "c_acctbal", "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t0" ) AS "t12" INNER JOIN ( SELECT @@ -128,7 +128,7 @@ FROM ( "t1"."O_CLERK" AS "o_clerk", "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t1" ) AS "t13" ON "t12"."c_custkey" = "t13"."o_custkey" INNER JOIN ( @@ -149,7 +149,7 @@ FROM ( "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" ) AS "t14" ON "t14"."l_orderkey" = "t13"."o_orderkey" INNER JOIN ( @@ -161,7 +161,7 @@ FROM ( "t3"."S_PHONE" AS "s_phone", "t3"."S_ACCTBAL" AS "s_acctbal", "t3"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t3" ) AS "t15" ON "t14"."l_suppkey" = "t15"."s_suppkey" INNER JOIN ( @@ -170,7 +170,7 @@ FROM ( "t4"."N_NAME" AS "n_name", "t4"."N_REGIONKEY" AS "n_regionkey", "t4"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t4" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t4" ) AS "t16" ON "t12"."c_nationkey" = "t15"."s_nationkey" AND "t15"."s_nationkey" = "t16"."n_nationkey" @@ -179,7 +179,7 @@ FROM ( "t5"."R_REGIONKEY" AS "r_regionkey", "t5"."R_NAME" AS "r_name", "t5"."R_COMMENT" AS "r_comment" - FROM "REGION" AS "t5" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t5" ) AS "t17" ON "t16"."n_regionkey" = "t17"."r_regionkey" ) AS "t23" diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql index c1bbad1c8935..9c3e856b2a91 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql @@ -115,7 +115,7 @@ FROM ( CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t0"."c_mktsegment", "t0"."c_comment" - FROM "customer" AS "t0" + FROM "hive"."ibis_sf1"."customer" AS "t0" ) AS "t14" INNER JOIN ( SELECT @@ -128,7 +128,7 @@ FROM ( "t1"."o_clerk", "t1"."o_shippriority", "t1"."o_comment" - FROM "orders" AS "t1" + FROM "hive"."ibis_sf1"."orders" AS "t1" ) AS "t15" ON "t14"."c_custkey" = "t15"."o_custkey" INNER JOIN ( @@ -149,7 +149,7 @@ FROM ( "t2"."l_shipinstruct", "t2"."l_shipmode", "t2"."l_comment" - FROM "lineitem" AS "t2" + FROM "hive"."ibis_sf1"."lineitem" AS "t2" ) AS "t16" ON "t16"."l_orderkey" = "t15"."o_orderkey" INNER JOIN ( @@ -161,7 +161,7 @@ FROM ( "t3"."s_phone", CAST("t3"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t3"."s_comment" - FROM "supplier" AS "t3" + FROM "hive"."ibis_sf1"."supplier" AS "t3" ) AS "t17" ON "t16"."l_suppkey" = "t17"."s_suppkey" INNER JOIN ( @@ -170,7 +170,7 @@ FROM ( "t4"."n_name", "t4"."n_regionkey", "t4"."n_comment" - FROM "nation" AS "t4" + FROM "hive"."ibis_sf1"."nation" AS "t4" ) AS "t12" ON "t14"."c_nationkey" = "t17"."s_nationkey" AND "t17"."s_nationkey" = "t12"."n_nationkey" @@ -179,7 +179,7 @@ FROM ( "t5"."r_regionkey", "t5"."r_name", "t5"."r_comment" - FROM "region" AS "t5" + FROM "hive"."ibis_sf1"."region" AS "t5" ) AS "t13" ON "t12"."n_regionkey" = "t13"."r_regionkey" ) AS "t23" diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql index 16b0662e7346..e2f8ef6be299 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql @@ -18,7 +18,7 @@ FROM ( "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t0" WHERE "t0"."L_SHIPDATE" >= DATE_FROM_PARTS(1994, 1, 1) AND "t0"."L_SHIPDATE" < DATE_FROM_PARTS(1995, 1, 1) diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql index 1984dc9737ef..88d450e7cba0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql @@ -18,7 +18,7 @@ FROM ( "t0"."l_shipinstruct", "t0"."l_shipmode", "t0"."l_comment" - FROM "lineitem" AS "t0" + FROM "hive"."ibis_sf1"."lineitem" AS "t0" WHERE "t0"."l_shipdate" >= FROM_ISO8601_DATE('1994-01-01') AND "t0"."l_shipdate" < FROM_ISO8601_DATE('1995-01-01') diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql index 8bf2ec92c1f2..c3b31dcec500 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql @@ -38,7 +38,7 @@ FROM ( "t0"."S_PHONE" AS "s_phone", "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t0" ) AS "t10" INNER JOIN ( SELECT @@ -58,7 +58,7 @@ FROM ( "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" ) AS "t11" ON "t10"."s_suppkey" = "t11"."l_suppkey" INNER JOIN ( @@ -72,7 +72,7 @@ FROM ( "t2"."O_CLERK" AS "o_clerk", "t2"."O_SHIPPRIORITY" AS "o_shippriority", "t2"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t2" ) AS "t12" ON "t12"."o_orderkey" = "t11"."l_orderkey" INNER JOIN ( @@ -85,7 +85,7 @@ FROM ( "t3"."C_ACCTBAL" AS "c_acctbal", "t3"."C_MKTSEGMENT" AS "c_mktsegment", "t3"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t3" ) AS "t13" ON "t13"."c_custkey" = "t12"."o_custkey" INNER JOIN ( @@ -94,7 +94,7 @@ FROM ( "t4"."N_NAME" AS "n_name", "t4"."N_REGIONKEY" AS "n_regionkey", "t4"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t4" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t4" ) AS "t14" ON "t10"."s_nationkey" = "t14"."n_nationkey" INNER JOIN ( @@ -103,7 +103,7 @@ FROM ( "t4"."N_NAME" AS "n_name", "t4"."N_REGIONKEY" AS "n_regionkey", "t4"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t4" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t4" ) AS "t16" ON "t13"."c_nationkey" = "t16"."n_nationkey" ) AS "t22" diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql index 74c153ef4979..b484929c34d4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql @@ -38,7 +38,7 @@ FROM ( "t0"."s_phone", CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t0"."s_comment" - FROM "supplier" AS "t0" + FROM "hive"."ibis_sf1"."supplier" AS "t0" ) AS "t12" INNER JOIN ( SELECT @@ -58,7 +58,7 @@ FROM ( "t1"."l_shipinstruct", "t1"."l_shipmode", "t1"."l_comment" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" ) AS "t13" ON "t12"."s_suppkey" = "t13"."l_suppkey" INNER JOIN ( @@ -72,7 +72,7 @@ FROM ( "t2"."o_clerk", "t2"."o_shippriority", "t2"."o_comment" - FROM "orders" AS "t2" + FROM "hive"."ibis_sf1"."orders" AS "t2" ) AS "t14" ON "t14"."o_orderkey" = "t13"."l_orderkey" INNER JOIN ( @@ -85,7 +85,7 @@ FROM ( CAST("t3"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t3"."c_mktsegment", "t3"."c_comment" - FROM "customer" AS "t3" + FROM "hive"."ibis_sf1"."customer" AS "t3" ) AS "t15" ON "t15"."c_custkey" = "t14"."o_custkey" INNER JOIN ( @@ -94,7 +94,7 @@ FROM ( "t4"."n_name", "t4"."n_regionkey", "t4"."n_comment" - FROM "nation" AS "t4" + FROM "hive"."ibis_sf1"."nation" AS "t4" ) AS "t10" ON "t12"."s_nationkey" = "t10"."n_nationkey" INNER JOIN ( @@ -103,7 +103,7 @@ FROM ( "t4"."n_name", "t4"."n_regionkey", "t4"."n_comment" - FROM "nation" AS "t4" + FROM "hive"."ibis_sf1"."nation" AS "t4" ) AS "t16" ON "t15"."c_nationkey" = "t16"."n_nationkey" ) AS "t22" diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql index ab47e8ef7f9f..9dac02ababb7 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql @@ -35,7 +35,7 @@ FROM ( "t0"."P_CONTAINER" AS "p_container", "t0"."P_RETAILPRICE" AS "p_retailprice", "t0"."P_COMMENT" AS "p_comment" - FROM "PART" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t0" ) AS "t14" INNER JOIN ( SELECT @@ -55,7 +55,7 @@ FROM ( "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" ) AS "t15" ON "t14"."p_partkey" = "t15"."l_partkey" INNER JOIN ( @@ -67,7 +67,7 @@ FROM ( "t2"."S_PHONE" AS "s_phone", "t2"."S_ACCTBAL" AS "s_acctbal", "t2"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t2" ) AS "t16" ON "t16"."s_suppkey" = "t15"."l_suppkey" INNER JOIN ( @@ -81,7 +81,7 @@ FROM ( "t3"."O_CLERK" AS "o_clerk", "t3"."O_SHIPPRIORITY" AS "o_shippriority", "t3"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t3" ) AS "t17" ON "t15"."l_orderkey" = "t17"."o_orderkey" INNER JOIN ( @@ -94,7 +94,7 @@ FROM ( "t4"."C_ACCTBAL" AS "c_acctbal", "t4"."C_MKTSEGMENT" AS "c_mktsegment", "t4"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t4" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t4" ) AS "t18" ON "t17"."o_custkey" = "t18"."c_custkey" INNER JOIN ( @@ -103,7 +103,7 @@ FROM ( "t5"."N_NAME" AS "n_name", "t5"."N_REGIONKEY" AS "n_regionkey", "t5"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t5" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t5" ) AS "t19" ON "t18"."c_nationkey" = "t19"."n_nationkey" INNER JOIN ( @@ -111,7 +111,7 @@ FROM ( "t6"."R_REGIONKEY" AS "r_regionkey", "t6"."R_NAME" AS "r_name", "t6"."R_COMMENT" AS "r_comment" - FROM "REGION" AS "t6" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t6" ) AS "t21" ON "t19"."n_regionkey" = "t21"."r_regionkey" INNER JOIN ( @@ -120,7 +120,7 @@ FROM ( "t5"."N_NAME" AS "n_name", "t5"."N_REGIONKEY" AS "n_regionkey", "t5"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t5" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t5" ) AS "t22" ON "t16"."s_nationkey" = "t22"."n_nationkey" ) AS "t30" diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql index 80eaa5b0522c..d54d0e87e9ae 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql @@ -35,7 +35,7 @@ FROM ( "t0"."p_container", CAST("t0"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", "t0"."p_comment" - FROM "part" AS "t0" + FROM "hive"."ibis_sf1"."part" AS "t0" ) AS "t17" INNER JOIN ( SELECT @@ -55,7 +55,7 @@ FROM ( "t1"."l_shipinstruct", "t1"."l_shipmode", "t1"."l_comment" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" ) AS "t18" ON "t17"."p_partkey" = "t18"."l_partkey" INNER JOIN ( @@ -67,7 +67,7 @@ FROM ( "t2"."s_phone", CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t2"."s_comment" - FROM "supplier" AS "t2" + FROM "hive"."ibis_sf1"."supplier" AS "t2" ) AS "t19" ON "t19"."s_suppkey" = "t18"."l_suppkey" INNER JOIN ( @@ -81,7 +81,7 @@ FROM ( "t3"."o_clerk", "t3"."o_shippriority", "t3"."o_comment" - FROM "orders" AS "t3" + FROM "hive"."ibis_sf1"."orders" AS "t3" ) AS "t20" ON "t18"."l_orderkey" = "t20"."o_orderkey" INNER JOIN ( @@ -94,7 +94,7 @@ FROM ( CAST("t4"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t4"."c_mktsegment", "t4"."c_comment" - FROM "customer" AS "t4" + FROM "hive"."ibis_sf1"."customer" AS "t4" ) AS "t21" ON "t20"."o_custkey" = "t21"."c_custkey" INNER JOIN ( @@ -103,7 +103,7 @@ FROM ( "t5"."n_name", "t5"."n_regionkey", "t5"."n_comment" - FROM "nation" AS "t5" + FROM "hive"."ibis_sf1"."nation" AS "t5" ) AS "t14" ON "t21"."c_nationkey" = "t14"."n_nationkey" INNER JOIN ( @@ -111,7 +111,7 @@ FROM ( "t6"."r_regionkey", "t6"."r_name", "t6"."r_comment" - FROM "region" AS "t6" + FROM "hive"."ibis_sf1"."region" AS "t6" ) AS "t16" ON "t14"."n_regionkey" = "t16"."r_regionkey" INNER JOIN ( @@ -120,7 +120,7 @@ FROM ( "t5"."n_name", "t5"."n_regionkey", "t5"."n_comment" - FROM "nation" AS "t5" + FROM "hive"."ibis_sf1"."nation" AS "t5" ) AS "t22" ON "t19"."s_nationkey" = "t22"."n_nationkey" ) AS "t30" diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql index b828b08644bc..7d854d04d97f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql @@ -43,7 +43,7 @@ FROM ( "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t0" ) AS "t12" INNER JOIN ( SELECT @@ -54,7 +54,7 @@ FROM ( "t1"."S_PHONE" AS "s_phone", "t1"."S_ACCTBAL" AS "s_acctbal", "t1"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t1" ) AS "t13" ON "t13"."s_suppkey" = "t12"."l_suppkey" INNER JOIN ( @@ -64,7 +64,7 @@ FROM ( "t2"."PS_AVAILQTY" AS "ps_availqty", "t2"."PS_SUPPLYCOST" AS "ps_supplycost", "t2"."PS_COMMENT" AS "ps_comment" - FROM "PARTSUPP" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS "t2" ) AS "t14" ON "t14"."ps_suppkey" = "t12"."l_suppkey" AND "t14"."ps_partkey" = "t12"."l_partkey" INNER JOIN ( @@ -78,7 +78,7 @@ FROM ( "t3"."P_CONTAINER" AS "p_container", "t3"."P_RETAILPRICE" AS "p_retailprice", "t3"."P_COMMENT" AS "p_comment" - FROM "PART" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t3" ) AS "t15" ON "t15"."p_partkey" = "t12"."l_partkey" INNER JOIN ( @@ -92,7 +92,7 @@ FROM ( "t4"."O_CLERK" AS "o_clerk", "t4"."O_SHIPPRIORITY" AS "o_shippriority", "t4"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t4" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t4" ) AS "t16" ON "t16"."o_orderkey" = "t12"."l_orderkey" INNER JOIN ( @@ -101,7 +101,7 @@ FROM ( "t5"."N_NAME" AS "n_name", "t5"."N_REGIONKEY" AS "n_regionkey", "t5"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t5" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t5" ) AS "t17" ON "t13"."s_nationkey" = "t17"."n_nationkey" ) AS "t23" diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql index 3e652f95bc61..e8149adf2755 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql @@ -43,7 +43,7 @@ FROM ( "t0"."l_shipinstruct", "t0"."l_shipmode", "t0"."l_comment" - FROM "lineitem" AS "t0" + FROM "hive"."ibis_sf1"."lineitem" AS "t0" ) AS "t13" INNER JOIN ( SELECT @@ -54,7 +54,7 @@ FROM ( "t1"."s_phone", CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t1"."s_comment" - FROM "supplier" AS "t1" + FROM "hive"."ibis_sf1"."supplier" AS "t1" ) AS "t14" ON "t14"."s_suppkey" = "t13"."l_suppkey" INNER JOIN ( @@ -64,7 +64,7 @@ FROM ( "t2"."ps_availqty", CAST("t2"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", "t2"."ps_comment" - FROM "partsupp" AS "t2" + FROM "hive"."ibis_sf1"."partsupp" AS "t2" ) AS "t15" ON "t15"."ps_suppkey" = "t13"."l_suppkey" AND "t15"."ps_partkey" = "t13"."l_partkey" INNER JOIN ( @@ -78,7 +78,7 @@ FROM ( "t3"."p_container", CAST("t3"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", "t3"."p_comment" - FROM "part" AS "t3" + FROM "hive"."ibis_sf1"."part" AS "t3" ) AS "t16" ON "t16"."p_partkey" = "t13"."l_partkey" INNER JOIN ( @@ -92,7 +92,7 @@ FROM ( "t4"."o_clerk", "t4"."o_shippriority", "t4"."o_comment" - FROM "orders" AS "t4" + FROM "hive"."ibis_sf1"."orders" AS "t4" ) AS "t17" ON "t17"."o_orderkey" = "t13"."l_orderkey" INNER JOIN ( @@ -101,7 +101,7 @@ FROM ( "t5"."n_name", "t5"."n_regionkey", "t5"."n_comment" - FROM "nation" AS "t5" + FROM "hive"."ibis_sf1"."nation" AS "t5" ) AS "t12" ON "t14"."s_nationkey" = "t12"."n_nationkey" ) AS "t23" diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql index 055718392e8d..3714cb16807d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql @@ -107,7 +107,7 @@ FROM ( "t0"."C_ACCTBAL" AS "c_acctbal", "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t0" ) AS "t8" INNER JOIN ( SELECT @@ -120,7 +120,7 @@ FROM ( "t1"."O_CLERK" AS "o_clerk", "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t1" ) AS "t9" ON "t8"."c_custkey" = "t9"."o_custkey" INNER JOIN ( @@ -141,7 +141,7 @@ FROM ( "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" ) AS "t10" ON "t10"."l_orderkey" = "t9"."o_orderkey" INNER JOIN ( @@ -150,7 +150,7 @@ FROM ( "t3"."N_NAME" AS "n_name", "t3"."N_REGIONKEY" AS "n_regionkey", "t3"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t3" ) AS "t11" ON "t8"."c_nationkey" = "t11"."n_nationkey" ) AS "t15" diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql index d30ac72d2fd8..df0d20c1dcd9 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql @@ -107,7 +107,7 @@ FROM ( CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t0"."c_mktsegment", "t0"."c_comment" - FROM "customer" AS "t0" + FROM "hive"."ibis_sf1"."customer" AS "t0" ) AS "t9" INNER JOIN ( SELECT @@ -120,7 +120,7 @@ FROM ( "t1"."o_clerk", "t1"."o_shippriority", "t1"."o_comment" - FROM "orders" AS "t1" + FROM "hive"."ibis_sf1"."orders" AS "t1" ) AS "t10" ON "t9"."c_custkey" = "t10"."o_custkey" INNER JOIN ( @@ -141,7 +141,7 @@ FROM ( "t2"."l_shipinstruct", "t2"."l_shipmode", "t2"."l_comment" - FROM "lineitem" AS "t2" + FROM "hive"."ibis_sf1"."lineitem" AS "t2" ) AS "t11" ON "t11"."l_orderkey" = "t10"."o_orderkey" INNER JOIN ( @@ -150,7 +150,7 @@ FROM ( "t3"."n_name", "t3"."n_regionkey", "t3"."n_comment" - FROM "nation" AS "t3" + FROM "hive"."ibis_sf1"."nation" AS "t3" ) AS "t8" ON "t9"."c_nationkey" = "t8"."n_nationkey" ) AS "t15" diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql index 8c99ccb39f9c..3dae0694734f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql @@ -48,7 +48,7 @@ FROM ( "t0"."PS_AVAILQTY" AS "ps_availqty", "t0"."PS_SUPPLYCOST" AS "ps_supplycost", "t0"."PS_COMMENT" AS "ps_comment" - FROM "PARTSUPP" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS "t0" ) AS "t6" INNER JOIN ( SELECT @@ -59,7 +59,7 @@ FROM ( "t1"."S_PHONE" AS "s_phone", "t1"."S_ACCTBAL" AS "s_acctbal", "t1"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t1" ) AS "t7" ON "t6"."ps_suppkey" = "t7"."s_suppkey" INNER JOIN ( @@ -68,7 +68,7 @@ FROM ( "t2"."N_NAME" AS "n_name", "t2"."N_REGIONKEY" AS "n_regionkey", "t2"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t2" ) AS "t8" ON "t8"."n_nationkey" = "t7"."s_nationkey" ) AS "t11" @@ -126,7 +126,7 @@ WHERE "t0"."PS_AVAILQTY" AS "ps_availqty", "t0"."PS_SUPPLYCOST" AS "ps_supplycost", "t0"."PS_COMMENT" AS "ps_comment" - FROM "PARTSUPP" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS "t0" ) AS "t6" INNER JOIN ( SELECT @@ -137,7 +137,7 @@ WHERE "t1"."S_PHONE" AS "s_phone", "t1"."S_ACCTBAL" AS "s_acctbal", "t1"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t1" ) AS "t7" ON "t6"."ps_suppkey" = "t7"."s_suppkey" INNER JOIN ( @@ -146,7 +146,7 @@ WHERE "t2"."N_NAME" AS "n_name", "t2"."N_REGIONKEY" AS "n_regionkey", "t2"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t2" ) AS "t8" ON "t8"."n_nationkey" = "t7"."s_nationkey" ) AS "t11" diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql index 4d85a7eb8a1b..066c3e445817 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql @@ -48,7 +48,7 @@ FROM ( "t0"."ps_availqty", CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", "t0"."ps_comment" - FROM "partsupp" AS "t0" + FROM "hive"."ibis_sf1"."partsupp" AS "t0" ) AS "t7" INNER JOIN ( SELECT @@ -59,7 +59,7 @@ FROM ( "t1"."s_phone", CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t1"."s_comment" - FROM "supplier" AS "t1" + FROM "hive"."ibis_sf1"."supplier" AS "t1" ) AS "t8" ON "t7"."ps_suppkey" = "t8"."s_suppkey" INNER JOIN ( @@ -68,7 +68,7 @@ FROM ( "t2"."n_name", "t2"."n_regionkey", "t2"."n_comment" - FROM "nation" AS "t2" + FROM "hive"."ibis_sf1"."nation" AS "t2" ) AS "t6" ON "t6"."n_nationkey" = "t8"."s_nationkey" ) AS "t11" @@ -126,7 +126,7 @@ WHERE "t0"."ps_availqty", CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", "t0"."ps_comment" - FROM "partsupp" AS "t0" + FROM "hive"."ibis_sf1"."partsupp" AS "t0" ) AS "t7" INNER JOIN ( SELECT @@ -137,7 +137,7 @@ WHERE "t1"."s_phone", CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t1"."s_comment" - FROM "supplier" AS "t1" + FROM "hive"."ibis_sf1"."supplier" AS "t1" ) AS "t8" ON "t7"."ps_suppkey" = "t8"."s_suppkey" INNER JOIN ( @@ -146,7 +146,7 @@ WHERE "t2"."n_name", "t2"."n_regionkey", "t2"."n_comment" - FROM "nation" AS "t2" + FROM "hive"."ibis_sf1"."nation" AS "t2" ) AS "t6" ON "t6"."n_nationkey" = "t8"."s_nationkey" ) AS "t11" diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql index e6baf40f88e8..3a07fd0399a2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql @@ -76,7 +76,7 @@ FROM ( "t0"."O_CLERK" AS "o_clerk", "t0"."O_SHIPPRIORITY" AS "o_shippriority", "t0"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -96,7 +96,7 @@ FROM ( "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" ) AS "t5" ON "t4"."o_orderkey" = "t5"."l_orderkey" ) AS "t7" diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql index 444f5d44b978..e09b830eaf5e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql @@ -76,7 +76,7 @@ FROM ( "t0"."o_clerk", "t0"."o_shippriority", "t0"."o_comment" - FROM "orders" AS "t0" + FROM "hive"."ibis_sf1"."orders" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -96,7 +96,7 @@ FROM ( "t1"."l_shipinstruct", "t1"."l_shipmode", "t1"."l_comment" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" ) AS "t5" ON "t4"."o_orderkey" = "t5"."l_orderkey" ) AS "t7" diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql index 159d83733a08..2a93f8a9369d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql @@ -38,7 +38,7 @@ FROM ( "t0"."C_ACCTBAL" AS "c_acctbal", "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t0" ) AS "t4" LEFT OUTER JOIN ( SELECT @@ -51,7 +51,7 @@ FROM ( "t1"."O_CLERK" AS "o_clerk", "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t1" ) AS "t5" ON "t4"."c_custkey" = "t5"."o_custkey" AND NOT ( diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql index dff39fad956b..2e0da41dd7b2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql @@ -38,7 +38,7 @@ FROM ( CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t0"."c_mktsegment", "t0"."c_comment" - FROM "customer" AS "t0" + FROM "hive"."ibis_sf1"."customer" AS "t0" ) AS "t4" LEFT OUTER JOIN ( SELECT @@ -51,7 +51,7 @@ FROM ( "t1"."o_clerk", "t1"."o_shippriority", "t1"."o_comment" - FROM "orders" AS "t1" + FROM "hive"."ibis_sf1"."orders" AS "t1" ) AS "t5" ON "t4"."c_custkey" = "t5"."o_custkey" AND NOT ( diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql index bc75afe8d8dd..308333259aec 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql @@ -80,7 +80,7 @@ FROM ( "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -93,7 +93,7 @@ FROM ( "t1"."P_CONTAINER" AS "p_container", "t1"."P_RETAILPRICE" AS "p_retailprice", "t1"."P_COMMENT" AS "p_comment" - FROM "PART" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t1" ) AS "t5" ON "t4"."l_partkey" = "t5"."p_partkey" ) AS "t7" diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql index be7f2a998089..1a4327c3b6e3 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql @@ -80,7 +80,7 @@ FROM ( "t0"."l_shipinstruct", "t0"."l_shipmode", "t0"."l_comment" - FROM "lineitem" AS "t0" + FROM "hive"."ibis_sf1"."lineitem" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -93,7 +93,7 @@ FROM ( "t1"."p_container", CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", "t1"."p_comment" - FROM "part" AS "t1" + FROM "hive"."ibis_sf1"."part" AS "t1" ) AS "t5" ON "t4"."l_partkey" = "t5"."p_partkey" ) AS "t7" diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql index 8c5d56f9f2b9..bbbce8c1d553 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql @@ -24,7 +24,7 @@ FROM ( "t0"."S_PHONE" AS "s_phone", "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t0" ) AS "t3" INNER JOIN ( SELECT @@ -50,7 +50,7 @@ FROM ( "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" WHERE "t1"."L_SHIPDATE" >= DATE_FROM_PARTS(1996, 1, 1) AND "t1"."L_SHIPDATE" < DATE_FROM_PARTS(1996, 4, 1) @@ -84,7 +84,7 @@ WHERE "t0"."S_PHONE" AS "s_phone", "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t0" ) AS "t3" INNER JOIN ( SELECT @@ -110,7 +110,7 @@ WHERE "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" WHERE "t1"."L_SHIPDATE" >= DATE_FROM_PARTS(1996, 1, 1) AND "t1"."L_SHIPDATE" < DATE_FROM_PARTS(1996, 4, 1) diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql index 423ef0b3245b..eabc65ac19cf 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql @@ -24,7 +24,7 @@ FROM ( "t0"."s_phone", CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t0"."s_comment" - FROM "supplier" AS "t0" + FROM "hive"."ibis_sf1"."supplier" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -50,7 +50,7 @@ FROM ( "t1"."l_shipinstruct", "t1"."l_shipmode", "t1"."l_comment" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" WHERE "t1"."l_shipdate" >= FROM_ISO8601_DATE('1996-01-01') AND "t1"."l_shipdate" < FROM_ISO8601_DATE('1996-04-01') @@ -84,7 +84,7 @@ WHERE "t0"."s_phone", CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t0"."s_comment" - FROM "supplier" AS "t0" + FROM "hive"."ibis_sf1"."supplier" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -110,7 +110,7 @@ WHERE "t1"."l_shipinstruct", "t1"."l_shipmode", "t1"."l_comment" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" WHERE "t1"."l_shipdate" >= FROM_ISO8601_DATE('1996-01-01') AND "t1"."l_shipdate" < FROM_ISO8601_DATE('1996-04-01') diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql index ed1cd26910ad..842af99dbc5a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql @@ -48,7 +48,7 @@ FROM ( "t0"."PS_AVAILQTY" AS "ps_availqty", "t0"."PS_SUPPLYCOST" AS "ps_supplycost", "t0"."PS_COMMENT" AS "ps_comment" - FROM "PARTSUPP" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS "t0" ) AS "t5" INNER JOIN ( SELECT @@ -61,7 +61,7 @@ FROM ( "t2"."P_CONTAINER" AS "p_container", "t2"."P_RETAILPRICE" AS "p_retailprice", "t2"."P_COMMENT" AS "p_comment" - FROM "PART" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t2" ) AS "t7" ON "t7"."p_partkey" = "t5"."ps_partkey" ) AS "t9" @@ -75,7 +75,7 @@ FROM ( "t9"."ps_suppkey" IN ( SELECT "t1"."S_SUPPKEY" AS "s_suppkey" - FROM "SUPPLIER" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t1" WHERE "t1"."S_COMMENT" LIKE '%Customer%Complaints%' ) diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql index f1681099f881..a775b1a392c4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql @@ -48,7 +48,7 @@ FROM ( "t0"."ps_availqty", CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", "t0"."ps_comment" - FROM "partsupp" AS "t0" + FROM "hive"."ibis_sf1"."partsupp" AS "t0" ) AS "t6" INNER JOIN ( SELECT @@ -61,7 +61,7 @@ FROM ( "t2"."p_container", CAST("t2"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", "t2"."p_comment" - FROM "part" AS "t2" + FROM "hive"."ibis_sf1"."part" AS "t2" ) AS "t7" ON "t7"."p_partkey" = "t6"."ps_partkey" ) AS "t9" @@ -75,7 +75,7 @@ FROM ( "t9"."ps_suppkey" IN ( SELECT "t1"."s_suppkey" - FROM "supplier" AS "t1" + FROM "hive"."ibis_sf1"."supplier" AS "t1" WHERE "t1"."s_comment" LIKE '%Customer%Complaints%' ) diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql index 67ed46944982..c5f3e273aba1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql @@ -72,7 +72,7 @@ FROM ( "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -85,7 +85,7 @@ FROM ( "t1"."P_CONTAINER" AS "p_container", "t1"."P_RETAILPRICE" AS "p_retailprice", "t1"."P_COMMENT" AS "p_comment" - FROM "PART" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t1" ) AS "t5" ON "t5"."p_partkey" = "t4"."l_partkey" ) AS "t7" @@ -114,7 +114,7 @@ FROM ( "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t0" WHERE "t0"."L_PARTKEY" = "t7"."p_partkey" ) AS "t8" diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql index 3c5f39dcc1d8..657b48da6ca8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql @@ -72,7 +72,7 @@ FROM ( "t0"."l_shipinstruct", "t0"."l_shipmode", "t0"."l_comment" - FROM "lineitem" AS "t0" + FROM "hive"."ibis_sf1"."lineitem" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -85,7 +85,7 @@ FROM ( "t1"."p_container", CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", "t1"."p_comment" - FROM "part" AS "t1" + FROM "hive"."ibis_sf1"."part" AS "t1" ) AS "t5" ON "t5"."p_partkey" = "t4"."l_partkey" ) AS "t7" @@ -114,7 +114,7 @@ FROM ( "t0"."l_shipinstruct", "t0"."l_shipmode", "t0"."l_comment" - FROM "lineitem" AS "t0" + FROM "hive"."ibis_sf1"."lineitem" AS "t0" WHERE "t0"."l_partkey" = "t7"."p_partkey" ) AS "t8" diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql index 51c9e215cf29..84bae2c4079c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql @@ -93,7 +93,7 @@ FROM ( "t0"."C_ACCTBAL" AS "c_acctbal", "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t0" ) AS "t6" INNER JOIN ( SELECT @@ -106,7 +106,7 @@ FROM ( "t1"."O_CLERK" AS "o_clerk", "t1"."O_SHIPPRIORITY" AS "o_shippriority", "t1"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t1" ) AS "t7" ON "t6"."c_custkey" = "t7"."o_custkey" INNER JOIN ( @@ -127,7 +127,7 @@ FROM ( "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" ) AS "t8" ON "t7"."o_orderkey" = "t8"."l_orderkey" ) AS "t12" @@ -157,7 +157,7 @@ FROM ( "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t2"."L_SHIPMODE" AS "l_shipmode", "t2"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" ) AS "t5" GROUP BY 1 diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql index f84e31100199..87e8f68633c3 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql @@ -93,7 +93,7 @@ FROM ( CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t0"."c_mktsegment", "t0"."c_comment" - FROM "customer" AS "t0" + FROM "hive"."ibis_sf1"."customer" AS "t0" ) AS "t6" INNER JOIN ( SELECT @@ -106,7 +106,7 @@ FROM ( "t1"."o_clerk", "t1"."o_shippriority", "t1"."o_comment" - FROM "orders" AS "t1" + FROM "hive"."ibis_sf1"."orders" AS "t1" ) AS "t7" ON "t6"."c_custkey" = "t7"."o_custkey" INNER JOIN ( @@ -127,7 +127,7 @@ FROM ( "t2"."l_shipinstruct", "t2"."l_shipmode", "t2"."l_comment" - FROM "lineitem" AS "t2" + FROM "hive"."ibis_sf1"."lineitem" AS "t2" ) AS "t8" ON "t7"."o_orderkey" = "t8"."l_orderkey" ) AS "t12" @@ -157,7 +157,7 @@ FROM ( "t2"."l_shipinstruct", "t2"."l_shipmode", "t2"."l_comment" - FROM "lineitem" AS "t2" + FROM "hive"."ibis_sf1"."lineitem" AS "t2" ) AS "t5" GROUP BY 1 diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql index 2eaa6ff3f403..4d2f688bde96 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql @@ -74,7 +74,7 @@ FROM ( "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t0"."L_SHIPMODE" AS "l_shipmode", "t0"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -87,7 +87,7 @@ FROM ( "t1"."P_CONTAINER" AS "p_container", "t1"."P_RETAILPRICE" AS "p_retailprice", "t1"."P_COMMENT" AS "p_comment" - FROM "PART" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t1" ) AS "t5" ON "t5"."p_partkey" = "t4"."l_partkey" ) AS "t7" diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql index 033059993529..7f859fefa591 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql @@ -74,7 +74,7 @@ FROM ( "t0"."l_shipinstruct", "t0"."l_shipmode", "t0"."l_comment" - FROM "lineitem" AS "t0" + FROM "hive"."ibis_sf1"."lineitem" AS "t0" ) AS "t4" INNER JOIN ( SELECT @@ -87,7 +87,7 @@ FROM ( "t1"."p_container", CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", "t1"."p_comment" - FROM "part" AS "t1" + FROM "hive"."ibis_sf1"."part" AS "t1" ) AS "t5" ON "t5"."p_partkey" = "t4"."l_partkey" ) AS "t7" diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql index 1d5530c72b48..fb4b2b507e07 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql @@ -23,7 +23,7 @@ FROM ( "t0"."S_PHONE" AS "s_phone", "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t0" ) AS "t8" INNER JOIN ( SELECT @@ -31,7 +31,7 @@ FROM ( "t2"."N_NAME" AS "n_name", "t2"."N_REGIONKEY" AS "n_regionkey", "t2"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t2" ) AS "t9" ON "t8"."s_nationkey" = "t9"."n_nationkey" ) AS "t13" @@ -47,13 +47,13 @@ WHERE "t1"."PS_AVAILQTY" AS "ps_availqty", "t1"."PS_SUPPLYCOST" AS "ps_supplycost", "t1"."PS_COMMENT" AS "ps_comment" - FROM "PARTSUPP" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS "t1" ) AS "t6" WHERE "t6"."ps_partkey" IN ( SELECT "t3"."P_PARTKEY" AS "p_partkey" - FROM "PART" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t3" WHERE "t3"."P_NAME" LIKE 'forest%' ) @@ -79,7 +79,7 @@ WHERE "t4"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t4"."L_SHIPMODE" AS "l_shipmode", "t4"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t4" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t4" WHERE "t4"."L_PARTKEY" = "t6"."ps_partkey" AND "t4"."L_SUPPKEY" = "t6"."ps_suppkey" diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql index 22520f016c64..d9e0e24998e1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql @@ -23,7 +23,7 @@ FROM ( "t0"."s_phone", CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t0"."s_comment" - FROM "supplier" AS "t0" + FROM "hive"."ibis_sf1"."supplier" AS "t0" ) AS "t10" INNER JOIN ( SELECT @@ -31,7 +31,7 @@ FROM ( "t2"."n_name", "t2"."n_regionkey", "t2"."n_comment" - FROM "nation" AS "t2" + FROM "hive"."ibis_sf1"."nation" AS "t2" ) AS "t8" ON "t10"."s_nationkey" = "t8"."n_nationkey" ) AS "t13" @@ -47,13 +47,13 @@ WHERE "t1"."ps_availqty", CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", "t1"."ps_comment" - FROM "partsupp" AS "t1" + FROM "hive"."ibis_sf1"."partsupp" AS "t1" ) AS "t7" WHERE "t7"."ps_partkey" IN ( SELECT "t3"."p_partkey" - FROM "part" AS "t3" + FROM "hive"."ibis_sf1"."part" AS "t3" WHERE "t3"."p_name" LIKE 'forest%' ) @@ -79,7 +79,7 @@ WHERE "t4"."l_shipinstruct", "t4"."l_shipmode", "t4"."l_comment" - FROM "lineitem" AS "t4" + FROM "hive"."ibis_sf1"."lineitem" AS "t4" WHERE "t4"."l_partkey" = "t7"."ps_partkey" AND "t4"."l_suppkey" = "t7"."ps_suppkey" diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql index 5b785b3ea5c9..6ca53704e385 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql @@ -32,7 +32,7 @@ FROM ( "t0"."S_PHONE" AS "s_phone", "t0"."S_ACCTBAL" AS "s_acctbal", "t0"."S_COMMENT" AS "s_comment" - FROM "SUPPLIER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t0" ) AS "t8" INNER JOIN ( SELECT @@ -52,7 +52,7 @@ FROM ( "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" ) AS "t9" ON "t8"."s_suppkey" = "t9"."l_suppkey" INNER JOIN ( @@ -66,7 +66,7 @@ FROM ( "t2"."O_CLERK" AS "o_clerk", "t2"."O_SHIPPRIORITY" AS "o_shippriority", "t2"."O_COMMENT" AS "o_comment" - FROM "ORDERS" AS "t2" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t2" ) AS "t12" ON "t12"."o_orderkey" = "t9"."l_orderkey" INNER JOIN ( @@ -75,7 +75,7 @@ FROM ( "t3"."N_NAME" AS "n_name", "t3"."N_REGIONKEY" AS "n_regionkey", "t3"."N_COMMENT" AS "n_comment" - FROM "NATION" AS "t3" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t3" ) AS "t13" ON "t8"."s_nationkey" = "t13"."n_nationkey" ) AS "t17" @@ -104,7 +104,7 @@ FROM ( "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" ) AS "t10" WHERE ( @@ -136,7 +136,7 @@ FROM ( "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", "t1"."L_SHIPMODE" AS "l_shipmode", "t1"."L_COMMENT" AS "l_comment" - FROM "LINEITEM" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" ) AS "t11" WHERE ( diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql index c7c0686a5465..46bc135ee050 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql @@ -32,7 +32,7 @@ FROM ( "t0"."s_phone", CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t0"."s_comment" - FROM "supplier" AS "t0" + FROM "hive"."ibis_sf1"."supplier" AS "t0" ) AS "t9" INNER JOIN ( SELECT @@ -52,7 +52,7 @@ FROM ( "t1"."l_shipinstruct", "t1"."l_shipmode", "t1"."l_comment" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" ) AS "t10" ON "t9"."s_suppkey" = "t10"."l_suppkey" INNER JOIN ( @@ -66,7 +66,7 @@ FROM ( "t2"."o_clerk", "t2"."o_shippriority", "t2"."o_comment" - FROM "orders" AS "t2" + FROM "hive"."ibis_sf1"."orders" AS "t2" ) AS "t13" ON "t13"."o_orderkey" = "t10"."l_orderkey" INNER JOIN ( @@ -75,7 +75,7 @@ FROM ( "t3"."n_name", "t3"."n_regionkey", "t3"."n_comment" - FROM "nation" AS "t3" + FROM "hive"."ibis_sf1"."nation" AS "t3" ) AS "t8" ON "t9"."s_nationkey" = "t8"."n_nationkey" ) AS "t17" @@ -104,7 +104,7 @@ FROM ( "t1"."l_shipinstruct", "t1"."l_shipmode", "t1"."l_comment" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" ) AS "t11" WHERE ( @@ -136,7 +136,7 @@ FROM ( "t1"."l_shipinstruct", "t1"."l_shipmode", "t1"."l_comment" - FROM "lineitem" AS "t1" + FROM "hive"."ibis_sf1"."lineitem" AS "t1" ) AS "t12" WHERE ( diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql index acb134eac72f..44771b5d4bda 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql @@ -27,7 +27,7 @@ FROM ( "t0"."C_ACCTBAL" AS "c_acctbal", "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t0" ) AS "t2" WHERE IFF( @@ -50,7 +50,7 @@ FROM ( "t0"."C_ACCTBAL" AS "c_acctbal", "t0"."C_MKTSEGMENT" AS "c_mktsegment", "t0"."C_COMMENT" AS "c_comment" - FROM "CUSTOMER" AS "t0" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t0" WHERE "t0"."C_ACCTBAL" > 0.0 AND IFF( @@ -66,7 +66,7 @@ FROM ( EXISTS( SELECT 1 AS "1" - FROM "ORDERS" AS "t1" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t1" WHERE "t1"."O_CUSTKEY" = "t2"."c_custkey" ) diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql index 1d8794cc6e66..f2aae175e83b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql @@ -27,7 +27,7 @@ FROM ( CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t0"."c_mktsegment", "t0"."c_comment" - FROM "customer" AS "t0" + FROM "hive"."ibis_sf1"."customer" AS "t0" ) AS "t2" WHERE IF( @@ -50,7 +50,7 @@ FROM ( CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", "t0"."c_mktsegment", "t0"."c_comment" - FROM "customer" AS "t0" + FROM "hive"."ibis_sf1"."customer" AS "t0" WHERE CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) > CAST(0.0 AS DOUBLE) AND IF( @@ -66,7 +66,7 @@ FROM ( EXISTS( SELECT 1 AS "1" - FROM "orders" AS "t1" + FROM "hive"."ibis_sf1"."orders" AS "t1" WHERE "t1"."o_custkey" = "t2"."c_custkey" ) diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 65627c0d506b..b7e62cf056b4 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -371,17 +371,18 @@ class PhysicalTable(Relation): values = FrozenDict() -@public -class UnboundTable(PhysicalTable): - schema: Schema - - @public class Namespace(Concrete): database: Optional[str] = None schema: Optional[str] = None +@public +class UnboundTable(PhysicalTable): + schema: Schema + namespace: Namespace = Namespace() + + @public class DatabaseTable(PhysicalTable): schema: Schema diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index a77bfcfdc59a..710439af562d 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -593,7 +593,9 @@ def unbind(self) -> ir.Table: from ibis.expr.analysis import p, c from ibis.common.deferred import _ - rule = p.DatabaseTable >> c.UnboundTable(name=_.name, schema=_.schema) + rule = p.DatabaseTable >> c.UnboundTable( + name=_.name, schema=_.schema, namespace=_.namespace + ) return self.op().replace(rule).to_expr() def as_table(self) -> ir.Table: diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index bed1cb84168e..4418425505a7 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -2018,3 +2018,17 @@ def test_invalid_distinct_empty_key(): t = ibis.table(dict(a="int", b="string"), name="t") with pytest.raises(com.IbisInputError): t.distinct(on="c", keep="first") + + +def test_unbind_with_namespace(): + schema = ibis.schema({"a": "int"}) + ns = ops.Namespace(database="db", schema="sch") + + t_op = ops.DatabaseTable(name="t", schema=schema, source=None, namespace=ns) + t = t_op.to_expr() + s = t.unbind() + + expected = ops.UnboundTable(name="t", schema=schema, namespace=ns).to_expr() + + assert s.op() == expected.op() + assert s.equals(expected) From 7a69243aa0c61f940d702d2e7f3b63ee4650e290 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 09:17:08 -0500 Subject: [PATCH 060/161] chore(duckdb/mysql): remove dead code and comment --- ibis/backends/duckdb/__init__.py | 6 ------ ibis/backends/mysql/tests/test_client.py | 6 +----- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 6bd467185640..d115967f49c8 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1459,12 +1459,6 @@ def _compile_builtin_udf(self, udf_node: ops.ScalarUDF) -> None: def _compile_pandas_udf(self, _: ops.ScalarUDF) -> None: raise NotImplementedError("duckdb doesn't support pandas UDFs") - def _get_compiled_statement(self, view, definition): - # TODO: remove this once duckdb supports CTAS prepared statements - return super()._get_compiled_statement( - view, definition, compile_kwargs={"literal_binds": True} - ) - def insert( self, table_name: str, diff --git a/ibis/backends/mysql/tests/test_client.py b/ibis/backends/mysql/tests/test_client.py index 18ad4a39c8af..d3db0f4972fb 100644 --- a/ibis/backends/mysql/tests/test_client.py +++ b/ibis/backends/mysql/tests/test_client.py @@ -53,11 +53,7 @@ # mariadb doesn't have a distinct json type param("json", dt.string, id="json"), param("enum('small', 'medium', 'large')", dt.string, id="enum"), - # con.table(name) first parses the type correctly as ibis inet using sqlglot, - # then convert these types to sqlalchemy types then a sqlalchemy table to - # get the ibis schema again from the alchemy types, but alchemy doesn't - # support inet6 so it gets converted to string eventually - # ("inet6", dt.inet), + param("inet6", dt.inet, id="inet"), param("set('a', 'b', 'c', 'd')", dt.Array(dt.string), id="set"), param("mediumblob", dt.binary, id="mediumblob"), param("blob", dt.binary, id="blob"), From b4fecd501f4563eddcc02815f97a0f2ff36cb15c Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 09:38:33 -0500 Subject: [PATCH 061/161] refactor(sqlglot): remove duplicate `StringAscii` definitions --- ibis/backends/base/sqlglot/compiler.py | 1 + ibis/backends/clickhouse/compiler.py | 3 --- ibis/backends/datafusion/compiler.py | 1 - ibis/backends/duckdb/compiler.py | 1 - ibis/backends/mysql/compiler.py | 1 - ibis/backends/postgres/compiler.py | 1 - ibis/backends/snowflake/compiler.py | 1 - 7 files changed, 1 insertion(+), 8 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index e71344c78086..618776523e49 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -1261,6 +1261,7 @@ def visit_RegexExtract(self, op, *, arg, pattern, index): ops.Sqrt: "sqrt", ops.StartsWith: "starts_with", ops.StrRight: "right", + ops.StringAscii: "ascii", ops.StringContains: "contains", ops.StringLength: "length", ops.StringReplace: "replace", diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py index a0919312f78a..4be20f852335 100644 --- a/ibis/backends/clickhouse/compiler.py +++ b/ibis/backends/clickhouse/compiler.py @@ -713,17 +713,14 @@ def visit_Undefined(self, op, **_): ops.RStrip: "trimRight", ops.RandomScalar: "randCanonical", ops.RegexReplace: "replaceRegexpAll", - ops.Repeat: "repeat", ops.RowNumber: "row_number", ops.StartsWith: "startsWith", ops.StrRight: "right", ops.Strftime: "formatDateTime", - ops.StringAscii: "ascii", ops.StringLength: "length", ops.StringReplace: "replaceAll", ops.Strip: "trimBoth", ops.TimestampNow: "now", - ops.Translate: "translate", ops.TypeOf: "toTypeName", ops.Unnest: "arrayJoin", } diff --git a/ibis/backends/datafusion/compiler.py b/ibis/backends/datafusion/compiler.py index 7b5e77e735ab..916cc45f0cb8 100644 --- a/ibis/backends/datafusion/compiler.py +++ b/ibis/backends/datafusion/compiler.py @@ -531,7 +531,6 @@ def visit_Aggregate(self, op, *, parent, groups, metrics): ops.Last: "last_value", ops.Median: "median", ops.RPad: "rpad", - ops.StringAscii: "ascii", ops.StringLength: "character_length", ops.RegexSplit: "regex_split", } diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 524e18ce9e0b..5e6572842867 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -365,7 +365,6 @@ def visit_Quantile(self, op, *, arg, quantile, where): ops.MapValues: "map_values", ops.Mode: "mode", ops.RPad: "rpad", - ops.StringAscii: "ascii", ops.TimeFromHMS: "make_time", ops.TypeOf: "typeof", ops.GeoPoint: "st_point", diff --git a/ibis/backends/mysql/compiler.py b/ibis/backends/mysql/compiler.py index 848916e8f1c1..b4aae86ed161 100644 --- a/ibis/backends/mysql/compiler.py +++ b/ibis/backends/mysql/compiler.py @@ -399,7 +399,6 @@ def visit_Undefined(self, op, **_): ops.Log10: "log10", ops.LPad: "lpad", ops.RPad: "rpad", - ops.StringAscii: "ascii", ops.StringContains: "instr", ops.ExtractWeekOfYear: "weekofyear", ops.ExtractEpochSeconds: "unix_timestamp", diff --git a/ibis/backends/postgres/compiler.py b/ibis/backends/postgres/compiler.py index 18b9090663af..a020961ab88a 100644 --- a/ibis/backends/postgres/compiler.py +++ b/ibis/backends/postgres/compiler.py @@ -595,7 +595,6 @@ def visit_Undefined(self, op, **_): ops.MapValues: "avals", ops.RPad: "rpad", ops.RegexSearch: "regexp_like", - ops.StringAscii: "ascii", ops.TimeFromHMS: "make_time", } diff --git a/ibis/backends/snowflake/compiler.py b/ibis/backends/snowflake/compiler.py index a61bd7c7ef84..2f082cfb1737 100644 --- a/ibis/backends/snowflake/compiler.py +++ b/ibis/backends/snowflake/compiler.py @@ -650,7 +650,6 @@ def visit_Undefined(self, op, **_): ops.Median: "median", ops.Mode: "mode", ops.RPad: "rpad", - ops.StringAscii: "ascii", ops.StringToTimestamp: "to_timestamp_tz", ops.TimeFromHMS: "time_from_parts", ops.TimestampFromYMDHMS: "timestamp_from_parts", From a5e86dc148f3ef3c16753ce6e3f9056da0f12f88 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 09:45:38 -0500 Subject: [PATCH 062/161] chore(sqlglot): deduplicate pad functions --- ibis/backends/base/sqlglot/compiler.py | 2 ++ ibis/backends/clickhouse/compiler.py | 2 -- ibis/backends/datafusion/compiler.py | 2 -- ibis/backends/duckdb/compiler.py | 2 -- ibis/backends/mysql/compiler.py | 2 -- ibis/backends/postgres/compiler.py | 2 -- ibis/backends/snowflake/compiler.py | 2 -- ibis/backends/trino/compiler.py | 2 -- 8 files changed, 2 insertions(+), 14 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 618776523e49..b34e4550aa9c 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -1233,6 +1233,7 @@ def visit_RegexExtract(self, op, *, arg, pattern, index): ops.IsInf: "isinf", ops.IsNan: "isnan", ops.JSONGetItem: "json_extract", + ops.LPad: "lpad", ops.Last: "last", ops.LastValue: "last_value", ops.Levenshtein: "levenshtein", @@ -1249,6 +1250,7 @@ def visit_RegexExtract(self, op, *, arg, pattern, index): ops.PercentRank: "percent_rank", ops.Pi: "pi", ops.Power: "pow", + ops.RPad: "rpad", ops.Radians: "radians", ops.RandomScalar: "random", ops.RegexSearch: "regexp_like", diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py index 4be20f852335..e4121a2ef9b2 100644 --- a/ibis/backends/clickhouse/compiler.py +++ b/ibis/backends/clickhouse/compiler.py @@ -696,7 +696,6 @@ def visit_Undefined(self, op, **_): ops.IsInf: "isInfinite", ops.IsNan: "isNaN", ops.IsNull: "isNull", - ops.LPad: "leftPad", ops.LStrip: "trimLeft", ops.Last: "anyLast", ops.Ln: "log", @@ -709,7 +708,6 @@ def visit_Undefined(self, op, **_): ops.Median: "quantileExactExclusive", ops.NotNull: "isNotNull", ops.NullIf: "nullIf", - ops.RPad: "rightPad", ops.RStrip: "trimRight", ops.RandomScalar: "randCanonical", ops.RegexReplace: "replaceRegexpAll", diff --git a/ibis/backends/datafusion/compiler.py b/ibis/backends/datafusion/compiler.py index 916cc45f0cb8..0bd4c005451b 100644 --- a/ibis/backends/datafusion/compiler.py +++ b/ibis/backends/datafusion/compiler.py @@ -527,10 +527,8 @@ def visit_Aggregate(self, op, *, parent, groups, metrics): ops.Cot: "cot", ops.ExtractMicrosecond: "extract_microsecond", ops.First: "first_value", - ops.LPad: "lpad", ops.Last: "last_value", ops.Median: "median", - ops.RPad: "rpad", ops.StringLength: "character_length", ops.RegexSplit: "regex_split", } diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 5e6572842867..391f2d633cd5 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -358,13 +358,11 @@ def visit_Quantile(self, op, *, arg, quantile, where): ops.Hash: "hash", ops.IntegerRange: "range", ops.TimestampRange: "range", - ops.LPad: "lpad", ops.MapKeys: "map_keys", ops.MapLength: "cardinality", ops.MapMerge: "map_concat", ops.MapValues: "map_values", ops.Mode: "mode", - ops.RPad: "rpad", ops.TimeFromHMS: "make_time", ops.TypeOf: "typeof", ops.GeoPoint: "st_point", diff --git a/ibis/backends/mysql/compiler.py b/ibis/backends/mysql/compiler.py index b4aae86ed161..4c7914b0491b 100644 --- a/ibis/backends/mysql/compiler.py +++ b/ibis/backends/mysql/compiler.py @@ -397,8 +397,6 @@ def visit_Undefined(self, op, **_): ops.BitXor: "bit_xor", ops.DayOfWeekName: "dayname", ops.Log10: "log10", - ops.LPad: "lpad", - ops.RPad: "rpad", ops.StringContains: "instr", ops.ExtractWeekOfYear: "weekofyear", ops.ExtractEpochSeconds: "unix_timestamp", diff --git a/ibis/backends/postgres/compiler.py b/ibis/backends/postgres/compiler.py index a020961ab88a..121940220faa 100644 --- a/ibis/backends/postgres/compiler.py +++ b/ibis/backends/postgres/compiler.py @@ -589,11 +589,9 @@ def visit_Undefined(self, op, **_): ops.GeoWithin: "st_within", ops.GeoX: "st_x", ops.GeoY: "st_y", - ops.LPad: "lpad", ops.MapContains: "exist", ops.MapKeys: "akeys", ops.MapValues: "avals", - ops.RPad: "rpad", ops.RegexSearch: "regexp_like", ops.TimeFromHMS: "make_time", } diff --git a/ibis/backends/snowflake/compiler.py b/ibis/backends/snowflake/compiler.py index 2f082cfb1737..dd2784ef2795 100644 --- a/ibis/backends/snowflake/compiler.py +++ b/ibis/backends/snowflake/compiler.py @@ -646,10 +646,8 @@ def visit_Undefined(self, op, **_): ops.BitwiseXor: "bitxor", ops.EndsWith: "endswith", ops.Hash: "hash", - ops.LPad: "lpad", ops.Median: "median", ops.Mode: "mode", - ops.RPad: "rpad", ops.StringToTimestamp: "to_timestamp_tz", ops.TimeFromHMS: "time_from_parts", ops.TimestampFromYMDHMS: "timestamp_from_parts", diff --git a/ibis/backends/trino/compiler.py b/ibis/backends/trino/compiler.py index 3ea36c3b81f3..f53671f5b291 100644 --- a/ibis/backends/trino/compiler.py +++ b/ibis/backends/trino/compiler.py @@ -497,8 +497,6 @@ def visit_Undefined(self, op, **kw): ops.ExtractHost: "url_extract_host", ops.ExtractPath: "url_extract_path", ops.ExtractFragment: "url_extract_fragment", - ops.RPad: "rpad", - ops.LPad: "lpad", ops.ArrayPosition: "array_position", } From 69885db80fb0215e354618d947024ee967537ad6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 09:41:41 -0500 Subject: [PATCH 063/161] refactor(sqlglot): make anonymous functions easier to use and remove `array_func` hack --- ibis/backends/base/sqlglot/compiler.py | 45 ++++++++++++++++++++------ ibis/backends/postgres/compiler.py | 25 ++++++-------- ibis/backends/snowflake/compiler.py | 8 ++--- ibis/backends/trino/compiler.py | 2 +- 4 files changed, 49 insertions(+), 31 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index b34e4550aa9c..139daa782268 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -31,6 +31,8 @@ ) if TYPE_CHECKING: + from collections.abc import Iterable + import ibis.expr.schema as sch import ibis.expr.types as ir from ibis.backends.base.sqlglot.datatypes import SqlglotType @@ -59,32 +61,55 @@ def __getitem__(self, key: str) -> sge.Var: return sge.Var(this=key) +class AnonymousFuncGen: + __slots__ = () + + def __getattr__(self, name: str) -> Callable[..., sge.Anonymous]: + return lambda *args: sge.Anonymous( + this=name, expressions=list(map(sge.convert, args)) + ) + + def __getitem__(self, key: str) -> Callable[..., sge.Anonymous]: + return getattr(self, key) + + class FuncGen: - __slots__ = ("namespace",) + __slots__ = ("namespace", "anon") def __init__(self, namespace: str | None = None) -> None: self.namespace = namespace + self.anon = AnonymousFuncGen() - def __getattr__(self, name: str) -> partial: + def __getattr__(self, name: str) -> Callable[..., sge.Func]: name = ".".join(filter(None, (self.namespace, name))) return lambda *args, **kwargs: sg.func(name, *map(sge.convert, args), **kwargs) - def __getitem__(self, key: str) -> partial: + def __getitem__(self, key: str) -> Callable[..., sge.Func]: return getattr(self, key) - def array(self, *args): - return sge.Array.from_arg_list(list(map(sge.convert, args))) + def array(self, *args: Any) -> sge.Array: + if not args: + return sge.Array(expressions=[]) + + first, *rest = args + + if isinstance(first, sge.Select): + assert ( + not rest + ), "only one argument allowed when `first` is a select statement" + + return sge.Array(expressions=list(map(sge.convert, (first, *rest)))) - def tuple(self, *args): - return sg.func("tuple", *map(sge.convert, args)) + def tuple(self, *args: Any) -> sge.Anonymous: + return self.anon.tuple(*args) - def exists(self, query): + def exists(self, query: sge.Expression) -> sge.Exists: return sge.Exists(this=query) - def concat(self, *args): + def concat(self, *args: Any) -> sge.Concat: return sge.Concat(expressions=list(map(sge.convert, args))) - def map(self, keys, values): + def map(self, keys: Iterable, values: Iterable) -> sge.Map: return sge.Map(keys=keys, values=values) diff --git a/ibis/backends/postgres/compiler.py b/ibis/backends/postgres/compiler.py index 121940220faa..a3c7e06475ee 100644 --- a/ibis/backends/postgres/compiler.py +++ b/ibis/backends/postgres/compiler.py @@ -142,9 +142,6 @@ def visit_Median(self, op, *, arg, where): def visit_ApproxCountDistinct(self, op, *, arg, where): return self.agg.count(sge.Distinct(expressions=[arg]), where=where) - def array_func(self, *args): - return sge.Anonymous(this=sg.to_identifier("array"), expressions=list(args)) - @visit_node.register(ops.IntegerRange) @visit_node.register(ops.TimestampRange) def visit_Range(self, op, *, start, stop, step): @@ -176,7 +173,7 @@ def _sign(value, dtype): _sign(step, step_dtype).eq(_sign(stop - start, step_dtype)), ), self.f.array_remove( - self.array_func( + self.f.array( sg.select(STAR).from_(self.f.generate_series(start, stop, step)) ), stop, @@ -196,7 +193,7 @@ def visit_ArrayContains(self, op, *, arg, other): @visit_node.register(ops.ArrayFilter) def visit_ArrayFilter(self, op, *, arg, body, param): - return self.array_func( + return self.f.array( sg.select(sg.column(param, quoted=self.quoted)) .from_(sge.Unnest(expressions=[arg], alias=param)) .where(body) @@ -204,7 +201,7 @@ def visit_ArrayFilter(self, op, *, arg, body, param): @visit_node.register(ops.ArrayMap) def visit_ArrayMap(self, op, *, arg, body, param): - return self.array_func( + return self.f.array( sg.select(body).from_(sge.Unnest(expressions=[arg], alias=param)) ) @@ -219,7 +216,7 @@ def visit_ArrayPosition(self, op, *, arg, other): @visit_node.register(ops.ArraySort) def visit_ArraySort(self, op, *, arg): - return self.array_func( + return self.f.array( sg.select("x").from_(sge.Unnest(expressions=[arg], alias="x")).order_by("x") ) @@ -227,7 +224,7 @@ def visit_ArraySort(self, op, *, arg): def visit_ArrayRepeat(self, op, *, arg, times): i = sg.to_identifier("i") length = self.f.cardinality(arg) - return self.array_func( + return self.f.array( sg.select(arg[i % length + 1]).from_( self.f.generate_series(0, length * times - 1).as_(i.name) ) @@ -238,20 +235,18 @@ def visit_ArrayDistinct(self, op, *, arg): return self.if_( arg.is_(NULL), NULL, - self.array_func(sg.select(sge.Explode(this=arg)).distinct()), + self.f.array(sg.select(sge.Explode(this=arg)).distinct()), ) @visit_node.register(ops.ArrayUnion) def visit_ArrayUnion(self, op, *, left, right): - return self.array_func( - sg.union( - sg.select(sge.Explode(this=left)), sg.select(sge.Explode(this=right)) - ) + return self.f.anon.array( + sg.union(sg.select(self.f.explode(left)), sg.select(self.f.explode(right))) ) @visit_node.register(ops.ArrayIntersect) def visit_ArrayIntersect(self, op, *, left, right): - return self.array_func( + return self.f.anon.array( sg.intersect( sg.select(sge.Explode(this=left)), sg.select(sge.Explode(this=right)) ) @@ -302,7 +297,7 @@ def visit_StructColumn(self, op, *, names, values): def visit_ToJSONArray(self, op, *, arg): return self.if_( self.f.json_typeof(arg).eq(sge.convert("array")), - self.array_func(sg.select(STAR).from_(self.f.json_array_elements(arg))), + self.f.array(sg.select(STAR).from_(self.f.json_array_elements(arg))), NULL, ) diff --git a/ibis/backends/snowflake/compiler.py b/ibis/backends/snowflake/compiler.py index dd2784ef2795..09c53c17f0be 100644 --- a/ibis/backends/snowflake/compiler.py +++ b/ibis/backends/snowflake/compiler.py @@ -129,11 +129,9 @@ def visit_Literal(self, op, *, value, dtype): if value.tzinfo is not None: return self.f.timestamp_tz_from_parts(*args, dtype.timezone) else: - # workaround sqlglot not supporting more than 6 arguments - return sge.Anonymous( - this=sg.to_identifier("timestamp_from_parts"), - expressions=list(map(sge.convert, args)), - ) + # workaround sqlglot not supporting more than 6 arguments by + # using an anonymous function + return self.f.anon.timestamp_from_parts(*args) elif dtype.is_time(): nanos = value.microsecond * 1_000 return self.f.time_from_parts(value.hour, value.minute, value.second, nanos) diff --git a/ibis/backends/trino/compiler.py b/ibis/backends/trino/compiler.py index f53671f5b291..2d214c685110 100644 --- a/ibis/backends/trino/compiler.py +++ b/ibis/backends/trino/compiler.py @@ -451,7 +451,7 @@ def visit_StringContains(self, op, *, haystack, needle): def visit_RegexpExtract(self, op, *, arg, pattern, index): # sqlglot doesn't support the third `group` argument for trino so work # around that limitation using an anonymous function - return sge.Anonymous(this="regexp_extract", expressions=[arg, pattern, index]) + return self.f.anon.regexp_extract(arg, pattern, index) @visit_node.register(ops.Quantile) @visit_node.register(ops.MultiQuantile) From bedfce63465b948c3e9a3e8123828172d1f30452 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jan 2024 07:17:38 -0500 Subject: [PATCH 064/161] test(backends): make null results try_cast test agnostic to nan vs None --- ibis/backends/tests/test_generic.py | 33 +++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index a47d2c004907..5479d4c26bea 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -44,12 +44,11 @@ } -@pytest.mark.broken(["impala", "bigquery"], "assert nan is None") @pytest.mark.notyet(["flink"], "The runtime does not support untyped `NULL` values.") def test_null_literal(con, backend): expr = ibis.null() result = con.execute(expr) - assert result is None + assert pd.isna(result) with contextlib.suppress(com.OperationNotDefinedError): backend_name = backend.name() @@ -1402,8 +1401,34 @@ def hash_256(col): ], ids=str, ) -def test_try_cast_expected(con, from_val, to_type, expected): - assert con.execute(ibis.literal(from_val).try_cast(to_type)) == expected +def test_try_cast(con, from_val, to_type, expected): + expr = ibis.literal(from_val).try_cast(to_type) + result = con.execute(expr) + assert result == expected + + +@pytest.mark.notimpl( + [ + "pandas", + "dask", + "druid", + "impala", + "mssql", + "oracle", + "pyspark", + "snowflake", + "sqlite", + "exasol", + ] +) +@pytest.mark.notyet(["flink"], reason="casts to nan") +@pytest.mark.notyet(["datafusion"]) +@pytest.mark.notimpl(["postgres"], raises=PsycoPg2InvalidTextRepresentation) +@pytest.mark.notyet(["mysql"], reason="returns 0") +def test_try_cast_returns_null(con): + expr = ibis.literal("a").try_cast("int") + result = con.execute(expr) + assert pd.isna(result) @pytest.mark.notimpl( From 2b7c4157a15f8875da70896cedab52be6edb12f1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 13 Jan 2024 06:17:55 -0500 Subject: [PATCH 065/161] refactor(sqlglot): use a more backend-agnostic expression for non-finite constants --- ibis/backends/base/sqlglot/compiler.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 139daa782268..792d0c4cd3c3 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -9,7 +9,7 @@ from collections.abc import Iterator, Mapping from functools import partial, reduce, singledispatchmethod from itertools import starmap -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any, Callable, ClassVar import sqlglot as sg import sqlglot.expressions as sge @@ -165,13 +165,19 @@ class SQLGlotCompiler(abc.ABC): quoted: bool | None = None """Whether to always quote identifiers.""" - NAN = sge.Literal.number("'NaN'::double") + NAN: ClassVar[sge.Expression] = sge.Cast( + this=sge.convert("NaN"), to=sge.DataType(this=sge.DataType.Type.DOUBLE) + ) """Backend's NaN literal.""" - POS_INF = sge.Literal.number("'Inf'::double") + POS_INF: ClassVar[sge.Expression] = sge.Cast( + this=sge.convert("Inf"), to=sge.DataType(this=sge.DataType.Type.DOUBLE) + ) """Backend's positive infinity literal.""" - NEG_INF = sge.Literal.number("'-Inf'::double") + NEG_INF: ClassVar[sge.Expression] = sge.Cast( + this=sge.convert("-Inf"), to=sge.DataType(this=sge.DataType.Type.DOUBLE) + ) """Backend's negative infinity literal.""" def __init__(self) -> None: From dbce2934d75e71b1bd1112bcf8a232a2291757c8 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 13 Jan 2024 06:59:34 -0500 Subject: [PATCH 066/161] refactor(sqlglot): clean up `explode` usage --- ibis/backends/base/sqlglot/compiler.py | 4 ---- ibis/backends/postgres/compiler.py | 6 ++---- ibis/backends/snowflake/compiler.py | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 792d0c4cd3c3..913c5e4d2187 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -1213,10 +1213,6 @@ def visit_SQLStringView(self, op, *, query: str, name: str, child): def visit_SQLQueryResult(self, op, *, query, schema, source): return sg.parse_one(query, read=self.dialect).subquery() - @visit_node.register(ops.Unnest) - def visit_Unnest(self, op, *, arg): - return sge.Explode(this=arg) - @visit_node.register(ops.JoinTable) def visit_JoinTable(self, op, *, parent, index): return parent diff --git a/ibis/backends/postgres/compiler.py b/ibis/backends/postgres/compiler.py index a3c7e06475ee..e6b44829a49e 100644 --- a/ibis/backends/postgres/compiler.py +++ b/ibis/backends/postgres/compiler.py @@ -233,9 +233,7 @@ def visit_ArrayRepeat(self, op, *, arg, times): @visit_node.register(ops.ArrayDistinct) def visit_ArrayDistinct(self, op, *, arg): return self.if_( - arg.is_(NULL), - NULL, - self.f.array(sg.select(sge.Explode(this=arg)).distinct()), + arg.is_(NULL), NULL, self.f.array(sg.select(self.f.explode(arg)).distinct()) ) @visit_node.register(ops.ArrayUnion) @@ -248,7 +246,7 @@ def visit_ArrayUnion(self, op, *, left, right): def visit_ArrayIntersect(self, op, *, left, right): return self.f.anon.array( sg.intersect( - sg.select(sge.Explode(this=left)), sg.select(sge.Explode(this=right)) + sg.select(self.f.explode(left)), sg.select(self.f.explode(right)) ) ) diff --git a/ibis/backends/snowflake/compiler.py b/ibis/backends/snowflake/compiler.py index 09c53c17f0be..3ec0b5f02571 100644 --- a/ibis/backends/snowflake/compiler.py +++ b/ibis/backends/snowflake/compiler.py @@ -495,7 +495,7 @@ def visit_Unnest(self, op, *, arg): split = self.f.split( self.f.array_to_string(self.f.nullif(arg, self.f.array()), sep), sep ) - expr = self.f.nullif(sge.Explode(this=split), "") + expr = self.f.nullif(self.f.explode(split), "") return self.cast(expr, op.dtype) @visit_node.register(ops.Quantile) From 55dcf57a6b3ac3955f6ba6d42af03d3c4be23529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 8 Jan 2024 15:56:13 +0100 Subject: [PATCH 067/161] refactor(pyspark): reimplement the backend using the new relational operations an spark SQL Summary ------- Port the Spark backend to use the new relational operations as well as move to Spark SQL from PySpark and take advantage of the new SQLGlot backend. Test suite changes ------------------ ``` OLD: 1223 passed, 33 skipped, 29316 deselected, 285 xfailed in 375.24s (0:06:15) NEW: 1334 passed, 33 skipped, 30002 deselected, 155 xfailed in 333.94s (0:05:33) ``` Advantages ---------- - Spark SQL is faster than PySpark. - Spark SQL has broader feature support than PySpark, for example it properly supports subqueries. - We can reuse the base SQLGlot backend making the PySpark compiler as thin as 500 lines of code. I kept the previous datatype and schema mappers since some of the DDL operations directly work on the PySpark session and Catalog objects, most of these would be harder to express in Spark SQL. --- .github/workflows/ibis-backends.yml | 136 +- ibis/backends/base/__init__.py | 3 + ibis/backends/base/sqlglot/__init__.py | 6 +- ibis/backends/base/sqlglot/compiler.py | 3 +- ibis/backends/base/sqlglot/datatypes.py | 6 + ibis/backends/duckdb/__init__.py | 2 + ibis/backends/duckdb/compiler.py | 1 + ibis/backends/mysql/__init__.py | 2 + ibis/backends/mysql/converter.py | 3 +- ibis/backends/pyspark/__init__.py | 430 ++- ibis/backends/pyspark/client.py | 137 - ibis/backends/pyspark/compiler.py | 2547 +++-------------- ibis/backends/pyspark/converter.py | 31 + ibis/backends/pyspark/datatypes.py | 32 +- ibis/backends/pyspark/ddl.py | 221 -- .../pyspark/tests/test_aggregation.py | 2 +- ibis/backends/pyspark/tests/test_array.py | 66 +- ibis/backends/pyspark/tests/test_basic.py | 191 +- ibis/backends/pyspark/tests/test_ddl.py | 57 +- ibis/backends/pyspark/tests/test_null.py | 8 +- .../pyspark/tests/test_timecontext.py | 116 - ibis/backends/pyspark/tests/test_window.py | 36 +- .../tests/test_window_context_adjustment.py | 427 --- ibis/backends/pyspark/timecontext.py | 76 - ibis/backends/tests/errors.py | 3 +- .../test_default_limit/pyspark/out.sql | 5 + .../test_disable_query_limit/pyspark/out.sql | 5 + .../pyspark/out.sql | 3 + .../test_respect_set_limit/pyspark/out.sql | 10 + .../test_union_aliasing/pyspark/out.sql | 134 + ibis/backends/tests/test_aggregation.py | 47 +- ibis/backends/tests/test_array.py | 6 +- ibis/backends/tests/test_asof_join.py | 8 +- ibis/backends/tests/test_dot_sql.py | 2 +- ibis/backends/tests/test_export.py | 9 +- ibis/backends/tests/test_generic.py | 38 +- ibis/backends/tests/test_join.py | 3 - ibis/backends/tests/test_map.py | 2 +- ibis/backends/tests/test_numeric.py | 38 +- ibis/backends/tests/test_param.py | 8 +- ibis/backends/tests/test_sql.py | 4 +- ibis/backends/tests/test_string.py | 47 +- ibis/backends/tests/test_temporal.py | 135 +- ibis/backends/tests/test_timecontext.py | 3 +- ibis/backends/tests/test_uuid.py | 24 +- ibis/backends/tests/test_vectorized_udf.py | 7 +- ibis/backends/tests/test_window.py | 34 +- ibis/expr/operations/temporal.py | 4 +- 48 files changed, 1102 insertions(+), 4016 deletions(-) delete mode 100644 ibis/backends/pyspark/client.py create mode 100644 ibis/backends/pyspark/converter.py delete mode 100644 ibis/backends/pyspark/ddl.py delete mode 100644 ibis/backends/pyspark/tests/test_timecontext.py delete mode 100644 ibis/backends/pyspark/tests/test_window_context_adjustment.py delete mode 100644 ibis/backends/pyspark/timecontext.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/pyspark/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/pyspark/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/pyspark/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/pyspark/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_union_aliasing/pyspark/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index bda3dbff3ae1..c0ea4ec5f909 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -533,73 +533,73 @@ jobs: if: matrix.backend.services != null && failure() run: docker compose logs - # test_pyspark: - # name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} - # runs-on: ${{ matrix.os }} - # strategy: - # fail-fast: false - # matrix: - # os: - # - ubuntu-latest - # python-version: - # - "3.10" - # steps: - # - name: checkout - # uses: actions/checkout@v4 - # - # - uses: actions/setup-java@v4 - # with: - # distribution: microsoft - # java-version: 17 - # - # - uses: extractions/setup-just@v1 - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # - # - name: download backend data - # run: just download-data - # - # - name: install python - # uses: actions/setup-python@v5 - # id: install_python - # with: - # python-version: ${{ matrix.python-version }} - # - # - name: install poetry - # run: python -m pip install --upgrade pip 'poetry==1.7.1' - # - # - name: remove lonboard - # # it requires a version of pandas that pyspark is not compatible with - # run: poetry remove lonboard - # - # - name: install maximum versions of pandas and numpy - # run: poetry add --lock 'pandas@<2' 'numpy<1.24' - # - # - name: checkout the lock file - # run: git checkout poetry.lock - # - # - name: lock with no updates - # # poetry add is aggressive and will update other dependencies like - # # numpy and pandas so we keep the pyproject.toml edits and then relock - # # without updating anything except the requested versions - # run: poetry lock --no-update - # - # - name: install ibis - # run: poetry install --without dev --without docs --extras pyspark - # - # - name: run tests - # run: just ci-check -m pyspark - # - # - name: check that no untracked files were produced - # shell: bash - # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - # - # - name: upload code coverage - # # only upload coverage for jobs that aren't mostly xfails - # if: success() && matrix.python-version != '3.11' - # uses: codecov/codecov-action@v4 - # with: - # flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} + test_pyspark: + name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + python-version: + - "3.10" + steps: + - name: checkout + uses: actions/checkout@v4 + + - uses: actions/setup-java@v4 + with: + distribution: microsoft + java-version: 17 + + - uses: extractions/setup-just@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: download backend data + run: just download-data + + - name: install python + uses: actions/setup-python@v5 + id: install_python + with: + python-version: ${{ matrix.python-version }} + + - name: install poetry + run: python -m pip install --upgrade pip 'poetry==1.7.1' + + - name: remove lonboard + # it requires a version of pandas that pyspark is not compatible with + run: poetry remove lonboard + + - name: install maximum versions of pandas and numpy + run: poetry add --lock 'pandas@<2' 'numpy<1.24' + + - name: checkout the lock file + run: git checkout poetry.lock + + - name: lock with no updates + # poetry add is aggressive and will update other dependencies like + # numpy and pandas so we keep the pyproject.toml edits and then relock + # without updating anything except the requested versions + run: poetry lock --no-update + + - name: install ibis + run: poetry install --without dev --without docs --extras pyspark + + - name: run tests + run: just ci-check -m pyspark + + - name: check that no untracked files were produced + shell: bash + run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . + + - name: upload code coverage + # only upload coverage for jobs that aren't mostly xfails + if: success() && matrix.python-version != '3.11' + uses: codecov/codecov-action@v4 + with: + flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} # gen_lockfile_sqlalchemy2: # name: Generate Poetry Lockfile for SQLAlchemy 2 @@ -765,6 +765,6 @@ jobs: # - test_backends_min_version - test_backends # - test_backends_sqlalchemy2 - # - test_pyspark + - test_pyspark steps: - run: exit 0 diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py index 7190d831ba68..7411077514d1 100644 --- a/ibis/backends/base/__init__.py +++ b/ibis/backends/base/__init__.py @@ -820,6 +820,8 @@ def db_identity(self) -> str: parts.extend(f"{k}={v}" for k, v in self._con_kwargs.items()) return "_".join(map(str, parts)) + # TODO(kszucs): this should be a classmethod returning with a new backend + # instance which does instantiate the connection def connect(self, *args, **kwargs) -> BaseBackend: """Connect to the database. @@ -857,6 +859,7 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend: def _convert_kwargs(kwargs: MutableMapping) -> None: """Manipulate keyword arguments to `.connect` method.""" + # TODO(kszucs): should call self.connect(*self._con_args, **self._con_kwargs) def reconnect(self) -> None: """Reconnect to the database already configured with connect.""" self.do_connect(*self._con_args, **self._con_kwargs) diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index 1ce1c33c7d56..3f94f50a7e9a 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -35,6 +35,7 @@ def has_operation(cls, operation: type[ops.Value]) -> bool: dispatcher = cls.compiler.visit_node.register.__self__.dispatcher return dispatcher.dispatch(operation) is not dispatcher.dispatch(object) + # TODO(kszucs): get_schema() is not registered as an abstract method def table( self, name: str, schema: str | None = None, database: str | None = None ) -> ir.Table: @@ -90,7 +91,7 @@ def compile( ): """Compile an Ibis expression to a SQL string.""" query = self._to_sqlglot(expr, limit=limit, params=params, **kwargs) - sql = query.sql(dialect=self.name, pretty=True) + sql = query.sql(dialect=self.compiler.dialect, pretty=True) self._log(sql) return sql @@ -118,6 +119,7 @@ def sql( schema = self._get_schema_using_query(query) return ops.SQLQueryResult(query, ibis.schema(schema), self).to_expr() + # TODO(kszucs): should be removed in favor of _get_schema_using_query() @abc.abstractmethod def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: """Return the metadata of a SQL query.""" @@ -223,6 +225,8 @@ def execute( schema = table.schema() + # TODO(kszucs): these methods should be abstractmethods or this default + # implementation should be removed with self._safe_raw_sql(sql) as cur: result = self._fetch_from_cursor(cur, schema) return expr.__pandas_result__(result) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 913c5e4d2187..6cf6b4e75639 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -362,7 +362,7 @@ def visit_DefaultLiteral(self, op, *, value, dtype): if math.isnan(value): return self.NAN elif math.isinf(value): - return self.POS_INF if value < 0 else self.NEG_INF + return self.POS_INF if value > 0 else self.NEG_INF return sge.convert(value) elif dtype.is_decimal(): return self.cast(str(value), dtype) @@ -864,6 +864,7 @@ def visit_Argument(self, op, *, name: str, shape, dtype): def visit_RowID(self, op, *, table): return sg.column(op.name, table=table.alias_or_name, quoted=self.quoted) + # TODO(kszucs): this should be renamed to something UDF related def __sql_name__(self, op: ops.ScalarUDF | ops.AggUDF) -> str: # not actually a table, but easier to quote individual namespace # components this way diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 8b379d1d0db9..e342ebcd080e 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -606,3 +606,9 @@ def _from_ibis_Struct(cls, dtype: dt.Struct) -> sge.DataType: class SQLiteType(SqlglotType): dialect = "sqlite" + + +class PySparkType(SqlglotType): + dialect = "spark" + default_decimal_precision = 38 + default_decimal_scale = 18 diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index d115967f49c8..d7111410cb28 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -100,6 +100,7 @@ def current_schema(self) -> str: [(schema,)] = cur.fetchall() return schema + # TODO(kszucs): should be moved to the base SQLGLot backend def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): query = query.sql(dialect=self.name) @@ -440,6 +441,7 @@ def _load_extensions( cur.install_extension(extension, force_install=force_install) cur.load_extension(extension) + # TODO(kszucs): should be a classmethod def _from_url(self, url: str, **kwargs) -> BaseBackend: """Connect to a backend using a URL `url`. diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 391f2d633cd5..e42bb5e733b0 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -64,6 +64,7 @@ def visit_ArrayRepeat(self, op, *, arg, times): func = sge.Lambda(this=arg, expressions=[sg.to_identifier("_")]) return self.f.flatten(self.f.list_apply(self.f.range(times), func)) + # TODO(kszucs): this could be moved to the base SQLGlotCompiler @visit_node.register(ops.Sample) def visit_Sample( self, op, *, parent, fraction: float, method: str, seed: int | None, **_ diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index a52edbe5fba8..cae87e456800 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -258,6 +258,8 @@ def begin(self): finally: cur.close() + # TODO(kszucs): should make it an abstract method or remove the use of it + # from .execute() @contextlib.contextmanager def _safe_raw_sql(self, *args, **kwargs): with contextlib.closing(self.raw_sql(*args, **kwargs)) as result: diff --git a/ibis/backends/mysql/converter.py b/ibis/backends/mysql/converter.py index ffa277c56de4..4f2010225a5b 100644 --- a/ibis/backends/mysql/converter.py +++ b/ibis/backends/mysql/converter.py @@ -6,6 +6,7 @@ class MySQLPandasData(PandasData): + # TODO(kszucs): this could be reused at other backends, like pyspark @classmethod def convert_Time(cls, s, dtype, pandas_type): def convert(timedelta): @@ -14,7 +15,7 @@ def convert(timedelta): hour=comps.hours, minute=comps.minutes, second=comps.seconds, - microsecond=comps.microseconds, + microsecond=comps.milliseconds * 1000 + comps.microseconds, ) return s.map(convert, na_action="ignore") diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 755ddbe3d653..0a9182061d03 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -1,5 +1,7 @@ from __future__ import annotations +import atexit +import contextlib import os from pathlib import Path from typing import TYPE_CHECKING, Any @@ -7,8 +9,10 @@ import pyspark import sqlalchemy as sa import sqlglot as sg +import sqlglot.expressions as sge from pyspark import SparkConf from pyspark.sql import DataFrame, SparkSession +from pyspark.sql.functions import PandasUDFType, pandas_udf import ibis.common.exceptions as com import ibis.config @@ -17,22 +21,12 @@ import ibis.expr.types as ir from ibis import util from ibis.backends.base import CanCreateDatabase -from ibis.backends.base.df.scope import Scope -from ibis.backends.base.df.timecontext import canonicalize_context, localize_context -from ibis.backends.base.sql import BaseSQLBackend -from ibis.backends.base.sql.compiler import Compiler, TableSetFormatter -from ibis.backends.base.sql.ddl import ( - CreateDatabase, - DropTable, - TruncateTable, - is_fully_qualified, -) -from ibis.backends.pyspark import ddl -from ibis.backends.pyspark.client import PySparkTable -from ibis.backends.pyspark.compiler import PySparkExprTranslator -from ibis.backends.pyspark.datatypes import PySparkType -from ibis.common.temporal import normalize_timezone -from ibis.formats.pandas import PandasData +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.pyspark.compiler import PySparkCompiler +from ibis.backends.pyspark.converter import PySparkPandasData +from ibis.backends.pyspark.datatypes import PySparkSchema, PySparkType +from ibis.expr.operations.udf import InputType +from ibis.legacy.udf.vectorized import _coerce_to_series if TYPE_CHECKING: from collections.abc import Mapping, Sequence @@ -92,35 +86,9 @@ def __exit__(self, exc_type, exc_value, traceback): """No-op for compatibility.""" -class PySparkTableSetFormatter(TableSetFormatter): - def _format_in_memory_table(self, op): - # we don't need to compile the table to a VALUES statement because the - # table has been registered already by createOrReplaceTempView. - # - # The only place where the SQL API is currently used is DDL operations - return op.name - - -class PySparkCompiler(Compiler): - cheap_in_memory_tables = True - table_set_formatter_class = PySparkTableSetFormatter - - -class PySparkPandasData(PandasData): - @classmethod - def convert_Timestamp_element(cls, dtype): - def converter(value, dtype=dtype): - if (tz := dtype.timezone) is not None: - return value.astimezone(normalize_timezone(tz)) - - return value.astimezone(normalize_timezone("UTC")).replace(tzinfo=None) - - return converter - - -class Backend(BaseSQLBackend, CanCreateDatabase): - compiler = PySparkCompiler +class Backend(SQLGlotBackend, CanCreateDatabase): name = "pyspark" + compiler = PySparkCompiler() _sqlglot_dialect = "spark" class Options(ibis.config.Config): @@ -179,6 +147,13 @@ def do_connect(self, session: SparkSession) -> None: # local time to UTC with microsecond resolution. # https://spark.apache.org/docs/latest/sql-pyspark-pandas-with-arrow.html#timestamp-with-time-zone-semantics self._session.conf.set("spark.sql.session.timeZone", "UTC") + self._session.conf.set("spark.sql.mapKeyDedupPolicy", "LAST_WIN") + self._temp_views = set() + + def _metadata(self, query: str): + cursor = self.raw_sql(query) + struct_dtype = PySparkType.to_ibis(cursor.query.schema) + return struct_dtype.items() @property def version(self): @@ -188,6 +163,18 @@ def version(self): def current_database(self) -> str: return self._catalog.currentDatabase() + @contextlib.contextmanager + def _active_database(self, name: str | None) -> None: + if name is None: + yield + return + current = self.current_database + try: + self._catalog.setCurrentDatabase(name) + yield + finally: + self._catalog.setCurrentDatabase(current) + def list_databases(self, like: str | None = None) -> list[str]: databases = [db.name for db in self._catalog.listDatabases()] return self._filter_with_like(databases, like) @@ -201,97 +188,65 @@ def list_tables( ] return self._filter_with_like(tables, like) - def compile(self, expr, timecontext=None, params=None, *args, **kwargs): - """Compile an ibis expression to a PySpark DataFrame object.""" + def _wrap_udf_to_return_pandas(self, func, output_dtype): + def wrapper(*args): + return _coerce_to_series(func(*args), output_dtype) + + return wrapper + + def _register_udfs(self, expr: ir.Expr) -> None: + node = expr.op() + for udf in node.find(ops.ScalarUDF): + udf_name = self.compiler.__sql_name__(udf) + udf_func = self._wrap_udf_to_return_pandas(udf.__func__, udf.dtype) + udf_return = PySparkType.from_ibis(udf.dtype) + if udf.__input_type__ != InputType.PANDAS: + raise NotImplementedError( + "Only Pandas UDFs are support in the PySpark backend" + ) + spark_udf = pandas_udf(udf_func, udf_return, PandasUDFType.SCALAR) + self._session.udf.register(udf_name, spark_udf) + + for udf in node.find(ops.ElementWiseVectorizedUDF): + udf_name = self.compiler.__sql_name__(udf) + udf_func = self._wrap_udf_to_return_pandas(udf.func, udf.return_type) + udf_return = PySparkType.from_ibis(udf.return_type) + spark_udf = pandas_udf(udf_func, udf_return, PandasUDFType.SCALAR) + self._session.udf.register(udf_name, spark_udf) + + for udf in node.find(ops.ReductionVectorizedUDF): + udf_name = self.compiler.__sql_name__(udf) + udf_func = self._wrap_udf_to_return_pandas(udf.func, udf.return_type) + udf_func = udf.func + udf_return = PySparkType.from_ibis(udf.return_type) + spark_udf = pandas_udf(udf_func, udf_return, PandasUDFType.GROUPED_AGG) + self._session.udf.register(udf_name, spark_udf) - if timecontext is not None: - session_timezone = self._session.conf.get("spark.sql.session.timeZone") - # Since spark use session timezone for tz-naive timestamps - # we localize tz-naive context here to match that behavior - timecontext = localize_context( - canonicalize_context(timecontext), session_timezone - ) - - # Insert params in scope - scope = Scope( - { - param.op(): raw_value - for param, raw_value in ({} if params is None else params).items() - }, - timecontext, - ) - return PySparkExprTranslator().translate( - expr.op(), - scope=scope, - timecontext=timecontext, - session=getattr(self, "_session", None), - ) - - def execute(self, expr: ir.Expr, **kwargs: Any) -> Any: - """Execute an expression.""" - table_expr = expr.as_table() - df = self.compile(table_expr, **kwargs).toPandas() - - return expr.__pandas_result__(df) + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = PySparkSchema.from_ibis(op.schema) + df = self._session.createDataFrame(data=op.data.to_frame(), schema=schema) + df.createOrReplaceTempView(op.name) - def _fully_qualified_name(self, name, database): - if is_fully_qualified(name): - return name + def _register_temp_view_cleanup(self, name: str) -> None: + def drop(self, name: str): + self._session.catalog.dropTempView(name) + self._temp_views.discard(name) - return sg.table(name, db=database, quoted=True).sql(dialect="spark") + atexit.register(drop, self, name=name) - def close(self): - """Close Spark connection and drop any temporary objects.""" - self._context.stop() + def _fetch_from_cursor(self, cursor, schema): + df = cursor.query.toPandas() # blocks until finished + return PySparkPandasData.convert_table(df, schema) - def fetch_from_cursor(self, cursor, schema): - return cursor.query.toPandas() # blocks until finished + def _safe_raw_sql(self, query: str) -> _PySparkCursor: + return self.raw_sql(query) - def raw_sql(self, query: str) -> _PySparkCursor: + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> _PySparkCursor: + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self._sqlglot_dialect) query = self._session.sql(query) return _PySparkCursor(query) - def _get_schema_using_query(self, query): - cursor = self.raw_sql(f"SELECT * FROM ({query}) t0 LIMIT 0") - return sch.Schema(PySparkType.to_ibis(cursor.query.schema)) - - def _get_jtable(self, name, database=None): - get_table = self._catalog._jcatalog.getTable - try: - jtable = get_table(self._fully_qualified_name(name, database)) - except pyspark.sql.utils.AnalysisException as e1: - try: - jtable = get_table(self._fully_qualified_name(name, database=None)) - except pyspark.sql.utils.AnalysisException as e2: - raise com.IbisInputError(str(e2)) from e1 - return jtable - - def table(self, name: str, database: str | None = None) -> ir.Table: - """Return a table expression from a table or view in the database. - - Parameters - ---------- - name - Table name - database - Database in which the table resides - - Returns - ------- - Table - Table named `name` from `database` - """ - jtable = self._get_jtable(name, database) - name, database = jtable.name(), jtable.database() - - qualified_name = self._fully_qualified_name(name, database) - - schema = self.get_schema(qualified_name) - node = ops.DatabaseTable( - name, schema, self, namespace=ops.Namespace(database=database) - ) - return PySparkTable(node) - def create_database( self, name: str, @@ -309,8 +264,21 @@ def create_database( force Whether to append `IF NOT EXISTS` to the database creation SQL """ - statement = CreateDatabase(name, path=path, can_exist=force) - return self.raw_sql(statement.compile()) + if path is not None: + properties = sge.Properties( + expressions=[sge.LocationProperty(this=sge.convert(str(path)))] + ) + else: + properties = None + + sql = sge.Create( + kind="DATABASE", + exist=force, + this=sg.to_identifier(name), + properties=properties, + ) + with self._safe_raw_sql(sql): + pass def drop_database(self, name: str, force: bool = False) -> Any: """Drop a Spark database. @@ -323,12 +291,14 @@ def drop_database(self, name: str, force: bool = False) -> Any: If False, Spark throws exception if database is not empty or database does not exist """ - statement = ddl.DropDatabase(name, must_exist=not force, cascade=force) - return self.raw_sql(statement.compile()) + sql = sge.Drop(kind="DATABASE", exist=force, this=sg.to_identifier(name)) + with self._safe_raw_sql(sql): + pass def get_schema( self, table_name: str, + schema: str | None = None, database: str | None = None, ) -> sch.Schema: """Return a Schema object for the indicated table and database. @@ -337,6 +307,9 @@ def get_schema( ---------- table_name Table name. May be fully qualified + schema + Spark does not have a schema argument for its table() method, + so this must be None database Spark does not have a database argument for its table() method, so this must be None @@ -346,13 +319,15 @@ def get_schema( Schema An ibis schema """ - if database is not None: + if schema is not None: raise com.UnsupportedArgumentError( - "Spark does not support the `database` argument for `get_schema`" + "Spark does not support the `schema` argument for `get_schema`" ) - df = self._session.table(table_name) - struct = PySparkType.to_ibis(df.schema) + with self._active_database(database): + df = self._session.table(table_name) + struct = PySparkType.to_ibis(df.schema) + return sch.Schema(struct) def create_table( @@ -394,51 +369,43 @@ def create_table( -------- >>> con.create_table("new_table_name", table_expr) # quartodoc: +SKIP # doctest: +SKIP """ - import pandas as pd - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - - if obj is None and schema is None: - raise com.IbisError("The schema or obj parameter is required") if temp is True: raise NotImplementedError( "PySpark backend does not yet support temporary tables" ) + if obj is not None: - if isinstance(obj, pa.Table): - obj = obj.to_pandas() - if isinstance(obj, pd.DataFrame): - spark_df = self._session.createDataFrame(obj) - mode = "overwrite" if overwrite else "error" - spark_df.write.saveAsTable(name, format=format, mode=mode) - return None - else: - self._register_in_memory_tables(obj) - - ast = self.compiler.to_ast(obj) - select = ast.queries[0] - - statement = ddl.CTAS( - name, - select, - database=database, - can_exist=overwrite, - format=format, - ) + table = obj if isinstance(obj, ir.Expr) else ibis.memtable(obj) + query = self.compile(table) + mode = "overwrite" if overwrite else "error" + with self._active_database(database): + self._run_pre_execute_hooks(table) + df = self._session.sql(query) + df.write.saveAsTable(name, format=format, mode=mode) + elif schema is not None: + schema = PySparkSchema.from_ibis(schema) + with self._active_database(database): + self._catalog.createTable(name, schema=schema, format=format) else: - statement = ddl.CreateTableWithSchema( - name, - schema, - database=database, - format=format, - can_exist=overwrite, - ) + raise com.IbisError("The schema or obj parameter is required") - self.raw_sql(statement.compile()) return self.table(name, database=database) - def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: - self.compile(op.to_expr()).createOrReplaceTempView(op.name) + # TODO(kszucs): should have this implementation in the base sqlglot backend + def truncate_table(self, name: str, database: str | None = None) -> None: + """Delete all rows from an existing table. + + Parameters + ---------- + name + Table name + database + Database name + """ + table = sg.table(name, db=database) + query = f"TRUNCATE TABLE {table}" + with self._safe_raw_sql(query): + pass def create_view( self, @@ -446,9 +413,10 @@ def create_view( obj: ir.Table, *, database: str | None = None, + schema: str | None = None, overwrite: bool = False, ) -> ir.Table: - """Create a Spark view from a table expression. + """Create a temporary Spark view from a table expression. Parameters ---------- @@ -458,6 +426,8 @@ def create_view( Expression to use for the view database Database name + schema + Schema name overwrite Replace an existing view of the same name if it exists @@ -466,61 +436,19 @@ def create_view( Table The created view """ - ast = self.compiler.to_ast(obj) - select = ast.queries[0] - statement = ddl.CreateView( - name, select, database=database, can_exist=overwrite, temporary=True + src = sge.Create( + this=sg.table( + name, db=schema, catalog=database, quoted=self.compiler.quoted + ), + kind="TEMPORARY VIEW", + replace=overwrite, + expression=self.compile(obj), ) - self.raw_sql(statement.compile()) + self._register_in_memory_tables(obj) + with self._safe_raw_sql(src): + pass return self.table(name, database=database) - def drop_table( - self, - name: str, - *, - database: str | None = None, - force: bool = False, - ) -> None: - """Drop a table.""" - self.drop_table_or_view(name, database=database, force=force) - - def drop_view( - self, - name: str, - *, - database: str | None = None, - force: bool = False, - ): - """Drop a view.""" - self.drop_table_or_view(name, database=database, force=force) - - def drop_table_or_view( - self, - name: str, - *, - database: str | None = None, - force: bool = False, - ) -> None: - """Drop a Spark table or view. - - Parameters - ---------- - name - Table or view name - database - Database name - force - Database may throw exception if table does not exist - - Examples - -------- - >>> table = "my_table" - >>> db = "operations" - >>> con.drop_table_or_view(table, db, force=True) # quartodoc: +SKIP # doctest: +SKIP - """ - statement = DropTable(name, database=database, must_exist=not force) - self.raw_sql(statement.compile()) - def rename_table(self, old_name: str, new_name: str) -> None: """Rename an existing table. @@ -531,21 +459,15 @@ def rename_table(self, old_name: str, new_name: str) -> None: new_name The new name of the table. """ - statement = ddl.RenameTable(old_name, new_name) - self.raw_sql(statement.compile()) - - def truncate_table(self, name: str, database: str | None = None) -> None: - """Delete all rows from an existing table. - - Parameters - ---------- - name - Table name - database - Database name - """ - statement = TruncateTable(name, database=database) - self.raw_sql(statement.compile()) + old = sg.table(old_name, quoted=True) + new = sg.table(new_name, quoted=True) + query = sge.AlterTable( + this=old, + exists=False, + actions=[sge.RenameTable(this=new, exists=True)], + ) + with self._safe_raw_sql(query): + pass def insert( self, @@ -553,8 +475,6 @@ def insert( obj: ir.Table | pd.DataFrame | None = None, database: str | None = None, overwrite: bool = False, - values: Any | None = None, - validate: bool = True, ) -> Any: """Insert data into an existing table. @@ -566,10 +486,15 @@ def insert( # Completely overwrite contents >>> con.insert(table, table_expr, overwrite=True) # quartodoc: +SKIP # doctest: +SKIP """ - table = self.table(table_name, database=database) - return table.insert( - obj=obj, overwrite=overwrite, values=values, validate=validate - ) + + if isinstance(obj, ir.Expr): + df = self._session.sql(self.compile(obj)) + else: + table = ibis.memtable(obj) + df = self._session.createDataFrame(table.op().data.to_frame()) + + with self._active_database(database): + df.write.insertInto(table_name, overwrite=overwrite) def compute_stats( self, @@ -590,15 +515,14 @@ def compute_stats( rows, size in bytes). """ maybe_noscan = " NOSCAN" * noscan - name = self._fully_qualified_name(name, database) - return self.raw_sql(f"ANALYZE TABLE {name} COMPUTE STATISTICS{maybe_noscan}") - - @classmethod - def has_operation(cls, operation: type[ops.Value]) -> bool: - return operation in PySparkExprTranslator._registry + table = sg.table(name, db=database, quoted=self.compiler.quoted).sql( + dialect=self._sqlglot_dialect + ) + return self.raw_sql(f"ANALYZE TABLE {table} COMPUTE STATISTICS{maybe_noscan}") def _load_into_cache(self, name, expr): - t = expr.compile().cache() + query = self.compile(expr) + t = self._session.sql(query).cache() assert t.is_cached t.createOrReplaceTempView(name) # store the underlying spark dataframe so we can release memory when @@ -768,7 +692,6 @@ def register( ir.Table The just-registered table """ - if isinstance(source, (str, Path)): first = str(source) elif isinstance(source, (list, tuple)): @@ -798,9 +721,6 @@ def _register_failure(self): f"please call one of {msg} directly" ) - def _to_sql(self, expr: ir.Expr, **kwargs) -> str: - raise NotImplementedError(f"Backend '{self.name}' backend doesn't support SQL") - @util.experimental def to_delta( self, diff --git a/ibis/backends/pyspark/client.py b/ibis/backends/pyspark/client.py deleted file mode 100644 index 969dc3f9ad37..000000000000 --- a/ibis/backends/pyspark/client.py +++ /dev/null @@ -1,137 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Any - -import sqlglot as sg - -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.types as ir -from ibis.backends.pyspark import ddl - -if TYPE_CHECKING: - from collections.abc import Iterable, Mapping - - import pandas as pd - - -class PySparkTable(ir.Table): - @property - def _qualified_name(self) -> str: - op = self.op() - return sg.table( - op.name, db=op.namespace.schema, catalog=op.namespace.database, quoted=True - ).sql(dialect="spark") - - @property - def _database(self) -> str: - return self.op().namespace - - @property - def _unqualified_name(self) -> str: - return self.name - - @property - def name(self): - return self.op().name - - @property - def _client(self): - return self.op().source - - def compute_stats(self, noscan: bool = False): - """Invoke the Spark `ANALYZE TABLE COMPUTE STATISTICS` command. - - Parameters - ---------- - noscan - If `True`, collect only basic statistics for the table (number of - rows, size in bytes). - - See Also - -------- - [`pyspark.Backend.compute_stats`](../backends/pyspark.qmd#ibis.backends.pyspark.Backend.compute_stats) - """ - return self._client.compute_stats(self._qualified_name, noscan=noscan) - - def drop(self): - """Drop the table from the database.""" - self._client.drop_table_or_view(self._qualified_name) - - def truncate(self): - """Truncate the table, removing all data.""" - self._client.truncate_table(self._qualified_name) - - @staticmethod - def _validate_compatible(from_schema, to_schema): - if set(from_schema.names) != set(to_schema.names): - raise com.IbisInputError("Schemas have different names") - - for name in from_schema: - lt = from_schema[name] - rt = to_schema[name] - if not dt.castable(lt, rt): - raise com.IbisInputError(f"Cannot safely cast {lt!r} to {rt!r}") - - def insert( - self, - obj: ir.Table | pd.DataFrame | None = None, - overwrite: bool = False, - values: Iterable[Any] | None = None, - validate: bool = True, - ): - """Insert data into the table. - - Parameters - ---------- - obj - Table expression or pandas DataFrame - overwrite - If True, will replace existing contents of table - values - Values to insert. Not implemented currently. - validate - If True, do more rigorous validation that schema of table being - inserted is compatible with the existing table - - Examples - -------- - >>> t.insert(table_expr) # quartodoc: +SKIP # doctest: +SKIP - - # Completely overwrite contents - >>> t.insert(table_expr, overwrite=True) # quartodoc: +SKIP # doctest: +SKIP - """ - import pandas as pd - - if isinstance(obj, pd.DataFrame): - spark_df = self._session.createDataFrame(obj) - spark_df.insertInto(self.name, overwrite=overwrite) - return None - - expr = obj - - if values is not None: - raise NotImplementedError - - if validate: - existing_schema = self.schema() - insert_schema = expr.schema() - if not insert_schema.equals(existing_schema): - self._validate_compatible(insert_schema, existing_schema) - - ast = self._client.compiler.to_ast(expr) - select = ast.queries[0] - statement = ddl.InsertSelect(self._qualified_name, select, overwrite=overwrite) - return self._client.raw_sql(statement.compile()) - - def alter(self, tbl_properties: Mapping[str, str] | None = None) -> Any: - """Change settings and parameters of the table. - - Parameters - ---------- - tbl_properties - Spark table properties - """ - - stmt = ddl.AlterTable(self._qualified_name, tbl_properties=tbl_properties) - return self._client.raw_sql(stmt.compile()) diff --git a/ibis/backends/pyspark/compiler.py b/ibis/backends/pyspark/compiler.py index f1d5e76b84b6..bc2bbf2b7584 100644 --- a/ibis/backends/pyspark/compiler.py +++ b/ibis/backends/pyspark/compiler.py @@ -1,2152 +1,491 @@ from __future__ import annotations -import collections -import enum -import functools -import operator -from functools import partial, reduce - -import pyspark -import pyspark.sql.functions as F -import pyspark.sql.types as pt -import toolz -from packaging.version import parse as vparse -from pyspark.sql import Window -from pyspark.sql.functions import PandasUDFType, pandas_udf +import calendar +import itertools +import re +from functools import singledispatchmethod +import sqlglot as sg +import sqlglot.expressions as sge +from public import public + +import ibis import ibis.common.exceptions as com -import ibis.expr.analysis as an import ibis.expr.datatypes as dt import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis import interval -from ibis.backends.base.df.timecontext import adjust_context -from ibis.backends.pandas.execution import execute -from ibis.backends.pyspark.datatypes import PySparkType -from ibis.backends.pyspark.timecontext import ( - combine_time_context, - filter_by_time_context, -) -from ibis.common.collections import frozendict +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import PySparkType +from ibis.backends.base.sqlglot.rewrites import Window, p +from ibis.common.patterns import replace from ibis.config import options -from ibis.expr.operations.udf import InputType -from ibis.util import any_of, guid - - -class AggregationContext(enum.Enum): - ENTIRE = 0 - WINDOW = 1 - GROUP = 2 - - -class PySparkExprTranslator: - _registry = {} - - @classmethod - def compiles(cls, klass): - def decorator(f): - cls._registry[klass] = f - return f - - return decorator - - def translate(self, op, *, scope, timecontext, **kwargs): - """Translate Ibis expression into a PySpark object. - - All translated expressions are cached within scope. If an expression is - found within scope, it's returned. Otherwise, the it's translated and - cached for future reference. - - Parameters - ---------- - op - An ibis operation. - scope - dictionary mapping from operation to translated result - timecontext - time context associated with expr - kwargs - parameters passed as keyword args - - Returns - ------- - pyspark.sql.DataFrame - translated PySpark DataFrame or Column object - """ - # TODO(cpcloud): remove the udf instance checking when going to sqlglot - if isinstance(op, ops.ScalarUDF): - formatter = compile_scalar_udf - result = formatter(self, op, scope=scope, timecontext=timecontext, **kwargs) - return result - elif ( - not isinstance(op, ops.ScalarParameter) - and (result := scope.get_value(op, timecontext)) is not None - ): - return result - elif (formatter := self._registry.get(type(op))) is not None: - result = formatter(self, op, scope=scope, timecontext=timecontext, **kwargs) - scope.set_value(op, timecontext, result) - return result - else: - raise com.OperationNotDefinedError(f"No translation rule for {type(op)}") - - -compiles = PySparkExprTranslator.compiles - - -# TODO(kszucs): there are plenty of repetitions in this file which should be -# reduced at some point - - -@compiles(ops.DatabaseTable) -def compile_datasource(t, op, *, timecontext, **kwargs): - df = op.source._session.table(op.name) - return filter_by_time_context(df, timecontext).alias(op.name) - - -@compiles(ops.SQLQueryResult) -def compile_sql_query_result(t, op, **kwargs): - query, _, client = op.args - return client._session.sql(query) - - -def _can_be_replaced_by_column_name(column, table): - """Return whether the given `column` can be replaced by its literal name. - - `True` when `column` and `table[column.get_name()]` are semantically equivalent. - """ - # Each check below is necessary to distinguish a pure projection from - # other valid selections, such as a mutation that assigns a new column - # or changes the value of an existing column. - return ( - isinstance(column, ops.TableColumn) - and column.table == table - and column.name in table.schema - # TODO(kszucs): do we really need this condition? - and column == table.to_expr()[column.name].op() - ) - - -@compiles(ops.Alias) -def compile_alias(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - if isinstance(arg, pyspark.sql.Column): - return arg.alias(op.name) - else: - return arg - - -@compiles(ops.Selection) -def compile_selection(t, op, *, scope, timecontext, **kwargs): - # In selection, there could be multiple children that point to the - # same root table. e.g. window with different sizes on a table. - # We need to get the 'combined' time range that is a superset of every - # time context among child nodes, and pass this as context to - # source table to get all data within time context loaded. - arg_timecontexts = [ - adjust_context(node, scope=scope, timecontext=timecontext) # , **kwargs) - for node in op.selections - if timecontext - ] - adjusted_timecontext = combine_time_context(arg_timecontexts) - # If this is a sort or filter node, op.selections is empty - # in this case, we use the original timecontext - if not adjusted_timecontext: - adjusted_timecontext = timecontext - src_table = t.translate( - op.table, scope=scope, timecontext=adjusted_timecontext, **kwargs - ) - - col_in_selection_order = [] - col_to_drop = [] - result_table = src_table - - for predicate in op.predicates: - col = t.translate(predicate, scope=scope, timecontext=timecontext, **kwargs) - # Due to an upstream Spark issue (SPARK-33057) we cannot - # directly use filter with a window operation. The workaround - # here is to assign a temporary column for the filter predicate, - # do the filtering, and then drop the temporary column. - filter_column = f"predicate_{guid()}" - result_table = result_table.withColumn(filter_column, col) - result_table = result_table.filter(F.col(filter_column)) - result_table = result_table.drop(filter_column) - - for selection in op.selections: - if isinstance(selection, ops.TableNode): - col_in_selection_order.extend(selection.schema.names) - elif isinstance(selection, ops.Value): - # If the selection is a straightforward projection of a table - # column from the root table itself (i.e. excluding mutations and - # renames), we can get the selection name directly. - if _can_be_replaced_by_column_name(selection, op.table): - col_in_selection_order.append(selection.name) - else: - col = t.translate( - selection, - scope=scope, - timecontext=adjusted_timecontext, - **kwargs, - ) - col = col.alias(selection.name) - col_in_selection_order.append(col) - else: - raise NotImplementedError( - f"Unrecognized type in selections: {type(selection)}" - ) - if col_in_selection_order: - result_table = result_table[col_in_selection_order] +from ibis.util import gen_name - if col_to_drop: - result_table = result_table.drop(*col_to_drop) - if op.sort_keys: - sort_cols = [ - t.translate(key, scope=scope, timecontext=timecontext, **kwargs) - for key in op.sort_keys - ] - result_table = result_table.sort(*sort_cols) - - return filter_by_time_context(result_table, timecontext, adjusted_timecontext) - - -@compiles(ops.SortKey) -def compile_sort_desc(t, op, **kwargs): - col = t.translate(op.expr, **kwargs) - return col.asc() if op.ascending else col.desc() - - -def compile_nan_as_null(compile_func): - @functools.wraps(compile_func) - def wrapper(t, op, *args, **kwargs): - compiled = compile_func(t, op, *args, **kwargs) - if options.pyspark.treat_nan_as_null and isinstance(op.dtype, dt.Floating): - return F.nanvl(compiled, F.lit(None)) +@replace(p.Limit) +def offset_to_filter(_): + # spark doesn't support dynamic limit, so raise an error if either limit or + # offset is not a literal expression + if isinstance(_.n, ops.Value) and _.n.find(ops.Relation): + raise com.UnsupportedOperationError( + "PySpark backend does not support dynamic limit." + ) + if isinstance(_.offset, ops.Value) and _.offset.find(ops.Relation): + raise com.UnsupportedOperationError( + "PySpark backend does not support dynamic offset." + ) + if _.offset == 0: + return _ + # spark doesn't support offset by default, so we need to emulate it by first + # generating row numbers and then filtering out the first N rows + field_name = gen_name("ibis_row_number") + rel = _.parent.to_expr() + rel = rel.mutate(ibis.row_number().name(field_name)) + rel = rel.filter(rel[field_name] > _.offset) + return _.copy(parent=rel, offset=0) + + +@public +class PySparkCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "spark" + type_mapper = PySparkType + rewrites = (offset_to_filter, *SQLGlotCompiler.rewrites) + quoted = True + + def _aggregate(self, funcname: str, *args, where): + func = self.f[funcname] + if where is not None: + args = tuple(self.if_(where, arg, NULL) for arg in args) + return func(*args) + + @singledispatchmethod + def visit_node(self, op, **kwargs): + return super().visit_node(op, **kwargs) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_floating(): + result = super().visit_NonNullLiteral(op, value=value, dtype=dtype) + if options.pyspark.treat_nan_as_null: + return self.f.nanvl(result, sge.NULL) + else: + return result + elif dtype.is_string(): + value = value.replace("\\", "\\\\") + return sge.convert(value) + elif dtype.is_binary(): + return self.f.unhex(value.hex()) + elif dtype.is_decimal(): + if value.is_finite(): + return self.cast(str(value), dtype) + else: + return self.cast(str(value), dt.float64) + elif dtype.is_uuid(): + return sge.convert(str(value)) else: - return compiled - - return wrapper - - -@compiles(ops.TableColumn) -@compile_nan_as_null -def compile_column(t, op, **kwargs): - name = op.name - table = op.table - try: - name = f"`{table.name}`.`{name}`" - except AttributeError: - spark_df = t.translate(table, **kwargs) - return spark_df[name] - else: - return F.col(name) - - -@compiles(ops.StructField) -def compile_struct_field(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - return arg[op.field] + return None + @visit_node.register(ops.Field) + def visit_Field(self, op, *, rel, name): + result = super().visit_Field(op, rel=rel, name=name) + if op.dtype.is_floating() and options.pyspark.treat_nan_as_null: + return self.f.nanvl(result, sge.NULL) + else: + return result -@compiles(ops.StructColumn) -def compile_struct_column(t, op, **kwargs): - return F.struct( - *( - t.translate(col, **kwargs).alias(name) - for name, col in zip(op.names, op.values) + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + if to.is_json(): + if op.arg.dtype.is_string(): + return arg + else: + return self.f.to_json(arg) + else: + return self.cast(arg, to) + + @visit_node.register(ops.IsNull) + def visit_IsNull(self, op, *, arg): + is_null = arg.is_(sge.NULL) + is_nan = self.f.isnan(arg) + if op.arg.dtype.is_floating(): + return sg.or_(is_null, is_nan) + else: + return is_null + + @visit_node.register(ops.NotNull) + def visit_NotNull(self, op, *, arg): + is_not_null = arg.is_(sg.not_(sge.NULL)) + is_not_nan = sg.not_(self.f.isnan(arg)) + if op.arg.dtype.is_floating(): + return sg.and_(is_not_null, is_not_nan) + else: + return is_not_null + + @visit_node.register(ops.IsInf) + def visit_IsInf(self, op, *, arg): + if op.arg.dtype.is_floating(): + return sg.or_(arg == self.POS_INF, arg == self.NEG_INF) + return sge.FALSE + + @visit_node.register(ops.Xor) + def visit_Xor(self, op, left, right): + return (left | right) & ~(left & right) + + @visit_node.register(ops.Time) + def visit_Time(self, op, *, arg): + return arg - self.f.anon.date_trunc("day", arg) + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + arg = self.f.concat(arg, sge.convert(f" {unit.plural}")) + typ = sge.DataType(this=sge.DataType.Type.INTERVAL) + return sg.cast(sge.convert(arg), to=typ) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return (self.f.dayofweek(arg) + 5) % 7 + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + return sge.Case( + this=(self.f.dayofweek(arg) + 5) % 7, + ifs=list(itertools.starmap(self.if_, enumerate(calendar.day_name))), ) - ) + @visit_node.register(ops.ExtractDayOfYear) + def visit_ExtractDayOfYear(self, op, *, arg): + return self.cast(self.f.dayofyear(arg), op.dtype) -@compiles(ops.SelfReference) -def compile_self_reference(t, op, **kwargs): - return t.translate(op.table, **kwargs).alias(op.name) + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + return self.cast(self.f.date_format(arg, "SSS"), op.dtype) + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + raise com.UnsupportedOperationError( + "PySpark backend does not support extracting microseconds." + ) -@compiles(ops.Cast) -def compile_cast(t, op, **kwargs): - if op.to.is_interval(): - if isinstance(op.arg, ops.Literal): - return interval(op.arg.value, op.to.unit).op() + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractEpochSeconds(self, op, *, arg): + return self.f.unix_timestamp(self.cast(arg, dt.timestamp)) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + if not op.unit: + return self.f.to_timestamp(self.f.from_unixtime(arg)) + elif op.unit.short == "s": + fmt = "yyyy-MM-dd HH:mm:ss" + return self.f.to_timestamp(self.f.from_unixtime(arg, fmt), fmt) else: raise com.UnsupportedArgumentError( - "Casting to intervals is only supported for literals " - f"in the PySpark backend. {type(op.arg)} not allowed." + "PySpark backend does not support timestamp from unix time with " + f"unit {op.unit.short}. Supported unit is s." ) - cast_type = PySparkType.from_ibis(op.to) - - src_column = t.translate(op.arg, **kwargs) - return src_column.cast(cast_type) - - -@compiles(ops.Limit) -def compile_limit(t, op, **kwargs): - if (n := op.n) is not None and not isinstance(n, int): - raise com.UnsupportedArgumentError( - "Dynamic LIMIT is not implemented upstream in PySpark" - ) - if not isinstance(offset := op.offset, int): - raise com.UnsupportedArgumentError( - "Dynamic OFFSET is not implemented upstream in PySpark" - ) - if n != 0 and offset != 0: - raise com.UnsupportedArgumentError( - "PySpark backend does not support non-zero offset values for " - f"the limit operation. Got offset {offset:d}." - ) - df = t.translate(op.table, **kwargs) - - if n is not None: - return df.limit(n) - else: - return df - - -@compiles(ops.Sample) -def compile_sample(t, op, **kwargs): - df = t.translate(op.table, **kwargs) - return df.sample(fraction=op.fraction, seed=op.seed) - - -@compiles(ops.And) -def compile_and(t, op, **kwargs): - return t.translate(op.left, **kwargs) & t.translate(op.right, **kwargs) - - -@compiles(ops.Or) -def compile_or(t, op, **kwargs): - return t.translate(op.left, **kwargs) | t.translate(op.right, **kwargs) - - -@compiles(ops.Xor) -def compile_xor(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return (left | right) & ~(left & right) - - -@compiles(ops.Equals) -def compile_equals(t, op, **kwargs): - return t.translate(op.left, **kwargs) == t.translate(op.right, **kwargs) - - -@compiles(ops.Not) -def compile_not(t, op, **kwargs): - return ~t.translate(op.arg, **kwargs) - - -@compiles(ops.NotEquals) -def compile_not_equals(t, op, **kwargs): - return t.translate(op.left, **kwargs) != t.translate(op.right, **kwargs) - - -@compiles(ops.Greater) -def compile_greater(t, op, **kwargs): - return t.translate(op.left, **kwargs) > t.translate(op.right, **kwargs) - - -@compiles(ops.GreaterEqual) -def compile_greater_equal(t, op, **kwargs): - return t.translate(op.left, **kwargs) >= t.translate(op.right, **kwargs) - - -@compiles(ops.Less) -def compile_less(t, op, **kwargs): - return t.translate(op.left, **kwargs) < t.translate(op.right, **kwargs) - - -@compiles(ops.LessEqual) -def compile_less_equal(t, op, **kwargs): - return t.translate(op.left, **kwargs) <= t.translate(op.right, **kwargs) - - -@compiles(ops.Between) -def compile_between(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - lower_bound = t.translate(op.lower_bound, **kwargs) - upper_bound = t.translate(op.upper_bound, **kwargs) - return arg.between(lower_bound, upper_bound) - - -@compiles(ops.Multiply) -def compile_multiply(t, op, **kwargs): - return t.translate(op.left, **kwargs) * t.translate(op.right, **kwargs) - - -@compiles(ops.Subtract) -def compile_subtract(t, op, **kwargs): - return t.translate(op.left, **kwargs) - t.translate(op.right, **kwargs) - - -@compiles(ops.Literal) -@compile_nan_as_null -def compile_literal(t, op, *, raw=False, **kwargs): - """If raw is True, don't wrap the result with F.lit().""" - - value = op.value - dtype = op.dtype - - if value is None: - return F.lit(None) - - if raw: - return value - - if dtype.is_interval(): - # execute returns a Timedelta and value is nanoseconds - return execute(op).value - - if isinstance(value, collections.abc.Set): - # Don't wrap set with F.lit - if isinstance(value, frozenset): - # Spark doesn't like frozenset - return set(value) + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimeTruncate) + def visit_TimestampTruncate(self, op, *, arg, unit): + if unit.short == "ns": + raise com.UnsupportedOperationError( + f"{unit!r} unit is not supported in timestamp {type(op)}" + ) + return self.f.anon.date_trunc(unit.singular, arg) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + if where is not None: + return self.f.sum(self.cast(where, op.dtype)) + return self.f.count(STAR) + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.count(sge.Distinct(expressions=[arg])) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, arg, where): + if where is None: + return self.f.count(sge.Distinct(expressions=[STAR])) + + cols = [ + self.if_( + where, + sg.column(name, table=arg.alias_or_name, quoted=self.quoted), + sge.NULL, + ) + for name in op.arg.schema + ] + return self.f.count(sge.Distinct(expressions=cols)) + + @visit_node.register(ops.First) + def visit_First(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.first(arg, sge.TRUE) + + @visit_node.register(ops.Last) + def visit_Last(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.last(arg, sge.TRUE) + + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if where is not None: + arg = self.if_(where, arg, NULL) + if how == "first": + return self.f.first(arg, sge.TRUE) + elif how == "last": + return self.f.last(arg, sge.TRUE) else: - return value - elif dtype.is_array(): - return F.array(*map(F.lit, value)) - elif dtype.is_struct(): - return F.struct(*(F.lit(val).alias(name) for name, val in value.items())) - elif dtype.is_timestamp(): - return F.from_utc_timestamp(F.lit(str(value)), tz="UTC") - elif dtype.is_decimal(): - return F.lit(value.normalize()) - else: - return F.lit(value) - - -@compiles(ops.Aggregation) -def compile_aggregation(t, op, **kwargs): - src_table = t.translate(op.table, **kwargs) - - if op.having: - raise com.UnsupportedOperationError( - "The PySpark backend does not support `having` because the underlying " - "PySpark API does not support it. Use a filter on the aggregation " - "expression instead." - ) - - if op.predicates: - predicate = reduce(ops.And, op.predicates) - src_table = src_table.filter(t.translate(predicate, **kwargs)) - - if op.by: - aggcontext = AggregationContext.GROUP - bys = [t.translate(b, **kwargs) for b in op.by] - src_table = src_table.groupby(*bys) - else: - aggcontext = AggregationContext.ENTIRE - - aggs = [t.translate(m, aggcontext=aggcontext, **kwargs) for m in op.metrics] - return src_table.agg(*aggs) - - -@compiles(ops.Union) -def compile_union(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - result = left.union(right) - return result.distinct() if op.distinct else result - - -@compiles(ops.Intersection) -def compile_intersection(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return left.intersect(right) if op.distinct else left.intersectAll(right) - - -@compiles(ops.Difference) -def compile_difference(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return left.subtract(right) if op.distinct else left.exceptAll(right) - - -@compiles(ops.InColumn) -def in_column(t, op, **kwargs): - value = t.translate(op.value, **kwargs) - options = t.translate(op.options, **kwargs) - return value.isin(options) - - -@compiles(ops.InValues) -def in_values(t, op, **kwargs): - value = t.translate(op.value, **kwargs) - options = [t.translate(option, **kwargs) for option in op.options] - return value.isin(options) - - -@compiles(ops.StartsWith) -def compile_startswith(t, op, **kwargs): - col = t.translate(op.arg, **kwargs) - start = t.translate(op.start, **kwargs) - return col.startswith(start) - - -@compiles(ops.EndsWith) -def compile_endswith(t, op, **kwargs): - col = t.translate(op.arg, **kwargs) - end = t.translate(op.end, **kwargs) - return col.endswith(end) - - -def _is_table(table): - # TODO(kszucs): is has a pretty misleading name, should be removed - try: - return isinstance(table.arg, ops.TableNode) - except AttributeError: - return False - - -def compile_aggregator( - t, op, *, fn, aggcontext=None, where_excludes: tuple[str, ...] = (), **kwargs -): - if (where := getattr(op, "where", None)) is not None: - condition = t.translate(where, **kwargs) - else: - condition = None - - def translate_arg(arg, include_where: bool): - src_col = t.translate(arg, **kwargs) - - if include_where and condition is not None: - src_col = F.when(condition, src_col) - return src_col + raise com.UnsupportedOperationError( + f"PySpark backend does not support arbitrary with how={how}. " + "Supported values are `first` and `last`." + ) - src_inputs = tuple( - (argname, arg) - for argname, arg in zip(op.argnames, op.args) - if argname != "where" - ) - src_cols = tuple( - translate_arg( - arg, include_where=(not where_excludes) or argname not in where_excludes + @visit_node.register(ops.Median) + def visit_Median(self, op, *, arg, where): + return self.agg.percentile(arg, 0.5, where=where) + + @visit_node.register(ops.GroupConcat) + def visit_GroupConcat(self, op, *, arg, sep, where): + if where is not None: + arg = self.if_(where, arg, sge.NULL) + collected = self.f.collect_list(arg) + collected = self.if_(self.f.size(collected).eq(0), sge.NULL, collected) + return self.f.array_join(collected, sep) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if (left_type := op.left.dtype).is_boolean(): + left = self.cast(left, dt.Int32(nullable=left_type.nullable)) + if (right_type := op.right.dtype).is_boolean(): + right = self.cast(right, dt.Int32(nullable=right_type.nullable)) + return self.agg.corr(left, right, where=where) + + def _build_sequence(self, start, stop, step, zero): + seq = self.f.sequence(start, stop, step) + length = self.f.size(seq) + last_element = self.f.element_at(seq, length) + # slice off the last element if we'd be inclusive on the right + seq = self.if_(last_element.eq(stop), self.f.slice(seq, 1, length - 1), seq) + return self.if_( + step.neq(zero) & self.f.signum(step).eq(self.f.signum(stop - start)), + seq, + self.f.array(), ) - for argname, arg in src_inputs - if isinstance(arg, ops.Node) - ) - - col = fn(*src_cols) - if aggcontext: - return col - else: - # We are trying to compile a expr such as some_col.max() - # to a Spark expression. - # Here we get the root table df of that column and compile - # the expr to: - # df.select(max(some_col)) - if _is_table(op): - (src_col,) = src_cols - return src_col.select(col) - table_op = an.find_first_base_table(op) - return t.translate(table_op, **kwargs).select(col) - - -@compiles(ops.GroupConcat) -def compile_group_concat(t, op, **kwargs): - sep = t.translate(op.sep, raw=True, **kwargs) - - def fn(col, _): - collected = F.collect_list(col) - return F.array_join( - F.when(F.size(collected) == 0, F.lit(None)).otherwise(collected), - sep, - ) - - return compile_aggregator(t, op, fn=fn, **kwargs) - - -@compiles(ops.Any) -def compile_any(t, op, *, aggcontext=None, **kwargs): - return compile_aggregator(t, op, fn=F.max, aggcontext=aggcontext, **kwargs) - - -@compiles(ops.All) -def compile_all(t, op, *args, **kwargs): - return compile_aggregator(t, op, *args, fn=F.min, **kwargs) - - -@compiles(ops.Count) -def compile_count(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.count, **kwargs) - - -@compiles(ops.CountDistinct) -def compile_count_distinct(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.count_distinct, **kwargs) - - -@compiles(ops.CountStar) -def compile_count_star(t, op, aggcontext=None, **kwargs): - src_table = t.translate(op.arg, **kwargs) - - src_col = F.lit(1) - - if (where := op.where) is not None: - src_col = F.when(t.translate(where, **kwargs), src_col) - - col = F.count(src_col) - if aggcontext is not None: - return col - else: - return src_table.select(col) - - -@compiles(ops.CountDistinctStar) -def compile_count_distinct_star(t, op, aggcontext=None, **kwargs): - src_table = t.translate(op.arg, **kwargs) - src_col = F.struct(*map(F.col, op.arg.schema.names)) - - if (where := op.where) is not None: - src_col = F.when(t.translate(where, **kwargs), src_col) - - src_col = F.countDistinct(src_col) - if aggcontext is not None: - return src_col - else: - return src_table.select(src_col) - - -@compiles(ops.Max) -def compile_max(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.max, **kwargs) - - -@compiles(ops.Min) -def compile_min(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.min, **kwargs) - -@compiles(ops.Mean) -def compile_mean(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.mean, **kwargs) - - -@compiles(ops.Sum) -def compile_sum(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.sum, **kwargs) - - -@compiles(ops.ApproxCountDistinct) -def compile_approx_count_distinct(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.approx_count_distinct, **kwargs) - - -@compiles(ops.ApproxMedian) -def compile_approx_median(t, op, **kwargs): - return compile_aggregator( - t, op, fn=lambda arg: F.percentile_approx(arg, 0.5), **kwargs - ) - - -@compiles(ops.StandardDev) -def compile_std(t, op, **kwargs): - how = op.how - - if how == "sample": - fn = F.stddev_samp - elif how == "pop": - fn = F.stddev_pop - else: - raise com.TranslationError(f"Unexpected 'how' in translation: {how}") - - return compile_aggregator(t, op, fn=fn, **kwargs) - - -@compiles(ops.Variance) -def compile_variance(t, op, **kwargs): - how = op.how - - if how == "sample": - fn = F.var_samp - elif how == "pop": - fn = F.var_pop - else: - raise com.TranslationError(f"Unexpected 'how' in translation: {how}") - - return compile_aggregator(t, op, fn=fn, **kwargs) - - -@compiles(ops.Covariance) -def compile_covariance(t, op, **kwargs): - how = op.how - - fn = {"sample": F.covar_samp, "pop": F.covar_pop}[how] - - new_op = op.__class__( - left=ops.Cast(op.left, to=dt.float64), - right=ops.Cast(op.right, to=dt.float64), - how=how, - where=op.where, - ) - return compile_aggregator(t, new_op, fn=fn, **kwargs) - - -@compiles(ops.Correlation) -def compile_correlation(t, op, **kwargs): - if (how := op.how) == "pop": - raise ValueError("PySpark only implements sample correlation") - - new_op = op.__class__( - left=ops.Cast(op.left, to=dt.float64), - right=ops.Cast(op.right, to=dt.float64), - how=how, - where=op.where, - ) - return compile_aggregator(t, new_op, fn=F.corr, **kwargs) - - -@compiles(ops.Arbitrary) -def compile_arbitrary(t, op, **kwargs): - how = op.how - - if how == "first": - fn = functools.partial(F.first, ignorenulls=True) - elif how == "last": - fn = functools.partial(F.last, ignorenulls=True) - else: - raise com.UnsupportedOperationError( - f"PySpark backend does not support how={how!r}" + @visit_node.register(ops.IntegerRange) + def visit_IntegerRange(self, op, *, start, stop, step): + zero = sge.convert(0) + return self._build_sequence(start, stop, step, zero) + + @visit_node.register(ops.TimestampRange) + def visit_TimestampRange(self, op, *, start, stop, step): + unit = op.step.dtype.resolution + zero = sge.Interval(this=sge.convert(0), unit=unit) + return self._build_sequence(start, stop, step, zero) + + @visit_node.register(ops.Sample) + def visit_Sample( + self, op, *, parent, fraction: float, method: str, seed: int | None, **_ + ): + if seed is not None: + raise com.UnsupportedOperationError( + "PySpark backend does not support sampling with seed." + ) + sample = sge.TableSample( + this=parent, + percent=sge.convert(fraction * 100.0), ) + return sg.select(STAR).from_(sample) - return compile_aggregator(t, op, fn=fn, **kwargs) - - -@compiles(ops.First) -def compile_first(t, op, **kwargs): - fn = functools.partial(F.first, ignorenulls=True) - return compile_aggregator(t, op, fn=fn, **kwargs) - - -@compiles(ops.Last) -def compile_last(t, op, **kwargs): - fn = functools.partial(F.last, ignorenulls=True) - return compile_aggregator(t, op, fn=fn, **kwargs) - - -@compiles(ops.Coalesce) -def compile_coalesce(t, op, **kwargs): - kwargs["raw"] = False # override to force column literals - - src_columns = [] - - for arg in op.arg: - col = t.translate(arg, **kwargs) - - if arg.dtype.is_floating(): - col = F.when(F.isnan(col), F.lit(None)).otherwise(col) - - src_columns.append(col) - - if len(src_columns) == 1: - return src_columns[0] - else: - return F.coalesce(*src_columns) - - -@compiles(ops.Greatest) -def compile_greatest(t, op, **kwargs): - kwargs["raw"] = False # override to force column literals - src_columns = [t.translate(col, **kwargs) for col in op.arg] - if len(src_columns) == 1: - return src_columns[0] - else: - return F.greatest(*src_columns) - - -@compiles(ops.Least) -def compile_least(t, op, **kwargs): - kwargs["raw"] = False # override to force column literals - src_columns = [t.translate(col, **kwargs) for col in op.arg] - if len(src_columns) == 1: - return src_columns[0] - else: - return F.least(*src_columns) - - -@compiles(ops.Abs) -def compile_abs(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.abs(src_column) - - -@compiles(ops.Clip) -def compile_clip(t, op, **kwargs): - col = t.translate(op.arg, **kwargs) - upper = t.translate(op.upper, **kwargs) if op.upper is not None else float("inf") - lower = t.translate(op.lower, **kwargs) if op.lower is not None else float("-inf") - - def column_min(value, limit): - """Return values greater than or equal to `limit`.""" - return F.when((value < limit) & ~F.isnull(value), limit).otherwise(value) - - def column_max(value, limit): - """Return values less than or equal to `limit`.""" - return F.when((value > limit) & ~F.isnull(value), limit).otherwise(value) - - def clip(column, lower_value, upper_value): - return column_max(column_min(column, F.lit(lower_value)), F.lit(upper_value)) - - return clip(col, lower, upper).cast(PySparkType.from_ibis(op.dtype)) - - -@compiles(ops.Round) -def compile_round(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - scale = t.translate(op.digits, **kwargs, raw=True) if op.digits is not None else 0 - rounded = F.round(src_column, scale=scale) - if scale == 0: - rounded = rounded.astype("long") - return rounded - - -@compiles(ops.Ceil) -def compile_ceil(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.ceil(src_column) - - -@compiles(ops.Floor) -def compile_floor(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.floor(src_column) - - -@compiles(ops.Exp) -def compile_exp(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.exp(src_column) - - -@compiles(ops.Sign) -def compile_sign(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - - return F.when(src_column == 0, F.lit(0.0)).otherwise( - F.when(src_column > 0, F.lit(1.0)).otherwise(-1.0) - ) - - -@compiles(ops.Sqrt) -def compile_sqrt(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.sqrt(src_column) - - -@compiles(ops.Log) -def compile_log(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - raw_base = t.translate(op.base, **kwargs, raw=True) - try: - base = float(raw_base) - except TypeError: - return F.log(src_column) / F.log(raw_base) - else: - return F.log(base, src_column) - - -@compiles(ops.Ln) -def compile_ln(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.log(src_column) - - -@compiles(ops.Log2) -def compile_log2(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.log2(src_column) - - -@compiles(ops.Log10) -def compile_log10(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.log10(src_column) - - -@compiles(ops.Modulus) -def compile_modulus(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return left % right - - -@compiles(ops.Negate) -def compile_negate(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - if op.dtype.is_boolean(): - return ~src_column - return -src_column - - -@compiles(ops.Add) -def compile_add(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return left + right - - -@compiles(ops.Divide) -def compile_divide(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return left / right - - -@compiles(ops.FloorDivide) -def compile_floor_divide(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return F.floor(left / right) - - -@compiles(ops.Power) -def compile_power(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return F.pow(left, right) - - -@compiles(ops.IsNan) -def compile_isnan(t, op, **kwargs): - arg = op.arg - if arg.dtype.is_floating(): - src_column = t.translate(arg, **kwargs) - return F.isnull(src_column) | F.isnan(src_column) - return F.lit(False) - - -@compiles(ops.IsInf) -def compile_isinf(t, op, **kwargs): - arg = op.arg - if arg.dtype.is_floating(): - inf = float("inf") - return t.translate(arg, **kwargs).isin([inf, -inf]) - return F.lit(False) - - -@compiles(ops.Uppercase) -def compile_uppercase(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.upper(src_column) - - -@compiles(ops.Lowercase) -def compile_lowercase(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.lower(src_column) - - -@compiles(ops.Reverse) -def compile_reverse(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.reverse(src_column) - - -@compiles(ops.Strip) -def compile_strip(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.trim(src_column) - - -@compiles(ops.LStrip) -def compile_lstrip(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.ltrim(src_column) + @visit_node.register(ops.WindowBoundary) + def visit_WindowBoundary(self, op, *, value, preceding): + if isinstance(op.value, ops.Literal) and op.value.value == 0: + value = "CURRENT ROW" + side = None + else: + side = "PRECEDING" if preceding else "FOLLOWING" + return {"value": value, "side": side} + + def __sql_name__(self, op) -> str: + if isinstance(op, (ops.ScalarUDF, ops.AggUDF)): + func = op.__func__ + name = op.__func_name__ + elif isinstance(op, (ops.ElementWiseVectorizedUDF, ops.ReductionVectorizedUDF)): + func = op.func + name = op.func.__name__ + else: + raise TypeError(f"Cannot get SQL name for {type(op).__name__}") + if not name.isidentifier(): + # replace invalid characters with underscores + name = re.sub("[^0-9a-zA-Z_]", "", name) -@compiles(ops.RStrip) -def compile_rstrip(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.rtrim(src_column) + # generate unique name for all functions; this is necessary because + # of lambda functions and because kwargs passed to VectorizedUDF nodes + # are encoded as part of the closure + name = f"{name}_{hash(func):X}" + return f"ibis_udf_{name}" -@compiles(ops.Capitalize) -def compile_capitalize(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.initcap(src_column) + @visit_node.register(ops.ElementWiseVectorizedUDF) + @visit_node.register(ops.ReductionVectorizedUDF) + def visit_VectorizedUDF(self, op, *, func, func_args, input_type, return_type): + return self.f[self.__sql_name__(op)](*func_args) + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + if default is None: + return arg[key] + else: + return self.if_(self.f.map_contains_key(arg, key), arg[key], default) + + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op, *, arg): + return self.f.arrays_zip(*arg) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, body, param): + param = sge.Identifier(this=param) + func = sge.Lambda(this=body, expressions=[param]) + return self.f.transform(arg, func) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, body, param): + param = sge.Identifier(this=param) + func = sge.Lambda(this=self.if_(body, param, sge.NULL), expressions=[param]) + transform = self.f.transform(arg, func) + func = sge.Lambda(this=param.is_(sg.not_(NULL)), expressions=[param]) + return self.f.filter(transform, func) + + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return self.f.element_at(arg, index + 1) + + @visit_node.register(ops.ArrayPosition) + def visit_ArrayPosition(self, op, *, arg, other): + return self.f.array_position(arg, other) + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + return self.f.flatten(self.f.array_repeat(arg, times)) + + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + size = self.f.array_size(arg) + start = self.if_(start < 0, self.if_(start < -size, 0, size + start), start) + if stop is None: + stop = size + else: + stop = self.if_(stop < 0, self.if_(stop < -size, 0, size + stop), stop) -@compiles(ops.Substring) -def compile_substring(t, op, raw: bool = False, **kwargs): - src_column = t.translate(op.arg, raw=raw, **kwargs) - start = t.translate(op.start, **kwargs, raw=True) + 1 - length = t.translate(op.length, **kwargs, raw=True) + length = self.if_(stop < start, 0, stop - start) + return self.f.slice(arg, start + 1, length) - if any_of((start, length), pyspark.sql.Column): - raise NotImplementedError( - "Specifying `start` or `length` with column expressions is not supported." + @visit_node.register(ops.ArrayContains) + def visit_ArrayContains(self, op, *, arg, other): + return self.if_( + arg.is_(NULL), + sge.NULL, + self.f.coalesce(self.f.array_contains(arg, other), sge.FALSE), ) - if start < 0: - raise NotImplementedError("`start < 0` is not supported.") - - return src_column.substr(start, length) - - -@compiles(ops.StringLength) -def compile_string_length(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.length(src_column) - - -@compiles(ops.StrRight) -def compile_str_right(t, op, **kwargs): - @F.udf("string") - def str_right(s, nchars): - return s[-nchars:] - - src_column = t.translate(op.arg, **kwargs) - nchars_column = t.translate(op.nchars, **kwargs) - return str_right(src_column, nchars_column) - - -@compiles(ops.Repeat) -def compile_repeat(t, op, **kwargs): - @F.udf("string") - def repeat(s, times): - return s * times - - src_column = t.translate(op.arg, **kwargs) - times_column = t.translate(op.times, **kwargs) - return repeat(src_column, times_column) - - -@compiles(ops.StringFind) -def compile_string_find(t, op, **kwargs): - @F.udf("long") - def str_find(s, substr, start, end): - return s.find(substr, start, end) - - src_column = t.translate(op.arg, **kwargs) - substr_column = t.translate(op.substr, **kwargs) - start_column = t.translate(op.start, **kwargs) if op.start else F.lit(None) - end_column = t.translate(op.end, **kwargs) if op.end else F.lit(None) - return str_find(src_column, substr_column, start_column, end_column) - - -@compiles(ops.Translate) -def compile_translate(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - from_str = op.from_str.value - to_str = op.to_str.value - return F.translate(src_column, from_str, to_str) - - -@compiles(ops.LPad) -def compile_lpad(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - length = op.length.value - pad = op.pad.value - return F.lpad(src_column, length, pad) - - -@compiles(ops.RPad) -def compile_rpad(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - length = op.length.value - pad = op.pad.value - return F.rpad(src_column, length, pad) - - -@compiles(ops.StringJoin) -def compile_string_join(t, op, **kwargs): - @F.udf("string") - def join(sep, arr): - return sep.join(arr) - - sep_column = t.translate(op.sep, **kwargs) - arg = [t.translate(arg, **kwargs) for arg in op.arg] - return join(sep_column, F.array(arg)) + @visit_node.register(ops.ArrayStringJoin) + def visit_ArrayStringJoin(self, op, *, arg, sep): + return self.f.concat_ws(sep, arg) + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if end is not None: + raise com.UnsupportedOperationError( + "String find doesn't support `end` argument" + ) -@compiles(ops.RegexSearch) -def compile_regex_search(t, op, **kwargs): - import re + if start is not None: + arg = self.f.substr(arg, start + 1) + pos = self.f.instr(arg, substr) + return self.if_(pos > 0, pos + start, 0) - @F.udf("boolean") - def regex_search(s, pattern): - return re.search(pattern, s) is not None + return self.f.instr(arg, substr) - src_column = t.translate(op.arg, **kwargs) - pattern = t.translate(op.pattern, **kwargs) - return regex_search(src_column, pattern) + @visit_node.register(ops.RegexReplace) + def visit_RegexReplace(self, op, *, arg, pattern, replacement): + return self.f.regexp_replace(arg, pattern, replacement) + @visit_node.register(ops.JSONGetItem) + def visit_JSONGetItem(self, op, *, arg, index): + if op.index.dtype.is_integer(): + fmt = "$[%s]" + else: + fmt = "$.%s" + path = self.f.format_string(fmt, index) + return self.f.get_json_object(arg, path) + + @visit_node.register(Window) + def visit_Window(self, op, *, func, group_by, order_by, **kwargs): + if isinstance(op.func, ops.Analytic): + # spark disallows specifying boundaries for lead/lag + if order_by: + order = sge.Order(expressions=order_by) + else: + # pyspark requires an order by clause for lag/lead + order = sge.Order(expressions=[sge.NULL]) + return sge.Window(this=func, partition_by=group_by, order=order) + else: + return super().visit_node( + op, func=func, group_by=group_by, order_by=order_by, **kwargs + ) -@compiles(ops.RegexExtract) -def compile_regex_extract(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - pattern = op.pattern.value - idx = op.index.value - return F.regexp_extract(src_column, pattern, idx) + @visit_node.register(ops.JoinLink) + def visit_JoinLink(self, op, **kwargs): + if op.how == "asof": + raise com.UnsupportedOperationError( + "ASOF joins are not supported by Spark SQL yet and LATERAL joins " + "raise an analysis error if the lateral subquery is limited which " + "would be necessary to emulate ASOF joins. Once this is fixed " + "upstream, we can add support for ASOF joins." + ) + return super().visit_JoinLink(op, **kwargs) + + @visit_node.register(ops.RowID) + @visit_node.register(ops.TimestampBucket) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.ArrayDistinct: "array_distinct", + ops.ArrayFlatten: "flatten", + ops.ArrayIntersect: "array_intersect", + ops.ArrayRemove: "array_remove", + ops.ArraySort: "array_sort", + ops.ArrayUnion: "array_union", + ops.EndsWith: "endswith", + ops.Hash: "hash", + ops.Log10: "log10", + ops.LStrip: "ltrim", + ops.RStrip: "rtrim", + ops.MapLength: "size", + ops.MapContains: "map_contains_key", + ops.MapMerge: "map_concat", + ops.MapKeys: "map_keys", + ops.MapValues: "map_values", +} -@compiles(ops.RegexReplace) -def compile_regex_replace(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - pattern = op.pattern.value - replacement = op.replacement.value - return F.regexp_replace(src_column, pattern, replacement) +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + @PySparkCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) -@compiles(ops.StringReplace) -def compile_string_replace(*args, **kwargs): - return compile_regex_replace(*args, **kwargs) + setattr(PySparkCompiler, f"visit_{_op.__name__}", _fmt) -@compiles(ops.StringSplit) -def compile_string_split(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - delimiter = op.delimiter.value - return F.split(src_column, delimiter) - - -@compiles(ops.StringConcat) -def compile_string_concat(t, op, **kwargs): - kwargs["raw"] = False # override to force column literals - src_columns = [t.translate(arg, **kwargs) for arg in op.arg] - return F.concat(*src_columns) - - -@compiles(ops.StringAscii) -def compile_string_ascii(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.ascii(src_column) - - -@compiles(ops.StringSQLLike) -def compile_string_like(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - pattern = op.pattern.value - return src_column.like(pattern) - - -@compiles(ops.InnerJoin) -def compile_inner_join(t, op, **kwargs): - return compile_join(t, op, **kwargs, how="inner") - - -@compiles(ops.LeftJoin) -def compile_left_join(t, op, **kwargs): - return compile_join(t, op, **kwargs, how="left") - - -@compiles(ops.RightJoin) -def compile_right_join(t, op, **kwargs): - return compile_join(t, op, **kwargs, how="right") - - -@compiles(ops.OuterJoin) -def compile_outer_join(t, op, **kwargs): - return compile_join(t, op, **kwargs, how="outer") - - -@compiles(ops.LeftSemiJoin) -def compile_left_semi_join(t, op, **kwargs): - return compile_join(t, op, **kwargs, how="leftsemi") - - -@compiles(ops.LeftAntiJoin) -def compile_left_anti_join(t, op, **kwargs): - return compile_join(t, op, **kwargs, how="leftanti") - - -def compile_join(t, op, how, **kwargs): - left_df = t.translate(op.left, **kwargs) - right_df = t.translate(op.right, **kwargs) - - pred_columns = reduce( - operator.and_, map(partial(t.translate, **kwargs), op.predicates) - ) - return left_df.join(right_df, pred_columns, how) - - -@compiles(ops.Distinct) -def compile_distinct(t, op, **kwargs): - return t.translate(op.table, **kwargs).distinct() - - -def _canonicalize_interval(t, interval, **kwargs): - """Convert interval to integer timestamp of second. - - When pyspark cast timestamp to integer type, it uses the number of - seconds since epoch. Therefore, we need cast ibis interval - correspondingly. - """ - if isinstance(interval, ir.IntervalScalar): - t.translate(interval.op(), **kwargs) - return None - - elif isinstance(interval, int): - return interval - else: - raise com.UnsupportedOperationError( - f"type {type(interval)} is not supported in preceding /following " - "in window." - ) - - -@compiles(ops.WindowBoundary) -def compile_window_boundary(t, boundary, **kwargs): - if boundary.value.dtype.is_interval(): - value = t.translate(boundary.value, **kwargs) - # TODO(kszucs): the value can be a literal which is a bug - value = value.value if isinstance(value, ops.Literal) else value - # value is in nanoseconds and spark uses seconds since epoch - value = int(value / 1e9) - else: - value = boundary.value.value - - return -value if boundary.preceding else value - - -@compiles(ops.WindowFunction) -def compile_window_function(t, op, **kwargs): - grouping_keys = [ - key.name if isinstance(key, ops.TableColumn) else t.translate(key, **kwargs) - for key in op.frame.group_by - ] - - # Timestamp needs to be cast to long for window bounds in spark - ordering_keys = [ - F.col(sort.name).cast("long") if sort.dtype.is_timestamp() else sort.name - for sort in op.frame.order_by - ] - aggcontext = AggregationContext.WINDOW - pyspark_window = Window.partitionBy(grouping_keys).orderBy(ordering_keys) - - # If the operand is a shift op (e.g. lead, lag), Spark will set the window - # bounds. Only set window bounds here if not a shift operation. - func = op.func.__window_op__ - if not isinstance(func, ops.ShiftBase): - if op.frame.start is None: - win_start = Window.unboundedPreceding - else: - win_start = t.translate(op.frame.start, **kwargs) - if op.frame.end is None: - win_end = Window.unboundedFollowing - else: - win_end = t.translate(op.frame.end, **kwargs) - - if op.frame.how == "range": - pyspark_window = pyspark_window.rangeBetween(win_start, win_end) - else: - pyspark_window = pyspark_window.rowsBetween(win_start, win_end) - - result = t.translate(func, **kwargs, aggcontext=aggcontext).over(pyspark_window) - - if isinstance(func, (ops.RankBase, ops.NTile)): - # result must be cast to long type for Rank / RowNumber - return result.astype("long") - 1 - else: - return result - - -def _handle_shift_operation(t, op, fn, **kwargs): - src_column = t.translate(op.arg, **kwargs) - default = op.default.value if op.default is not None else op.default - offset = op.offset.value if op.offset is not None else op.offset - - if offset: - return fn(src_column, count=offset, default=default) - else: - return fn(src_column, default=default) - - -@compiles(ops.Lag) -def compile_lag(t, op, **kwargs): - return _handle_shift_operation(t, op, fn=F.lag, **kwargs) - - -@compiles(ops.Lead) -def compile_lead(t, op, **kwargs): - return _handle_shift_operation(t, op, fn=F.lead, **kwargs) - - -@compiles(ops.MinRank) -def compile_rank(t, op, **kwargs): - return F.rank() - - -@compiles(ops.DenseRank) -def compile_dense_rank(t, op, **kwargs): - return F.dense_rank() - - -@compiles(ops.PercentRank) -def compile_percent_rank(t, op, **kwargs): - return F.percent_rank() - - -@compiles(ops.CumeDist) -def compile_cume_dist(t, op, **kwargs): - raise com.UnsupportedOperationError( - "PySpark backend does not support cume_dist with Ibis." - ) - - -@compiles(ops.NTile) -def compile_ntile(t, op, **kwargs): - buckets = op.buckets.value - return F.ntile(buckets) - - -@compiles(ops.FirstValue) -def compile_first_value(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.first(src_column) - - -@compiles(ops.LastValue) -def compile_last_value(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.last(src_column) - - -@compiles(ops.NthValue) -def compile_nth_value(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - nth = t.translate(op.nth, **kwargs, raw=True) - return F.nth_value(src_column, nth + 1) - - -@compiles(ops.RowNumber) -def compile_row_number(t, op, **kwargs): - return F.row_number() - - -# -------------------------- Temporal Operations ---------------------------- - -# Ibis value to PySpark value -_time_unit_mapping = { - "Y": "year", - "Q": "quarter", - "M": "month", - "W": "week", - "D": "day", - "h": "hour", - "m": "minute", - "s": "second", -} - - -@compiles(ops.Date) -def compile_date(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.to_date(src_column).cast("timestamp") - - -def _extract_component_from_datetime(t, op, extract_fn, **kwargs): - date_col = t.translate(op.arg, **kwargs) - return extract_fn(date_col).cast("integer") - - -@compiles(ops.ExtractYear) -def compile_extract_year(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.year, **kwargs) - - -@compiles(ops.ExtractMonth) -def compile_extract_month(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.month, **kwargs) - - -@compiles(ops.ExtractDay) -def compile_extract_day(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.dayofmonth, **kwargs) - - -@compiles(ops.ExtractDayOfYear) -def compile_extract_day_of_year(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.dayofyear, **kwargs) - - -@compiles(ops.ExtractQuarter) -def compile_extract_quarter(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.quarter, **kwargs) - - -@compiles(ops.ExtractEpochSeconds) -def compile_extract_epoch_seconds(t, op, **kwargs): - return _extract_component_from_datetime( - t, op, extract_fn=F.unix_timestamp, **kwargs - ) - - -@compiles(ops.ExtractWeekOfYear) -def compile_extract_week_of_year(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.weekofyear, **kwargs) - - -@compiles(ops.ExtractHour) -def compile_extract_hour(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.hour, **kwargs) - - -@compiles(ops.ExtractMinute) -def compile_extract_minute(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.minute, **kwargs) - - -@compiles(ops.ExtractSecond) -def compile_extract_second(t, op, **kwargs): - return _extract_component_from_datetime(t, op, extract_fn=F.second, **kwargs) - - -@compiles(ops.ExtractMicrosecond) -def compile_extract_microsecond(t, op, **kwargs): - raise com.UnsupportedOperationError( - "PySpark backend does not support extracting microseconds." - ) - - -@compiles(ops.ExtractMillisecond) -def compile_extract_millisecond(t, op, **kwargs): - raise com.UnsupportedOperationError( - "PySpark backend does not support extracting milliseconds." - ) - - -@compiles(ops.DateTruncate) -def compile_date_truncate(t, op, **kwargs): - try: - unit = _time_unit_mapping[op.unit.short] - except KeyError: - raise com.UnsupportedOperationError( - f"{op.unit!r} unit is not supported in timestamp truncate" - ) - - src_column = t.translate(op.arg, **kwargs) - return F.date_trunc(unit, src_column) - - -@compiles(ops.TimestampTruncate) -def compile_timestamp_truncate(t, op, **kwargs): - return compile_date_truncate(t, op, **kwargs) - - -@compiles(ops.Strftime) -def compile_strftime(t, op, **kwargs): - format_str = op.format_str.value - - @pandas_udf("string", PandasUDFType.SCALAR) - def strftime(timestamps): - return timestamps.dt.strftime(format_str) - - src_column = t.translate(op.arg, **kwargs) - return strftime(src_column) - - -@compiles(ops.TimestampFromUNIX) -def compile_timestamp_from_unix(t, op, **kwargs): - unixtime = t.translate(op.arg, **kwargs) - if not op.unit: - return F.to_timestamp(F.from_unixtime(unixtime)) - elif op.unit.short == "s": - fmt = "yyyy-MM-dd HH:mm:ss" - return F.to_timestamp(F.from_unixtime(unixtime, fmt), fmt) - else: - raise com.UnsupportedArgumentError( - "PySpark backend does not support timestamp from unix time with " - f"unit {op.unit.short}. Supported unit is s." - ) - - -@compiles(ops.TimestampNow) -def compile_timestamp_now(t, op, **kwargs): - return F.current_timestamp() - - -@compiles(ops.StringToTimestamp) -def compile_string_to_timestamp(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - fmt = op.format_str.value - return F.to_timestamp(src_column, fmt) - - -@compiles(ops.DayOfWeekIndex) -def compile_day_of_week_index(t, op, **kwargs): - @pandas_udf("short", PandasUDFType.SCALAR) - def day_of_week(s): - return s.dt.dayofweek - - src_column = t.translate(op.arg, **kwargs) - return day_of_week(src_column.cast("timestamp")) - - -@compiles(ops.DayOfWeekName) -def compiles_day_of_week_name(t, op, **kwargs): - @pandas_udf("string", PandasUDFType.SCALAR) - def day_name(s): - return s.dt.day_name() - - src_column = t.translate(op.arg, **kwargs) - return day_name(src_column.cast("timestamp")) - - -def _get_interval_col(t, op, allowed_units=None, **kwargs): - import pandas as pd - - dtype = op.dtype - if not dtype.is_interval(): - raise com.UnsupportedArgumentError( - f"{dtype} expression cannot be converted to interval column. " - "Must be Interval dtype." - ) - if allowed_units and dtype.unit.short not in allowed_units: - raise com.UnsupportedArgumentError( - f'Interval unit "{dtype.unit.short}" is not allowed. Allowed units are: ' - f"{allowed_units}" - ) - - # if interval expression is a binary op, translate expression into - # an interval column and return - if isinstance(op, ops.IntervalBinary): - return t.translate(op, **kwargs) - - # otherwise, translate expression into a literal op and construct - # interval column from literal value and dtype - if isinstance(op, ops.Alias): - op = op.arg - - # TODO(kszucs): t.translate should never return with an ibis operation; - # I assume this is required for special case when casting to intervals, - # see the implementation of ops.Cast compilation - if not isinstance(op, ops.Literal): - op = t.translate(op, **kwargs) - - if isinstance(op.value, pd.Timedelta): - td_nanos = op.value.value - if td_nanos % 1000 != 0: - raise com.UnsupportedArgumentError( - "Interval with nanoseconds is not supported. The " - "smallest unit supported by Spark is microseconds." - ) - td_micros = td_nanos // 1000 - return F.expr(f"INTERVAL {td_micros} MICROSECOND") - else: - return F.expr(f"INTERVAL {op.value} {_time_unit_mapping[dtype.unit.short]}") - - -def _compile_datetime_binop(t, op, *, fn, **kwargs): - left = t.translate(op.left, **kwargs) - right = _get_interval_col(t, op.right, **kwargs) - return fn(left, right) - - -@compiles(ops.DateAdd) -def compile_date_add(t, op, **kwargs): - allowed_units = ["Y", "W", "M", "D"] - return _compile_datetime_binop( - t, - op, - fn=lambda lhs, rhs: (lhs + rhs).cast("timestamp"), - allowed_units=allowed_units, - **kwargs, - ) - - -@compiles(ops.DateSub) -def compile_date_sub(t, op, **kwargs): - allowed_units = ["Y", "W", "M", "D"] - return _compile_datetime_binop( - t, - op, - fn=lambda lhs, rhs: (lhs - rhs).cast("timestamp"), - allowed_units=allowed_units, - **kwargs, - ) - - -if vparse(pyspark.__version__) >= vparse("3.3"): - - @compiles(ops.DateDiff) - def compile_date_diff(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - - return F.concat( - F.lit("INTERVAL '"), F.datediff(left, right), F.lit("' DAY") - ).cast( - pt.DayTimeIntervalType( - pt.DayTimeIntervalType.DAY, pt.DayTimeIntervalType.DAY - ) - ) - - -@compiles(ops.TimestampAdd) -def compile_timestamp_add(t, op, **kwargs): - allowed_units = ["Y", "W", "M", "D", "h", "m", "s"] - return _compile_datetime_binop( - t, - op, - fn=lambda lhs, rhs: (lhs + rhs).cast("timestamp"), - allowed_units=allowed_units, - **kwargs, - ) - - -@compiles(ops.TimestampSub) -def compile_timestamp_sub(t, op, **kwargs): - allowed_units = ["Y", "W", "M", "D", "h", "m", "s"] - return _compile_datetime_binop( - t, - op, - fn=lambda lhs, rhs: (lhs - rhs).cast("timestamp"), - allowed_units=allowed_units, - **kwargs, - ) - - -@compiles(ops.TimestampDiff) -def compile_timestamp_diff(t, op, **kwargs): - raise com.UnsupportedOperationError( - "PySpark backend does not support TimestampDiff as there is no " - "timedelta type." - ) - - -def _compile_interval_binop(t, op, fn, **kwargs): - left = _get_interval_col(t, op.left, **kwargs) - right = _get_interval_col(t, op.right, **kwargs) - - return fn(left, right) - - -@compiles(ops.IntervalAdd) -def compile_interval_add(t, op, **kwargs): - return _compile_interval_binop(t, op, fn=operator.add, **kwargs) - - -@compiles(ops.IntervalSubtract) -def compile_interval_subtract(t, op, **kwargs): - return _compile_interval_binop(t, op, fn=operator.sub, **kwargs) - - -@compiles(ops.IntervalFromInteger) -def compile_interval_from_integer(t, op, **kwargs): - raise com.UnsupportedOperationError( - "Interval from integer column is unsupported for the PySpark backend." - ) - - -# -------------------------- Array Operations ---------------------------- - - -@compiles(ops.Array) -def compile_array_column(t, op, **kwargs): - cols = [t.translate(col, **kwargs) for col in op.exprs] - return F.array(cols) - - -@compiles(ops.ArrayLength) -def compile_array_length(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.size(src_column) - - -@compiles(ops.ArraySlice) -def compile_array_slice(t, op, **kwargs): - start = op.start.value if op.start is not None else op.start - stop = op.stop.value if op.stop is not None else op.stop - spark_type = PySparkType.from_ibis(op.arg.dtype) - - @F.udf(spark_type) - def array_slice(array): - return array[start:stop] - - src_column = t.translate(op.arg, **kwargs) - return array_slice(src_column) - - -@compiles(ops.ArrayIndex) -def compile_array_index(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - index = op.index.value + 1 - return F.element_at(src_column, index) - - -@compiles(ops.ArrayConcat) -def compile_array_concat(t, op, **kwargs): - return F.concat(*map(partial(t.translate, **kwargs), op.arg)) - - -@compiles(ops.ArrayRepeat) -def compile_array_repeat(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - times = op.times.value - return F.flatten(F.array_repeat(src_column, times)) - - -@compiles(ops.ArrayCollect) -def compile_array_collect(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - if (where := op.where) is not None: - src_column = F.when(t.translate(where, **kwargs), src_column) - return F.collect_list(src_column) - - -@compiles(ops.Argument) -def compile_argument(t, op, arg_columns, **kwargs): - return arg_columns[op.param] - - -@compiles(ops.ArrayFilter) -def compile_array_filter(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.filter( - src_column, - lambda x: t.translate(op.body, arg_columns={op.param: x}, **kwargs), - ) - - -@compiles(ops.ArrayMap) -def compile_array_map(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - return F.transform( - src_column, - lambda x: t.translate(op.body, arg_columns={op.param: x}, **kwargs), - ) - - -# --------------------------- Null Operations ----------------------------- - - -@compiles(ops.NullIf) -def compile_null_if(t, op, **kwargs): - col = t.translate(op.arg, **kwargs) - nullif_col = t.translate(op.null_if_expr, **kwargs) - return F.when(col == nullif_col, F.lit(None)).otherwise(col) - - -@compiles(ops.IsNull) -def compile_is_null(t, op, **kwargs): - arg = op.arg - col = t.translate(arg, **kwargs) - result = F.isnull(col) - if arg.dtype.is_floating(): - result |= F.isnan(col) - return result - - -@compiles(ops.NotNull) -def compile_not_null(t, op, **kwargs): - arg = op.arg - col = t.translate(arg, **kwargs) - result = ~F.isnull(col) - if arg.dtype.is_floating(): - result &= ~F.isnan(col) - return result - - -@compiles(ops.DropNa) -def compile_dropna_table(t, op, **kwargs): - table = t.translate(op.table, **kwargs) - - if op.subset is not None: - subset = [col.name for col in op.subset] - else: - subset = None - - return table.dropna(how=op.how, subset=subset) - - -@compiles(ops.FillNa) -def compile_fillna_table(t, op, **kwargs): - table = t.translate(op.table, **kwargs) - raw_replacements = op.replacements - replacements = ( - dict(raw_replacements) - if isinstance(raw_replacements, frozendict) - else raw_replacements.value - ) - return table.fillna(replacements) - - -# ------------------------- User defined function ------------------------ - - -@compiles(ops.ElementWiseVectorizedUDF) -def compile_elementwise_udf(t, op, **kwargs): - spark_output_type = PySparkType.from_ibis(op.return_type) - func = op.func - spark_udf = pandas_udf(func, spark_output_type, PandasUDFType.SCALAR) - func_args = (t.translate(arg, **kwargs) for arg in op.func_args) - return spark_udf(*func_args) - - -@compiles(ops.ReductionVectorizedUDF) -def compile_reduction_udf(t, op, *, aggcontext=None, **kwargs): - spark_output_type = PySparkType.from_ibis(op.return_type) - spark_udf = pandas_udf(op.func, spark_output_type, PandasUDFType.GROUPED_AGG) - func_args = (t.translate(arg, **kwargs) for arg in op.func_args) - - col = spark_udf(*func_args) - if aggcontext: - return col - else: - src_table = t.translate(op.func_args[0].table, **kwargs) - return src_table.agg(col) - - -# NB: this is intentionally not using @compiles because @compiles doesn't -# handle subclasses of operations -def compile_scalar_udf(t, op, **kwargs): - if op.__input_type__ != InputType.PANDAS: - raise NotImplementedError("Only Pandas UDFs are support in the PySpark backend") - - import pandas as pd - - make_series = partial(pd.Series, dtype=op.dtype.to_pandas()) - func = toolz.compose(make_series, op.__func__) - spark_dtype = PySparkType.from_ibis(op.dtype) - spark_udf = pandas_udf(func, spark_dtype, PandasUDFType.SCALAR) - return spark_udf(*map(partial(t.translate, **kwargs), op.args)) - - -@compiles(ops.SearchedCase) -def compile_searched_case(t, op, **kwargs): - existing_when = None - - for case, result in zip(op.cases, op.results): - if existing_when is not None: - # Spark allowed chained when statement - when = existing_when.when - else: - when = F.when - - existing_when = when( - t.translate(case, **kwargs), - t.translate(result, **kwargs), - ) - - return existing_when.otherwise(t.translate(op.default, **kwargs)) - - -@compiles(ops.View) -def compile_view(t, op, **kwargs): - name = op.name - child = op.child - # TODO(kszucs): avoid converting to expr - backend = child.to_expr()._find_backend() - tables = backend._session.catalog.listTables() - if any(name == table.name and not table.isTemporary for table in tables): - raise ValueError(f"table or non-temporary view `{name}` already exists") - result = t.translate(child, **kwargs) - result.createOrReplaceTempView(name) - return result.alias(name) - - -@compiles(ops.SQLStringView) -def compile_sql_view(t, op, **kwargs): - # TODO(kszucs): avoid converting to expr - backend = op.child.to_expr()._find_backend() - result = backend._session.sql(op.query) - name = op.name - result.createOrReplaceTempView(name) - return result.alias(name) - - -@compiles(ops.StringContains) -def compile_string_contains(t, op, **kwargs): - haystack = t.translate(op.haystack, **kwargs) - needle = t.translate(op.needle, **kwargs) - return haystack.contains(needle) - - -@compiles(ops.Unnest) -def compile_unnest(t, op, **kwargs): - column = t.translate(op.arg, **kwargs) - return F.explode(column) - - -@compiles(ops.Acos) -@compiles(ops.Asin) -@compiles(ops.Atan) -@compiles(ops.Cos) -@compiles(ops.Sin) -@compiles(ops.Tan) -def compile_trig(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - func_name = op.__class__.__name__.lower() - func = getattr(F, func_name) - return func(arg) - - -@compiles(ops.Cot) -def compile_cot(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - return 1.0 / F.tan(arg) - - -@compiles(ops.Atan2) -def compile_atan2(t, op, **kwargs): - y, x = (t.translate(arg, **kwargs) for arg in op.args) - return F.atan2(y, x) - - -@compiles(ops.Degrees) -def compile_degrees(t, op, **kwargs): - return F.degrees(t.translate(op.arg, **kwargs)) - - -@compiles(ops.Radians) -def compile_radians(t, op, **kwargs): - return F.radians(t.translate(op.arg, **kwargs)) - - -@compiles(ops.IfElse) -def compile_ifelse(t, op, **kwargs): - return F.when( - t.translate(op.bool_expr, **kwargs), - t.translate(op.true_expr, **kwargs), - ).otherwise(t.translate(op.false_null_expr, **kwargs)) - - -@compiles(ops.RandomScalar) -def compile_random(*args, **kwargs): - return F.rand() - - -@compiles(ops.InMemoryTable) -def compile_in_memory_table(t, op, session, **kwargs): - fields = [ - pt.StructField(name, PySparkType.from_ibis(dtype), dtype.nullable) - for name, dtype in op.schema.items() - ] - schema = pt.StructType(fields) - return session.createDataFrame(data=op.data.to_frame(), schema=schema).alias( - op.name - ) - - -@compiles(ops.BitwiseAnd) -def compile_bitwise_and(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - - return left.bitwiseAND(right) - - -@compiles(ops.BitwiseOr) -def compile_bitwise_or(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - - return left.bitwiseOR(right) - - -@compiles(ops.BitwiseXor) -def compile_bitwise_xor(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - - return left.bitwiseXOR(right) - - -@compiles(ops.BitwiseNot) -def compile_bitwise_not(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - return F.bitwise_not(arg) - - -@compiles(ops.JSONGetItem) -def compile_json_getitem(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - index = t.translate(op.index, raw=True, **kwargs) - if op.index.dtype.is_integer(): - path = f"$[{index}]" - else: - path = f"$.{index}" - return F.get_json_object(arg, path) - - -@compiles(ops.DummyTable) -def compile_dummy_table(t, op, session=None, **kwargs): - return session.range(0, 1).select( - *(t.translate(value, **kwargs) for value in op.values) - ) - - -@compiles(ops.ScalarParameter) -def compile_scalar_parameter(t, op, timecontext=None, scope=None, **kwargs): - assert scope is not None, "scope is None" - raw_value = scope.get_value(op, timecontext) - return F.lit(raw_value).cast(PySparkType.from_ibis(op.dtype)) - - -@compiles(ops.E) -def compile_e(t, op, **kwargs): - return F.exp(F.lit(1)) - - -@compiles(ops.Pi) -def compile_pi(t, op, **kwargs): - return F.acos(F.lit(-1)) - - -@compiles(ops.Quantile) -@compiles(ops.MultiQuantile) -def compile_quantile(t, op, **kwargs): - return compile_aggregator( - t, op, fn=F.percentile_approx, where_excludes=("quantile",), **kwargs - ) - - -@compiles(ops.ArgMin) -def compile_argmin(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.min_by, **kwargs) - - -@compiles(ops.ArgMax) -def compile_argmax(t, op, **kwargs): - return compile_aggregator(t, op, fn=F.max_by, **kwargs) - - -@compiles(ops.ArrayStringJoin) -def compile_array_string_join(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - sep = t.translate(op.sep, raw=True, **kwargs) - return F.concat_ws(sep, arg) - - -@compiles(ops.ArrayContains) -def compile_array_contains(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - other = t.translate(op.other, **kwargs) - return F.when( - ~F.isnull(arg), F.coalesce(F.array_contains(arg, other), F.lit(False)) - ).otherwise(F.lit(None)) - - -@compiles(ops.ArrayPosition) -def compile_array_pos(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - other = t.translate(op.other, raw=True, **kwargs) - return F.array_position(arg, other) - 1 - - -@compiles(ops.ArrayDistinct) -def compile_array_distinct(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - return F.array_distinct(arg) - - -@compiles(ops.ArraySort) -def compile_array_sort(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - return F.array_sort(arg) - - -@compiles(ops.ArrayRemove) -def compile_array_remove(t, op, **kwargs): - arg = t.translate(op.arg, **kwargs) - other = t.translate(op.other, raw=True, **kwargs) - return F.array_remove(arg, other) - - -@compiles(ops.ArrayUnion) -def compile_array_union(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return F.array_union(left, right) - - -@compiles(ops.ArrayIntersect) -def compile_array_intersect(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return F.array_intersect(left, right) - - -@compiles(ops.Hash) -def compile_hash_column(t, op, **kwargs): - return F.hash(t.translate(op.arg, **kwargs)) - - -@compiles(ops.HexDigest) -def compile_hexdigest_column(t, op, **kwargs): - how = op.how - arg = t.translate(op.arg, **kwargs) - - if how == "md5": - return F.md5(arg) - elif how == "sha1": - return F.sha1(arg) - elif how in ("sha256", "sha512"): - return F.sha2(arg, int(how[-3:])) - else: - raise NotImplementedError(how) - - -@compiles(ops.ArrayZip) -def compile_zip(t, op, **kwargs): - return F.arrays_zip(*map(partial(t.translate, **kwargs), op.arg)) - - -@compiles(ops.Levenshtein) -def compile_levenshtein(t, op, **kwargs): - left = t.translate(op.left, **kwargs) - right = t.translate(op.right, **kwargs) - return F.levenshtein(left, right) - - -@compiles(ops.ArrayFlatten) -def compile_flatten(t, op, **kwargs): - return F.flatten(t.translate(op.arg, **kwargs)) - - -def _zero_value(dtype): - if dtype.is_interval(): - return F.expr(f"INTERVAL 0 {dtype.resolution}") - return F.lit(0) - - -def _build_sequence(start, stop, step, zero): - seq = F.sequence(start, stop, step) - length = F.size(seq) - last_element = F.element_at(seq, length) - # slice off the last element if we'd be inclusive on the right - seq = F.when(last_element == stop, F.slice(seq, 1, length - 1)).otherwise(seq) - return F.when( - (step != zero) & (F.signum(step) == F.signum(stop - start)), seq - ).otherwise(F.array()) - - -@compiles(ops.IntegerRange) -def compile_integer_range(t, op, **kwargs): - start = t.translate(op.start, **kwargs) - stop = t.translate(op.stop, **kwargs) - step = t.translate(op.step, **kwargs) - - return _build_sequence(start, stop, step, _zero_value(op.step.dtype)) - - -@compiles(ops.TimestampRange) -def compile_timestamp_range(t, op, **kwargs): - start = t.translate(op.start, **kwargs) - stop = t.translate(op.stop, **kwargs) - - if not isinstance(op.step, ops.Literal): - raise com.UnsupportedOperationError( - "`step` argument of timestamp range must be a literal" - ) - - step_value = op.step.value - unit = op.step.dtype.resolution - - step = F.expr(f"INTERVAL {step_value} {unit}") - - return _build_sequence(start, stop, step, _zero_value(op.step.dtype)) - - -@compiles(ops.RegexSplit) -def compile_regex_split(t, op, **kwargs): - src_column = t.translate(op.arg, **kwargs) - if not isinstance(op.pattern, ops.Literal): - raise com.UnsupportedOperationError( - "`pattern` argument of re_split must be a literal" - ) - pattern = t.translate(op.pattern, raw=True, **kwargs) - return F.split(src_column, pattern) +del _op, _name, _fmt diff --git a/ibis/backends/pyspark/converter.py b/ibis/backends/pyspark/converter.py new file mode 100644 index 000000000000..610ad9a04637 --- /dev/null +++ b/ibis/backends/pyspark/converter.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import datetime + +from ibis.common.temporal import normalize_timezone +from ibis.formats.pandas import PandasData + + +class PySparkPandasData(PandasData): + @classmethod + def convert_Time(cls, s, dtype, pandas_type): + def convert(timedelta): + comps = timedelta.components + return datetime.time( + hour=comps.hours, + minute=comps.minutes, + second=comps.seconds, + microsecond=comps.milliseconds * 1000 + comps.microseconds, + ) + + return s.map(convert, na_action="ignore") + + @classmethod + def convert_Timestamp_element(cls, dtype): + def converter(value, dtype=dtype): + if (tz := dtype.timezone) is not None: + return value.astimezone(normalize_timezone(tz)) + + return value.astimezone(normalize_timezone("UTC")).replace(tzinfo=None) + + return converter diff --git a/ibis/backends/pyspark/datatypes.py b/ibis/backends/pyspark/datatypes.py index 5c0b0aa7a231..d45efc9dc2ed 100644 --- a/ibis/backends/pyspark/datatypes.py +++ b/ibis/backends/pyspark/datatypes.py @@ -6,26 +6,14 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt -from ibis.backends.base.sql.registry import sql_type_names -from ibis.formats import TypeMapper - -_sql_type_names = dict(sql_type_names, date="date") +import ibis.expr.schema as sch +from ibis.formats import SchemaMapper, TypeMapper # DayTimeIntervalType introduced in Spark 3.2 (at least) but didn't show up in # PySpark until version 3.3 PYSPARK_33 = vparse(pyspark.__version__) >= vparse("3.3") -def type_to_sql_string(tval): - if tval.is_decimal(): - return f"decimal({tval.precision}, {tval.scale})" - name = tval.name.lower() - try: - return _sql_type_names[name] - except KeyError: - raise com.UnsupportedBackendType(name) - - _from_pyspark_dtypes = { pt.BinaryType: dt.Binary, pt.BooleanType: dt.Boolean, @@ -43,6 +31,8 @@ def type_to_sql_string(tval): _to_pyspark_dtypes = {v: k for k, v in _from_pyspark_dtypes.items()} _to_pyspark_dtypes[dt.JSON] = pt.StringType +_to_pyspark_dtypes[dt.UUID] = pt.StringType + if PYSPARK_33: _pyspark_interval_units = { @@ -116,3 +106,17 @@ def from_ibis(cls, dtype): raise com.IbisTypeError( f"Unable to convert dtype {dtype!r} to pyspark type" ) + + +class PySparkSchema(SchemaMapper): + @classmethod + def from_ibis(cls, schema): + fields = [ + pt.StructField(name, PySparkType.from_ibis(dtype), dtype.nullable) + for name, dtype in schema.items() + ] + return pt.StructType(fields) + + @classmethod + def to_ibis(cls, schema): + return sch.Schema({name: PySparkType.to_ibis(typ) for name, typ in schema}) diff --git a/ibis/backends/pyspark/ddl.py b/ibis/backends/pyspark/ddl.py deleted file mode 100644 index f031367c26c2..000000000000 --- a/ibis/backends/pyspark/ddl.py +++ /dev/null @@ -1,221 +0,0 @@ -from __future__ import annotations - -from ibis.backends.base.sql.ddl import ( - CTAS, - AlterTable, - CreateTable, - CreateTableWithSchema, - DropObject, - InsertSelect, - RenameTable, -) -from ibis.backends.base.sql.registry import quote_identifier -from ibis.backends.pyspark.datatypes import type_to_sql_string - -_format_aliases = {"TEXTFILE": "TEXT"} - - -def _sanitize_format(format): - if format is None: - return None - format = format.upper() - format = _format_aliases.get(format, format) - if format not in ( - "TEXT", - "CSV", - "JSON", - "JDBC", - "PARQUET", - "ORC", - "HIVE", - "DELTA", - "LIBSVM", - ): - raise ValueError(f"Invalid format: {format!r}") - - return format - - -def format_tblproperties(props): - formatted_props = _format_properties(props) - return f"TBLPROPERTIES {formatted_props}" - - -def _format_properties(props): - tokens = [] - for k, v in sorted(props.items()): - tokens.append(f" '{k}'='{v}'") - - return "(\n{}\n)".format(",\n".join(tokens)) - - -class CreateTable(CreateTable): - """Create a table.""" - - def __init__( - self, - table_name, - database=None, - format="parquet", - can_exist=False, - tbl_properties=None, - ): - super().__init__( - table_name, - database=database, - external=False, - format=format, - can_exist=can_exist, - partition=None, - tbl_properties=tbl_properties, - ) - - def _storage(self): - return f"USING {self.format}" - - -class CreateTableWithSchema(CreateTableWithSchema): - def _storage(self): - return f"USING {self.format}" - - -class CTAS(CTAS): - """Create Table As Select.""" - - def __init__( - self, - table_name, - select, - database=None, - format="parquet", - can_exist=False, - ): - super().__init__( - table_name, - select, - database=database, - format=format, - can_exist=can_exist, - ) - self.select = select - - def _storage(self): - return f"USING {self.format}" - - -class CreateView(CTAS): - """Create a view.""" - - def __init__( - self, - table_name, - select, - database=None, - can_exist=False, - temporary=False, - ): - super().__init__(table_name, select, database=database, can_exist=can_exist) - self.temporary = temporary - - @property - def _pieces(self): - yield "AS" - yield self.select.compile() - - @property - def _prefix(self): - return f"CREATE {self._or_replace_clause()}{self._temporary_clause()}VIEW" - - def _or_replace_clause(self): - return "OR REPLACE " if self.can_exist else "" - - def _temporary_clause(self): - return "TEMPORARY " if self.temporary else "" - - def _if_exists(self): - return "" - - -def format_schema(schema): - elements = [ - _format_schema_element(name, t) for name, t in zip(schema.names, schema.types) - ] - return "({})".format(",\n ".join(elements)) - - -def _format_schema_element(name, t): - return f"{quote_identifier(name, force=True)} {type_to_sql_string(t)}" - - -class DropDatabase(DropObject): - _object_type = "DATABASE" - - def __init__(self, name, must_exist=True, cascade=False): - super().__init__(must_exist=must_exist) - self.name = name - self.cascade = cascade - - def _object_name(self): - return self.name - - def compile(self): - compiled = super().compile() - if self.cascade: - return f"{compiled} CASCADE" - else: - return compiled - - -class DropFunction(DropObject): - _object_type = "TEMPORARY FUNCTION" - - def __init__(self, name, must_exist=True): - super().__init__(must_exist=must_exist) - self.name = name - self.must_exist = must_exist - - def _object_name(self): - return self.name - - -class InsertSelect(InsertSelect): - def __init__(self, table_name, select_expr, database=None, overwrite=False): - super().__init__( - table_name, - select_expr, - database=database, - partition=None, - partition_schema=None, - overwrite=overwrite, - ) - - def compile(self): - if self.overwrite: - cmd = "INSERT OVERWRITE TABLE" - else: - cmd = "INSERT INTO" - - select_query = self.select.compile() - scoped_name = self._get_scoped_name(self.table_name, self.database) - return f"{cmd} {scoped_name}\n{select_query}" - - -class AlterTable(AlterTable): - def __init__(self, table, tbl_properties=None): - super().__init__( - table, - location=None, - format=None, - tbl_properties=tbl_properties, - serde_properties=None, - ) - - def compile(self): - props = self._format_properties() - action = f"{self.table} SET{props}" - return self._wrap_command(action) - - -class RenameTable(RenameTable): - def __init__(self, old_name, new_name): - super().__init__(old_name, new_name, dialect="spark") diff --git a/ibis/backends/pyspark/tests/test_aggregation.py b/ibis/backends/pyspark/tests/test_aggregation.py index 09e066cbb668..7ceaddbe30b5 100644 --- a/ibis/backends/pyspark/tests/test_aggregation.py +++ b/ibis/backends/pyspark/tests/test_aggregation.py @@ -19,7 +19,7 @@ def test_aggregation_float_nulls(con, result_fn, expected_fn, monkeypatch): monkeypatch.setattr(ibis.options.pyspark, "treat_nan_as_null", True) table = con.table("null_table") - df = table.compile().toPandas() + df = table.execute() expr = result_fn(table) result = expr.execute() diff --git a/ibis/backends/pyspark/tests/test_array.py b/ibis/backends/pyspark/tests/test_array.py index b4ebb55513ae..8d45e24e9358 100644 --- a/ibis/backends/pyspark/tests/test_array.py +++ b/ibis/backends/pyspark/tests/test_array.py @@ -10,14 +10,20 @@ pytest.importorskip("pyspark") -def test_array_length(con): - table = con.table("array_table") +@pytest.fixture +def t(con): + return con.table("array_table") - result = table.mutate(length=table.array_int.length()).compile() - expected = table.compile().toPandas() - expected["length"] = expected["array_int"].map(lambda a: len(a)).astype("int32") - tm.assert_frame_equal(result.toPandas(), expected) +@pytest.fixture +def df(con): + return con._session.table("array_table").toPandas() + + +def test_array_length(t, df): + result = t.mutate(length=t.array_int.length()).execute() + expected = df.assign(length=df.array_int.map(lambda a: len(a))) + tm.assert_frame_equal(result, expected) def test_array_length_scalar(con): @@ -44,14 +50,10 @@ def test_array_length_scalar(con): (-3, -1), ], ) -def test_array_slice(con, start, stop): - table = con.table("array_table") - - result = table.mutate(sliced=table.array_int[start:stop]).compile() - - expected = table.compile().toPandas() - expected["sliced"] = expected["array_int"].map(lambda a: a[start:stop]) - tm.assert_frame_equal(result.toPandas(), expected) +def test_array_slice(t, df, start, stop): + result = t.mutate(sliced=t.array_int[start:stop]).execute() + expected = df.assign(sliced=df.array_int.map(lambda a: a[start:stop])) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -79,12 +81,10 @@ def test_array_slice_scalar(con, start, stop): @pytest.mark.parametrize("index", [1, 3, 4, 11, -11]) -def test_array_index(con, index): - table = con.table("array_table") - expr = table[table.array_int[index].name("indexed")] +def test_array_index(t, df, index): + expr = t[t.array_int[index].name("indexed")] result = expr.execute() - df = table.compile().toPandas() expected = pd.DataFrame( { "indexed": df.array_int.apply( @@ -106,14 +106,12 @@ def test_array_index_scalar(con, index): @pytest.mark.parametrize("op", [lambda x, y: x + y, lambda x, y: y + x]) -def test_array_concat(con, op): - table = con.table("array_table") - x = table.array_int.cast("array") - y = table.array_str +def test_array_concat(t, df, op): + x = t.array_int.cast("array") + y = t.array_str expr = op(x, y).name("array_result") result = expr.execute() - df = table.compile().toPandas() expected = op(df.array_int.apply(lambda x: list(map(str, x))), df.array_str).rename( "array_result" ) @@ -133,13 +131,10 @@ def test_array_concat_scalar(con, op): @pytest.mark.parametrize("n", [1, 3, 4, 7, -2]) # negative returns empty list @pytest.mark.parametrize("mul", [lambda x, n: x * n, lambda x, n: n * x]) -def test_array_repeat(con, n, mul): - table = con.table("array_table") - - expr = table.select(mul(table.array_int, n).name("repeated")) +def test_array_repeat(t, df, n, mul): + expr = t.select(mul(t.array_int, n).name("repeated")) result = expr.execute() - df = table.compile().toPandas() expected = pd.DataFrame({"repeated": df.array_int * n}) tm.assert_frame_equal(result, expected) @@ -155,12 +150,10 @@ def test_array_repeat_scalar(con, n, mul): assert result == expected -def test_array_collect(con): - table = con.table("array_table") - expr = table.group_by(table.key).aggregate(collected=table.array_int.collect()) +def test_array_collect(t, df): + expr = t.group_by(t.key).aggregate(collected=t.array_int.collect()) result = expr.execute().sort_values("key").reset_index(drop=True) - df = table.compile().toPandas() expected = ( df.groupby("key") .array_int.apply(list) @@ -170,13 +163,10 @@ def test_array_collect(con): tm.assert_frame_equal(result, expected) -def test_array_filter(con): - table = con.table("array_table") - expr = table.select( - table.array_int.filter(lambda item: item != 3).name("array_int") - ) +def test_array_filter(t, df): + expr = t.select(t.array_int.filter(lambda item: item != 3).name("array_int")) result = expr.execute() - df = table.compile().toPandas() + df["array_int"] = df["array_int"].apply( lambda ar: [item for item in ar if item != 3] ) diff --git a/ibis/backends/pyspark/tests/test_basic.py b/ibis/backends/pyspark/tests/test_basic.py index 31974cdf5306..f0e99a7c3bea 100644 --- a/ibis/backends/pyspark/tests/test_basic.py +++ b/ibis/backends/pyspark/tests/test_basic.py @@ -11,33 +11,28 @@ pyspark = pytest.importorskip("pyspark") -import pyspark.sql.functions as F # noqa: E402 -from ibis.backends.pyspark.compiler import _can_be_replaced_by_column_name # noqa: E402 +@pytest.fixture +def t(con): + return con.table("basic_table") -def test_basic(con): - table = con.table("basic_table") - result = table.compile().toPandas() - expected = pd.DataFrame({"id": range(10), "str_col": "value"}) +@pytest.fixture +def df(con): + return con._session.table("basic_table").toPandas() - tm.assert_frame_equal(result, expected) +def test_basic(t): + result = t.execute() + expected = pd.DataFrame({"id": range(10), "str_col": "value"}) + tm.assert_frame_equal(result, expected) -def test_projection(con): - table = con.table("basic_table") - result1 = table.mutate(v=table["id"]).compile().toPandas() +def test_projection(t): + result1 = t.mutate(v=t["id"]).execute() expected1 = pd.DataFrame({"id": range(10), "str_col": "value", "v": range(10)}) - result2 = ( - table.mutate(v=table["id"]) - .mutate(v2=table["id"]) - .mutate(id=table["id"] * 2) - .compile() - .toPandas() - ) - + result2 = t.mutate(v=t["id"]).mutate(v2=t["id"]).mutate(id=t["id"] * 2).execute() expected2 = pd.DataFrame( { "id": range(0, 20, 2), @@ -51,83 +46,35 @@ def test_projection(con): tm.assert_frame_equal(result2, expected2) -def test_aggregation_col(con): - table = con.table("basic_table") - result = table["id"].count().execute() - assert result == table.compile().count() - - -def test_aggregation(con): - table = con.table("basic_table") - result = table.aggregate(max=table["id"].max()).compile() - expected = table.compile().agg(F.max("id").alias("max")) +def test_aggregation_col(t, df): + result = t["id"].count().execute() + assert result == len(df) - tm.assert_frame_equal(result.toPandas(), expected.toPandas()) +def test_aggregation(t, df): + result = t.aggregate(max=t["id"].max()).execute() + expected = pd.DataFrame({"max": [df.id.max()]}) + tm.assert_frame_equal(result, expected) -def test_group_by(con): - table = con.table("basic_table") - result = table.group_by("id").aggregate(max=table["id"].max()).compile() - expected = table.compile().groupby("id").agg(F.max("id").alias("max")) - tm.assert_frame_equal(result.toPandas(), expected.toPandas()) +def test_group_by(t, df): + result = t.group_by("id").aggregate(max=t["id"].max()).execute() + expected = df[["id"]].assign(max=df.groupby("id").id.max()) + tm.assert_frame_equal(result, expected) -def test_window(con): - table = con.table("basic_table") +def test_window(t, df): w = ibis.window() - result = table.mutate( - grouped_demeaned=table["id"] - table["id"].mean().over(w) - ).compile() - - spark_window = pyspark.sql.Window.partitionBy() - spark_table = table.compile() - expected = spark_table.withColumn( - "grouped_demeaned", - spark_table["id"] - F.mean(spark_table["id"]).over(spark_window), - ) - - tm.assert_frame_equal(result.toPandas(), expected.toPandas()) - - -def test_greatest(con): - table = con.table("basic_table") - result = table.mutate(greatest=ibis.greatest(table.id)).compile() - df = table.compile() - expected = table.compile().withColumn("greatest", df.id) - - tm.assert_frame_equal(result.toPandas(), expected.toPandas()) - - -def test_selection(con): - table = con.table("basic_table") - table = table.mutate(id2=table["id"] * 2) - - result1 = table[["id"]].compile() - result2 = table[["id", "id2"]].compile() - result3 = table[[table, (table.id + 1).name("plus1")]].compile() - result4 = table[[(table.id + 1).name("plus1"), table]].compile() - - df = table.compile() - tm.assert_frame_equal(result1.toPandas(), df[["id"]].toPandas()) - tm.assert_frame_equal(result2.toPandas(), df[["id", "id2"]].toPandas()) - tm.assert_frame_equal( - result3.toPandas(), - df[[df.columns]].withColumn("plus1", df.id + 1).toPandas(), - ) - tm.assert_frame_equal( - result4.toPandas(), - df.withColumn("plus1", df.id + 1)[["plus1", *df.columns]].toPandas(), - ) + result = t.mutate(grouped_demeaned=t["id"] - t["id"].mean().over(w)).execute() + expected = df.assign(grouped_demeaned=df.id - df.id.mean()) + tm.assert_frame_equal(result, expected) -def test_join(con): - table = con.table("basic_table") - result = table.join(table, ["id", "str_col"])[table.id, table.str_col].compile() - spark_table = table.compile() - expected = spark_table.join(spark_table, ["id", "str_col"]) - tm.assert_frame_equal(result.toPandas(), expected.toPandas()) +def test_greatest(t, df): + result = t.mutate(greatest=ibis.greatest(t.id, t.id + 1)).execute() + expected = df.assign(greatest=df.id + 1) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -149,72 +96,26 @@ def test_join(con): ), ], ) -def test_filter(con, filter_fn, expected_fn): - table = con.table("basic_table") - - result = filter_fn(table).compile() - - df = table.compile() - expected = expected_fn(df) - - tm.assert_frame_equal(result.toPandas(), expected.toPandas()) - - -def test_cast(con): - table = con.table("basic_table") - - result = table.mutate(id_string=table.id.cast("string")).compile() +def test_filter(t, df, filter_fn, expected_fn): + result = filter_fn(t).execute().reset_index(drop=True) + expected = expected_fn(df).reset_index(drop=True) + tm.assert_frame_equal(result, expected) - df = table.compile() - df = df.withColumn("id_string", df.id.cast("string")) - tm.assert_frame_equal(result.toPandas(), df.toPandas()) +def test_cast(t, df): + result = t.mutate(id_string=t.id.cast("string")).execute() + df = df.assign(id_string=df.id.astype(str)) + tm.assert_frame_equal(result, df) -def test_alias_after_select(con): +def test_alias_after_select(t, df): # Regression test for issue 2136 - table = con.table("basic_table") - table = table[["id"]] + table = t[["id"]] table = table.mutate(id2=table["id"]) - - result = table.compile().toPandas() + result = table.execute() tm.assert_series_equal(result["id"], result["id2"], check_names=False) -@pytest.mark.parametrize( - ("selection_fn", "selection_idx", "expected"), - [ - # selected column id is selections[0], OK to replace since - # id == t['id'] (straightforward column projection) - (lambda t: t[["id"]], 0, True), - # new column v is selections[1], cannot be replaced since it does - # not exist in the root table - (lambda t: t.mutate(v=t["id"]), 1, False), - # new column id is selections[0], cannot be replaced since - # new id != t['id'] - (lambda t: t.mutate(id=t["str_col"]), 0, False), - # new column id is selections[0], OK to replace since - # new id == t['id'] (mutation is no-op) - (lambda t: t.mutate(id=t["id"]), 0, True), - # new column id is selections[0], cannot be replaced since - # new id != t['id'] - (lambda t: t.mutate(id=t["id"] + 1), 0, False), - # new column id is selections[0], OK to replace since - # new id == t['id'] (rename is a no-op) - (lambda t: t.rename({"id": "id"}), 0, True), - # new column id2 is selections[0], cannot be replaced since - # id2 does not exist in the table - (lambda t: t.rename({"id2": "id"}), 0, False), - ], -) -def test_can_be_replaced_by_column_name(selection_fn, selection_idx, expected): - table = ibis.table([("id", "double"), ("str_col", "string")]) - table = selection_fn(table) - selection_to_test = table.op().selections[selection_idx] - result = _can_be_replaced_by_column_name(selection_to_test, table.op().table) - assert result == expected - - def test_interval_columns(con): table = con.table("interval_table") assert table.schema() == ibis.schema( @@ -241,3 +142,9 @@ def test_interval_columns_invalid(con): msg = r"DayTimeIntervalType\(0, 1\) couldn't be converted to Interval" with pytest.raises(IbisTypeError, match=msg): con.table("invalid_interval_table") + + +def test_string_literal_backslash_escaping(con): + expr = ibis.literal("\\d\\e") + result = con.execute(expr) + assert result == "\\d\\e" diff --git a/ibis/backends/pyspark/tests/test_ddl.py b/ibis/backends/pyspark/tests/test_ddl.py index 0e925a7085c4..834b61aa517a 100644 --- a/ibis/backends/pyspark/tests/test_ddl.py +++ b/ibis/backends/pyspark/tests/test_ddl.py @@ -7,7 +7,6 @@ import pytest import ibis -import ibis.common.exceptions as com from ibis import util from ibis.tests.util import assert_equal @@ -73,7 +72,6 @@ def test_truncate_table(con, alltypes, temp_table): expr = alltypes.limit(1) con.create_table(temp_table, obj=expr) - con.truncate_table(temp_table) t = con.table(temp_table) @@ -81,15 +79,6 @@ def test_truncate_table(con, alltypes, temp_table): assert not nrows -def test_truncate_table_expression(con, alltypes, temp_table): - expr = alltypes.limit(1) - - t = con.create_table(temp_table, obj=expr) - t.truncate() - nrows = t.count().execute() - assert not nrows - - def test_ctas_from_table_expr(con, alltypes, temp_table_db): expr = alltypes db, table_name = temp_table_db @@ -120,26 +109,22 @@ def test_insert_table(con, alltypes, temp_table, test_data_db): db = test_data_db con.create_table(temp_table, expr.limit(0), database=db) - con.insert(temp_table, expr.limit(10), database=db) - - # check using SparkTable.insert - t = con.table(temp_table, database=db) - t.insert(expr.limit(10)) - - sz = t.count() - assert sz.execute() == 20 + assert con.table(temp_table).count().execute() == 10 # Overwrite and verify only 10 rows now - t.insert(expr.limit(10), overwrite=True) - assert sz.execute() == 10 + con.insert(temp_table, expr.limit(10), overwrite=True) + assert con.table(temp_table).count().execute() == 10 + + con.insert(temp_table, expr.limit(10), database=db, overwrite=False) + assert con.table(temp_table).count().execute() == 20 def test_insert_validate_types(con, alltypes, test_data_db, temp_table): db = test_data_db expr = alltypes - t = con.create_table( + con.create_table( temp_table, schema=expr["tinyint_col", "int_col", "string_col"].schema(), database=db, @@ -148,27 +133,20 @@ def test_insert_validate_types(con, alltypes, test_data_db, temp_table): to_insert = expr[ expr.tinyint_col, expr.smallint_col.name("int_col"), expr.string_col ] - t.insert(to_insert.limit(10)) + con.insert(temp_table, to_insert.limit(10)) to_insert = expr[ expr.tinyint_col, expr.smallint_col.cast("int32").name("int_col"), expr.string_col, ] - t.insert(to_insert.limit(10)) - - to_insert = expr[expr.tinyint_col, expr.bigint_col.name("int_col"), expr.string_col] - - limit_expr = to_insert.limit(10) - with pytest.raises(com.IbisError): - t.insert(limit_expr) + con.insert(temp_table, to_insert.limit(10)) def test_compute_stats(con, alltypes, temp_table): - t = con.create_table(temp_table, alltypes) - t.compute_stats() - t.compute_stats(noscan=True) + con.create_table(temp_table, alltypes) con.compute_stats(temp_table) + con.compute_stats(temp_table, noscan=True) @pytest.fixture @@ -194,19 +172,6 @@ def table(con, temp_database): con.drop_table(table_name, database=temp_database) -def test_change_properties(con, table, temp_database): - props = {"foo": "1", "bar": "2"} - - table.alter(tbl_properties=props) - tbl_props_rows = con.raw_sql( - f"show tblproperties {temp_database}.{table.name}" - ).fetchall() - for row in tbl_props_rows: - key = row.key - value = row.value - assert value == props[key] - - @pytest.fixture def keyword_t(con): yield "distinct" diff --git a/ibis/backends/pyspark/tests/test_null.py b/ibis/backends/pyspark/tests/test_null.py index 4603c3a41185..048330d6b39a 100644 --- a/ibis/backends/pyspark/tests/test_null.py +++ b/ibis/backends/pyspark/tests/test_null.py @@ -8,19 +8,19 @@ def test_isnull(con): table = con.table("null_table") - table_pandas = table.compile().toPandas() + table_pandas = table.execute() for col, _ in table_pandas.items(): - result = table[table[col].isnull()].compile().toPandas().reset_index(drop=True) + result = table[table[col].isnull()].execute().reset_index(drop=True) expected = table_pandas[table_pandas[col].isnull()].reset_index(drop=True) tm.assert_frame_equal(result, expected) def test_notnull(con): table = con.table("null_table") - table_pandas = table.compile().toPandas() + table_pandas = table.execute() for col, _ in table_pandas.items(): - result = table[table[col].notnull()].compile().toPandas().reset_index(drop=True) + result = table[table[col].notnull()].execute().reset_index(drop=True) expected = table_pandas[table_pandas[col].notnull()].reset_index(drop=True) tm.assert_frame_equal(result, expected) diff --git a/ibis/backends/pyspark/tests/test_timecontext.py b/ibis/backends/pyspark/tests/test_timecontext.py deleted file mode 100644 index dfdf670ced1c..000000000000 --- a/ibis/backends/pyspark/tests/test_timecontext.py +++ /dev/null @@ -1,116 +0,0 @@ -from __future__ import annotations - -import pandas as pd -import pandas.testing as tm -import pytest - -import ibis -import ibis.expr.operations as ops -from ibis.backends.base.df.timecontext import adjust_context - -pytest.importorskip("pyspark") - -from ibis.backends.pyspark.compiler import ( # noqa: E402 - compile_window_function, - compiles, -) -from ibis.backends.pyspark.timecontext import combine_time_context # noqa: E402 - - -def test_table_with_timecontext(con): - table = con.table("time_indexed_table") - context = (pd.Timestamp("20170102"), pd.Timestamp("20170103")) - result = table.execute(timecontext=context) - expected = table.execute() - expected = expected[expected.time.between(*context)] - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - ("contexts", "expected"), - [ - ( - [ - (pd.Timestamp("20200102"), pd.Timestamp("20200103")), - (pd.Timestamp("20200101"), pd.Timestamp("20200106")), - ], - (pd.Timestamp("20200101"), pd.Timestamp("20200106")), - ), # superset - ( - [ - (pd.Timestamp("20200101"), pd.Timestamp("20200103")), - (pd.Timestamp("20200102"), pd.Timestamp("20200106")), - ], - (pd.Timestamp("20200101"), pd.Timestamp("20200106")), - ), # overlap - ( - [ - (pd.Timestamp("20200101"), pd.Timestamp("20200103")), - (pd.Timestamp("20200202"), pd.Timestamp("20200206")), - ], - (pd.Timestamp("20200101"), pd.Timestamp("20200206")), - ), # non-overlap - ( - [(pd.Timestamp("20200101"), pd.Timestamp("20200103")), None], - (pd.Timestamp("20200101"), pd.Timestamp("20200103")), - ), # None in input - ([None], None), # None for all - ( - [ - (pd.Timestamp("20200102"), pd.Timestamp("20200103")), - (pd.Timestamp("20200101"), pd.Timestamp("20200106")), - (pd.Timestamp("20200109"), pd.Timestamp("20200110")), - ], - (pd.Timestamp("20200101"), pd.Timestamp("20200110")), - ), # complex - ], -) -def test_combine_time_context(contexts, expected): - assert combine_time_context(contexts) == expected - - -def test_adjust_context_scope(con): - """Test that `adjust_context` has access to `scope` by default.""" - table = con.table("time_indexed_table") - - # Window is the only context-adjusted node that the PySpark backend - # can compile. Ideally we would test the context adjustment logic for - # Window itself, but building this test like that would unfortunately - # affect other tests that involve Window. - # To avoid that, we'll create a dummy subclass of Window and build the - # test around that. - - class CustomWindowFunction(ops.WindowFunction): - pass - - # Tell the Spark backend compiler it should compile CustomWindow just - # like Window - compiles(CustomWindowFunction)(compile_window_function) - - # Create an `adjust_context` function for this subclass that simply checks - # that `scope` is passed in. - @adjust_context.register(CustomWindowFunction) - def adjust_context_window_check_scope(op, scope, timecontext): - """Confirms that `scope` is passed in.""" - assert scope is not None - return timecontext - - # Do an operation that will trigger context adjustment - # on a CustomWindow - value_count = table["value"].count() - window = ibis.window( - ibis.interval(hours=1), - 0, - order_by="time", - group_by="key", - ) - frame = window.bind(table) - - # the argument needs to be pull out from the alias - # any extensions must do the same - value_count_over_win = CustomWindowFunction(value_count, frame).to_expr() - - expr = table.mutate(value_count_over_win=value_count_over_win) - - context = (pd.Timestamp("20170105"), pd.Timestamp("20170111")) - expr.execute(timecontext=context) diff --git a/ibis/backends/pyspark/tests/test_window.py b/ibis/backends/pyspark/tests/test_window.py index 964c5815a376..f7d23e69f02c 100644 --- a/ibis/backends/pyspark/tests/test_window.py +++ b/ibis/backends/pyspark/tests/test_window.py @@ -11,6 +11,16 @@ from pyspark.sql.window import Window # noqa: E402 +@pytest.fixture +def t(con): + return con.table("time_indexed_table") + + +@pytest.fixture +def spark_table(con): + return con._session.table("time_indexed_table") + + @pytest.mark.parametrize( ("ibis_windows", "spark_range"), [ @@ -23,11 +33,9 @@ ], indirect=["ibis_windows"], ) -def test_time_indexed_window(con, ibis_windows, spark_range): - table = con.table("time_indexed_table") - result = table.mutate(mean=table["value"].mean().over(ibis_windows[0])).compile() - result_pd = result.toPandas() - spark_table = table.compile() +def test_time_indexed_window(t, spark_table, ibis_windows, spark_range): + result = t.mutate(mean=t["value"].mean().over(ibis_windows[0])).execute() + spark_window = ( Window.partitionBy("key") .orderBy(F.col("time").cast("long")) @@ -37,7 +45,8 @@ def test_time_indexed_window(con, ibis_windows, spark_range): "mean", F.mean(spark_table["value"]).over(spark_window), ).toPandas() - tm.assert_frame_equal(result_pd, expected) + + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -50,15 +59,12 @@ def test_time_indexed_window(con, ibis_windows, spark_range): ], indirect=["ibis_windows"], ) -def test_multiple_windows(con, ibis_windows, spark_range): - table = con.table("time_indexed_table") - result = table.mutate( - mean_1h=table["value"].mean().over(ibis_windows[0]), - mean_2h=table["value"].mean().over(ibis_windows[1]), - ).compile() - result_pd = result.toPandas() +def test_multiple_windows(t, spark_table, ibis_windows, spark_range): + result = t.mutate( + mean_1h=t["value"].mean().over(ibis_windows[0]), + mean_2h=t["value"].mean().over(ibis_windows[1]), + ).execute() - spark_table = table.compile() spark_window = ( Window.partitionBy("key") .orderBy(F.col("time").cast("long")) @@ -80,4 +86,4 @@ def test_multiple_windows(con, ibis_windows, spark_range): ) .toPandas() ) - tm.assert_frame_equal(result_pd, expected) + tm.assert_frame_equal(result, expected) diff --git a/ibis/backends/pyspark/tests/test_window_context_adjustment.py b/ibis/backends/pyspark/tests/test_window_context_adjustment.py deleted file mode 100644 index 4b770483ccb6..000000000000 --- a/ibis/backends/pyspark/tests/test_window_context_adjustment.py +++ /dev/null @@ -1,427 +0,0 @@ -from __future__ import annotations - -import pandas as pd -import pandas.testing as tm -import pytest - -import ibis - -pyspark = pytest.importorskip("pyspark") - -import pyspark.sql.functions as F # noqa: E402 -from pyspark.sql.window import Window # noqa: E402 - - -@pytest.mark.parametrize( - ("ibis_windows", "spark_range"), - [ - ([(ibis.interval(hours=1), 0)], (-3600, 0)), # 1h back looking window - ([(ibis.interval(hours=2), 0)], (-7200, 0)), # 2h back looking window - ( - [(0, ibis.interval(hours=1))], - (0, 3600), - ), # 1h forward looking window - ( - [(ibis.interval(hours=1), ibis.interval(hours=1))], - (-3600, 3600), - ), # both forward and trailing - ], - indirect=["ibis_windows"], -) -def test_window_with_timecontext(con, ibis_windows, spark_range): - """Test context adjustment for trailing / range window. - - We expand context according to window sizes, for example, for a table of: - time value - 2020-01-01 a - 2020-01-02 b - 2020-01-03 c - 2020-01-04 d - with context = (2020-01-03, 2002-01-04) trailing count for 1 day will be: - time value count - 2020-01-03 c 2 - 2020-01-04 d 2 - trailing count for 2 days will be: - time value count - 2020-01-03 c 3 - 2020-01-04 d 3 - with context = (2020-01-01, 2002-01-02) count for 1 day forward looking - window will be: - time value count - 2020-01-01 a 2 - 2020-01-02 b 2 - """ - table = con.table("time_indexed_table") - context = ( - pd.Timestamp("20170102 07:00:00", tz="UTC"), - pd.Timestamp("20170103", tz="UTC"), - ) - result_pd = table.mutate( - count=table["value"].count().over(ibis_windows[0]) - ).execute(timecontext=context) - spark_table = table.compile() - spark_window = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range) - ) - expected = spark_table.withColumn( - "count", - F.count(spark_table["value"]).over(spark_window), - ).toPandas() - expected = expected[ - expected.time.between(*(t.tz_convert(None) for t in context)) - ].reset_index(drop=True) - tm.assert_frame_equal(result_pd, expected) - - -@pytest.mark.parametrize( - ("ibis_windows", "spark_range"), - [([(None, 0)], (Window.unboundedPreceding, 0))], - indirect=["ibis_windows"], -) -def test_cumulative_window(con, ibis_windows, spark_range): - """Test context adjustment for cumulative window. - - For cumulative window, by definition we should look back infinitely. - When data is trimmed by time context, we define the limit of looking - back is the start time of given time context. Thus for a table of - time value - 2020-01-01 a - 2020-01-02 b - 2020-01-03 c - 2020-01-04 d - with context = (2020-01-02, 2002-01-03) cumulative count will be: - time value count - 2020-01-02 b 1 - 2020-01-03 c 2 - """ - table = con.table("time_indexed_table") - context = ( - pd.Timestamp("20170102 07:00:00", tz="UTC"), - pd.Timestamp("20170105", tz="UTC"), - ) - result_pd = table.mutate( - count_cum=table["value"].count().over(ibis_windows[0]) - ).execute(timecontext=context) - - spark_table = table.compile(timecontext=context) - spark_window = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range) - ) - expected = spark_table.withColumn( - "count_cum", - F.count(spark_table["value"]).over(spark_window), - ).toPandas() - expected = expected[ - expected.time.between(*(t.tz_convert(None) for t in context)) - ].reset_index(drop=True) - tm.assert_frame_equal(result_pd, expected) - - -@pytest.mark.parametrize( - ("ibis_windows", "spark_range"), - [ - ( - [(ibis.interval(hours=1), 0), (ibis.interval(hours=2), 0)], - [(-3600, 0), (-7200, 0)], - ) - ], - indirect=["ibis_windows"], -) -def test_multiple_trailing_window(con, ibis_windows, spark_range): - """Test context adjustment for multiple trailing window. - - When there are multiple window ops, we need to verify contexts are - adjusted correctly for all windows. In this tests we are constructing - one trailing window for 1h and another trailing window for 2h - """ - table = con.table("time_indexed_table") - context = ( - pd.Timestamp("20170102 07:00:00", tz="UTC"), - pd.Timestamp("20170105", tz="UTC"), - ) - result_pd = table.mutate( - count_1h=table["value"].count().over(ibis_windows[0]), - count_2h=table["value"].count().over(ibis_windows[1]), - ).execute(timecontext=context) - - spark_table = table.compile() - spark_window_1h = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range[0]) - ) - spark_window_2h = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range[1]) - ) - expected = ( - spark_table.withColumn( - "count_1h", F.count(spark_table["value"]).over(spark_window_1h) - ) - .withColumn("count_2h", F.count(spark_table["value"]).over(spark_window_2h)) - .toPandas() - ) - expected = expected[ - expected.time.between(*(t.tz_convert(None) for t in context)) - ].reset_index(drop=True) - tm.assert_frame_equal(result_pd, expected) - - -@pytest.mark.parametrize( - ("ibis_windows", "spark_range"), - [ - ( - [(ibis.interval(hours=1), 0), (ibis.interval(hours=2), 0)], - [(-3600, 0), (-7200, 0)], - ) - ], - indirect=["ibis_windows"], -) -def test_chained_trailing_window(con, ibis_windows, spark_range): - """Test context adjustment for chained windows. - - When there are chained window ops, we need to verify contexts are - adjusted correctly for all windows. In this tests we are constructing - one trailing window for 1h and trailing window on the new column for - 2h - """ - table = con.table("time_indexed_table") - context = ( - pd.Timestamp("20170102 07:00:00", tz="UTC"), - pd.Timestamp("20170105", tz="UTC"), - ) - table = table.mutate( - new_col=table["value"].count().over(ibis_windows[0]), - ) - table = table.mutate(count=table["new_col"].count().over(ibis_windows[1])) - result_pd = table.execute(timecontext=context) - - spark_table = table.compile() - spark_window_1h = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range[0]) - ) - spark_window_2h = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range[1]) - ) - spark_table = spark_table.withColumn( - "new_col", F.count(spark_table["value"]).over(spark_window_1h) - ) - spark_table = spark_table.withColumn( - "count", F.count(spark_table["new_col"]).over(spark_window_2h) - ) - expected = spark_table.toPandas() - expected = expected[ - expected.time.between(*(t.tz_convert(None) for t in context)) - ].reset_index(drop=True) - tm.assert_frame_equal(result_pd, expected) - - -@pytest.mark.xfail( - reason="Issue #2457 Adjust context properly for mixed rolling window," - " cumulative window and non window ops", - strict=True, -) -@pytest.mark.parametrize( - ("ibis_windows", "spark_range"), - [ - ( - [(ibis.interval(hours=1), 0), (None, 0)], - [(-3600, 0), (Window.unboundedPreceding, 0)], - ) - ], - indirect=["ibis_windows"], -) -def test_rolling_with_cumulative_window(con, ibis_windows, spark_range): - """Test context adjustment for rolling window and cumulative window. - - cumulative window should calculate only with in user's context, - while rolling window should calculate on expanded context. - For a rolling window of 1 day, - time value - 2020-01-01 a - 2020-01-02 b - 2020-01-03 c - 2020-01-04 d - with context = (2020-01-02, 2002-01-03), count will be: - time value roll_count cum_count - 2020-01-02 b 2 1 - 2020-01-03 c 2 2 - """ - table = con.table("time_indexed_table") - context = ( - pd.Timestamp("20170102 07:00:00", tz="UTC"), - pd.Timestamp("20170105", tz="UTC"), - ) - result_pd = table.mutate( - count_1h=table["value"].count().over(ibis_windows[0]), - count_cum=table["value"].count().over(ibis_windows[1]), - ).execute(timecontext=context) - - spark_table = table.compile() - spark_window_1h = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range[0]) - ) - spark_window_cum = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range[1]) - ) - expected = ( - spark_table.withColumn( - "count_1h", F.count(spark_table["value"]).over(spark_window_1h) - ) - .withColumn("count_cum", F.count(spark_table["value"]).over(spark_window_cum)) - .toPandas() - ) - expected = expected[ - expected.time.between(*(t.tz_convert(None) for t in context)) - ].reset_index(drop=True) - tm.assert_frame_equal(result_pd, expected) - - -@pytest.mark.xfail( - reason="Issue #2457 Adjust context properly for mixed rolling window," - " cumulative window and non window ops", - strict=True, -) -@pytest.mark.parametrize( - ("ibis_windows", "spark_range"), - [([(ibis.interval(hours=1), 0)], [(-3600, 0)])], - indirect=["ibis_windows"], -) -def test_rolling_with_non_window_op(con, ibis_windows, spark_range): - """Test context adjustment for rolling window and non window ops. - - non window ops should calculate only with in user's context, - while rolling window should calculate on expanded context. - For a rolling window of 1 day, and a `count` aggregation - time value - 2020-01-01 a - 2020-01-02 b - 2020-01-03 c - 2020-01-04 d - with context = (2020-01-02, 2002-01-04), result will be: - time value roll_count count - 2020-01-02 b 2 3 - 2020-01-03 c 2 3 - 2020-01-04 d 2 3 - Because there are 3 rows within user context (01-02, 01-04), - count should return 3 for every row, rather 4, based on the - adjusted context (01-01, 01-04). - """ - table = con.table("time_indexed_table") - context = ( - pd.Timestamp("20170102 07:00:00", tz="UTC"), - pd.Timestamp("20170105", tz="UTC"), - ) - result_pd = table.mutate( - count_1h=table["value"].count().over(ibis_windows[0]), - count=table["value"].count(), - ).execute(timecontext=context) - - spark_table = table.compile() - spark_window_1h = ( - Window.partitionBy("key") - .orderBy(F.col("time").cast("long")) - .rangeBetween(*spark_range[0]) - ) - expected = ( - spark_table.withColumn( - "count_1h", F.count(spark_table["value"]).over(spark_window_1h) - ) - .withColumn("count", F.count(spark_table["value"])) - .toPandas() - ) - expected = expected[ - expected.time.between(*(t.tz_convert(None) for t in context)) - ].reset_index(drop=True) - tm.assert_frame_equal(result_pd, expected) - - -def test_complex_window(con): - """Test window with different sizes mix context adjustment for window op - that require context adjustment and non window op that doesn't adjust - context.""" - table = con.table("time_indexed_table") - context = ( - pd.Timestamp("20170102 07:00:00", tz="UTC"), - pd.Timestamp("20170105", tz="UTC"), - ) - window = ibis.trailing_window( - preceding=ibis.interval(hours=1), order_by="time", group_by="key" - ) - window2 = ibis.trailing_window( - preceding=ibis.interval(hours=2), order_by="time", group_by="key" - ) - window_cum = ibis.cumulative_window(order_by="time", group_by="key") - # context should be adjusted accordingly for each window - result_pd = ( - table.mutate( - count_1h=table["value"].count().over(window), - count_2h=table["value"].count().over(window2), - count_cum=table["value"].count().over(window_cum), - ) - .mutate(count=table["value"].count()) - .execute(timecontext=context) - ) - - df = table.execute() - expected_win_1h = ( - df.set_index("time") - .groupby("key") - .value.rolling("1h", closed="both") - .count() - .rename("count_1h") - .astype(int) - ) - expected_win_2h = ( - df.set_index("time") - .groupby("key") - .value.rolling("2h", closed="both") - .count() - .rename("count_2h") - .astype(int) - ) - expected_cum_win = ( - df.set_index("time") - .groupby("key") - .value.expanding() - .count() - .rename("count_cum") - .astype(int) - ) - df = df.set_index("time") - df = df.assign( - count_1h=expected_win_1h.sort_index(level=["time", "key"]).reset_index( - level="key", drop=True - ) - ) - df = df.assign( - count_2h=expected_win_2h.sort_index(level=["time", "key"]).reset_index( - level="key", drop=True - ) - ) - df = df.assign( - count_cum=expected_cum_win.sort_index(level=["time", "key"]).reset_index( - level="key", drop=True - ) - ) - df["count"] = df.groupby(["key"])["value"].transform("count") - df = df.reset_index() - expected = ( - df[df.time.between(*(t.tz_convert(None) for t in context))] - .sort_values(["key"]) - .reset_index(drop=True) - ) - tm.assert_frame_equal(result_pd, expected) diff --git a/ibis/backends/pyspark/timecontext.py b/ibis/backends/pyspark/timecontext.py deleted file mode 100644 index e2d5c2a4d4d5..000000000000 --- a/ibis/backends/pyspark/timecontext.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -import pyspark.sql.functions as F - -import ibis.common.exceptions as com -from ibis.backends.base.df.timecontext import TimeContext, get_time_col - -if TYPE_CHECKING: - from pyspark.sql.dataframe import DataFrame - - -def filter_by_time_context( - df: DataFrame, - timecontext: TimeContext | None, - adjusted_timecontext: TimeContext | None = None, -) -> DataFrame: - """Filter a Dataframe by given time context.""" - # Return original df if there is no timecontext (timecontext is not used) - # or timecontext and adjusted_timecontext are the same - if (not timecontext) or ( - timecontext and adjusted_timecontext and timecontext == adjusted_timecontext - ): - return df - - time_col = get_time_col() - if time_col in df.columns: - # For py3.8, underlying spark type converter calls utctimetuple() - # and will throw exception for Timestamp type if tz is set. - # See https://github.com/pandas-dev/pandas/issues/32174 - # Dropping tz will cause spark to interpret begin, end with session - # timezone & os env TZ. We convert Timestamp to pydatetime to - # workaround. - begin, end = timecontext - return df.filter( - (F.col(time_col) >= begin.to_pydatetime()) - & (F.col(time_col) < end.to_pydatetime()) - ) - else: - raise com.TranslationError( - f"'time' column missing in Dataframe {df}." - "To use time context, a Timestamp column name 'time' must" - "present in the table. " - ) - - -def combine_time_context( - timecontexts: list[TimeContext], -) -> TimeContext | None: - """Return a combined time context of `timecontexts`. - - The combined time context starts from the earliest begin time - of `timecontexts`, and ends with the latest end time of `timecontexts` - The motivation is to generate a time context that is a superset - to all time contexts. - - Examples - -------- - >>> import pandas as pd - >>> timecontexts = [ - ... (pd.Timestamp("20200102"), pd.Timestamp("20200103")), - ... (pd.Timestamp("20200101"), pd.Timestamp("20200106")), - ... (pd.Timestamp("20200109"), pd.Timestamp("20200110")), - ... ] - >>> combine_time_context(timecontexts) - (Timestamp(...), Timestamp(...)) - >>> timecontexts = [None] - >>> print(combine_time_context(timecontexts)) - None - """ - begin = min((t[0] for t in timecontexts if t), default=None) - end = max((t[1] for t in timecontexts if t), default=None) - if begin and end: - return begin, end - return None diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 229ad1577282..798f65c54c1d 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -31,11 +31,12 @@ from pyspark.sql.utils import ( IllegalArgumentException as PySparkIllegalArgumentException, ) + from pyspark.sql.utils import ParseException as PySparkParseException from pyspark.sql.utils import PythonException as PySparkPythonException except ImportError: PySparkAnalysisException = ( PySparkIllegalArgumentException - ) = PySparkPythonException = None + ) = PySparkParseException = PySparkPythonException = None try: from google.api_core.exceptions import BadRequest as GoogleBadRequest diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/pyspark/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/pyspark/out.sql new file mode 100644 index 000000000000..f63de03c314a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/pyspark/out.sql @@ -0,0 +1,5 @@ +SELECT + `t0`.`id`, + `t0`.`bool_col` +FROM `functional_alltypes` AS `t0` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/pyspark/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/pyspark/out.sql new file mode 100644 index 000000000000..f63de03c314a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/pyspark/out.sql @@ -0,0 +1,5 @@ +SELECT + `t0`.`id`, + `t0`.`bool_col` +FROM `functional_alltypes` AS `t0` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/pyspark/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/pyspark/out.sql new file mode 100644 index 000000000000..d8a9c4090dc1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/pyspark/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM(`t0`.`bigint_col`) AS `Sum(bigint_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/pyspark/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/pyspark/out.sql new file mode 100644 index 000000000000..d4b1b19815b0 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/pyspark/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + `t0`.`id`, + `t0`.`bool_col` + FROM `functional_alltypes` AS `t0` + LIMIT 10 +) AS `t2` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/pyspark/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/pyspark/out.sql new file mode 100644 index 000000000000..583ea9c73238 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/pyspark/out.sql @@ -0,0 +1,134 @@ +SELECT + `t10`.`field_of_study`, + `t10`.`diff` +FROM ( + SELECT + `t5`.`field_of_study`, + `t5`.`diff` + FROM ( + SELECT + `t4`.`field_of_study`, + FIRST(`t4`.`diff`, TRUE) AS `diff` + FROM ( + SELECT + `t3`.`field_of_study`, + `t3`.`years`, + `t3`.`degrees`, + `t3`.`earliest_degrees`, + `t3`.`latest_degrees`, + `t3`.`latest_degrees` - `t3`.`earliest_degrees` AS `diff` + FROM ( + SELECT + `t2`.`field_of_study`, + `t2`.`years`, + `t2`.`degrees`, + FIRST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `earliest_degrees`, + LAST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `latest_degrees` + FROM ( + SELECT + `t1`.`field_of_study`, + `t1`.`__pivoted__`.`years` AS `years`, + `t1`.`__pivoted__`.`degrees` AS `degrees` + FROM ( + SELECT + `t0`.`field_of_study`, + EXPLODE( + ARRAY( + STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), + STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), + STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), + STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), + STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), + STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), + STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), + STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), + STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), + STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), + STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), + STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), + STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), + STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), + STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), + STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), + STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), + STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`) + ) + ) AS `__pivoted__` + FROM `humanities` AS `t0` + ) AS `t1` + ) AS `t2` + ) AS `t3` + ) AS `t4` + GROUP BY + 1 + ) AS `t5` + ORDER BY + `t5`.`diff` DESC + LIMIT 10 + UNION ALL + SELECT + `t5`.`field_of_study`, + `t5`.`diff` + FROM ( + SELECT + `t4`.`field_of_study`, + FIRST(`t4`.`diff`, TRUE) AS `diff` + FROM ( + SELECT + `t3`.`field_of_study`, + `t3`.`years`, + `t3`.`degrees`, + `t3`.`earliest_degrees`, + `t3`.`latest_degrees`, + `t3`.`latest_degrees` - `t3`.`earliest_degrees` AS `diff` + FROM ( + SELECT + `t2`.`field_of_study`, + `t2`.`years`, + `t2`.`degrees`, + FIRST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `earliest_degrees`, + LAST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `latest_degrees` + FROM ( + SELECT + `t1`.`field_of_study`, + `t1`.`__pivoted__`.`years` AS `years`, + `t1`.`__pivoted__`.`degrees` AS `degrees` + FROM ( + SELECT + `t0`.`field_of_study`, + EXPLODE( + ARRAY( + STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), + STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), + STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), + STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), + STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), + STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), + STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), + STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), + STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), + STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), + STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), + STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), + STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), + STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), + STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), + STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), + STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), + STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`) + ) + ) AS `__pivoted__` + FROM `humanities` AS `t0` + ) AS `t1` + ) AS `t2` + ) AS `t3` + ) AS `t4` + GROUP BY + 1 + ) AS `t5` + WHERE + `t5`.`diff` < 0 + ORDER BY + `t5`.`diff` ASC NULLS LAST + LIMIT 10 +) AS `t10` \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index d6a3a4d0f0d1..e1388ae541cd 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -389,14 +389,6 @@ def mean_and_std(v): lambda t, where: (t.int_col > 0)[where].sum(), id="bool_sum", marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=PySparkAnalysisException, - reason=( - "pyspark.sql.utils.AnalysisException: " - "function sum requires numeric or interval types, not boolean;" - ), - ), pytest.mark.broken( ["oracle"], raises=sa.exc.DatabaseError, @@ -1076,11 +1068,6 @@ def test_quantile( raises=(ValueError, AttributeError), reason="ClickHouse only implements `sample` correlation coefficient", ), - pytest.mark.notyet( - ["pyspark"], - raises=ValueError, - reason="PySpark only implements sample correlation", - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1169,11 +1156,6 @@ def test_quantile( raises=ValueError, reason="ClickHouse only implements `sample` correlation coefficient", ), - pytest.mark.notyet( - ["pyspark"], - raises=ValueError, - reason="PySpark only implements sample correlation", - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1242,7 +1224,7 @@ def test_approx_median(alltypes): ["bigquery", "druid", "sqlite", "exasol"], raises=com.OperationNotDefinedError ) @pytest.mark.notyet( - ["impala", "mysql", "mssql", "druid", "pyspark", "trino"], + ["impala", "mysql", "mssql", "druid", "trino"], raises=com.OperationNotDefinedError, ) @pytest.mark.notyet(["dask"], raises=NotImplementedError) @@ -1290,20 +1272,8 @@ def test_median(alltypes, df): @pytest.mark.parametrize( "func", [ - param( - methodcaller("quantile", 0.5), - id="quantile", - marks=[ - pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) - ], - ), - param( - methodcaller("median"), - id="median", - marks=[ - pytest.mark.notimpl(["pyspark"], raises=com.OperationNotDefinedError) - ], - ), + param(methodcaller("quantile", 0.5), id="quantile"), + param(methodcaller("median"), id="median"), ], ) def test_string_quantile(alltypes, func): @@ -1336,13 +1306,6 @@ def test_string_quantile(alltypes, func): pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) ], ), - param( - methodcaller("median"), - id="median", - marks=[ - pytest.mark.notimpl(["pyspark"], raises=com.OperationNotDefinedError) - ], - ), ], ) def test_date_quantile(alltypes, func): @@ -1366,9 +1329,6 @@ def test_date_quantile(alltypes, func): raises=GoogleBadRequest, reason="Argument 2 to STRING_AGG must be a literal or query parameter", ), - pytest.mark.broken( - ["pyspark"], raises=TypeError, reason="Column is not iterable" - ), ], ), ], @@ -1604,7 +1564,6 @@ def collect_udf(v): backend.assert_frame_equal(result, expected, check_like=True) -@pytest.mark.notimpl(["pyspark"], raises=com.OperationNotDefinedError) def test_binds_are_cast(alltypes): expr = alltypes.aggregate( high_line_count=( diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index b14afa5a9441..760334ef7235 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -8,9 +8,9 @@ import pandas.testing as tm import pytest import pytz -import sqlalchemy as sa import toolz from pytest import param +import sqlalchemy as sa import ibis import ibis.common.exceptions as com @@ -1212,7 +1212,7 @@ def swap(token): pytest.mark.notyet(["polars"], raises=com.UnsupportedOperationError), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet( - ["clickhouse", "pyspark", "snowflake"], + ["clickhouse", "snowflake"], raises=com.UnsupportedOperationError, ), pytest.mark.notimpl( @@ -1263,7 +1263,7 @@ def test_timestamp_range(con, start, stop, step, freq, tzinfo): pytest.mark.notyet(["polars"], raises=com.UnsupportedOperationError), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet( - ["clickhouse", "pyspark", "snowflake"], + ["clickhouse", "snowflake"], raises=com.UnsupportedOperationError, ), pytest.mark.notyet( diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 172075a0a860..2665031af72f 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -80,7 +80,9 @@ def time_keyed_right(time_keyed_df2): @pytest.mark.parametrize( ("direction", "op"), [("backward", operator.ge), ("forward", operator.le)] ) -@pytest.mark.notyet(["datafusion", "snowflake", "trino", "postgres", "mysql"]) +@pytest.mark.notyet( + ["datafusion", "snowflake", "trino", "postgres", "mysql", "pyspark"] +) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): on = op(time_left["time"], time_right["time"]) expr = time_left.asof_join(time_right, on=on, predicates="group") @@ -104,7 +106,9 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op @pytest.mark.broken( ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds" ) -@pytest.mark.notyet(["datafusion", "snowflake", "trino", "postgres", "mysql"]) +@pytest.mark.notyet( + ["datafusion", "snowflake", "trino", "postgres", "mysql", "pyspark"] +) def test_keyed_asof_join_with_tolerance( con, time_keyed_left, diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 1700403104e3..67c25d63cd49 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -280,7 +280,7 @@ def test_con_dot_sql_transpile(backend, con, dialect, df): @dot_sql_notimpl @dot_sql_never -@pytest.mark.notimpl(["druid", "flink", "impala", "polars", "pyspark"]) +@pytest.mark.notimpl(["druid", "flink", "impala", "polars"]) @pytest.mark.notyet(["snowflake"], reason="snowflake column names are case insensitive") @pytest.mark.notyet( ["risingwave"], diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 8f37499438c2..594fd6717b22 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -15,7 +15,7 @@ DuckDBParserException, MySQLOperationalError, PyDeltaTableError, - PySparkAnalysisException, + PySparkParseException, SnowflakeProgrammingError, TrinoUserError, ) @@ -31,7 +31,7 @@ # limit not implemented for flink and pandas backend execution "dask", "pandas", - "pyspark", + "flink", ] ), ], @@ -168,9 +168,6 @@ def test_column_pyarrow_batch_chunk_size(awards_players): @pytest.mark.notimpl(["pandas", "dask"]) -@pytest.mark.broken( - ["pyspark"], raises=AssertionError, reason="chunk_size isn't respected" -) @pytest.mark.broken( ["sqlite"], raises=pa.ArrowException, @@ -360,7 +357,7 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): pytest.mark.notyet(["mysql"], raises=MySQLOperationalError), pytest.mark.notyet( ["pyspark"], - raises=PySparkAnalysisException, + raises=PySparkParseException, reason="precision is out of range", ), pytest.mark.notyet(["exasol"], raises=sa.exc.DBAPIError), diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 5479d4c26bea..573982a6675e 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -27,6 +27,7 @@ MySQLProgrammingError, SnowflakeProgrammingError, TrinoUserError, + PsycoPg2InvalidTextRepresentation ) from ibis.common.annotations import ValidationError @@ -158,6 +159,7 @@ def test_isna(backend, alltypes, col, filt): "druid", "oracle", "exasol", + "pyspark", ], reason="NaN != NULL for these backends", ), @@ -199,7 +201,7 @@ def test_coalesce(con, expr, expected): # TODO(dask) - identicalTo - #2553 -@pytest.mark.notimpl(["clickhouse", "dask", "pyspark", "mssql", "druid", "exasol"]) +@pytest.mark.notimpl(["clickhouse", "dask", "mssql", "druid", "exasol"]) def test_identical_to(backend, alltypes, sorted_df): sorted_alltypes = alltypes.order_by("id") df = sorted_df @@ -316,6 +318,7 @@ def test_filter(backend, alltypes, sorted_df, predicate_fn, expected_fn): "oracle", "exasol", "pandas", + "pyspark", ] ) @pytest.mark.never( @@ -795,7 +798,6 @@ def test_interactive(alltypes, monkeypatch): repr(expr) -@pytest.mark.notyet(["pyspark"], reason="no native support for correlated subqueries") def test_correlated_subquery(alltypes): expr = alltypes[_.double_col > _.view().double_col] assert expr.compile() is not None @@ -838,7 +840,6 @@ def test_int_scalar(alltypes): @pytest.mark.notyet( ["clickhouse"], reason="https://github.com/ClickHouse/ClickHouse/issues/6697" ) -@pytest.mark.notyet(["pyspark"]) @pytest.mark.parametrize("method_name", ["any", "notany"]) def test_exists(batting, awards_players, method_name): years = [1980, 1981] @@ -903,7 +904,7 @@ def test_isin_uncorrelated( @pytest.mark.broken(["polars"], reason="incorrect answer") -@pytest.mark.notimpl(["pyspark", "druid", "exasol"]) +@pytest.mark.notimpl(["druid", "exasol"]) @pytest.mark.notyet(["dask"], reason="not supported by the backend") def test_isin_uncorrelated_filter( backend, batting, awards_players, batting_df, awards_players_df @@ -1240,11 +1241,6 @@ def test_distinct_on_keep(backend, on, keep): raises=com.OperationNotDefinedError, reason="backend doesn't implement ops.WindowFunction", ) -@pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="backend doesn't support `having` filters", -) @pytest.mark.notimpl( ["flink"], raises=com.OperationNotDefinedError, @@ -1373,7 +1369,6 @@ def hash_256(col): "mssql", "oracle", "risingwave", - "pyspark", "snowflake", "sqlite", "exasol", @@ -1415,7 +1410,6 @@ def test_try_cast(con, from_val, to_type, expected): "impala", "mssql", "oracle", - "pyspark", "snowflake", "sqlite", "exasol", @@ -1444,7 +1438,6 @@ def test_try_cast_returns_null(con): "oracle", "postgres", "risingwave", - "pyspark", "snowflake", "sqlite", "exasol", @@ -1516,7 +1509,6 @@ def test_try_cast_table(backend, con): "oracle", "postgres", "risingwave", - "pyspark", "snowflake", "sqlite", "exasol", @@ -1532,7 +1524,7 @@ def test_try_cast_table(backend, con): lambda x: x is None or np.isnan(x), marks=[ pytest.mark.notyet( - ["clickhouse", "polars", "flink"], + ["clickhouse", "polars", "flink", "pyspark"], reason="casts this to to a number", ), pytest.mark.notyet(["trino"], raises=TrinoUserError), @@ -1541,7 +1533,9 @@ def test_try_cast_table(backend, con): ], ) def test_try_cast_func(con, from_val, to_type, func): - assert func(con.execute(ibis.literal(from_val).try_cast(to_type))) + expr = ibis.literal(from_val).try_cast(to_type) + result = con.execute(expr) + assert func(result) @pytest.mark.parametrize( @@ -1623,11 +1617,6 @@ def test_try_cast_func(con, from_val, to_type, func): raises=ImpalaHiveServer2Error, reason="impala doesn't support OFFSET without ORDER BY", ), - pytest.mark.notyet( - ["pyspark"], - raises=com.UnsupportedArgumentError, - reason="pyspark doesn't support non-zero offset until version 3.4", - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1656,11 +1645,6 @@ def test_try_cast_func(con, from_val, to_type, func): raises=ImpalaHiveServer2Error, reason="impala doesn't support OFFSET without ORDER BY", ), - pytest.mark.notyet( - ["pyspark"], - raises=com.UnsupportedArgumentError, - reason="pyspark doesn't support non-zero offset until version 3.4", - ), ], ), ], @@ -1895,6 +1879,7 @@ def test_sample_memtable(con, backend): "sqlite", "trino", "exasol", + "pyspark", ] ) def test_sample_with_seed(backend): @@ -1924,9 +1909,6 @@ def test_substitute(backend): @pytest.mark.notimpl( ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" ) -@pytest.mark.notimpl( - ["pyspark"], reason="pyspark doesn't generate SQL", raises=NotImplementedError -) @pytest.mark.notimpl(["druid", "flink"], reason="no sqlglot dialect", raises=ValueError) @pytest.mark.notimpl(["exasol"], raises=ValueError, reason="unknown dialect") @pytest.mark.notimpl( diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 1f264c34d455..ad0bb2fd7c05 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -319,7 +319,6 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu lambda left: left.filter(lambda t: t.x == 1), "x", id="left-x", - marks=pytest.mark.notimpl(["pyspark"], reason="overlapping columns"), ), param( "right", @@ -334,7 +333,6 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu lambda left: left.filter(lambda t: t.x == 1), "x", id="right-x", - marks=pytest.mark.notimpl(["pyspark"], reason="overlapping columns"), ), param( "outer", @@ -351,7 +349,6 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu "x", id="outer-x", marks=[ - pytest.mark.notimpl(["pyspark"], reason="overlapping columns"), *outer_join_nullability_failures, ], ), diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py index 10cc901419d5..4aa30d079620 100644 --- a/ibis/backends/tests/test_map.py +++ b/ibis/backends/tests/test_map.py @@ -19,7 +19,7 @@ ["bigquery", "impala"], reason="Backend doesn't yet implement map types" ), pytest.mark.notimpl( - ["datafusion", "exasol", "pyspark", "polars", "druid", "oracle"], + ["datafusion", "exasol", "polars", "druid", "oracle"], reason="Not yet implemented in ibis", ), ] diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 17c9cd7d348e..4ad19845e538 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -25,6 +25,7 @@ MySQLOperationalError, PsycoPg2DivisionByZero, Py4JError, + PySparkParseException, SnowflakeProgrammingError, TrinoUserError, ) @@ -405,6 +406,11 @@ def test_numeric_literal(con, backend, expr, expected_types): reason="Unsupported precision.", raises=DuckDBParserException, ), + pytest.mark.broken( + ["pyspark"], + reason="Unsupported precision.", + raises=PySparkParseException, + ), pytest.mark.broken( ["trino"], reason="Unsupported precision.", raises=TrinoUserError ), @@ -427,6 +433,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "postgres": decimal.Decimal("Infinity"), "pandas": decimal.Decimal("Infinity"), "dask": decimal.Decimal("Infinity"), + "pyspark": decimal.Decimal("Infinity"), "impala": float("inf"), "exasol": float("inf"), "duckdb": float("inf"), @@ -452,11 +459,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.broken( - ["pyspark"], - "An error occurred while calling z:org.apache.spark.sql.functions.lit.", - raises=Py4JError, - ), pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["mssql"], @@ -508,6 +510,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "postgres": decimal.Decimal("-Infinity"), "pandas": decimal.Decimal("-Infinity"), "dask": decimal.Decimal("-Infinity"), + "pyspark": decimal.Decimal("-Infinity"), "impala": float("-inf"), "exasol": float("-inf"), "duckdb": float("-inf"), @@ -533,11 +536,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.broken( - ["pyspark"], - "An error occurred while calling z:org.apache.spark.sql.functions.lit.", - raises=Py4JError, - ), pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["mssql"], @@ -590,6 +588,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "risingwave": float("nan"), "pandas": decimal.Decimal("NaN"), "dask": decimal.Decimal("NaN"), + "pyspark": decimal.Decimal("NaN"), "impala": float("nan"), "exasol": float("nan"), "duckdb": float("nan"), @@ -616,11 +615,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.broken( - ["pyspark"], - "An error occurred while calling z:org.apache.spark.sql.functions.lit.", - raises=Py4JError, - ), pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["mssql"], @@ -1625,11 +1619,6 @@ def test_constants(con, const): assert pytest.approx(result) == getattr(math, const) -pyspark_no_bitshift = pytest.mark.notyet( - ["pyspark"], - reason="pyspark doesn't implement bitshift operators", - raises=com.OperationNotDefinedError, -) flink_no_bitwise = pytest.mark.notyet( ["flink"], reason="Flink doesn't implement bitwise operators", @@ -1685,7 +1674,6 @@ def test_bitwise_columns(backend, con, alltypes, df, op, left_fn, right_fn): ) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError)) -@pyspark_no_bitshift @flink_no_bitwise def test_bitwise_shift(backend, alltypes, df, op, left_fn, right_fn): expr = op(left_fn(alltypes), right_fn(alltypes)).name("tmp") @@ -1703,13 +1691,7 @@ def test_bitwise_shift(backend, alltypes, df, op, left_fn, right_fn): @pytest.mark.parametrize( "op", - [ - param(and_), - param(or_), - param(xor), - param(lshift, marks=pyspark_no_bitshift), - param(rshift, marks=pyspark_no_bitshift), - ], + [and_, or_, xor, lshift, rshift], ) @pytest.mark.parametrize( ("left", "right"), diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 7fc9696aa9ab..01dad3aa36de 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -65,7 +65,7 @@ def test_timestamp_accepts_date_literals(alltypes): assert expr.compile(params=params) is not None -@pytest.mark.notimpl(["dask", "impala", "pyspark", "druid", "oracle", "exasol"]) +@pytest.mark.notimpl(["dask", "impala", "druid", "oracle", "exasol"]) @pytest.mark.never( ["mysql", "sqlite", "mssql"], reason="backend will never implement array types" ) @@ -82,7 +82,6 @@ def test_scalar_param_array(con): "impala", "postgres", "risingwave", - "pyspark", "druid", "oracle", "exasol", @@ -100,9 +99,7 @@ def test_scalar_param_struct(con): assert result == value["a"] -@pytest.mark.notimpl( - ["datafusion", "impala", "pyspark", "polars", "druid", "oracle", "exasol"] -) +@pytest.mark.notimpl(["datafusion", "impala", "polars", "druid", "oracle", "exasol"]) @pytest.mark.never( ["mysql", "sqlite", "mssql"], reason="mysql and sqlite will never implement map types", @@ -250,7 +247,6 @@ def test_scalar_param_date(backend, alltypes, value): "sqlite", "impala", "oracle", - "pyspark", "mssql", "druid", "exasol", diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 92c034b8124c..4740a0b896ac 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -39,7 +39,7 @@ reason="Not a SQL backend", ) no_sql_extraction = pytest.mark.notimpl( - ["pyspark", "polars"], reason="Not clear how to extract SQL from the backend" + ["polars"], reason="Not clear how to extract SQL from the backend" ) @@ -112,7 +112,7 @@ def test_isin_bug(con, snapshot): @pytest.mark.never( - ["pandas", "dask", "polars", "pyspark"], + ["pandas", "dask", "polars"], reason="not SQL", raises=NotImplementedError, ) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 9cc3e5dbd37d..bd497b1a31f0 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -11,7 +11,7 @@ import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt -from ibis.backends.tests.errors import ClickHouseDatabaseError, PySparkPythonException +from ibis.backends.tests.errors import ClickHouseDatabaseError from ibis.common.annotations import ValidationError @@ -186,7 +186,7 @@ def uses_java_re(t): id="ilike", marks=[ pytest.mark.notimpl( - ["pyspark", "polars"], + ["polars"], raises=com.OperationNotDefinedError, ), pytest.mark.broken( @@ -240,7 +240,6 @@ def uses_java_re(t): ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError, ), - pytest.mark.broken(["pyspark"], raises=PySparkPythonException), pytest.mark.never( ["druid"], reason="No posix support; regex is interpreted literally", @@ -590,7 +589,7 @@ def uses_java_re(t): # pyspark doesn't support `cases` yet marks=[ pytest.mark.notimpl( - ["dask", "pyspark"], + ["dask"], raises=com.OperationNotDefinedError, ), pytest.mark.broken(["druid", "mssql"], raises=sa.exc.ProgrammingError), @@ -605,7 +604,7 @@ def uses_java_re(t): # pyspark doesn't support `cases` yet marks=[ pytest.mark.notimpl( - ["dask", "datafusion", "pyspark"], + ["dask", "datafusion"], raises=com.OperationNotDefinedError, ), pytest.mark.broken(["druid", "mssql"], raises=sa.exc.ProgrammingError), @@ -665,10 +664,6 @@ def uses_java_re(t): lambda t: t.date_string_col.str[2:], id="substr-start-only", marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=com.OperationNotDefinedError, - ), pytest.mark.broken( ["mssql"], reason="substr requires 3 arguments", @@ -698,7 +693,6 @@ def uses_java_re(t): marks=[ pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError), pytest.mark.broken(["impala", "flink"], raises=AssertionError), - pytest.mark.notimpl(["pyspark"], raises=NotImplementedError), ], ), param( @@ -707,13 +701,6 @@ def uses_java_re(t): id="expr_slice_begin", # TODO: substring #2553 marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=NotImplementedError, - reason=( - "Specifying `start` or `length` with column expressions is not supported." - ), - ), pytest.mark.notimpl( ["polars"], raises=com.UnsupportedArgumentError, @@ -735,13 +722,6 @@ def uses_java_re(t): id="expr_slice_end", # TODO: substring #2553 marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=NotImplementedError, - reason=( - "Specifying `start` or `length` with column expressions is not supported." - ), - ), pytest.mark.notimpl( ["polars"], raises=com.UnsupportedArgumentError, @@ -763,13 +743,6 @@ def uses_java_re(t): id="expr_empty_slice", # TODO: substring #2553 marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=NotImplementedError, - reason=( - "Specifying `start` or `length` with column expressions is not supported." - ), - ), pytest.mark.notimpl( ["polars"], raises=com.UnsupportedArgumentError, @@ -794,13 +767,6 @@ def uses_java_re(t): id="expr_slice_begin_end", # TODO: substring #2553 marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=NotImplementedError, - reason=( - "Specifying `start` or `length` with column expressions is not supported." - ), - ), pytest.mark.notimpl( ["polars"], raises=com.UnsupportedArgumentError, @@ -1185,11 +1151,6 @@ def test_re_split_column(alltypes): raises=Exception, reason="pyarrow doesn't support splitting on a pattern per row", ) -@pytest.mark.notyet( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="pyspark only supports pattern constants", -) def test_re_split_column_multiple_patterns(alltypes): expr = ( alltypes.filter(lambda t: t.string_col.isin(("1", "2"))) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 4a395dfbac4b..d627926a7a3e 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -30,7 +30,6 @@ PolarsComputeError, PolarsPanicException, Py4JJavaError, - PySparkIllegalArgumentException, SnowflakeProgrammingError, TrinoUserError, ) @@ -199,11 +198,6 @@ def test_timestamp_extract(backend, alltypes, df, attr): pytest.mark.notimpl( ["druid", "oracle", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="PySpark backend does not support extracting milliseconds.", - ), ], ), param( @@ -272,11 +266,6 @@ def test_timestamp_extract_microseconds(backend, alltypes, df): raises=AttributeError, reason="'StringColumn' object has no attribute 'millisecond'", ) -@pytest.mark.notyet( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="PySpark backend does not support extracting milliseconds.", -) @pytest.mark.broken(["sqlite"], raises=AssertionError) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_extract_milliseconds(backend, alltypes, df): @@ -478,7 +467,6 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): "clickhouse", "impala", "mysql", - "pyspark", "sqlite", "datafusion", ], @@ -504,7 +492,6 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): "clickhouse", "impala", "mysql", - "pyspark", "sqlite", "trino", "datafusion", @@ -692,11 +679,6 @@ def test_date_truncate(backend, alltypes, df, unit): raises=ValueError, reason="Metadata inference failed in `add`.", ), - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="Interval from integer column is unsupported for the PySpark backend.", - ), pytest.mark.notyet( ["trino"], raises=com.UnsupportedOperationError, @@ -725,11 +707,6 @@ def test_date_truncate(backend, alltypes, df, unit): raises=TypeError, reason="duration() got an unexpected keyword argument 'months'", ), - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="Interval from integer column is unsupported for the PySpark backend.", - ), pytest.mark.notyet( ["trino"], raises=com.UnsupportedOperationError, @@ -752,11 +729,6 @@ def test_date_truncate(backend, alltypes, df, unit): raises=ValueError, reason="Metadata inference failed in `add`.", ), - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="Interval from integer column is unsupported for the PySpark backend.", - ), pytest.mark.notyet( ["trino"], raises=com.UnsupportedOperationError, @@ -774,50 +746,10 @@ def test_date_truncate(backend, alltypes, df, unit): ), ], ), - param( - "D", - pd.offsets.DateOffset, - marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="Interval from integer column is unsupported for the PySpark backend.", - ), - ], - ), - param( - "h", - pd.Timedelta, - marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="Interval from integer column is unsupported for the PySpark backend.", - ), - ], - ), - param( - "m", - pd.Timedelta, - marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="Interval from integer column is unsupported for the PySpark backend.", - ), - ], - ), - param( - "s", - pd.Timedelta, - marks=[ - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="Interval from integer column is unsupported for the PySpark backend.", - ), - ], - ), + param("D", pd.offsets.DateOffset), + param("h", pd.Timedelta), + param("m", pd.Timedelta), + param("s", pd.Timedelta), param( "ms", pd.Timedelta, @@ -825,11 +757,6 @@ def test_date_truncate(backend, alltypes, df, unit): pytest.mark.notimpl( ["clickhouse"], raises=com.UnsupportedOperationError ), - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedArgumentError, - reason="Interval unit \"ms\" is not allowed. Allowed units are: ['Y', 'W', 'M', 'D', 'h', 'm', 's']", - ), pytest.mark.broken( ["flink"], raises=Py4JJavaError, @@ -849,11 +776,6 @@ def test_date_truncate(backend, alltypes, df, unit): pytest.mark.notimpl( ["clickhouse"], raises=com.UnsupportedOperationError ), - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedArgumentError, - reason="Interval unit \"us\" is not allowed. Allowed units are: ['Y', 'W', 'M', 'D', 'h', 'm', 's']", - ), pytest.mark.notimpl( ["trino"], raises=AssertionError, @@ -948,13 +870,6 @@ def convert_to_offset(offset, displacement_type=displacement_type): ], raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl( - [ - "pyspark", - ], - raises=com.UnsupportedOperationError, - reason="Interval from integer column is unsupported for the PySpark backend.", -) @pytest.mark.notimpl( [ "sqlite", @@ -1129,11 +1044,6 @@ def convert_to_offset(x): ["bigquery", "snowflake", "sqlite"], raises=com.OperationNotDefinedError, ), - pytest.mark.notimpl( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="PySpark backend does not support TimestampDiff as there is no timedelta type.", - ), pytest.mark.notimpl( ["druid"], raises=ValidationError, @@ -1595,9 +1505,6 @@ def test_interval_add_cast_scalar(backend, alltypes): backend.assert_series_equal(result, expected.astype(result.dtype)) -@pytest.mark.never( - ["pyspark"], reason="PySpark does not support casting columns to intervals" -) @pytest.mark.notimpl( ["sqlite", "snowflake", "mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError, @@ -1789,11 +1696,6 @@ def test_integer_to_timestamp(backend, con, unit): "%m/%d/%y", id="mysql_format", marks=[ - pytest.mark.never( - ["pyspark"], - reason="Datetime formatting style is not supported.", - raises=ValueError, - ), pytest.mark.never( ["snowflake"], reason=( @@ -2014,7 +1916,7 @@ def test_now_from_projection(alltypes): } -@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["pandas", "dask"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=sa.exc.ProgrammingError, reason="SQL parse failed" ) @@ -2187,7 +2089,7 @@ def test_time_literal(con, backend): @pytest.mark.notyet( - ["clickhouse", "impala", "pyspark"], + ["clickhouse", "impala"], raises=com.OperationNotDefinedError, reason="backend doesn't have a time datatype", ) @@ -2271,12 +2173,6 @@ def test_extract_time_from_timestamp(con, microsecond): "Encountered: ) Expected: +", raises=ImpalaHiveServer2Error, ) -@pytest.mark.broken( - ["pyspark"], - "Invalid argument, not a string or column: 1000000000 of type . For column literals, " - "use 'lit', 'array', 'struct' or 'create_map' function.", - raises=TypeError, -) @pytest.mark.broken( ["mysql"], "The backend implementation is broken. " @@ -2319,7 +2215,7 @@ def test_interval_literal(con, backend): assert con.execute(expr.typeof()) == INTERVAL_BACKEND_TYPES[backend_name] -@pytest.mark.notimpl(["pandas", "dask", "pyspark"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["pandas", "dask"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -2423,16 +2319,6 @@ def test_date_column_from_iso(backend, con, alltypes, df): @pytest.mark.notimpl(["druid", "oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.notyet( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason=" PySpark backend does not support extracting milliseconds.", -) -@pytest.mark.notyet( - ["pyspark"], - raises=com.UnsupportedOperationError, - reason="PySpark backend does not support extracting milliseconds.", -) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_extract_milliseconds_with_big_value(con): timestamp = ibis.timestamp("2021-01-01 01:30:59.333456") @@ -2696,7 +2582,6 @@ def test_timestamp_precision_output(con, ts, scale, unit): "oracle", "pandas", "polars", - "pyspark", "sqlite", ], raises=com.OperationNotDefinedError, @@ -2736,6 +2621,11 @@ def test_timestamp_precision_output(con, ts, scale, unit): 2, id="timestamp", marks=[ + pytest.mark.broken( + ["pyspark"], + raises=AssertionError, + reason="pyspark difference is timezone aware", + ), pytest.mark.notimpl( ["mysql"], raises=com.OperationNotDefinedError, @@ -2997,7 +2887,6 @@ def test_time_literal_sql(dialect, snapshot, micros): reason="clickhouse doesn't support dates before the UNIX epoch", ), pytest.mark.notyet(["datafusion"], raises=Exception), - pytest.mark.notyet(["pyspark"], raises=PySparkIllegalArgumentException), ], ), param( diff --git a/ibis/backends/tests/test_timecontext.py b/ibis/backends/tests/test_timecontext.py index 50b181728d7e..88376a4f961b 100644 --- a/ibis/backends/tests/test_timecontext.py +++ b/ibis/backends/tests/test_timecontext.py @@ -27,6 +27,7 @@ "trino", "druid", "oracle", + "pyspark", ] ) @@ -117,7 +118,7 @@ def test_context_adjustment_filter_before_window( backend.assert_frame_equal(result, expected) -@pytest.mark.notimpl(["duckdb", "pyspark"]) +@pytest.mark.notimpl(["duckdb"]) @pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index 9a4dce517afa..ffef15992821 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -28,23 +28,6 @@ "clickhouse": "Nullable(UUID)", } -UUID_EXPECTED_VALUES = { - "pandas": TEST_UUID, - "bigquery": TEST_UUID, - "duckdb": TEST_UUID, - "sqlite": TEST_UUID, - "snowflake": TEST_UUID, - "trino": TEST_UUID, - "postgres": TEST_UUID, - "mysql": TEST_UUID, - "mssql": TEST_UUID, - "dask": TEST_UUID, - "oracle": TEST_UUID, - "flink": TEST_UUID, - "exasol": TEST_UUID, - "clickhouse": TEST_UUID, -} - pytestmark = pytest.mark.notimpl( ["druid"], raises=sqlalchemy.exc.CompileError, @@ -55,11 +38,6 @@ ) -@pytest.mark.broken( - ["pyspark"], - "'UUID' object has no attribute '_get_object_id'", - raises=AttributeError, -) @pytest.mark.xfail_version( duckdb=["duckdb<0.7.0"], reason='(duckdb.NotImplementedException) Not implemented Error: Unsupported type: "UUID"', @@ -79,7 +57,7 @@ def test_uuid_literal(con, backend): expr = ibis.literal(RAW_TEST_UUID, type=dt.uuid) result = con.execute(expr) - assert result == UUID_EXPECTED_VALUES[backend_name] + assert result == TEST_UUID with contextlib.suppress(com.OperationNotDefinedError): assert con.execute(expr.typeof()) == UUID_BACKEND_TYPE[backend_name] diff --git a/ibis/backends/tests/test_vectorized_udf.py b/ibis/backends/tests/test_vectorized_udf.py index c1c85326f52e..fa6728acb7f2 100644 --- a/ibis/backends/tests/test_vectorized_udf.py +++ b/ibis/backends/tests/test_vectorized_udf.py @@ -37,7 +37,7 @@ def _wrapper(*args, **kwargs): # elementwise UDF def add_one(s): - assert isinstance(s, pd.Series) + assert isinstance(s, pd.Series), type(s) return s + 1 @@ -362,14 +362,15 @@ def foo3(v, **kwargs): # UDF with kwargs return v + kwargs.get("amount", 1) - result = udf_alltypes.mutate( + expr = udf_alltypes.mutate( v1=foo1(udf_alltypes["double_col"]), v2=foo2(udf_alltypes["double_col"], amount=1), v3=foo2(udf_alltypes["double_col"], amount=2), v4=foo3(udf_alltypes["double_col"]), v5=foo3(udf_alltypes["double_col"], amount=2), v6=foo3(udf_alltypes["double_col"], amount=3), - ).execute() + ) + result = expr.execute() expected = udf_df.assign( v1=udf_df["double_col"] + 1, diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index d12960f729bc..e46827a641c3 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -18,7 +18,6 @@ ImpalaHiveServer2Error, MySQLOperationalError, Py4JJavaError, - PySparkAnalysisException, SnowflakeProgrammingError, ) from ibis.legacy.udf.vectorized import analytic, reduction @@ -158,7 +157,6 @@ def calc_zscore(s): lambda t: t.id.rank(method="min") / t.id.transform(len), id="cume_dist", marks=[ - pytest.mark.notimpl(["pyspark"], raises=com.UnsupportedOperationError), pytest.mark.notyet(["clickhouse"], raises=com.OperationNotDefinedError), pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl(["dask"], raises=NotImplementedError), @@ -681,11 +679,6 @@ def test_simple_ungrouped_unbound_following_window( raises=NotImplementedError, reason="support scalar sorting keys are not yet implemented", ) -@pytest.mark.broken( - ["pyspark"], - raises=PySparkAnalysisException, - reason="pyspark tries to locate None column", -) @pytest.mark.never( ["mssql"], raises=Exception, reason="order by constant is not supported" ) @@ -752,16 +745,16 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): pytest.mark.notimpl( ["pandas", "dask"], raises=com.OperationNotDefinedError ), - pytest.mark.notyet( - ["pyspark"], - raises=PySparkAnalysisException, - reason="pyspark requires CURRENT ROW", - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, reason="Feature is not yet implemented: Unrecognized window function: ntile", ), + pytest.mark.notimpl( + ["flink"], + raises=Py4JJavaError, + reason="CalciteContextException: Argument to function 'NTILE' must be a literal", + ), ], ), param( @@ -854,11 +847,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): strict=False, # sometimes it passes ), pytest.mark.broken(["oracle"], raises=AssertionError), - pytest.mark.notimpl( - ["pyspark"], - raises=PySparkAnalysisException, - reason="pyspark requires ORDER BY", - ), pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, @@ -906,11 +894,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): strict=False, # sometimes it passes ), pytest.mark.broken(["oracle"], raises=AssertionError), - pytest.mark.notimpl( - ["pyspark"], - raises=PySparkAnalysisException, - reason="pyspark requires ORDER BY", - ), pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, @@ -1100,7 +1083,6 @@ def gb_fn(df): @pytest.mark.notimpl(["clickhouse", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["dask"], raises=AttributeError) -@pytest.mark.notimpl(["pyspark"], raises=PySparkAnalysisException) @pytest.mark.notyet( ["clickhouse"], reason="clickhouse doesn't implement percent_rank", @@ -1285,9 +1267,6 @@ def test_range_expression_bounds(backend): @pytest.mark.broken( ["mssql"], reason="lack of support for booleans", raises=sa.exc.ProgrammingError ) -@pytest.mark.broken( - ["pyspark"], reason="pyspark requires CURRENT ROW", raises=PySparkAnalysisException -) @pytest.mark.broken( ["risingwave"], raises=sa.exc.InternalError, @@ -1323,9 +1302,6 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): ) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notyet(["flink"], raises=com.UnsupportedOperationError) -@pytest.mark.broken( - ["pyspark"], reason="pyspark requires CURRENT ROW", raises=PySparkAnalysisException -) @pytest.mark.broken( ["pandas"], raises=TypeError, diff --git a/ibis/expr/operations/temporal.py b/ibis/expr/operations/temporal.py index ae4d65960b71..5eea8d969a76 100644 --- a/ibis/expr/operations/temporal.py +++ b/ibis/expr/operations/temporal.py @@ -1,7 +1,7 @@ from __future__ import annotations import operator -from typing import Annotated, Union +from typing import Annotated, Optional from public import public @@ -55,7 +55,7 @@ class TimeTruncate(Value): class TimestampBucket(Value): arg: Value[dt.Timestamp] interval: Scalar[dt.Interval] - offset: Union[Scalar[dt.Interval], None] = None + offset: Optional[Scalar[dt.Interval]] = None shape = rlz.shape_like("arg") dtype = dt.timestamp From 03162f27b0ca72411d24425d9337fa809a23d9ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 15 Jan 2024 17:44:41 +0100 Subject: [PATCH 068/161] feat(pyspark): add support for PySpark 3.5 --- .github/workflows/ibis-backends.yml | 27 ++++++++++++++++------- ibis/backends/conftest.py | 23 ++++++++++++++----- ibis/backends/pyspark/converter.py | 21 +++++++++++++----- ibis/backends/pyspark/datatypes.py | 3 +++ ibis/backends/pyspark/tests/test_basic.py | 23 ------------------- ibis/backends/tests/errors.py | 8 +++++++ ibis/backends/tests/test_export.py | 3 ++- ibis/backends/tests/test_generic.py | 4 +++- ibis/backends/tests/test_numeric.py | 3 ++- ibis/backends/tests/test_register.py | 6 +++-- poetry.lock | 2 +- pyproject.toml | 4 +++- 12 files changed, 79 insertions(+), 48 deletions(-) diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index c0ea4ec5f909..ad3ec014a1dc 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -534,15 +534,26 @@ jobs: run: docker compose logs test_pyspark: - name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} - runs-on: ${{ matrix.os }} + name: PySpark ${{ matrix.pyspark-version }} ubuntu-latest python-${{ matrix.python-version }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: - - ubuntu-latest - python-version: - - "3.10" + include: + - name: pyspark-3.3 + python-version: "3.9" + pyspark-version: 3.3 + deps: + - "pyspark@3.3" + - "'pandas@<2'" + - "'numpy@<1.24'" + - name: pyspark-3.5 + python-version: "3.11" + pyspark-version: 3.5 + deps: + - "pyspark@3.5" + - "'pandas@>2'" + - "'numpy@>1.24'" steps: - name: checkout uses: actions/checkout@v4 @@ -572,8 +583,8 @@ jobs: # it requires a version of pandas that pyspark is not compatible with run: poetry remove lonboard - - name: install maximum versions of pandas and numpy - run: poetry add --lock 'pandas@<2' 'numpy<1.24' + - name: install exact versions of pyspark, pandas and numpy + run: poetry add --lock ${{ join(matrix.deps, ' ') }} - name: checkout the lock file run: git checkout poetry.lock diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index afa2479a44a0..ad4b8712de8a 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -4,6 +4,7 @@ import importlib import importlib.metadata import itertools +import operator from functools import cache from pathlib import Path from typing import TYPE_CHECKING, Any @@ -24,9 +25,25 @@ if TYPE_CHECKING: from collections.abc import Iterable - from ibis.backends.tests.base import BackendTest + +def compare_versions(module_name, given_version, op): + try: + current_version = importlib.metadata.version(module_name) + return op(vparse(current_version), vparse(given_version)) + except importlib.metadata.PackageNotFoundError: + return False + + +def is_newer_than(module_name, given_version): + return compare_versions(module_name, given_version, operator.gt) + + +def is_older_than(module_name, given_version): + return compare_versions(module_name, given_version, operator.lt) + + TEST_TABLES = { "functional_alltypes": ibis.schema( { @@ -333,10 +350,6 @@ def pytest_collection_modifyitems(session, config, items): ( item, [ - pytest.mark.xfail( - vparse(pd.__version__) >= vparse("2"), - reason="PySpark doesn't support pandas>=2", - ), pytest.mark.skipif( pyspark is not None and vparse(pyspark.__version__) < vparse("3.3.3") diff --git a/ibis/backends/pyspark/converter.py b/ibis/backends/pyspark/converter.py index 610ad9a04637..3258a1c1cb50 100644 --- a/ibis/backends/pyspark/converter.py +++ b/ibis/backends/pyspark/converter.py @@ -22,10 +22,21 @@ def convert(timedelta): @classmethod def convert_Timestamp_element(cls, dtype): - def converter(value, dtype=dtype): - if (tz := dtype.timezone) is not None: - return value.astimezone(normalize_timezone(tz)) - - return value.astimezone(normalize_timezone("UTC")).replace(tzinfo=None) + if dtype.timezone is None: + tz = normalize_timezone("UTC") + + def converter(value): + try: + return value.astimezone(tz).replace(tzinfo=None) + except TypeError: + return value.tz_localize(tz).replace(tzinfo=None) + else: + tz = normalize_timezone(dtype.timezone) + + def converter(value): + try: + return value.astimezone(tz) + except TypeError: + return value.tz_localize(tz) return converter diff --git a/ibis/backends/pyspark/datatypes.py b/ibis/backends/pyspark/datatypes.py index d45efc9dc2ed..ca2d0cd7d76b 100644 --- a/ibis/backends/pyspark/datatypes.py +++ b/ibis/backends/pyspark/datatypes.py @@ -12,6 +12,7 @@ # DayTimeIntervalType introduced in Spark 3.2 (at least) but didn't show up in # PySpark until version 3.3 PYSPARK_33 = vparse(pyspark.__version__) >= vparse("3.3") +PYSPARK_35 = vparse(pyspark.__version__) >= vparse("3.5") _from_pyspark_dtypes = { @@ -70,6 +71,8 @@ def to_ibis(cls, typ, nullable=True): return dt.Interval(unit, nullable=nullable) else: raise com.IbisTypeError(f"{typ!r} couldn't be converted to Interval") + elif PYSPARK_35 and isinstance(typ, pt.TimestampNTZType): + return dt.Timestamp(nullable=nullable) elif isinstance(typ, pt.UserDefinedType): return cls.to_ibis(typ.sqlType(), nullable=nullable) else: diff --git a/ibis/backends/pyspark/tests/test_basic.py b/ibis/backends/pyspark/tests/test_basic.py index f0e99a7c3bea..ff1631bf28e8 100644 --- a/ibis/backends/pyspark/tests/test_basic.py +++ b/ibis/backends/pyspark/tests/test_basic.py @@ -7,7 +7,6 @@ import ibis from ibis.common.exceptions import IbisTypeError -from ibis.expr import datatypes as dt pyspark = pytest.importorskip("pyspark") @@ -116,28 +115,6 @@ def test_alias_after_select(t, df): tm.assert_series_equal(result["id"], result["id2"], check_names=False) -def test_interval_columns(con): - table = con.table("interval_table") - assert table.schema() == ibis.schema( - pairs=[ - ("interval_day", dt.Interval("D")), - ("interval_hour", dt.Interval("h")), - ("interval_minute", dt.Interval("m")), - ("interval_second", dt.Interval("s")), - ] - ) - - expected = pd.DataFrame( - { - "interval_day": [pd.Timedelta("10d")], - "interval_hour": [pd.Timedelta("10h")], - "interval_minute": [pd.Timedelta("10m")], - "interval_second": [pd.Timedelta("10s")], - } - ) - tm.assert_frame_equal(table.execute(), expected) - - def test_interval_columns_invalid(con): msg = r"DayTimeIntervalType\(0, 1\) couldn't be converted to Interval" with pytest.raises(IbisTypeError, match=msg): diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 798f65c54c1d..87b7f5cdf6f0 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -38,6 +38,14 @@ PySparkIllegalArgumentException ) = PySparkParseException = PySparkPythonException = None +try: + # PySpark 3.5.0 + from pyspark.errors.exceptions.captured import ( + ArithmeticException as PySparkArithmeticException, + ) +except ImportError: + PySparkArithmeticException = None + try: from google.api_core.exceptions import BadRequest as GoogleBadRequest except ImportError: diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 594fd6717b22..568fe35ecb89 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -15,6 +15,7 @@ DuckDBParserException, MySQLOperationalError, PyDeltaTableError, + PySparkArithmeticException, PySparkParseException, SnowflakeProgrammingError, TrinoUserError, @@ -357,7 +358,7 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): pytest.mark.notyet(["mysql"], raises=MySQLOperationalError), pytest.mark.notyet( ["pyspark"], - raises=PySparkParseException, + raises=(PySparkParseException, PySparkArithmeticException), reason="precision is out of range", ), pytest.mark.notyet(["exasol"], raises=sa.exc.DBAPIError), diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 573982a6675e..40aaa840640a 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -18,6 +18,7 @@ import ibis.expr.datatypes as dt import ibis.selectors as s from ibis import _ +from ibis.backends.conftest import is_older_than from ibis.backends.tests.errors import ( ClickHouseDatabaseError, ExaQueryError, @@ -877,7 +878,8 @@ def test_typeof(con): @pytest.mark.broken(["polars"], reason="incorrect answer") @pytest.mark.notyet(["impala"], reason="can't find table in subquery") -@pytest.mark.notimpl(["datafusion", "pyspark", "druid"]) +@pytest.mark.notimpl(["datafusion", "druid"]) +@pytest.mark.notimpl(["pyspark"], condition=is_older_than("pyspark", "3.5.0")) @pytest.mark.notyet(["dask", "mssql"], reason="not supported by the backend") @pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @pytest.mark.broken( diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 4ad19845e538..e26eac9c13d8 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -25,6 +25,7 @@ MySQLOperationalError, PsycoPg2DivisionByZero, Py4JError, + PySparkArithmeticException, PySparkParseException, SnowflakeProgrammingError, TrinoUserError, @@ -409,7 +410,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.broken( ["pyspark"], reason="Unsupported precision.", - raises=PySparkParseException, + raises=(PySparkParseException, PySparkArithmeticException), ), pytest.mark.broken( ["trino"], reason="Unsupported precision.", raises=TrinoUserError diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py index 3aee23a6e6f8..f1ecdd6bdab6 100644 --- a/ibis/backends/tests/test_register.py +++ b/ibis/backends/tests/test_register.py @@ -11,7 +11,7 @@ from pytest import param import ibis -from ibis.backends.conftest import TEST_TABLES +from ibis.backends.conftest import TEST_TABLES, is_older_than if TYPE_CHECKING: from collections.abc import Iterator @@ -50,7 +50,9 @@ def gzip_csv(data_dir, tmp_path): "Diamonds2", id="csv_name", marks=pytest.mark.notyet( - ["pyspark"], reason="pyspark lowercases view names" + ["pyspark"], + reason="pyspark lowercases view names", + condition=is_older_than("pyspark", "3.5.0"), ), ), param( diff --git a/poetry.lock b/poetry.lock index 16f45735ba6b..2bc978b62c51 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7362,4 +7362,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "f48fea54ccdbe62885012b0c4b7d4ea605d83ea59ea67405e28ecd51d066fe53" +content-hash = "09172b6afbe1dba300c2f24464e5955ee9bbd63c6f704384d30ac5f673f3ff93" diff --git a/pyproject.toml b/pyproject.toml index 27c944bc3fc6..250a664da1c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ pydata-google-auth = { version = ">=1.4.0,<2", optional = true } pydruid = { version = ">=0.6.5,<1", optional = true, extras = ["sqlalchemy"] } pymysql = { version = ">=1,<2", optional = true } pyodbc = { version = ">=4.0.39,<6", optional = true } -pyspark = { version = ">=3,<3.4", optional = true } # pyspark is heavily broken by numpy >=1.24 and pandas >=2 +pyspark = { version = ">=3,<4", optional = true } # used to support posix regexen in the pandas, dask and sqlite backends regex = { version = ">=2021.7.6", optional = true } shapely = { version = ">=2,<3", optional = true } @@ -262,6 +262,8 @@ norecursedirs = [ filterwarnings = [ # fail on any warnings that are not explicitly matched below "error", + # pyspark uses a deprecated pandas API + "ignore:is_datetime64tz_dtype is deprecated and will be removed in a future version:DeprecationWarning", # pyspark and impala leave sockets open "ignore:Exception ignored in:", # dask From eb95cf36a76af7a835afd611bd4396ec177c02c3 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 16 Jan 2024 06:40:37 -0500 Subject: [PATCH 069/161] refactor(pyspark): remove sqlalchemy dependency from pyspark --- ibis/backends/pyspark/__init__.py | 24 ++++++++++++++++-------- poetry.lock | 6 +++--- pyproject.toml | 2 +- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 0a9182061d03..5ea17b2d3ae8 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -7,7 +7,6 @@ from typing import TYPE_CHECKING, Any import pyspark -import sqlalchemy as sa import sqlglot as sg import sqlglot.expressions as sge from pyspark import SparkConf @@ -104,15 +103,24 @@ class Options(ibis.config.Config): def _from_url(self, url: str, **kwargs) -> Backend: """Construct a PySpark backend from a URL `url`.""" - url = sa.engine.make_url(url) + from urllib.parse import parse_qs, urlparse - conf = SparkConf().setAll(url.query.items()) + url = urlparse(url) + query_params = parse_qs(url.query) + params = query_params.copy() - if database := url.database: - conf = conf.set( - "spark.sql.warehouse.dir", - str(Path(database).absolute()), - ) + for name, value in query_params.items(): + if len(value) > 1: + params[name] = value + elif len(value) == 1: + params[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + + conf = SparkConf().setAll(params.items()) + + if database := url.path[1:]: + conf = conf.set("spark.sql.warehouse.dir", str(Path(database).absolute())) builder = SparkSession.builder.config(conf=conf) session = builder.getOrCreate() diff --git a/poetry.lock b/poetry.lock index 2bc978b62c51..6855fa8b33c8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7352,8 +7352,8 @@ oracle = ["oracledb", "packaging", "sqlalchemy", "sqlalchemy-views"] pandas = ["regex"] polars = ["packaging", "polars"] postgres = ["psycopg2"] -pyspark = ["packaging", "pyspark", "sqlalchemy"] -risingwave = ["psycopg2", "sqlalchemy", "sqlalchemy-risingwave", "sqlalchemy-views"] +risingwave = ["psycopg2"] +pyspark = ["packaging", "pyspark"] snowflake = ["packaging", "snowflake-connector-python"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] trino = ["trino"] @@ -7362,4 +7362,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "09172b6afbe1dba300c2f24464e5955ee9bbd63c6f704384d30ac5f673f3ff93" +content-hash = "659771c151e098f1a48db403393d3072ad1f11935ed023415ffa2260c2e7e914" diff --git a/pyproject.toml b/pyproject.toml index 250a664da1c8..04f98497a8f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -195,7 +195,7 @@ pandas = ["regex"] polars = ["polars", "packaging"] risingwave = ["psycopg2"] postgres = ["psycopg2"] -pyspark = ["pyspark", "sqlalchemy", "packaging"] +pyspark = ["pyspark", "packaging"] snowflake = ["snowflake-connector-python", "packaging"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] trino = ["trino"] From 89e7ef6e6002569919b3b63e8ca19bba618e399b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 16 Jan 2024 06:52:34 -0500 Subject: [PATCH 070/161] chore(deps): bump pyspark to 3.5 in poetry lock file --- poetry.lock | 19 ++++++++++--------- requirements-dev.txt | 4 ++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/poetry.lock b/poetry.lock index 6855fa8b33c8..5ab1bd3971a6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4415,13 +4415,13 @@ files = [ [[package]] name = "py4j" -version = "0.10.9.5" +version = "0.10.9.7" description = "Enables Python programs to dynamically access arbitrary Java objects" optional = true python-versions = "*" files = [ - {file = "py4j-0.10.9.5-py2.py3-none-any.whl", hash = "sha256:52d171a6a2b031d8a5d1de6efe451cf4f5baff1a2819aabc3741c8406539ba04"}, - {file = "py4j-0.10.9.5.tar.gz", hash = "sha256:276a4a3c5a2154df1860ef3303a927460e02e97b047dc0a47c1c3fb8cce34db6"}, + {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"}, + {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"}, ] [[package]] @@ -4976,22 +4976,23 @@ files = [ [[package]] name = "pyspark" -version = "3.3.4" +version = "3.5.0" description = "Apache Spark Python API" optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pyspark-3.3.4.tar.gz", hash = "sha256:1f866be47130a522355240949ed50d9812a8f327bd7619f043ffe07fbcf7f7b6"}, + {file = "pyspark-3.5.0.tar.gz", hash = "sha256:d41a9b76bd2aca370a6100d075c029e22ba44c5940927877e9435a3a9c566558"}, ] [package.dependencies] -py4j = "0.10.9.5" +py4j = "0.10.9.7" [package.extras] +connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)", "grpcio-status (>=1.56.0)", "numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] ml = ["numpy (>=1.15)"] mllib = ["numpy (>=1.15)"] -pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] -sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] +pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] +sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] [[package]] name = "pystac" diff --git a/requirements-dev.txt b/requirements-dev.txt index 648abb407bab..9c9a9b891b2e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -166,7 +166,7 @@ ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0" pure-sasl==0.6.2 ; python_version >= "3.9" and python_version < "4.0" py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "4.0" -py4j==0.10.9.5 ; python_version >= "3.9" and python_version < "4.0" +py4j==0.10.9.7 ; python_version >= "3.9" and python_version < "4.0" pyarrow-hotfix==0.6 ; python_version >= "3.9" and python_version < "4.0" pyarrow==15.0.0 ; python_version >= "3.9" and python_version < "4.0" pyasn1-modules==0.3.0 ; python_version >= "3.9" and python_version < "4.0" @@ -189,7 +189,7 @@ pyproj==3.6.1 ; python_version >= "3.9" and python_version < "4.0" pyproject-hooks==1.0.0 ; python_version >= "3.9" and python_version < "4.0" pyshp==2.3.1 ; python_version >= "3.10" and python_version < "3.13" pysocks==1.7.1 ; python_version >= "3.10" and python_version < "3.13" -pyspark==3.3.4 ; python_version >= "3.9" and python_version < "4.0" +pyspark==3.5.0 ; python_version >= "3.9" and python_version < "4.0" pystac-client==0.7.5 ; python_version >= "3.10" and python_version < "3.13" pystac[validation]==1.9.0 ; python_version >= "3.10" and python_version < "3.13" pytest-benchmark==4.0.0 ; python_version >= "3.9" and python_version < "4.0" From d231cf2998a0f52f3c4d361c6948299462dda8df Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 15 Jan 2024 10:16:39 -0500 Subject: [PATCH 071/161] fix(ir): only dereference comparisons not generic binary operations --- ibis/expr/tests/test_newrels.py | 42 +++++++++++++++++++++++++++++++++ ibis/expr/types/joins.py | 6 ++--- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 7ba9b4b3a327..cdd4cd64b049 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -1314,3 +1314,45 @@ def test_join_method_docstrings(): join_method = getattr(joined, method) table_method = getattr(t1, method) assert join_method.__doc__ == table_method.__doc__ + + +def test_join_with_compound_predicate(): + t1 = ibis.table(name="t", schema={"a": "string", "b": "string"}) + t2 = t1.view() + + joined = t1.join( + t2, + [ + t1.a == t2.a, + (t1.a != t2.b) | (t1.b != t2.a), + (t1.a != t2.b) ^ (t1.b != t2.a), + (t1.a != t2.b) & (t1.b != t2.a), + (t1.a + t1.a != t2.b) & (t1.b + t1.b != t2.a), + ], + ) + expr = joined[t1] + with join_tables(t1, t2) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink( + "inner", + r2, + [ + r1.a == r2.a, + (r1.a != r2.b) | (r1.b != r2.a), + (r1.a != r2.b) ^ (r1.b != r2.a), + # these are flattened + r1.a != r2.b, + r1.b != r2.a, + r1.a + r1.a != r2.b, + r1.b + r1.b != r2.a, + ], + ), + ], + values={ + "a": r1.a, + "b": r1.b, + }, + ) + assert expr.op() == expected diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 957d16f4253f..92d85f1918ab 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -81,15 +81,15 @@ def dereference_sides(left, right, deref_left, deref_right): return left, right -def dereference_binop(pred, deref_left, deref_right): +def dereference_comparison_op(pred, deref_left, deref_right): left, right = dereference_sides(pred.left, pred.right, deref_left, deref_right) return pred.copy(left=left, right=right) def dereference_value(pred, deref_left, deref_right): deref_both = {**deref_left, **deref_right} - if isinstance(pred, ops.Binary) and pred.left.relations == pred.right.relations: - return dereference_binop(pred, deref_left, deref_right) + if isinstance(pred, ops.Comparison) and pred.left.relations == pred.right.relations: + return dereference_comparison_op(pred, deref_left, deref_right) else: return pred.replace(deref_both, filter=ops.Value) From 6c551489e59579ed5e7d6bd77c25a2f593a3479b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 16 Jan 2024 13:59:30 -0500 Subject: [PATCH 072/161] chore: rename to `dereference_comparison` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Krisztián Szűcs --- ibis/expr/types/joins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 92d85f1918ab..000a2ecdfb2c 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -81,7 +81,7 @@ def dereference_sides(left, right, deref_left, deref_right): return left, right -def dereference_comparison_op(pred, deref_left, deref_right): +def dereference_comparison(pred, deref_left, deref_right): left, right = dereference_sides(pred.left, pred.right, deref_left, deref_right) return pred.copy(left=left, right=right) @@ -89,7 +89,7 @@ def dereference_comparison_op(pred, deref_left, deref_right): def dereference_value(pred, deref_left, deref_right): deref_both = {**deref_left, **deref_right} if isinstance(pred, ops.Comparison) and pred.left.relations == pred.right.relations: - return dereference_comparison_op(pred, deref_left, deref_right) + return dereference_comparison(pred, deref_left, deref_right) else: return pred.replace(deref_both, filter=ops.Value) From da8b3ef499cd943cee050b57fc80f90f26add06d Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 17 Jan 2024 10:54:32 -0500 Subject: [PATCH 073/161] chore(deps): relock --- requirements-dev.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 9c9a9b891b2e..83d72c424fce 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -48,7 +48,7 @@ debugpy==1.8.0 ; python_version >= "3.10" and python_version < "3.13" decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0" deltalake==0.15.1 ; python_version >= "3.9" and python_version < "4.0" distlib==0.3.8 ; python_version >= "3.9" and python_version < "4.0" -distributed==2024.1.1 ; python_version >= "3.10" and python_version < "3.13" +distributed==2024.1.0 ; python_version >= "3.10" and python_version < "3.13" duckdb==0.9.2 ; python_version >= "3.9" and python_version < "4.0" dulwich==0.21.7 ; python_version >= "3.9" and python_version < "4.0" dunamai==1.19.0 ; python_version >= "3.9" and python_version < "4.0" @@ -230,7 +230,7 @@ setuptools==69.0.3 ; python_version >= "3.9" and python_version < "4.0" shapely==2.0.2 ; python_version >= "3.9" and python_version < "4.0" shellingham==1.5.4 ; python_version >= "3.9" and python_version < "4.0" six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" -snowflake-connector-python==3.7.0 ; python_version >= "3.9" and python_version < "4.0" +snowflake-connector-python==3.6.0 ; python_version >= "3.9" and python_version < "4.0" sortedcontainers==2.4.0 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.5 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1 ; python_version >= "3.10" and python_version < "3.13" @@ -255,8 +255,8 @@ tornado==6.4 ; python_version >= "3.10" and python_version < "3.13" tqdm==4.66.1 ; python_version >= "3.9" and python_version < "4.0" traitlets==5.14.1 ; python_version >= "3.9" and python_version < "4.0" traittypes==0.2.1 ; python_version >= "3.10" and python_version < "3.13" -trino[sqlalchemy]==0.327.0 ; python_version >= "3.9" and python_version < "4.0" -trove-classifiers==2024.1.31 ; python_version >= "3.9" and python_version < "4.0" +trino==0.327.0 ; python_version >= "3.9" and python_version < "4.0" +trove-classifiers==2024.1.8 ; python_version >= "3.9" and python_version < "4.0" typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "4.0" tzdata==2023.4 ; python_version >= "3.9" and python_version < "4.0" tzlocal==5.2 ; python_version >= "3.9" and python_version < "4.0" From e09e7338aee1675e4ba8c401863a5d5b7fb1ec50 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 17 Jan 2024 10:58:48 -0500 Subject: [PATCH 074/161] test(generic): clean up try_cast to null test --- ibis/backends/tests/test_generic.py | 37 +++++------------------------ 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 40aaa840640a..4b7ab1330493 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1406,43 +1406,20 @@ def test_try_cast(con, from_val, to_type, expected): @pytest.mark.notimpl( [ - "pandas", - "dask", - "druid", - "impala", - "mssql", - "oracle", - "snowflake", - "sqlite", - "exasol", - ] -) -@pytest.mark.notyet(["flink"], reason="casts to nan") -@pytest.mark.notyet(["datafusion"]) -@pytest.mark.notimpl(["postgres"], raises=PsycoPg2InvalidTextRepresentation) -@pytest.mark.notyet(["mysql"], reason="returns 0") -def test_try_cast_returns_null(con): - expr = ibis.literal("a").try_cast("int") - result = con.execute(expr) - assert pd.isna(result) - - -@pytest.mark.notimpl( - [ - "pandas", - "dask", "bigquery", + "dask", "datafusion", "druid", + "exasol", "impala", "mssql", "mysql", "oracle", + "pandas", "postgres", "risingwave", "snowflake", "sqlite", - "exasol", ] ) @pytest.mark.parametrize( @@ -1453,17 +1430,15 @@ def test_try_cast_returns_null(con): datetime.datetime(2023, 1, 1), "int", marks=[ - pytest.mark.never( - ["clickhouse", "flink"], reason="casts to 1672531200" - ), - pytest.mark.notyet(["trino"], raises=sa.exc.ProgrammingError), + pytest.mark.never(["clickhouse"], reason="casts to 1672531200"), + pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), ], ), ], ids=str, ) -def test_try_cast_expected_null(con, from_val, to_type): +def test_try_cast_null(con, from_val, to_type): assert pd.isna(con.execute(ibis.literal(from_val).try_cast(to_type))) From 5c96a9f49944c92af9d88698f2f6517af05db01e Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 17 Jan 2024 11:20:01 -0500 Subject: [PATCH 075/161] fix(polars): user newer `drop` API to avoid deprecation warning --- ibis/backends/polars/compiler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index f9acc6d01664..95a9749c67ab 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -313,7 +313,11 @@ def join(op, **kw): left, right = right, left joined = left.join(right, on=on, how=how) - joined = joined.drop(columns=on) + + try: + joined = joined.drop(*on) + except TypeError: + joined = joined.drop(columns=on) return joined From 4edd78b9c3ea8bbf31625161e33d7ef04e33ac10 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 17 Jan 2024 11:52:03 -0500 Subject: [PATCH 076/161] fix(polars): user newer `drop` API in asof join implementation --- ibis/backends/polars/compiler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 95a9749c67ab..f636c584a47e 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -351,7 +351,10 @@ def asof_join(op, **kw): assert len(on) == 1 joined = left.join_asof(right, on=on[0], by=by, strategy=direction) - joined = joined.drop(columns=on + by) + try: + joined = joined.drop(*(on + by)) + except TypeError: + joined = joined.drop(columns=on + by) return joined From e0ab99bfaa27caf9bdb9193a268475d06ea7aedc Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 16 Jan 2024 13:21:14 -0500 Subject: [PATCH 077/161] refactor(druid): port to sqlglot --- .github/workflows/ibis-backends.yml | 28 +- docker/druid/environment | 2 +- ibis/backends/druid/__init__.py | 227 ++++++++++------- ibis/backends/druid/compiler.py | 239 ++++++++++++++++-- ibis/backends/druid/datatypes.py | 89 ------- ibis/backends/druid/registry.py | 59 ----- ibis/backends/druid/tests/conftest.py | 13 +- ibis/backends/tests/errors.py | 5 + .../test_default_limit/druid/out.sql | 7 + .../test_disable_query_limit/druid/out.sql | 7 + .../druid/out.sql | 22 ++ .../test_respect_set_limit/druid/out.sql | 12 + .../test_group_by_has_index/druid/out.sql | 22 ++ .../test_sql/test_isin_bug/druid/out.sql | 9 + ibis/backends/tests/test_aggregation.py | 59 ++--- ibis/backends/tests/test_api.py | 5 +- ibis/backends/tests/test_asof_join.py | 4 +- ibis/backends/tests/test_binary.py | 2 +- ibis/backends/tests/test_client.py | 9 +- .../tests/test_dataframe_interchange.py | 3 +- ibis/backends/tests/test_dot_sql.py | 5 +- ibis/backends/tests/test_export.py | 18 +- ibis/backends/tests/test_generic.py | 40 +-- ibis/backends/tests/test_join.py | 7 +- ibis/backends/tests/test_numeric.py | 108 +++----- ibis/backends/tests/test_set_ops.py | 14 +- ibis/backends/tests/test_sql.py | 2 +- ibis/backends/tests/test_string.py | 71 ++---- ibis/backends/tests/test_temporal.py | 146 ++--------- ibis/backends/tests/test_uuid.py | 16 +- ibis/backends/tests/test_window.py | 20 +- ibis/formats/pandas.py | 2 + poetry.lock | 5 +- pyproject.toml | 4 +- requirements-dev.txt | 4 +- 35 files changed, 631 insertions(+), 654 deletions(-) delete mode 100644 ibis/backends/druid/datatypes.py delete mode 100644 ibis/backends/druid/registry.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_isin_bug/druid/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index ad3ec014a1dc..d3ddc2b3c4df 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -149,12 +149,12 @@ jobs: - trino services: - trino - # - name: druid - # title: Druid - # extras: - # - druid - # services: - # - druid + - name: druid + title: Druid + extras: + - druid + services: + - druid # - name: oracle # title: Oracle # serial: true @@ -262,14 +262,14 @@ jobs: - trino extras: - trino - # - os: windows-latest - # backend: - # name: druid - # title: Druid - # extras: - # - druid - # services: - # - druid + - os: windows-latest + backend: + name: druid + title: Druid + extras: + - druid + services: + - druid # - os: windows-latest # backend: # name: oracle diff --git a/docker/druid/environment b/docker/druid/environment index f754bfd0211a..29496ad75635 100644 --- a/docker/druid/environment +++ b/docker/druid/environment @@ -31,7 +31,7 @@ druid_extensions_loadList=["postgresql-metadata-storage", "druid-multi-stage-que druid_zk_service_host=zookeeper druid_worker_capacity=6 -druid_generic_useDefaultValueForNull=true +druid_generic_useDefaultValueForNull=false druid_metadata_storage_host= druid_metadata_storage_type=postgresql diff --git a/ibis/backends/druid/__init__.py b/ibis/backends/druid/__init__.py index f3544267237e..77024e3023e4 100644 --- a/ibis/backends/druid/__init__.py +++ b/ibis/backends/druid/__init__.py @@ -4,101 +4,104 @@ import contextlib import json -import warnings from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse -import sqlalchemy as sa +import pydruid +import sqlglot as sg +import ibis.common.exceptions as com import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy import BaseAlchemyBackend +import ibis.expr.schema as sch +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import STAR +from ibis.backends.base.sqlglot.datatypes import DruidType from ibis.backends.druid.compiler import DruidCompiler -from ibis.backends.druid.datatypes import ( - DruidBinary, - DruidDateTime, - DruidString, - DruidType, -) if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Iterable, Mapping + import pandas as pd + import pyarrow as pa -class Backend(BaseAlchemyBackend): + import ibis.expr.types as ir + + +class Backend(SQLGlotBackend): name = "druid" - compiler = DruidCompiler + compiler = DruidCompiler() supports_create_or_replace = False + supports_in_memory_tables = True @property - def current_database(self) -> str: - # https://druid.apache.org/docs/latest/querying/sql-metadata-tables.html#schemata-table - return "druid" + def version(self) -> str: + with self._safe_raw_sql("SELECT version()") as result: + [(version,)] = result.fetchall() + return version - def do_connect( - self, - host: str = "localhost", - port: int = 8082, - database: str | None = "druid/v2/sql", - **_: Any, - ) -> None: - """Create an Ibis client using the passed connection parameters. + def _from_url(self, url: str, **kwargs): + """Connect to a backend using a URL `url`. Parameters ---------- - host - Hostname - port - Port - database - Database to connect to + url + URL with which to connect to a backend. + kwargs + Additional keyword arguments + + Returns + ------- + BaseBackend + A backend instance """ - url = sa.engine.url.make_url(f"druid://{host}:{port}/{database}?header=true") - - self.database_name = "default" # not sure what should go here - engine = sa.create_engine(url, poolclass=sa.pool.StaticPool) - - super().do_connect(engine) - - # workaround a broken pydruid `has_table` implementation - engine.dialect.has_table = self._has_table + url = urlparse(url) + query_params = parse_qs(url.query) + kwargs = { + "user": url.username, + "password": url.password, + "host": url.hostname, + "path": url.path, + "port": url.port, + } | kwargs + + for name, value in query_params.items(): + if len(value) > 1: + kwargs[name] = value + elif len(value) == 1: + kwargs[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") - # don't double percent signs - engine.dialect.identifier_preparer._double_percents = False + self._convert_kwargs(kwargs) - @staticmethod - def _new_sa_metadata(): - meta = sa.MetaData() + return self.connect(**kwargs) - @sa.event.listens_for(meta, "column_reflect") - def column_reflect(inspector, table, column_info): - if isinstance(typ := column_info["type"], sa.DateTime): - column_info["type"] = DruidDateTime() - elif isinstance(typ, (sa.LargeBinary, sa.BINARY, sa.VARBINARY)): - column_info["type"] = DruidBinary() - elif isinstance(typ, sa.String): - column_info["type"] = DruidString() + @property + def current_database(self) -> str: + # https://druid.apache.org/docs/latest/querying/sql-metadata-tables.html#schemata-table + return "druid" - return meta + def do_connect(self, **kwargs: Any) -> None: + """Create an Ibis client using the passed connection parameters.""" + header = kwargs.pop("header", True) + self.con = pydruid.db.connect(**kwargs, header=header) + self._temp_views = set() @contextlib.contextmanager def _safe_raw_sql(self, query, *args, **kwargs): - query = query.compile( - dialect=self.con.dialect, compile_kwargs=dict(literal_binds=True) - ) + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.compiler.dialect) - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message="Dialect druid:rest will not make use of SQL compilation caching", - category=sa.exc.SAWarning, - ) - with self.begin() as con: - yield con.execute(query, *args, **kwargs) + with contextlib.closing(self.con.cursor()) as cur: + cur.execute(query, *args, **kwargs) + yield cur def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: - result = self._scalar_query(f"EXPLAIN PLAN FOR {query}") + with self._safe_raw_sql(f"EXPLAIN PLAN FOR {query}") as result: + [(row, *_)] = result.fetchall() - (plan,) = json.loads(result) + (plan,) = json.loads(row) for column in plan["signature"]: name, typ = column["name"], column["type"] if name == "__time": @@ -107,33 +110,71 @@ def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: dtype = DruidType.from_string(typ) yield name, dtype - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - raise NotImplementedError() - - def _has_table(self, connection, table_name: str, schema) -> bool: - t = sa.table( - "TABLES", sa.column("TABLE_NAME", sa.TEXT), schema="INFORMATION_SCHEMA" + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + name_type_pairs = self._metadata( + sg.select(STAR) + .from_(sg.table(table_name, db=schema, catalog=database)) + .sql(self.compiler.dialect) ) - query = sa.select( - sa.func.sum(sa.cast(t.c.TABLE_NAME == table_name, sa.INTEGER)) - ).compile(dialect=self.con.dialect) - - return bool(connection.execute(query).scalar()) - - def _get_sqla_table( - self, name: str, autoload: bool = True, **kwargs: Any - ) -> sa.Table: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message="|".join( # noqa: FLY002 - ( - "Did not recognize type", - "Dialect druid:rest will not make use of SQL compilation caching", - ) - ), - category=sa.exc.SAWarning, + return sch.Schema.from_tuples(name_type_pairs) + + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + import pandas as pd + + from ibis.formats.pandas import PandasData + + try: + df = pd.DataFrame.from_records( + cursor, columns=schema.names, coerce_float=True ) - return super()._get_sqla_table(name, autoload=autoload, **kwargs) + except Exception: + # clean up the cursor if we fail to create the DataFrame + cursor.close() + raise + df = PandasData.convert_table(df, schema) + return df + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: sch.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ) -> ir.Table: + raise NotImplementedError() + + def list_tables( + self, like: str | None = None, database: str | None = None + ) -> list[str]: + t = sg.table("TABLES", db="INFORMATION_SCHEMA", quoted=True) + c = self.compiler + query = sg.select(sg.column("TABLE_NAME", quoted=True)).from_(t).sql(c.dialect) + + with self._safe_raw_sql(query) as result: + tables = result.fetchall() + return self._filter_with_like([table.TABLE_NAME for table in tables], like=like) + + def _register_in_memory_tables(self, expr): + """No-op. Table are inlined, for better or worse.""" + + def _cursor_batches( + self, + expr: ir.Expr, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1 << 20, + ) -> Iterable[list]: + self._run_pre_execute_hooks(expr) + + dtypes = expr.as_table().schema().values() + + with self._safe_raw_sql( + self.compile(expr, limit=limit, params=params) + ) as cursor: + while batch := cursor.fetchmany(chunk_size): + yield (tuple(map(dt.normalize, dtypes, row)) for row in batch) diff --git a/ibis/backends/druid/compiler.py b/ibis/backends/druid/compiler.py index 6c766af97111..43fd1c1559fd 100644 --- a/ibis/backends/druid/compiler.py +++ b/ibis/backends/druid/compiler.py @@ -1,33 +1,232 @@ from __future__ import annotations -import contextlib +from functools import singledispatchmethod -import sqlalchemy as sa +import sqlglot as sg +import sqlglot.expressions as sge +import toolz +from sqlglot import exp +from sqlglot.dialects import Postgres +from sqlglot.dialects.dialect import rename_func -import ibis.backends.druid.datatypes as ddt -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.druid.registry import operation_registry +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.compiler import NULL, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import DruidType from ibis.expr.rewrites import rewrite_sample -class DruidExprTranslator(AlchemyExprTranslator): - _registry = operation_registry.copy() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _dialect_name = "druid" +# Is postgres the best dialect to inherit from? +class Druid(Postgres): + """The druid dialect.""" - type_mapper = ddt.DruidType + class Generator(Postgres.Generator): + TRANSFORMS = Postgres.Generator.TRANSFORMS.copy() | { + exp.ApproxDistinct: rename_func("approx_count_distinct"), + exp.Pow: rename_func("power"), + } - def translate(self, op): - result = super().translate(op) - with contextlib.suppress(AttributeError): - result = result.scalar_subquery() - return sa.type_coerce(result, self.type_mapper.from_ibis(op.dtype)) +class DruidCompiler(SQLGlotCompiler): + __slots__ = () -rewrites = DruidExprTranslator.rewrites + dialect = "druid" + type_mapper = DruidType + quoted = True + rewrites = (rewrite_sample, *SQLGlotCompiler.rewrites) + def _aggregate(self, funcname: str, *args, where): + expr = self.f[funcname](*args) + if where is not None: + return sg.exp.Filter(this=expr, expression=sg.exp.Where(this=where)) + return expr -class DruidCompiler(AlchemyCompiler): - translator_class = DruidExprTranslator - null_limit = sa.literal_column("ALL") - rewrites = AlchemyCompiler.rewrites | rewrite_sample + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + @visit_node.register(ops.InMemoryTable) + def visit_InMemoryTable(self, op, *, name, schema, data): + # the performance of this is rather terrible + tuples = data.to_frame().itertuples(index=False) + quoted = self.quoted + columns = [sg.column(col, quoted=quoted) for col in schema.names] + expr = sge.Values( + expressions=[ + sge.Tuple(expressions=tuple(map(sge.convert, row))) for row in tuples + ], + alias=sge.TableAlias( + this=sg.to_identifier(name, quoted=quoted), + columns=columns, + ), + ) + return sg.select(*columns).from_(expr) + + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, arg, sep): + return self.f.concat(*toolz.interpose(sep, arg)) + + @visit_node.register(ops.Pi) + def visit_Pi(self, op): + return self.f.acos(-1) + + @visit_node.register(ops.Sign) + def visit_Sign(self, op, *, arg): + return self.if_(arg.eq(0), 0, self.if_(arg > 0, 1, -1)) + + @visit_node.register(ops.GroupConcat) + def visit_GroupConcat(self, op, *, arg, sep, where): + return self.agg.string_agg(arg, sep, 1 << 20, where=where) + + @visit_node.register(ops.StartsWith) + def visit_StartsWith(self, op, *, arg, start): + return self.f.left(arg, self.f.length(start)).eq(start) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return self.f.right(arg, self.f.length(end)).eq(end) + + @visit_node.register(ops.Capitalize) + def visit_Capitalize(self, op, *, arg): + return self.if_( + self.f.length(arg) < 2, + self.f.upper(arg), + self.f.concat( + self.f.upper(self.f.substr(arg, 1, 1)), + self.f.lower(self.f.substr(arg, 2)), + ), + ) + + @visit_node.register(ops.RegexSearch) + def visit_RegexSearch(self, op, *, arg, pattern): + return self.f.anon.regexp_like(arg, pattern) + + @visit_node.register(ops.StringSQLILike) + def visit_StringSQLILike(self, op, *, arg, pattern, escape): + if escape is not None: + raise NotImplementedError("non-None escape not supported") + return self.f.upper(arg).like(self.f.upper(pattern)) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype): + if value is None: + return NULL + return super().visit_Literal(op, value=value, dtype=dtype) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_uuid(): + return sge.convert(str(value)) + + return None + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + from_ = op.arg.dtype + if from_.is_integer() and to.is_timestamp(): + # seconds since UNIX epoch + return self.f.millis_to_timestamp(arg * 1_000) + elif from_.is_string() and to.is_timestamp(): + return self.f.time_parse(arg) + return super().visit_Cast(op, arg=arg, to=to) + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds + ): + return self.f.time_parse( + self.f.concat( + self.f.lpad(self.cast(year, dt.string), 4, "0"), + "-", + self.f.lpad(self.cast(month, dt.string), 2, "0"), + "-", + self.f.lpad(self.cast(day, dt.string), 2, "0"), + "T", + self.f.lpad(self.cast(hours, dt.string), 2, "0"), + ":", + self.f.lpad(self.cast(minutes, dt.string), 2, "0"), + ":", + self.f.lpad(self.cast(seconds, dt.string), 2, "0"), + "Z", + ) + ) + + @visit_node.register(ops.ApproxMedian) + @visit_node.register(ops.Arbitrary) + @visit_node.register(ops.ArgMax) + @visit_node.register(ops.ArgMin) + @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.ArrayDistinct) + @visit_node.register(ops.ArrayFilter) + @visit_node.register(ops.ArrayFlatten) + @visit_node.register(ops.ArrayIntersect) + @visit_node.register(ops.ArrayMap) + @visit_node.register(ops.ArraySort) + @visit_node.register(ops.ArrayUnion) + @visit_node.register(ops.ArrayZip) + @visit_node.register(ops.CountDistinctStar) + @visit_node.register(ops.Covariance) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.DayOfWeekIndex) + @visit_node.register(ops.DayOfWeekName) + @visit_node.register(ops.First) + @visit_node.register(ops.IntervalFromInteger) + @visit_node.register(ops.IsNan) + @visit_node.register(ops.IsInf) + @visit_node.register(ops.Last) + @visit_node.register(ops.Levenshtein) + @visit_node.register(ops.Median) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.RegexReplace) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.RowID) + @visit_node.register(ops.StandardDev) + @visit_node.register(ops.Strftime) + @visit_node.register(ops.StringAscii) + @visit_node.register(ops.StringSplit) + @visit_node.register(ops.StringToTimestamp) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimestampDelta) + @visit_node.register(ops.TimestampNow) + @visit_node.register(ops.Translate) + @visit_node.register(ops.TypeOf) + @visit_node.register(ops.Unnest) + @visit_node.register(ops.Variance) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.BitAnd: "bit_and", + ops.BitOr: "bit_or", + ops.BitXor: "bit_xor", + ops.BitwiseAnd: "bitwise_and", + ops.BitwiseNot: "bitwise_complement", + ops.BitwiseOr: "bitwise_or", + ops.BitwiseXor: "bitwise_xor", + ops.BitwiseLeftShift: "bitwise_shift_left", + ops.BitwiseRightShift: "bitwise_shift_right", + ops.Modulus: "mod", + ops.Power: "power", + ops.Log10: "log10", + ops.ApproxCountDistinct: "approx_count_distinct", + ops.StringContains: "contains_string", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @DruidCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @DruidCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(DruidCompiler, f"visit_{_op.__name__}", _fmt) diff --git a/ibis/backends/druid/datatypes.py b/ibis/backends/druid/datatypes.py deleted file mode 100644 index c60b5505357e..000000000000 --- a/ibis/backends/druid/datatypes.py +++ /dev/null @@ -1,89 +0,0 @@ -from __future__ import annotations - -import sqlalchemy as sa -import sqlalchemy.types as sat -from dateutil.parser import parse as timestamp_parse -from sqlalchemy.ext.compiler import compiles - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import DruidType as SqlglotDruidType - - -class DruidDateTime(sat.TypeDecorator): - impl = sa.TIMESTAMP - - cache_ok = True - - def process_result_value(self, value, dialect): - return None if value is None else timestamp_parse(value) - - -class DruidBinary(sa.LargeBinary): - def result_processor(self, dialect, coltype): - def process(value): - return None if value is None else value.encode("utf-8") - - return process - - -class DruidString(sat.TypeDecorator): - impl = sa.String - - cache_ok = True - - def process_result_value(self, value, dialect): - return value - - -@compiles(sa.BIGINT, "druid") -@compiles(sa.BigInteger, "druid") -def _bigint(element, compiler, **kw): - return "BIGINT" - - -@compiles(sa.INTEGER, "druid") -@compiles(sa.Integer, "druid") -def _integer(element, compiler, **kw): - return "INTEGER" - - -@compiles(sa.SMALLINT, "druid") -@compiles(sa.SmallInteger, "druid") -def _smallint(element, compiler, **kw): - return "SMALLINT" - - -@compiles(DruidString, "druid") -def _string(element, compiler, **kw): - return "VARCHAR" - - -class DruidType(AlchemyType): - dialect = "hive" - - @classmethod - def to_ibis(cls, typ, nullable=True): - if isinstance(typ, DruidDateTime): - return dt.Timestamp(nullable=nullable) - elif isinstance(typ, DruidBinary): - return dt.Binary(nullable=nullable) - elif isinstance(typ, DruidString): - return dt.String(nullable=nullable) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_ibis(cls, dtype): - if dtype.is_timestamp(): - return DruidDateTime() - elif dtype.is_binary(): - return DruidBinary() - elif dtype.is_string(): - return DruidString() - else: - return super().from_ibis(dtype) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotDruidType.from_string(type_string, nullable=nullable) diff --git a/ibis/backends/druid/registry.py b/ibis/backends/druid/registry.py deleted file mode 100644 index 64dd5e4dc2c5..000000000000 --- a/ibis/backends/druid/registry.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import annotations - -import sqlalchemy as sa -import toolz - -import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import ( - fixed_arity, - sqlalchemy_operation_registry, - sqlalchemy_window_functions_registry, - unary, -) - -operation_registry = sqlalchemy_operation_registry.copy() - -operation_registry.update(sqlalchemy_window_functions_registry) - - -def _sign(t, op): - arg = op.arg - cond1 = ops.IfElse(ops.Greater(arg, 0), 1, -1) - cond2 = ops.IfElse(ops.Equals(arg, 0), 0, cond1) - return t.translate(cond2) - - -def _join(t, op): - sep = t.translate(op.sep) - values = list(map(t.translate, op.arg)) - return sa.func.concat(*toolz.interpose(sep, values)) - - -operation_registry.update( - { - ops.BitwiseAnd: fixed_arity(sa.func.bitwise_and, 2), - ops.BitwiseNot: unary(sa.func.bitwise_complement), - ops.BitwiseOr: fixed_arity(sa.func.bitwise_or, 2), - ops.BitwiseXor: fixed_arity(sa.func.bitwise_xor, 2), - ops.BitwiseLeftShift: fixed_arity(sa.func.bitwise_shift_left, 2), - ops.BitwiseRightShift: fixed_arity(sa.func.bitwise_shift_right, 2), - ops.Pi: fixed_arity(lambda: sa.func.acos(-1), 0), - ops.Modulus: fixed_arity(sa.func.mod, 2), - ops.Power: fixed_arity(sa.func.power, 2), - ops.Log10: fixed_arity(sa.func.log10, 1), - ops.Sign: _sign, - ops.StringJoin: _join, - ops.RegexSearch: fixed_arity(sa.func.regexp_like, 2), - } -) - -_invalid_operations = { - # ibis.expr.operations.generic - ops.RandomScalar, - # ibis.expr.operations.strings - ops.StringAscii, -} - -operation_registry = { - k: v for k, v in operation_registry.items() if k not in _invalid_operations -} diff --git a/ibis/backends/druid/tests/conftest.py b/ibis/backends/druid/tests/conftest.py index b95227a9aa61..c3c216a79a88 100644 --- a/ibis/backends/druid/tests/conftest.py +++ b/ibis/backends/druid/tests/conftest.py @@ -18,6 +18,8 @@ from collections.abc import Iterable from pathlib import Path + import ibis.expr.types as ir + DRUID_URL = os.environ.get( "DRUID_URL", "druid://localhost:8082/druid/v2/sql?header=true" ) @@ -101,7 +103,16 @@ class TestConf(ServiceBackendTest): supports_json = False # it does, but we haven't implemented it rounding_method = "half_to_even" service_name = "druid-middlemanager" - deps = ("pydruid.db.sqlalchemy",) + deps = ("pydruid.db",) + + @property + def functional_alltypes(self) -> ir.Table: + t = self.connection.table( + self.default_identifier_case_fn("functional_alltypes") + ) + # The parquet loading for booleans appears to be broken in Druid, so + # I'm using this as a workaround to make the data match what's on disk. + return t.mutate(bool_col=1 - t.id % 2) @property def test_files(self) -> Iterable[Path]: diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 87b7f5cdf6f0..a21b80556acb 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -107,3 +107,8 @@ from pymysql.err import ProgrammingError as MySQLProgrammingError except ImportError: MySQLNotSupportedError = MySQLProgrammingError = MySQLOperationalError = None + +try: + from pydruid.db.exceptions import ProgrammingError as PyDruidProgrammingError +except ImportError: + PyDruidProgrammingError = None diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/druid/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/druid/out.sql new file mode 100644 index 000000000000..e69e89fc3d4b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/druid/out.sql @@ -0,0 +1,7 @@ +SELECT + "t0"."id", + 1 - ( + MOD("t0"."id", 2) + ) AS "bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/druid/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/druid/out.sql new file mode 100644 index 000000000000..e69e89fc3d4b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/druid/out.sql @@ -0,0 +1,7 @@ +SELECT + "t0"."id", + 1 - ( + MOD("t0"."id", 2) + ) AS "bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/druid/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/druid/out.sql new file mode 100644 index 000000000000..6195869f606c --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/druid/out.sql @@ -0,0 +1,22 @@ +SELECT + SUM("t1"."bigint_col") AS "Sum(bigint_col)" +FROM ( + SELECT + "t0"."__time", + "t0"."id", + 1 - ( + MOD("t0"."id", 2) + ) AS "bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" + FROM "functional_alltypes" AS "t0" +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/druid/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/druid/out.sql new file mode 100644 index 000000000000..38962733a72a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/druid/out.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + 1 - ( + MOD("t0"."id", 2) + ) AS "bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/druid/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/druid/out.sql new file mode 100644 index 000000000000..d3969647c9ea --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/druid/out.sql @@ -0,0 +1,22 @@ +SELECT + CASE "t0"."continent" + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/druid/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/druid/out.sql new file mode 100644 index 000000000000..c1611d8cecc3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/druid/out.sql @@ -0,0 +1,9 @@ +SELECT + "t0"."x" IN ( + SELECT + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index e1388ae541cd..34dec9149fbc 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -21,6 +21,7 @@ MySQLNotSupportedError, PolarsInvalidOperationError, Py4JError, + PyDruidProgrammingError, PySparkAnalysisException, SnowflakeProgrammingError, TrinoUserError, @@ -105,10 +106,8 @@ def mean_udf(s): id="timestamp_max", marks=pytest.mark.broken( ["druid"], - raises=sa.exc.ProgrammingError, - reason=( - "Query not supported. Possible error: Max aggregation is not supported for 'STRING' type SQL" - ), + raises=PyDruidProgrammingError, + reason="Max aggregation is not supported for 'STRING' type SQL", ), ), ] @@ -480,49 +479,25 @@ def mean_and_std(v): lambda t, where: t.double_col.std(how="sample", where=where), lambda t, where: t.double_col[where].std(ddof=1), id="std", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason="No match found for function signature stddev_samp()", - ), - ], + marks=[pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)], ), param( lambda t, where: t.double_col.var(how="sample", where=where), lambda t, where: t.double_col[where].var(ddof=1), id="var", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason="No match found for function signature var_samp()", - ), - ], + marks=[pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)], ), param( lambda t, where: t.double_col.std(how="pop", where=where), lambda t, where: t.double_col[where].std(ddof=0), id="std_pop", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason="No match found for function signature stddev_pop()", - ), - ], + marks=[pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)], ), param( lambda t, where: t.double_col.var(how="pop", where=where), lambda t, where: t.double_col[where].var(ddof=0), id="var_pop", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason="No match found for function signature var_pop()", - ), - ], + marks=[pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)], ), param( lambda t, where: t.string_col.approx_nunique(where=where), @@ -1360,11 +1335,6 @@ def test_date_quantile(alltypes, func): @pytest.mark.notimpl( ["datafusion", "polars", "mssql"], raises=com.OperationNotDefinedError ) -@pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason="No match found for function signature group_concat(, )", -) @pytest.mark.notyet( ["oracle"], raises=sa.exc.DatabaseError, @@ -1405,6 +1375,9 @@ def test_group_concat( backend.assert_frame_equal(result.fillna(pd.NA), expected.fillna(pd.NA)) +@pytest.mark.broken( + ["druid"], raises=PyDruidProgrammingError, reason="Java NullPointerException" +) @pytest.mark.notimpl( ["dask"], raises=NotImplementedError, @@ -1438,12 +1411,7 @@ def test_topk_op(alltypes, df): ], ) @pytest.mark.broken( - ["druid"], - raises=sa.exc.ProgrammingError, - reason=( - "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " - "(org.apache.calcite.tools.ValidationException): java.lang.NullPointerException" - ), + ["druid"], raises=PyDruidProgrammingError, reason="Java NullPointerException" ) @pytest.mark.notimpl( ["dask"], @@ -1634,7 +1602,7 @@ def test_grouped_case(backend, con): ) @pytest.mark.notyet(["impala", "flink"], raises=com.UnsupportedOperationError) @pytest.mark.notyet(["clickhouse"], raises=ClickHouseDatabaseError) -@pytest.mark.notyet(["druid"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notyet(["trino"], raises=TrinoUserError) @pytest.mark.notyet(["mysql"], raises=MySQLNotSupportedError) @@ -1670,6 +1638,9 @@ def test_value_counts_on_expr(backend, alltypes, df): backend.assert_frame_equal(result, expected) +@pytest.mark.broken( + ["druid"], raises=PyDruidProgrammingError, reason="NullPointerException" +) def test_group_by_expr(backend, con): expr = ( ibis.memtable( diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 903bbff3ff39..5687eccd64e4 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -5,6 +5,7 @@ import ibis.expr.types as ir from ibis.backends.conftest import TEST_TABLES +from ibis.backends.tests.errors import PyDruidProgrammingError def test_backend_name(backend): @@ -12,9 +13,7 @@ def test_backend_name(backend): assert backend.api.name == backend.name() -@pytest.mark.notimpl( - ["druid"], raises=TypeError, reason="'NoneType' object is not iterable" -) +@pytest.mark.notyet(["druid"], raises=PyDruidProgrammingError) def test_version(backend): assert isinstance(backend.api.version, str) diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 2665031af72f..6846bf96b4d0 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -81,7 +81,7 @@ def time_keyed_right(time_keyed_df2): ("direction", "op"), [("backward", operator.ge), ("forward", operator.le)] ) @pytest.mark.notyet( - ["datafusion", "snowflake", "trino", "postgres", "mysql", "pyspark"] + ["datafusion", "snowflake", "trino", "postgres", "mysql", "pyspark", "druid"] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): on = op(time_left["time"], time_right["time"]) @@ -107,7 +107,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds" ) @pytest.mark.notyet( - ["datafusion", "snowflake", "trino", "postgres", "mysql", "pyspark"] + ["datafusion", "snowflake", "trino", "postgres", "mysql", "pyspark", "druid"] ) def test_keyed_asof_join_with_tolerance( con, diff --git a/ibis/backends/tests/test_binary.py b/ibis/backends/tests/test_binary.py index 9dc8c8e3ffff..c3dfe9965424 100644 --- a/ibis/backends/tests/test_binary.py +++ b/ibis/backends/tests/test_binary.py @@ -22,7 +22,7 @@ @pytest.mark.notimpl( - ["clickhouse", "impala"], + ["clickhouse", "impala", "druid"], "Unsupported type: Binary(nullable=True)", raises=NotImplementedError, ) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index ab0581a4d736..e4016c762910 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -27,7 +27,7 @@ import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.backends.conftest import ALL_BACKENDS -from ibis.backends.tests.errors import Py4JJavaError +from ibis.backends.tests.errors import Py4JJavaError, PyDruidProgrammingError from ibis.util import gen_name, guid if TYPE_CHECKING: @@ -454,11 +454,6 @@ def test_insert_no_overwrite_from_dataframe( ) -@pytest.mark.notyet( - ["trino"], - reason="Connector doesn't support deletion (required for overwrite=True)", - raises=sa.exc.ProgrammingError, -) def test_insert_overwrite_from_dataframe( alchemy_backend, alchemy_con, @@ -1455,7 +1450,7 @@ def gen_test_name(con: BaseBackend) -> str: reason="overwriting not implemented in ibis for this backend", ) @mark.broken( - ["druid"], raises=sa.exc.ProgrammingError, reason="generated SQL fails to parse" + ["druid"], raises=PyDruidProgrammingError, reason="generated SQL fails to parse" ) @mark.notimpl(["impala"], reason="impala doesn't support memtable") @mark.notimpl(["pyspark"]) diff --git a/ibis/backends/tests/test_dataframe_interchange.py b/ibis/backends/tests/test_dataframe_interchange.py index addf958d4226..be6886b9ae20 100644 --- a/ibis/backends/tests/test_dataframe_interchange.py +++ b/ibis/backends/tests/test_dataframe_interchange.py @@ -9,7 +9,6 @@ ) -@pytest.mark.notimpl(["druid"]) def test_dataframe_interchange_no_execute(con, alltypes, mocker): t = alltypes.select("int_col", "double_col", "string_col") pa_df = t.to_pyarrow().__dataframe__() @@ -70,7 +69,7 @@ def test_dataframe_interchange_dataframe_methods_execute(con, alltypes, mocker): assert to_pyarrow.call_count == 1 -@pytest.mark.notimpl(["druid", "flink"]) +@pytest.mark.notimpl(["flink"]) def test_dataframe_interchange_column_methods_execute(con, alltypes, mocker): t = alltypes.select("int_col", "double_col", "string_col") pa_df = t.to_pyarrow().__dataframe__() diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 67c25d63cd49..b7f2c5dd5487 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -255,7 +255,10 @@ def test_table_dot_sql_transpile(backend, alltypes, dialect, df): *no_sqlglot_dialect, ], ) -@pytest.mark.notyet(["druid"], raises=ValueError) +@pytest.mark.notyet(["polars"], raises=PolarsComputeError) +@pytest.mark.notyet( + ["druid"], raises=AttributeError, reason="druid doesn't respect column names" +) @pytest.mark.notyet(["snowflake", "bigquery"]) @pytest.mark.notyet( ["oracle"], strict=False, reason="only works with backends that quote everything" diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 568fe35ecb89..b1ec4c4bff28 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -15,6 +15,7 @@ DuckDBParserException, MySQLOperationalError, PyDeltaTableError, + PyDruidProgrammingError, PySparkArithmeticException, PySparkParseException, SnowflakeProgrammingError, @@ -335,7 +336,7 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): pa.Decimal128Type, id="decimal128", marks=[ - pytest.mark.notyet(["druid"], raises=sa.exc.ProgrammingError), + pytest.mark.notyet(["flink"], raises=NotImplementedError), pytest.mark.notyet(["exasol"], raises=sa.exc.DBAPIError), pytest.mark.notyet( ["risingwave"], @@ -351,7 +352,7 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): marks=[ pytest.mark.notyet(["impala"], reason="precision not supported"), pytest.mark.notyet(["duckdb"], reason="precision is out of range"), - pytest.mark.notyet(["druid", "mssql"], raises=sa.exc.ProgrammingError), + pytest.mark.notyet(["mssql"], raises=sa.exc.ProgrammingError), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), @@ -396,17 +397,13 @@ def test_to_pyarrow_decimal(backend, dtype, pyarrow_dtype): "dask", "trino", "exasol", + "druid", ], raises=NotImplementedError, reason="read_delta not yet implemented", ) @pytest.mark.notyet(["clickhouse"], raises=Exception) @pytest.mark.notyet(["mssql", "pandas"], raises=PyDeltaTableError) -@pytest.mark.notyet( - ["druid"], - raises=pa.lib.ArrowTypeError, - reason="arrow type conversion fails in `to_delta` call", -) def test_roundtrip_delta(backend, con, alltypes, tmp_path, monkeypatch): if con.name == "pyspark": pytest.importorskip("delta") @@ -471,6 +468,11 @@ def test_to_torch(alltypes): @pytest.mark.notimpl(["flink"]) +@pytest.mark.notyet( + ["druid"], + raises=PyDruidProgrammingError, + reason="backend doesn't support an empty VALUES construct", +) def test_empty_memtable(backend, con): expected = pd.DataFrame({"a": []}) table = ibis.memtable(expected) @@ -486,7 +488,7 @@ def test_to_pandas_batches_empty_table(backend, con): assert sum(map(len, t.to_pandas_batches())) == n -@pytest.mark.notimpl(["druid"]) +@pytest.mark.notimpl(["flink"]) @pytest.mark.parametrize( "n", [ diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 4b7ab1330493..f3bde9dafbe4 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -26,6 +26,7 @@ ImpalaHiveServer2Error, Py4JJavaError, MySQLProgrammingError, + PyDruidProgrammingError, SnowflakeProgrammingError, TrinoUserError, PsycoPg2InvalidTextRepresentation @@ -423,13 +424,9 @@ def test_table_fillna_invalid(alltypes): param( {"double_col": -1, "string_col": "missing"}, id="double-int-str", - marks=[pytest.mark.notimpl(["druid", "oracle"])], - ), - param( - {"double_col": -1.5, "string_col": "missing"}, - id="double-str", - marks=[pytest.mark.notimpl(["druid"])], + marks=[pytest.mark.notimpl(["oracle"])], ), + param({"double_col": -1.5, "string_col": "missing"}, id="double-str"), ], ) def test_table_fillna_mapping(backend, alltypes, replacements): @@ -446,7 +443,7 @@ def test_table_fillna_mapping(backend, alltypes, replacements): backend.assert_frame_equal(result, expected, check_dtype=False) -@pytest.mark.notimpl(["druid", "oracle"]) +@pytest.mark.notimpl(["oracle"]) def test_table_fillna_scalar(backend, alltypes): table = alltypes.mutate( int_col=alltypes.int_col.nullif(1), @@ -569,7 +566,7 @@ def test_order_by_random(alltypes): @pytest.mark.notyet( ["druid"], - raises=sa.exc.ProgrammingError, + raises=PyDruidProgrammingError, reason="Druid only supports trivial unions", ) @pytest.mark.notyet( @@ -682,14 +679,7 @@ def test_logical_negation_literal(con, expr, expected, op): assert con.execute(op(ibis.literal(expr)).name("tmp")) == expected -@pytest.mark.parametrize( - "op", - [ - toolz.identity, - invert, - neg, - ], -) +@pytest.mark.parametrize("op", [toolz.identity, invert, neg]) def test_logical_negation_column(backend, alltypes, df, op): result = op(alltypes["bool_col"]).name("tmp").execute() expected = op(df["bool_col"]) @@ -959,14 +949,6 @@ def test_memtable_bool_column(backend, con): assert Counter(con.execute(t.a)) == Counter(data) -@pytest.mark.broken( - ["druid"], - raises=( - TypeError, # pandas >=2.1.0 - AssertionError, # pandas <2.1.0 - ), - reason="result contains empty strings instead of None", -) def test_memtable_construct(backend, con, monkeypatch): pa = pytest.importorskip("pyarrow") monkeypatch.setattr(ibis.options, "default_backend", con) @@ -1366,7 +1348,6 @@ def hash_256(col): "pandas", "dask", "bigquery", - "druid", "impala", "mssql", "oracle", @@ -1389,6 +1370,7 @@ def hash_256(col): 1672531200, marks=[ pytest.mark.notyet(["duckdb"], reason="casts to None"), + pytest.mark.notyet(["druid"], reason="returns milliseconds"), pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), pytest.mark.broken(["datafusion"], reason="casts to 1672531200000000"), @@ -1479,7 +1461,6 @@ def test_try_cast_table(backend, con): "dask", "bigquery", "datafusion", - "druid", "impala", "mssql", "mysql", @@ -1491,14 +1472,15 @@ def test_try_cast_table(backend, con): "exasol", ] ) +@pytest.mark.notimpl(["druid"], strict=False) @pytest.mark.parametrize( ("from_val", "to_type", "func"), [ - param("a", "float", lambda x: x is None or np.isnan(x)), + param("a", "float", pd.isna), param( datetime.datetime(2023, 1, 1), "float", - lambda x: x is None or np.isnan(x), + pd.isna, marks=[ pytest.mark.notyet( ["clickhouse", "polars", "flink", "pyspark"], @@ -1886,7 +1868,7 @@ def test_substitute(backend): @pytest.mark.notimpl( ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" ) -@pytest.mark.notimpl(["druid", "flink"], reason="no sqlglot dialect", raises=ValueError) +@pytest.mark.notimpl(["flink"], reason="no sqlglot dialect", raises=ValueError) @pytest.mark.notimpl(["exasol"], raises=ValueError, reason="unknown dialect") @pytest.mark.notimpl( ["risingwave"], diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index ad0bb2fd7c05..6eed7380d2f5 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -12,6 +12,7 @@ import ibis import ibis.common.exceptions as com import ibis.expr.schema as sch +from ibis.backends.tests.errors import PyDruidProgrammingError def _pandas_semi_join(left, right, on, **_): @@ -299,10 +300,10 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu @pytest.mark.notimpl( - ["druid", "exasol"], - raises=sa.exc.NoSuchTableError, - reason="`win` table isn't loaded", + ["exasol"], raises=sa.exc.NoSuchTableError, reason="`win` table isn't loaded" ) +@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) +@pytest.mark.notimpl(["flink"], reason="`win` table isn't loaded") @pytest.mark.parametrize( ("how", "nrows", "gen_right", "keys"), [ diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index e26eac9c13d8..f1118ca466e0 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -25,6 +25,7 @@ MySQLOperationalError, PsycoPg2DivisionByZero, Py4JError, + PyDruidProgrammingError, PySparkArithmeticException, PySparkParseException, SnowflakeProgrammingError, @@ -272,7 +273,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "pyspark": decimal.Decimal("1.1"), "mysql": decimal.Decimal("1"), "mssql": 1.1, - "druid": 1.1, + "druid": decimal.Decimal("1.1"), "datafusion": decimal.Decimal("1.1"), "oracle": 1.1, "flink": decimal.Decimal("1.1"), @@ -327,7 +328,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "clickhouse": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), "mssql": 1.1, - "druid": 1.1, + "druid": decimal.Decimal("1.1"), "datafusion": decimal.Decimal("1.1"), "oracle": 1.1, "flink": decimal.Decimal("1.1"), @@ -376,7 +377,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "1.10000000000000003193790845333396190208" ), "mssql": 1.1, - "druid": 1.1, + "druid": decimal.Decimal("1.1"), "oracle": 1.1, }, { @@ -475,7 +476,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "(org.apache.calcite.tools.ValidationException): " "org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, " "column 15: Column 'Infinity' not found in any table", - raises=sa.exc.ProgrammingError, + raises=PyDruidProgrammingError, ), pytest.mark.broken(["datafusion"], raises=Exception), pytest.mark.broken( @@ -552,7 +553,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "(org.apache.calcite.tools.ValidationException): " "org.apache.calcite.runtime.CalciteContextException: From line 1, column 9 to line 1, " "column 16: Column 'Infinity' not found in any table", - raises=sa.exc.ProgrammingError, + raises=PyDruidProgrammingError, ), pytest.mark.broken(["datafusion"], raises=Exception), pytest.mark.broken( @@ -639,7 +640,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "(org.apache.calcite.tools.ValidationException): " "org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, " "column 10: Column 'NaN' not found in any table", - raises=sa.exc.ProgrammingError, + raises=PyDruidProgrammingError, ), pytest.mark.broken(["datafusion"], raises=Exception), pytest.mark.broken( @@ -692,15 +693,7 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): lambda t: t.float_col, id="float-column", marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="AttributeError: 'DecimalColumn' object has no attribute 'isinf'", - ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -708,15 +701,7 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): lambda t: t.double_col, id="double-column", marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="AttributeError: 'DecimalColumn' object has no attribute 'isinf'", - ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -724,25 +709,20 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): lambda t: 1.3, id="float-literal", marks=[ - pytest.mark.notimpl( - ["exasol", "druid"], raises=com.OperationNotDefinedError - ) + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) ], ), param( lambda t: ibis.literal(np.nan), lambda t: np.nan, id="nan-literal", - marks=[pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)], ), param( lambda t: ibis.literal(np.inf), lambda t: np.inf, id="inf-literal", marks=[ - pytest.mark.notimpl( - ["exasol", "druid"], raises=com.OperationNotDefinedError - ) + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) ], ), param( @@ -750,9 +730,7 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): lambda t: -np.inf, id="-inf-literal", marks=[ - pytest.mark.notimpl( - ["exasol", "druid"], raises=com.OperationNotDefinedError - ) + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) ], ), ], @@ -763,26 +741,19 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): param( operator.methodcaller("isnan"), np.isnan, - marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), - ], + marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), id="isnan", ), param( operator.methodcaller("isinf"), np.isinf, id="isinf", - marks=[ - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) - ], + marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ), ], ) @pytest.mark.notimpl( - ["sqlite", "mssql", "oracle", "flink"], raises=com.OperationNotDefinedError + ["sqlite", "mssql", "oracle", "flink", "druid"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl(["mysql"], raises=(MySQLOperationalError, NotImplementedError)) def test_isnan_isinf( @@ -878,12 +849,12 @@ def test_isnan_isinf( ["exasol"], raises=com.OperationNotDefinedError, ), - pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, reason="function log10(numeric, numeric) does not exist", ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -900,12 +871,12 @@ def test_isnan_isinf( ["exasol"], raises=com.OperationNotDefinedError, ), - pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, reason="function log10(numeric, numeric) does not exist", ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -1068,6 +1039,7 @@ def test_simple_math_functions_columns( raises=sa.exc.InternalError, reason="function log10(numeric, numeric) does not exist", ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], id="log2", ), @@ -1113,11 +1085,6 @@ def test_simple_math_functions_columns( ), ], ) -@pytest.mark.notimpl( - ["druid"], - raises=TypeError, - reason="loop of ufunc does not support argument 0 of type float which has no callable log2 method", -) def test_complex_math_functions_columns( backend, con, alltypes, df, expr_fn, expected_fn ): @@ -1136,10 +1103,10 @@ def test_complex_math_functions_columns( id="round", marks=[ pytest.mark.notimpl(["mssql"], raises=AssertionError), - pytest.mark.notimpl( + pytest.mark.broken( ["druid"], - raises=TypeError, - reason="loop of ufunc does not support argument 0 of type float which has no callable rint method", + raises=AssertionError, + reason="rounding works but behavior differs from pandas", ), ], ), @@ -1147,13 +1114,6 @@ def test_complex_math_functions_columns( lambda be, t: t.double_col.add(0.05).round(3), lambda be, t: be.round(t.double_col + 0.05, 3), id="round-with-param", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=TypeError, - reason="loop of ufunc does not support argument 0 of type float which has no callable rint method", - ), - ], ), param( lambda be, t: ibis.least(t.bigint_col, t.int_col), @@ -1335,16 +1295,8 @@ def test_floating_mod(backend, alltypes, df): ), ], ), - param( - "float_col", - 0, - marks=pytest.mark.notimpl(["druid"], raises=ZeroDivisionError), - ), - param( - "double_col", - 0, - marks=pytest.mark.notimpl(["druid"], raises=ZeroDivisionError), - ), + param("float_col", 0), + param("double_col", 0), param( "tinyint_col", 0.0, @@ -1401,15 +1353,13 @@ def test_floating_mod(backend, alltypes, df): "float_col", 0.0, marks=[ - pytest.mark.notimpl(["druid"], raises=ZeroDivisionError), - pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), + pytest.mark.never(["impala"], reason="doesn't allow divide by zero") ], ), param( "double_col", 0.0, marks=[ - pytest.mark.notimpl(["druid"], raises=ZeroDivisionError), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), ], ), @@ -1478,15 +1428,16 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "trino", "postgres", "mysql", + "druid", ], reason="Not SQLAlchemy backends", ) -@pytest.mark.notimpl(["druid", "exasol"], raises=KeyError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, reason="Feature is not yet implemented: unsupported data type: NUMERIC(5)", ) +@pytest.mark.notimpl(["exasol"], raises=KeyError) def test_sa_default_numeric_precision_and_scale( con, backend, default_precisions, default_scales, temp_table ): @@ -1520,14 +1471,13 @@ def test_sa_default_numeric_precision_and_scale( assert_equal(schema, expected) -@pytest.mark.notimpl( - ["dask", "pandas", "polars", "druid"], raises=com.OperationNotDefinedError -) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, reason="function random() does not exist", ) +@pytest.mark.notimpl(["dask", "pandas", "polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) def test_random(con): expr = ibis.random() result = con.execute(expr) @@ -1597,7 +1547,7 @@ def test_clip(backend, alltypes, df, ibis_func, pandas_func): @pytest.mark.notimpl(["polars", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], - raises=sa.exc.ProgrammingError, + raises=PyDruidProgrammingError, reason="SQL query requires 'MIN' operator that is not supported.", ) @pytest.mark.never( diff --git a/ibis/backends/tests/test_set_ops.py b/ibis/backends/tests/test_set_ops.py index c1931e205175..d489ac1a0884 100644 --- a/ibis/backends/tests/test_set_ops.py +++ b/ibis/backends/tests/test_set_ops.py @@ -4,13 +4,13 @@ import pandas as pd import pytest -import sqlalchemy as sa from pytest import param import ibis import ibis.common.exceptions as com import ibis.expr.types as ir from ibis import _ +from ibis.backends.tests.errors import PyDruidProgrammingError @pytest.fixture @@ -35,7 +35,7 @@ def union_subsets(alltypes, df): @pytest.mark.parametrize("distinct", [False, True], ids=["all", "distinct"]) -@pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError) +@pytest.mark.broken(["druid"], raises=PyDruidProgrammingError) def test_union(backend, union_subsets, distinct): (a, b, c), (da, db, dc) = union_subsets @@ -49,7 +49,7 @@ def test_union(backend, union_subsets, distinct): backend.assert_frame_equal(result, expected) -@pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError) +@pytest.mark.broken(["druid"], raises=PyDruidProgrammingError) def test_union_mixed_distinct(backend, union_subsets): (a, b, c), (da, db, dc) = union_subsets @@ -93,7 +93,7 @@ def test_union_mixed_distinct(backend, union_subsets): ], ) @pytest.mark.notimpl(["polars"]) -@pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError) +@pytest.mark.broken(["druid"], raises=PyDruidProgrammingError) def test_intersect(backend, alltypes, df, distinct): a = alltypes.filter((_.id >= 5200) & (_.id <= 5210)) b = alltypes.filter((_.id >= 5205) & (_.id <= 5215)) @@ -147,7 +147,7 @@ def test_intersect(backend, alltypes, df, distinct): ], ) @pytest.mark.notimpl(["polars"]) -@pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError) +@pytest.mark.broken(["druid"], raises=PyDruidProgrammingError) def test_difference(backend, alltypes, df, distinct): a = alltypes.filter((_.id >= 5200) & (_.id <= 5210)) b = alltypes.filter((_.id >= 5205) & (_.id <= 5215)) @@ -187,7 +187,7 @@ def test_table_set_operations_api(alltypes, method): "distinct", [ param( - True, marks=pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError) + True, marks=pytest.mark.broken(["druid"], raises=PyDruidProgrammingError) ), False, ], @@ -238,7 +238,7 @@ def test_top_level_union(backend, con, alltypes, distinct): ids=["intersect", "difference"], ) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError) +@pytest.mark.broken(["druid"], raises=PyDruidProgrammingError) def test_top_level_intersect_difference( backend, con, alltypes, distinct, opname, expected ): diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 4740a0b896ac..b3033c312359 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -11,7 +11,7 @@ sa = pytest.importorskip("sqlalchemy") sg = pytest.importorskip("sqlglot") -pytestmark = pytest.mark.notimpl(["druid", "flink", "exasol", "risingwave"]) +pytestmark = pytest.mark.notimpl(["flink", "exasol", "risingwave"]) simple_literal = param(ibis.literal(1), id="simple_literal") array_literal = param( diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index bd497b1a31f0..f8b2dd3cd365 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -11,7 +11,7 @@ import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt -from ibis.backends.tests.errors import ClickHouseDatabaseError +from ibis.backends.tests.errors import ClickHouseDatabaseError, PyDruidProgrammingError from ibis.common.annotations import ValidationError @@ -202,8 +202,7 @@ def uses_java_re(t): id="rlike", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), ], ), @@ -213,8 +212,7 @@ def uses_java_re(t): id="re_search_substring", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), ], ), @@ -224,8 +222,7 @@ def uses_java_re(t): id="re_search", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), ], ), @@ -253,8 +250,7 @@ def uses_java_re(t): id="re_extract", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -269,8 +265,7 @@ def uses_java_re(t): id="re_extract_group", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -287,8 +282,10 @@ def uses_java_re(t): id="re_extract_posix", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ), + pytest.mark.notimpl( + ["druid"], reason="No posix support", raises=AssertionError ), pytest.mark.notimpl( ["risingwave"], @@ -303,8 +300,7 @@ def uses_java_re(t): id="re_extract_whole_group", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -321,8 +317,7 @@ def uses_java_re(t): id="re_extract_group_1", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -339,8 +334,7 @@ def uses_java_re(t): id="re_extract_group_2", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -357,8 +351,7 @@ def uses_java_re(t): id="re_extract_group_3", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -373,8 +366,7 @@ def uses_java_re(t): id="re_extract_group_at_beginning", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -389,8 +381,7 @@ def uses_java_re(t): id="re_extract_group_at_end", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], - raises=com.OperationNotDefinedError, + ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -588,11 +579,8 @@ def uses_java_re(t): id="startswith", # pyspark doesn't support `cases` yet marks=[ - pytest.mark.notimpl( - ["dask"], - raises=com.OperationNotDefinedError, - ), - pytest.mark.broken(["druid", "mssql"], raises=sa.exc.ProgrammingError), + pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), ], ), param( @@ -604,10 +592,9 @@ def uses_java_re(t): # pyspark doesn't support `cases` yet marks=[ pytest.mark.notimpl( - ["dask", "datafusion"], - raises=com.OperationNotDefinedError, + ["dask", "datafusion"], raises=com.OperationNotDefinedError ), - pytest.mark.broken(["druid", "mssql"], raises=sa.exc.ProgrammingError), + pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), ], ), param( @@ -615,10 +602,7 @@ def uses_java_re(t): lambda t: t.date_string_col.str.startswith("2010-01"), id="startswith-simple", marks=[ - pytest.mark.notimpl( - ["dask"], - raises=com.OperationNotDefinedError, - ), + pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), ], ), @@ -691,7 +675,7 @@ def uses_java_re(t): lambda t: t.date_string_col.str[-2], id="negative-index", marks=[ - pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError), + pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), pytest.mark.broken(["impala", "flink"], raises=AssertionError), ], ), @@ -713,7 +697,7 @@ def uses_java_re(t): reason="'Series' object has no attribute 'items'", raises=AttributeError, ), - pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError), + pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -734,7 +718,7 @@ def uses_java_re(t): reason="'Series' object has no attribute 'items'", raises=AttributeError, ), - pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError), + pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -756,7 +740,7 @@ def uses_java_re(t): reason="'Series' object has no attribute 'items'", raises=AttributeError, ), - pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError), + pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -779,7 +763,7 @@ def uses_java_re(t): reason="'Series' object has no attribute 'items'", raises=AttributeError, ), - pytest.mark.broken(["druid"], raises=sa.exc.ProgrammingError), + pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -974,8 +958,7 @@ def test_capitalize(con): @pytest.mark.notimpl( - ["dask", "pandas", "polars", "druid", "oracle", "flink"], - raises=com.OperationNotDefinedError, + ["dask", "pandas", "polars", "oracle", "flink"], raises=com.OperationNotDefinedError ) @pytest.mark.notyet( ["impala", "mssql", "sqlite", "exasol"], diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index d627926a7a3e..f55e6764d452 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -30,6 +30,7 @@ PolarsComputeError, PolarsPanicException, Py4JJavaError, + PyDruidProgrammingError, SnowflakeProgrammingError, TrinoUserError, ) @@ -116,79 +117,37 @@ def test_timestamp_extract(backend, alltypes, df, attr): methodcaller("year"), 2015, id="year", - marks=[ - pytest.mark.broken( - ["druid"], - raises=sa.exc.CompileError, - reason='No literal value renderer is available for literal value "datetime.datetime(2015, 9, 1, 14, 48, 5, 359000)" with datatype DATETIME', - ), - pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - ], + marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], ), param( methodcaller("month"), 9, id="month", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.CompileError, - reason='No literal value renderer is available for literal value "datetime.datetime(2015, 9, 1, 14, 48, 5, 359000)" with datatype DATETIME', - ), - pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - ], + marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], ), param( methodcaller("day"), 1, id="day", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.CompileError, - reason='No literal value renderer is available for literal value "datetime.datetime(2015, 9, 1, 14, 48, 5, 359000)" with datatype DATETIME', - ), - pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - ], + marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], ), param( methodcaller("hour"), 14, id="hour", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.CompileError, - reason='No literal value renderer is available for literal value "datetime.datetime(2015, 9, 1, 14, 48, 5, 359000)" with datatype DATETIME', - ), - pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - ], + marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], ), param( methodcaller("minute"), 48, id="minute", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.CompileError, - reason='No literal value renderer is available for literal value "datetime.datetime(2015, 9, 1, 14, 48, 5, 359000)" with datatype DATETIME', - ), - pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - ], + marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], ), param( methodcaller("second"), 5, id="second", - marks=[ - pytest.mark.notimpl( - ["druid"], - raises=sa.exc.CompileError, - reason='No literal value renderer is available for literal value "datetime.datetime(2015, 9, 1, 14, 48, 5, 359000)" with datatype DATETIME', - ), - pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - ], + marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], ), param( methodcaller("millisecond"), @@ -1918,7 +1877,7 @@ def test_now_from_projection(alltypes): @pytest.mark.notimpl(["pandas", "dask"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( - ["druid"], raises=sa.exc.ProgrammingError, reason="SQL parse failed" + ["druid"], raises=PyDruidProgrammingError, reason="SQL parse failed" ) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936 missing expression" @@ -1957,16 +1916,6 @@ def test_date_literal(con, backend): @pytest.mark.notimpl( ["pandas", "dask", "pyspark", "mysql"], raises=com.OperationNotDefinedError ) -@pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason=( - "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " - "(org.apache.calcite.tools.ValidationException): org.apache.calcite.runtime.CalciteContextException: " - "From line 1, column 8 to line 1, column 44: No match found for function signature " - "make_timestamp(, , , , , )" - ), -) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904: MAKE TIMESTAMP invalid" ) @@ -2029,7 +1978,7 @@ def test_timestamp_literal(con, backend): ) @pytest.mark.notimpl( ["druid"], - raises=sa.exc.ProgrammingError, + raises=PyDruidProgrammingError, reason=( "No match found for function signature make_timestamp(, , " ", , , )" @@ -2067,9 +2016,7 @@ def test_timestamp_with_timezone_literal(con, timezone, expected): ) @pytest.mark.notyet(["clickhouse", "impala"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.broken( - ["druid"], raises=sa.exc.ProgrammingError, reason="SQL parse failed" -) +@pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], @@ -2095,7 +2042,7 @@ def test_time_literal(con, backend): ) @pytest.mark.notyet( ["druid"], - raises=sa.exc.CompileError, + raises=PyDruidProgrammingError, reason="druid sqlalchemy dialect fails to compile datetime types", ) @pytest.mark.broken( @@ -2162,11 +2109,7 @@ def test_extract_time_from_timestamp(con, microsecond): "invalid type [CAST(INTERVAL_LITERAL('second', '1') AS VARIANT)] for parameter 'TO_VARIANT'", raises=SnowflakeProgrammingError, ) -@pytest.mark.broken( - ["druid"], - 'No literal value renderer is available for literal value "1" with datatype DATETIME', - raises=sa.exc.CompileError, -) +@pytest.mark.notyet(["druid"], raises=PyDruidProgrammingError) @pytest.mark.broken( ["impala"], "AnalysisException: Syntax error in line 1: SELECT typeof(INTERVAL 1 SECOND) AS `TypeOf(1)` " @@ -2271,15 +2214,6 @@ def test_timestamp_column_from_ymdhms(backend, con, alltypes, df): backend.assert_series_equal(golden, result.timestamp_col) -@pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason=( - "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " - "(org.apache.calcite.tools.ValidationException): " - "java.lang.UnsupportedOperationException: class org.apache.calcite.sql.SqlIdentifier: LONG" - ), -) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-01861 literal does not match" ) @@ -2291,12 +2225,7 @@ def test_date_scalar_from_iso(con): assert result.strftime("%Y-%m-%d") == "2022-02-24" -@pytest.mark.notimpl(["mssql", "druid"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason="java.lang.UnsupportedOperationException: class org.apache.calcite.sql.SqlIdentifier: STRING", -) +@pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( ["oracle"], raises=sa.exc.DatabaseError, @@ -2339,11 +2268,6 @@ def test_timestamp_extract_milliseconds_with_big_value(con): raises=sa.exc.DatabaseError, reason="ORA-00932", ) -@pytest.mark.broken( - ["druid"], - raises=sa.exc.ProgrammingError, - reason="No match found for function signature to_timestamp()", -) @pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) def test_integer_cast_to_timestamp_column(backend, alltypes, df): expr = alltypes.int_col.cast("timestamp") @@ -2352,11 +2276,6 @@ def test_integer_cast_to_timestamp_column(backend, alltypes, df): backend.assert_series_equal(result, expected.astype(result.dtype)) -@pytest.mark.notimpl( - ["druid"], - raises=sa.exc.ProgrammingError, - reason="No match found for function signature to_timestamp()", -) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) def test_integer_cast_to_timestamp_scalar(alltypes, df): @@ -2369,11 +2288,7 @@ def test_integer_cast_to_timestamp_scalar(alltypes, df): @pytest.mark.broken( ["clickhouse"], raises=AssertionError, reason="clickhouse truncates the result" ) -@pytest.mark.notimpl( - ["druid"], - reason='No literal value renderer is available for literal value "datetime.datetime(2419, 10, 11, 10, 10, 25)" with datatype DATETIME', - raises=sa.exc.CompileError, -) +@pytest.mark.broken(["druid"], reason="timezone doesn't match", raises=AssertionError) @pytest.mark.notyet( ["pyspark"], reason="PySpark doesn't handle big timestamps", @@ -2408,11 +2323,7 @@ def build_date_col(t): @pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["druid"], - raises=sa.exc.CompileError, - reason='No literal value renderer is available for literal value "datetime.date(2010, 11, 1)" with datatype DATE', -) +@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.parametrize( ("left_fn", "right_fn"), @@ -2454,11 +2365,6 @@ def test_timestamp_date_comparison(backend, alltypes, df, left_fn, right_fn): reason="returns incorrect results", raises=AssertionError, ) -@pytest.mark.notimpl( - ["druid"], - raises=sa.exc.CompileError, - reason='No literal value renderer is available for literal value "datetime.datetime(4567, 1, 1, 0, 0)" with datatype DATETIME', -) @pytest.mark.notimpl(["pyspark"], raises=pd.errors.OutOfBoundsDatetime) @pytest.mark.notimpl( ["polars"], @@ -2501,6 +2407,11 @@ def test_large_timestamp(con): reason="assert Timestamp('2023-01-07 13:20:05.561000') == Timestamp('2023-01-07 13:20:05.561021')", raises=AssertionError, ), + pytest.mark.notyet( + ["druid"], + reason="time_parse truncates to milliseconds", + raises=AssertionError, + ), ], ), param( @@ -2514,6 +2425,11 @@ def test_large_timestamp(con): reason="drivers appear to truncate nanos", raises=AssertionError, ), + pytest.mark.broken( + ["druid"], + reason="ibis normalization truncates nanos", + raises=AssertionError, + ), pytest.mark.notyet( ["postgres", "sqlite"], reason="doesn't support nanoseconds", @@ -2551,14 +2467,6 @@ def test_large_timestamp(con): ), ], ) -@pytest.mark.broken( - ["druid"], - raises=sa.exc.ProgrammingError, - reason=( - "java.lang.UnsupportedOperationException: class " - "org.apache.calcite.sql.SqlIdentifier: LONG" - ), -) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, @@ -2860,7 +2768,9 @@ def test_time_literal_sql(dialect, snapshot, micros): snapshot.assert_match(sql, "out.sql") -@pytest.mark.notimpl(["druid"], raises=sa.exc.CompileError, reason="no date support") +@pytest.mark.notimpl( + ["druid"], raises=PyDruidProgrammingError, reason="no date support" +) @pytest.mark.parametrize( "value", [ diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index ffef15992821..d577ef903a37 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -28,21 +28,7 @@ "clickhouse": "Nullable(UUID)", } -pytestmark = pytest.mark.notimpl( - ["druid"], - raises=sqlalchemy.exc.CompileError, - reason=( - "No literal value renderer is available for literal value " - "\"UUID('08f48812-7948-4718-96c7-27fa6a398db6')\" with datatype NULL" - ), -) - -@pytest.mark.xfail_version( - duckdb=["duckdb<0.7.0"], - reason='(duckdb.NotImplementedException) Not implemented Error: Unsupported type: "UUID"', - raises=sqlalchemy.exc.NotSupportedError, -) @pytest.mark.notimpl( ["impala", "datafusion", "polars"], raises=NotImplementedError ) @@ -51,6 +37,8 @@ raises=sqlalchemy.exc.InternalError, reason="Feature is not yet implemented: unsupported data type: UUID", ) +@pytest.mark.notimpl(["impala", "polars"], raises=NotImplementedError) +@pytest.mark.notimpl(["datafusion"], raises=Exception) def test_uuid_literal(con, backend): backend_name = backend.name() diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index e46827a641c3..ea9667698b74 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -18,18 +18,24 @@ ImpalaHiveServer2Error, MySQLOperationalError, Py4JJavaError, + PyDruidProgrammingError, SnowflakeProgrammingError, ) from ibis.legacy.udf.vectorized import analytic, reduction -pytestmark = pytest.mark.notimpl( - ["druid", "exasol"], - raises=( - sa.exc.ProgrammingError, - sa.exc.NoSuchTableError, - com.OperationNotDefinedError, +pytestmark = [ + pytest.mark.notimpl( + ["exasol"], + raises=( + sa.exc.ProgrammingError, + sa.exc.NoSuchTableError, + com.OperationNotDefinedError, + ), ), -) + pytest.mark.notimpl( + ["druid"], raises=(com.OperationNotDefinedError, PyDruidProgrammingError) + ), +] # adapted from https://gist.github.com/xmnlab/2c1f93df1a6c6bde4e32c8579117e9cc diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index 0522e965c1d8..280fbaec04f1 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -234,6 +234,8 @@ def try_date(v): if isinstance(v, datetime.datetime): return v.date() elif isinstance(v, str): + if v.endswith("Z"): + return datetime.datetime.fromisoformat(v[:-1]).date() return datetime.date.fromisoformat(v) else: return v diff --git a/poetry.lock b/poetry.lock index 5ab1bd3971a6..cdcf1d01b3a5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4671,7 +4671,6 @@ files = [ [package.dependencies] requests = "*" -sqlalchemy = {version = "*", optional = true, markers = "extra == \"sqlalchemy\""} [package.extras] async = ["tornado"] @@ -7340,7 +7339,7 @@ dask = ["dask", "regex"] datafusion = ["datafusion"] decompiler = ["black"] deltalake = ["deltalake"] -druid = ["pydruid", "sqlalchemy"] +druid = ["pydruid"] duckdb = ["duckdb"] examples = ["pins"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] @@ -7363,4 +7362,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "659771c151e098f1a48db403393d3072ad1f11935ed023415ffa2260c2e7e914" +content-hash = "a2edd5b6e62e78267c3e2339efde3600b637e86ae05c0deec081798be5d1d34e" diff --git a/pyproject.toml b/pyproject.toml index 04f98497a8f5..abfdeb7fa3f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,7 @@ pins = { version = ">=0.8.3,<1", extras = ["gcs"], optional = true } polars = { version = ">=0.19.3,<1", optional = true } psycopg2 = { version = ">=2.8.4,<3", optional = true } pydata-google-auth = { version = ">=1.4.0,<2", optional = true } -pydruid = { version = ">=0.6.5,<1", optional = true, extras = ["sqlalchemy"] } +pydruid = { version = ">=0.6.5,<1", optional = true } pymysql = { version = ">=1,<2", optional = true } pyodbc = { version = ">=4.0.39,<6", optional = true } pyspark = { version = ">=3,<4", optional = true } @@ -182,7 +182,7 @@ bigquery = [ clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] datafusion = ["datafusion"] -druid = ["pydruid", "sqlalchemy"] +druid = ["pydruid"] duckdb = ["duckdb"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] flink = [] diff --git a/requirements-dev.txt b/requirements-dev.txt index 83d72c424fce..b4ad86ee9fb2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -175,8 +175,8 @@ pycparser==2.21 ; python_version >= "3.9" and python_version < "4.0" pydantic-core==2.16.1 ; python_version >= "3.10" and python_version < "3.13" pydantic==2.6.0 ; python_version >= "3.10" and python_version < "3.13" pydata-google-auth==1.8.2 ; python_version >= "3.9" and python_version < "4.0" -pydeps==1.12.18 ; python_version >= "3.9" and python_version < "4.0" -pydruid[sqlalchemy]==0.6.6 ; python_version >= "3.9" and python_version < "4.0" +pydeps==1.12.17 ; python_version >= "3.9" and python_version < "4.0" +pydruid==0.6.6 ; python_version >= "3.9" and python_version < "4.0" pyexasol==0.25.2 ; python_version >= "3.9" and python_version < "4.0" pygments==2.17.2 ; python_version >= "3.9" and python_version < "4.0" pyinstrument==4.6.2 ; python_version >= "3.9" and python_version < "4.0" From d0378bb7e844e66a0d4180c2fea96817b6001f4e Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sun, 14 Jan 2024 05:43:42 -0500 Subject: [PATCH 078/161] refactor(impala): port to sqlglot BREAKING CHANGE: Impala UDFs no longer require explicit registration. Remove any calls to `Function.register`. If you were passing `database` to `Function.register`, pass that to `scalar_function` or `aggregate_function` as appropriate. --- .github/workflows/ibis-backends.yml | 44 +- ibis/backends/base/sql/__init__.py | 1 - ibis/backends/base/sql/ddl.py | 6 +- ibis/backends/base/sql/registry/main.py | 2 - ibis/backends/base/sqlglot/compiler.py | 18 +- ibis/backends/base/sqlglot/datatypes.py | 8 + ibis/backends/base/sqlglot/rewrites.py | 6 +- ibis/backends/datafusion/compiler.py | 3 + ibis/backends/impala/__init__.py | 349 +++++++------ ibis/backends/impala/client.py | 22 +- ibis/backends/impala/compat.py | 24 - ibis/backends/impala/compiler.py | 461 ++++++++++++++++-- ibis/backends/impala/tests/conftest.py | 40 +- .../test_analytic_exprs/first/out.sql | 4 +- .../test_analytic_exprs/lag_arg/out.sql | 4 +- .../test_analytic_exprs/lag_default/out.sql | 4 +- .../lag_explicit_default/out.sql | 4 +- .../test_analytic_exprs/last/out.sql | 4 +- .../test_analytic_exprs/lead_arg/out.sql | 4 +- .../test_analytic_exprs/lead_default/out.sql | 4 +- .../lead_explicit_default/out.sql | 4 +- .../test_analytic_exprs/ntile/out.sql | 4 +- .../test_analytic_exprs/percent_rank/out.sql | 4 +- .../test_bucket_assign_labels/out.sql | 53 +- .../close_extreme_false/out.sql | 29 +- .../close_extreme_false_closed_right/out.sql | 29 +- .../out.sql | 35 +- .../test_bucket_to_case/closed_right/out.sql | 29 +- .../out.sql | 32 +- .../out.sql | 14 +- .../test_bucket_to_case/default/out.sql | 29 +- .../include_over_include_under0/out.sql | 14 +- .../include_over_include_under1/out.sql | 14 +- .../include_over_include_under2/out.sql | 14 +- .../test_bucket_to_case/include_under/out.sql | 32 +- .../include_under_include_over/out.sql | 35 +- .../fillna_l_extendedprice/out.sql | 4 +- .../fillna_l_extendedprice_double/out.sql | 4 +- .../fillna_l_quantity/out.sql | 4 +- .../test_case_exprs/test_identical_to/out.sql | 5 +- .../test_identical_to_special_case/out.sql | 3 +- .../test_ifelse_use_if/out.sql | 4 +- .../test_case_exprs/test_isnull_1_0/out1.sql | 4 +- .../test_case_exprs/test_isnull_1_0/out2.sql | 4 +- .../test_nullif_ifnull/nullif_boolean/out.sql | 4 +- .../test_nullif_ifnull/nullif_input/out.sql | 4 +- .../nullif_negate_boolean/out.sql | 4 +- .../test_case_exprs/test_search_case/out.sql | 14 +- .../test_case_exprs/test_simple_case/out.sql | 8 +- .../coalesce_columns/out.sql | 4 +- .../coalesce_scalar/out.sql | 4 +- .../greatest_columns/out.sql | 4 +- .../greatest_scalar/out.sql | 4 +- .../least_columns/out.sql | 4 +- .../least_scalar/out.sql | 4 +- .../test_avro_other_formats/out.sql | 5 +- .../test_create_external_table_as/out.sql | 5 +- .../test_create_table_parquet/out.sql | 20 +- .../test_no_overwrite/out.sql | 20 +- .../test_select_basics/out1.sql | 5 +- .../test_select_basics/out2.sql | 5 +- .../test_filter_with_analytic/out.sql | 32 +- .../test_named_from_filter_group_by/abc.sql | 21 +- .../test_named_from_filter_group_by/foo.sql | 21 +- .../test_exprs/test_nunique_where/out.sql | 5 +- .../test_where_with_timestamp/out.sql | 9 +- .../test_field_in_literals/isin/out.sql | 4 +- .../test_field_in_literals/notin/out.sql | 6 +- .../test_isin_notin_in_select/isin/out.sql | 18 +- .../test_isin_notin_in_select/notin/out.sql | 20 +- .../test_literal_in_fields/isin/out.sql | 4 +- .../test_literal_in_fields/notin/out.sql | 6 +- .../out.sql | 9 +- .../test_sql/test_is_parens/isnull/out.sql | 9 +- .../test_sql/test_is_parens/notnull/out.sql | 9 +- .../test_is_parens_identical_to/out.sql | 9 +- .../test_sql/test_join_aliasing/out.sql | 124 +++-- .../test_sql/test_join_key_name/out.sql | 150 ++++-- .../test_sql/test_join_key_name2/out.sql | 82 +++- .../cross_join/out.sql | 10 +- .../inner_join/out.sql | 11 +- .../left_join/out.sql | 11 +- .../outer_join/out.sql | 11 +- .../out.sql | 18 +- .../out.sql | 19 +- .../test_sql/test_limit_cte_extract/out.sql | 33 +- .../out.sql | 7 +- .../test_sql/test_multiple_filters/out.sql | 26 +- .../test_sql/test_multiple_filters2/out.sql | 28 +- .../test_sql/test_nested_join_base/out.sql | 41 +- .../test_nested_join_multiple_ctes/out.sql | 89 ++-- .../test_nested_joins_single_cte/out.sql | 63 ++- .../test_string_builtins/test_find/out.sql | 4 +- .../test_string_builtins/ascii_str/out.sql | 4 +- .../test_string_builtins/capitalize/out.sql | 4 +- .../test_string_builtins/extract_host/out.sql | 4 +- .../test_string_builtins/find/out.sql | 4 +- .../find_in_set_multiple/out.sql | 4 +- .../find_in_set_single/out.sql | 4 +- .../find_with_offset/out.sql | 4 +- .../test_string_builtins/length/out.sql | 4 +- .../test_string_builtins/like/out.sql | 4 +- .../like_multiple/out.sql | 4 +- .../test_string_builtins/lower/out.sql | 4 +- .../test_string_builtins/lpad_char/out.sql | 4 +- .../test_string_builtins/lpad_default/out.sql | 4 +- .../test_string_builtins/lstrip/out.sql | 4 +- .../test_string_builtins/re_extract/out.sql | 4 +- .../test_string_builtins/re_replace/out.sql | 4 +- .../test_string_builtins/re_search/out.sql | 4 +- .../test_string_builtins/repeat/out.sql | 4 +- .../test_string_builtins/reverse/out.sql | 4 +- .../test_string_builtins/rlike/out.sql | 4 +- .../test_string_builtins/rpad_char/out.sql | 4 +- .../test_string_builtins/rpad_default/out.sql | 4 +- .../test_string_builtins/rstrip/out.sql | 4 +- .../test_string_builtins/strip/out.sql | 4 +- .../test_string_builtins/strright/out.sql | 6 +- .../test_string_builtins/substr_0_3/out.sql | 10 +- .../test_string_builtins/substr_2/out.sql | 10 +- .../test_string_builtins/translate/out.sql | 4 +- .../test_string_builtins/upper/out.sql | 4 +- .../test_string_join/out.sql | 3 +- .../test_udf/test_sql_generation/out.sql | 3 +- .../out.sql | 3 +- .../test_unary_builtins/test_hash/out.sql | 4 +- .../test_numeric/log_with_base/out.sql | 4 +- .../test_numeric/round_expr/out.sql | 4 +- .../test_numeric/round_no_args/out.sql | 4 +- .../test_numeric/round_two/out.sql | 4 +- .../test_numeric/round_zero/out.sql | 4 +- .../test_numeric/sign_double/out.sql | 4 +- .../test_numeric/sign_float/out.sql | 4 +- .../test_numeric/sign_tinyint/out.sql | 4 +- .../double_col-abs/out.sql | 4 +- .../double_col-approx_median/out.sql | 4 +- .../double_col-approx_nunique/out.sql | 4 +- .../double_col-ceil/out.sql | 4 +- .../double_col-exp/out.sql | 4 +- .../double_col-floor/out.sql | 4 +- .../double_col-ln/out.sql | 4 +- .../double_col-log/out.sql | 4 +- .../double_col-log10/out.sql | 4 +- .../double_col-log2/out.sql | 4 +- .../double_col-nullif_zero/out.sql | 4 +- .../double_col-sqrt/out.sql | 4 +- .../double_col-zero_ifnull/out.sql | 4 +- .../int_col-abs/out.sql | 4 +- .../int_col-approx_median/out.sql | 4 +- .../int_col-approx_nunique/out.sql | 4 +- .../int_col-ceil/out.sql | 4 +- .../int_col-exp/out.sql | 4 +- .../int_col-floor/out.sql | 4 +- .../int_col-ln/out.sql | 4 +- .../int_col-log/out.sql | 4 +- .../int_col-log10/out.sql | 4 +- .../int_col-log2/out.sql | 4 +- .../int_col-nullif_zero/out.sql | 4 +- .../int_col-sqrt/out.sql | 4 +- .../int_col-zero_ifnull/out.sql | 4 +- .../test_reduction_where/avg/out.sql | 4 +- .../test_reduction_where/count/out.sql | 4 +- .../test_reduction_where/max/out.sql | 4 +- .../test_reduction_where/min/out.sql | 4 +- .../test_reduction_where/stddev_pop/out.sql | 4 +- .../test_reduction_where/stddev_samp/out.sql | 4 +- .../test_reduction_where/sum/out.sql | 4 +- .../test_reduction_where/var_pop/out.sql | 4 +- .../test_reduction_where/var_samp/out.sql | 4 +- .../test_value_exprs/test_any_all/all/out.sql | 4 +- .../test_value_exprs/test_any_all/any/out.sql | 4 +- .../test_any_all/not_all/out.sql | 6 +- .../test_any_all/not_any/out.sql | 6 +- .../test_value_exprs/test_between/out.sql | 4 +- .../test_binary_infix_operators/add/out.sql | 4 +- .../test_binary_infix_operators/and/out.sql | 6 +- .../test_binary_infix_operators/div/out.sql | 4 +- .../test_binary_infix_operators/eq/out.sql | 4 +- .../test_binary_infix_operators/ge/out.sql | 4 +- .../test_binary_infix_operators/gt/out.sql | 4 +- .../test_binary_infix_operators/le/out.sql | 4 +- .../test_binary_infix_operators/lt/out.sql | 4 +- .../test_binary_infix_operators/mul/out.sql | 4 +- .../test_binary_infix_operators/ne/out.sql | 4 +- .../test_binary_infix_operators/or/out.sql | 6 +- .../test_binary_infix_operators/pow/out.sql | 4 +- .../test_binary_infix_operators/sub/out.sql | 4 +- .../test_binary_infix_operators/xor/out.sql | 8 +- .../function_call/out.sql | 4 +- .../negation/out.sql | 6 +- .../parens_left/out.sql | 6 +- .../test_casts/a-int16/out.sql | 4 +- .../test_casts/a-int32/out.sql | 4 +- .../test_casts/a-int64/out.sql | 4 +- .../test_casts/a-string/out.sql | 4 +- .../test_casts/d-int8/out.sql | 4 +- .../test_casts/g-double/out.sql | 4 +- .../test_casts/g-timestamp/out.sql | 4 +- .../test_column_ref_table_aliases/out.sql | 1 - .../out1.sql | 15 + .../out2.sql | 15 + .../test_decimal_casts/column/out.sql | 4 +- .../test_decimal_casts/literal/out.sql | 3 +- .../compound_isnull/out.sql | 6 +- .../test_isnull_notnull/isnull/out.sql | 4 +- .../test_isnull_notnull/notnull/out.sql | 4 +- .../embedded_double_quote/out.sql | 3 +- .../embedded_single_quote/out.sql | 3 +- .../test_literals/false/out.sql | 3 +- .../test_literals/float/out.sql | 3 +- .../test_literals/int/out.sql | 3 +- .../test_literals/simple/out.sql | 3 +- .../test_literals/true/out.sql | 3 +- .../test_misc_conditionals/out.sql | 4 +- .../test_named_expressions/cast/out.sql | 4 +- .../compound_expr/out.sql | 6 +- .../test_named_expressions/spaces/out.sql | 4 +- .../test_value_exprs/test_negate/a/out.sql | 6 +- .../test_value_exprs/test_negate/f/out.sql | 6 +- .../test_value_exprs/test_negate/h/out.sql | 6 +- .../test_value_exprs/test_sql_extract/out.sql | 8 +- .../full_name/out.sql | 3 +- .../test_timestamp_day_of_week/index/out.sql | 3 +- .../test_timestamp_deltas/days/out1.sql | 4 +- .../test_timestamp_deltas/days/out2.sql | 4 +- .../test_timestamp_deltas/hours/out1.sql | 4 +- .../test_timestamp_deltas/hours/out2.sql | 4 +- .../test_timestamp_deltas/minutes/out1.sql | 4 +- .../test_timestamp_deltas/minutes/out2.sql | 4 +- .../test_timestamp_deltas/months/out1.sql | 4 +- .../test_timestamp_deltas/months/out2.sql | 4 +- .../test_timestamp_deltas/seconds/out1.sql | 4 +- .../test_timestamp_deltas/seconds/out2.sql | 4 +- .../test_timestamp_deltas/weeks/out1.sql | 4 +- .../test_timestamp_deltas/weeks/out2.sql | 4 +- .../test_timestamp_deltas/years/out1.sql | 4 +- .../test_timestamp_deltas/years/out2.sql | 4 +- .../test_timestamp_extract_field/day/out.sql | 4 +- .../test_timestamp_extract_field/hour/out.sql | 4 +- .../microsecond/out.sql | 4 +- .../millisecond/out.sql | 4 +- .../minute/out.sql | 4 +- .../month/out.sql | 4 +- .../second/out.sql | 4 +- .../test_timestamp_extract_field/year/out.sql | 4 +- .../default/out.sql | 4 +- .../test_timestamp_from_integer/ms/out.sql | 4 +- .../test_timestamp_from_integer/us/out.sql | 4 +- .../pd_timestamp/out.sql | 3 +- .../pydatetime/out.sql | 3 +- .../timestamp_function/out.sql | 3 +- .../test_timestamp_now/out.sql | 3 +- .../test_add_default_order_by/out.sql | 25 +- .../test_aggregate_in_projection/out.sql | 16 +- .../test_cumulative_functions/max/out1.sql | 5 +- .../test_cumulative_functions/max/out2.sql | 5 +- .../test_cumulative_functions/mean/out1.sql | 5 +- .../test_cumulative_functions/mean/out2.sql | 5 +- .../test_cumulative_functions/min/out1.sql | 5 +- .../test_cumulative_functions/min/out2.sql | 5 +- .../test_cumulative_functions/sum/out1.sql | 5 +- .../test_cumulative_functions/sum/out2.sql | 5 +- .../test_window/test_multiple_windows/out.sql | 7 +- .../test_nested_analytic_function/out.sql | 5 +- .../test_window/test_order_by_desc/out1.sql | 6 +- .../test_window/test_order_by_desc/out2.sql | 7 +- .../test_propagate_nested_windows/out.sql | 7 +- .../test_window/test_rank_functions/out.sql | 8 +- .../out1.sql | 16 +- .../out2.sql | 17 +- .../out.sql | 18 +- .../cumulative/out.sql | 5 +- .../test_window_frame_specs/foll_0/out.sql | 5 +- .../test_window_frame_specs/foll_10_5/out.sql | 5 +- .../test_window_frame_specs/foll_2/out.sql | 5 +- .../foll_2_prec_0/out.sql | 5 +- .../test_window_frame_specs/foll_5_10/out.sql | 5 +- .../test_window_frame_specs/prec_0/out.sql | 5 +- .../test_window_frame_specs/prec_5/out.sql | 5 +- .../prec_5_foll_0/out.sql | 5 +- .../prec_5_foll_2/out.sql | 5 +- .../trailing_10/out.sql | 5 +- .../impala/tests/test_bucket_histogram.py | 2 +- ibis/backends/impala/tests/test_case_exprs.py | 4 +- ibis/backends/impala/tests/test_client.py | 2 +- ibis/backends/impala/tests/test_ddl.py | 7 +- .../impala/tests/test_ddl_compilation.py | 24 +- ibis/backends/impala/tests/test_exprs.py | 16 +- ibis/backends/impala/tests/test_in_not_in.py | 3 +- .../backends/impala/tests/test_parquet_ddl.py | 4 +- ibis/backends/impala/tests/test_partition.py | 2 +- ibis/backends/impala/tests/test_sql.py | 26 +- ibis/backends/impala/tests/test_udf.py | 53 +- .../backends/impala/tests/test_value_exprs.py | 47 +- ibis/backends/impala/tests/test_window.py | 17 +- ibis/backends/impala/udf.py | 217 +++------ .../test_default_limit/impala/out.sql | 5 + .../test_disable_query_limit/impala/out.sql | 5 + .../impala/out.sql | 3 + .../test_respect_set_limit/impala/out.sql | 10 + .../test_group_by_has_index/impala/out.sql | 6 +- .../test_sql/test_isin_bug/impala/out.sql | 18 +- ibis/backends/tests/test_aggregation.py | 11 +- ibis/backends/tests/test_array.py | 35 +- ibis/backends/tests/test_asof_join.py | 22 +- ibis/backends/tests/test_dot_sql.py | 9 +- ibis/backends/tests/test_generic.py | 51 +- ibis/backends/tests/test_numeric.py | 66 +-- ibis/backends/tests/test_string.py | 7 +- ibis/backends/tests/test_temporal.py | 40 +- ibis/backends/tests/test_uuid.py | 15 +- ibis/backends/tests/test_window.py | 8 +- ibis/expr/operations/udf.py | 1 + poetry.lock | 4 +- pyproject.toml | 2 +- 315 files changed, 2752 insertions(+), 1341 deletions(-) delete mode 100644 ibis/backends/impala/compat.py delete mode 100644 ibis/backends/impala/tests/snapshots/test_value_exprs/test_column_ref_table_aliases/out.sql create mode 100644 ibis/backends/impala/tests/snapshots/test_value_exprs/test_correlated_predicate_subquery/out1.sql create mode 100644 ibis/backends/impala/tests/snapshots/test_value_exprs/test_correlated_predicate_subquery/out2.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/impala/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/impala/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/impala/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/impala/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index d3ddc2b3c4df..a78d1ae83a68 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -123,16 +123,16 @@ jobs: - postgres sys-deps: - libgeos-dev - # - name: impala - # title: Impala - # extras: - # - impala - # services: - # - impala - # - kudu - # sys-deps: - # - cmake - # - ninja-build + - name: impala + title: Impala + extras: + - impala + services: + - impala + - kudu + sys-deps: + - cmake + - ninja-build # - name: mssql # title: MS SQL Server # extras: @@ -230,18 +230,18 @@ jobs: - postgres sys-deps: - libgeos-dev - # - os: windows-latest - # backend: - # name: impala - # title: Impala - # extras: - # - impala - # services: - # - impala - # - kudu - # sys-deps: - # - cmake - # - ninja-build + - os: windows-latest + backend: + name: impala + title: Impala + extras: + - impala + services: + - impala + - kudu + sys-deps: + - cmake + - ninja-build # - os: windows-latest # backend: # name: mssql diff --git a/ibis/backends/base/sql/__init__.py b/ibis/backends/base/sql/__init__.py index 1fd6528170c4..618fa85678fa 100644 --- a/ibis/backends/base/sql/__init__.py +++ b/ibis/backends/base/sql/__init__.py @@ -22,7 +22,6 @@ import pandas as pd import pyarrow as pa -raise RuntimeError("Temporarily make the SQL backends dysfunctional") __all__ = ["BaseSQLBackend"] diff --git a/ibis/backends/base/sql/ddl.py b/ibis/backends/base/sql/ddl.py index f608c8ffd811..0f88a93a1df6 100644 --- a/ibis/backends/base/sql/ddl.py +++ b/ibis/backends/base/sql/ddl.py @@ -193,7 +193,7 @@ def _pieces(self): yield self._storage() yield self._location() yield "AS" - yield self.select.compile() + yield self.select def _partitioned_by(self): if self.partition is not None: @@ -212,7 +212,7 @@ def __init__(self, table_name, select, database=None, can_exist=False): @property def _pieces(self): yield "AS" - yield self.select.compile() + yield self.select @property def _prefix(self): @@ -352,7 +352,7 @@ def compile(self): else: partition = "" - select_query = self.select.compile() + select_query = self.select scoped_name = self._get_scoped_name(self.table_name, self.database) return f"{cmd} {scoped_name}{partition}\n{select_query}" diff --git a/ibis/backends/base/sql/registry/main.py b/ibis/backends/base/sql/registry/main.py index 91bf90f29707..96d74fa83013 100644 --- a/ibis/backends/base/sql/registry/main.py +++ b/ibis/backends/base/sql/registry/main.py @@ -355,10 +355,8 @@ def _floor(t, op): ops.IfElse: fixed_arity("if", 3), ops.Between: between, ops.InValues: binary_infix.in_values, - ops.InSubquery: binary_infix.in_column, ops.SimpleCase: case.simple_case, ops.SearchedCase: case.searched_case, - ops.Field: table_column, ops.DateAdd: timestamp.timestamp_op("date_add"), ops.DateSub: timestamp.timestamp_op("date_sub"), ops.DateDiff: timestamp.timestamp_op("datediff"), diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 6cf6b4e75639..1cc732ac89a0 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -822,6 +822,14 @@ def visit_Window(self, op, *, how, func, start, end, group_by, order_by): end_value = end.get("value", "UNBOUNDED") end_side = end.get("side", "FOLLOWING") + if getattr(start_value, "this", None) == "0": + start_value = "CURRENT ROW" + start_side = None + + if getattr(end_value, "this", None) == "0": + end_value = "CURRENT ROW" + end_side = None + spec = sge.WindowSpec( kind=how.upper(), start=start_value, @@ -1004,10 +1012,12 @@ def visit_JoinLink(self, op, *, how, table, predicates): "cross": "cross", "outer": "outer", } - assert predicates - return sge.Join( - this=table, side=sides[how], kind=kinds[how], on=sg.and_(*predicates) - ) + assert ( + predicates or how == "cross" + ), "expected non-empty predicates when not a cross join" + + on = sg.and_(*predicates) if predicates else None + return sge.Join(this=table, side=sides[how], kind=kinds[how], on=on) @staticmethod def _gen_valid_name(name: str) -> str: diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index e342ebcd080e..b1d4c336739e 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -608,7 +608,15 @@ class SQLiteType(SqlglotType): dialect = "sqlite" +class ImpalaType(SqlglotType): + dialect = "impala" + + default_decimal_precision = 9 + default_decimal_scale = 0 + + class PySparkType(SqlglotType): dialect = "spark" + default_decimal_precision = 38 default_decimal_scale = 18 diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index c6b02d23423c..d3084e8f1543 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -168,10 +168,8 @@ def rewrite_last_to_last_value(_, x, y): @replace(p.WindowFunction(frame=y @ p.WindowFrame(order_by=()))) -def rewrite_empty_order_by_window(_, y): - import ibis - - return _.copy(frame=y.copy(order_by=(ibis.NA,))) +def rewrite_empty_order_by_window(_, y, **__): + return _.copy(frame=y.copy(order_by=(ops.NULL,))) @replace(p.WindowFunction(p.RowNumber | p.NTile, y)) diff --git a/ibis/backends/datafusion/compiler.py b/ibis/backends/datafusion/compiler.py index 0bd4c005451b..f090819e2976 100644 --- a/ibis/backends/datafusion/compiler.py +++ b/ibis/backends/datafusion/compiler.py @@ -6,6 +6,7 @@ from itertools import starmap import sqlglot as sg +import sqlglot.expressions as sge from sqlglot import exp, transforms from sqlglot.dialects import Postgres from sqlglot.dialects.dialect import rename_func @@ -102,6 +103,8 @@ def visit_NonNullLiteral(self, op, *, value, dtype): return self.f.date_trunc("day", value.isoformat()) elif dtype.is_binary(): return sg.exp.HexString(this=value.hex()) + elif dtype.is_uuid(): + return sge.convert(str(value)) else: return None diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index a4cbb777dfcc..a2142846aa57 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -3,23 +3,24 @@ from __future__ import annotations import contextlib -import io import operator -import re +import os from functools import cached_property from typing import TYPE_CHECKING, Any, Literal +from urllib.parse import parse_qs, urlparse +import impala.dbapi as impyla import pandas as pd import sqlglot as sg +import sqlglot.expressions as sge +from impala.error import Error as ImpylaError import ibis.common.exceptions as com import ibis.config import ibis.expr.datatypes as dt -import ibis.expr.rules as rlz import ibis.expr.schema as sch import ibis.expr.types as ir from ibis import util -from ibis.backends.base.sql import BaseSQLBackend from ibis.backends.base.sql.ddl import ( CTAS, CreateDatabase, @@ -30,12 +31,10 @@ DropView, RenameTable, TruncateTable, - fully_qualified_re, - is_fully_qualified, ) +from ibis.backends.base.sqlglot import SQLGlotBackend from ibis.backends.impala import ddl, udf from ibis.backends.impala.client import ImpalaTable -from ibis.backends.impala.compat import ImpylaError, impyla from ibis.backends.impala.compiler import ImpalaCompiler from ibis.backends.impala.udf import ( aggregate_function, @@ -44,14 +43,14 @@ wrap_udf, ) from ibis.config import options -from ibis.formats.pandas import PandasData if TYPE_CHECKING: - from collections.abc import Mapping + from collections.abc import Iterator, Mapping from pathlib import Path import pyarrow as pa + import ibis.expr.operations as ops from ibis.backends.base.sql.compiler import DDL, DML @@ -64,48 +63,11 @@ ) -def _split_signature(x): - name, rest = x.split("(", 1) - return name, rest[:-1] - - -_arg_type = re.compile(r"(.*)\.\.\.|([^\.]*)") - - -class _type_parser: - NORMAL, IN_PAREN = 0, 1 - - def __init__(self, value): - self.value = value - self.state = self.NORMAL - self.buf = io.StringIO() - self.types = [] - for c in value: - self._step(c) - self._push() - - def _push(self): - val = self.buf.getvalue().strip() - if val: - self.types.append(val) - self.buf = io.StringIO() - - def _step(self, c): - if self.state == self.NORMAL: - if c == "(": - self.state = self.IN_PAREN - elif c == ",": - self._push() - return - elif self.state == self.IN_PAREN: - if c == ")": - self.state = self.NORMAL - self.buf.write(c) - - -class Backend(BaseSQLBackend): +class Backend(SQLGlotBackend): name = "impala" - compiler = ImpalaCompiler + compiler = ImpalaCompiler() + + supports_in_memory_tables = True _sqlglot_dialect = "hive" # not 100% accurate, but very close @@ -123,6 +85,50 @@ class Options(ibis.config.Config): temp_db: str = "__ibis_tmp" temp_path: str = "/tmp/__ibis" + def _from_url(self, url: str, **kwargs: Any) -> Backend: + """Connect to a backend using a URL `url`. + + Parameters + ---------- + url + URL with which to connect to a backend. + kwargs + Additional keyword arguments passed to the `connect` method. + + Returns + ------- + BaseBackend + A backend instance + """ + url = urlparse(url) + + for name in ("username", "hostname", "port", "password"): + if value := ( + getattr(url, name, None) + or os.environ.get(f"{self.name.upper()}_{name.upper()}") + ): + kwargs[name] = value + + with contextlib.suppress(KeyError): + kwargs["host"] = kwargs.pop("hostname") + + (database,) = url.path[1:].split("/", 1) + if database: + kwargs["database"] = database + + query_params = parse_qs(url.query) + + for name, value in query_params.items(): + if len(value) > 1: + kwargs[name] = value + elif len(value) == 1: + kwargs[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + + self._convert_kwargs(kwargs) + return self.connect(**kwargs) + def do_connect( self, host: str = "localhost", @@ -135,7 +141,6 @@ def do_connect( password: str | None = None, auth_mechanism: Literal["NOSASL", "PLAIN", "GSSAPI", "LDAP"] = "NOSASL", kerberos_service_name: str = "impala", - pool_size: int = 8, **params: Any, ): """Create an Impala `Backend` for use with Ibis. @@ -169,8 +174,6 @@ def do_connect( | `'GSSAPI'` | Kerberos-secured clusters | kerberos_service_name Specify a particular `impalad` service principal. - pool_size - Size of the connection pool. Typically this is not necessary to configure. params Any additional parameters necessary to open a connection to Impala. Please refer to impyla documentation for the full list of @@ -226,16 +229,10 @@ def list_tables(self, like=None, database=None): statement = "SHOW TABLES" if database is not None: statement += f" IN {database}" - if like: - if match := fully_qualified_re.match(like): - database, quoted, unquoted = match.groups() - like = quoted or unquoted - return self.list_tables(like=like, database=database) - statement += f" LIKE '{like}'" with self._safe_raw_sql(statement) as cursor: tables = fetchall(cursor) - return self._filter_with_like(tables.name.tolist()) + return self._filter_with_like(tables.name.tolist(), like=like) def raw_sql(self, query: str): cursor = self.con.cursor() @@ -259,16 +256,21 @@ def raw_sql(self, query: str): return cursor - def fetch_from_cursor(self, cursor, schema): + def _fetch_from_cursor(self, cursor, schema): + from ibis.formats.pandas import PandasData + results = fetchall(cursor) - if schema: - return PandasData.convert_table(results, schema) - return results + return PandasData.convert_table(results, schema) @contextlib.contextmanager def _safe_raw_sql(self, query: str | DDL | DML): if not isinstance(query, str): - query = query.compile() + try: + query = query.sql(dialect=self.compiler.dialect) + except AttributeError: + query = query.compile() + + assert isinstance(query, str), type(query) with contextlib.closing(self.raw_sql(query)) as cur: yield cur @@ -277,18 +279,15 @@ def _safe_exec_sql(self, *args, **kwargs): pass def _fully_qualified_name(self, name, database): - if is_fully_qualified(name): - return name - database = database or self.current_database - return sg.table(name, db=database, quoted=True).sql( - dialect=getattr(self, "_sqlglot_dialect", self.name) + return sg.table(name, db=database, quoted=self.compiler.quoted).sql( + self.compiler.dialect ) @property def current_database(self) -> str: with self._safe_raw_sql("SELECT CURRENT_DATABASE()") as cur: - (db,) = cur.fetchone() + [(db,)] = cur.fetchall() return db def create_database(self, name, path=None, force=False): @@ -353,13 +352,17 @@ def drop_database(self, name, force=False): statement = DropDatabase(name, must_exist=not force) self._safe_exec_sql(statement) - def get_schema(self, table_name: str, database: str | None = None) -> sch.Schema: + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: """Return a Schema object for the indicated table and database. Parameters ---------- table_name Table name + schema + Schema name. Unused in the impala backend. database Database name @@ -368,13 +371,44 @@ def get_schema(self, table_name: str, database: str | None = None) -> sch.Schema Schema Ibis schema """ - qualified_name = self._fully_qualified_name(table_name, database) - query = f"DESCRIBE {qualified_name}" + query = sge.Describe( + this=sg.table( + table_name, db=schema, catalog=database, quoted=self.compiler.quoted + ) + ) with self._safe_raw_sql(query) as cur: meta = fetchall(cur) - ibis_types = meta.type.str.lower().map(udf.parse_type) - return sch.Schema(dict(zip(meta.name, ibis_types))) + return sch.Schema.from_tuples( + zip(meta["name"], meta["type"].map(self.compiler.type_mapper.from_string)) + ) + + def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: + """Return a Schema object for the indicated table and database. + + Parameters + ---------- + query + Query to execute against Impala + + Returns + ------- + Iterator[tuple[str, dt.DataType]] + Iterator of column name and Ibis type pairs + """ + tmpview = util.gen_name("impala_tmpview") + query = f"CREATE VIEW IF NOT EXISTS {tmpview} AS {query}" + + with self._safe_raw_sql(query) as cur: + try: + cur.execute(f"DESCRIBE {tmpview}") + meta = fetchall(cur) + finally: + cur.execute(f"DROP VIEW IF EXISTS {tmpview}") + + return zip( + meta["name"], meta["type"].map(self.compiler.type_mapper.from_string) + ) @property def client_options(self): @@ -401,8 +435,7 @@ def create_view( database: str | None = None, overwrite: bool = False, ) -> ir.Table: - ast = self.compiler.to_ast(obj) - select = ast.queries[0] + select = self.compile(obj) statement = CreateView(name, select, database=database, can_exist=overwrite) self._safe_exec_sql(statement) return self.table(name, database=database) @@ -475,8 +508,9 @@ def create_table( if isinstance(obj, pd.DataFrame): raise NotImplementedError("Pandas DataFrames not yet supported") - ast = self.compiler.to_ast(obj) - select = ast.queries[0] + self._run_pre_execute_hooks(obj) + + select = self.compile(obj) if overwrite: self.drop_table(name, force=True) @@ -485,7 +519,7 @@ def create_table( CTAS( name, select, - database=database, + database=database or self.current_database, format=format, external=True if location is not None else external, partition=partition, @@ -499,14 +533,14 @@ def create_table( CreateTableWithSchema( name, schema if schema is not None else obj.schema(), - database=database, + database=database or self.current_database, format=format, external=external, path=location, partition=partition, ) ) - return self.table(name, database=database) + return self.table(name, database=database or self.current_database) def avro_file( self, directory, avro_schema, name=None, database=None, external=True @@ -600,13 +634,13 @@ def delimited_file( def parquet_file( self, - directory, - schema=None, - name=None, - database=None, - external=True, - like_file=None, - like_table=None, + directory: str | Path, + schema: sch.Schema | None = None, + name: str | None = None, + database: str | None = None, + external: bool = True, + like_file: str | Path | None = None, + like_table: str | None = None, ): """Create an Ibis table from the passed directory of Parquet files. @@ -633,8 +667,8 @@ def parquet_file( Database to create the (possibly temporary) table in external If a table is external, the referenced data will not be deleted - when the table is dropped in Impala. Otherwise (external=False) - Impala takes ownership of the Parquet file. + when the table is dropped in Impala. Otherwise Impala takes + ownership of the Parquet file. Returns ------- @@ -710,6 +744,8 @@ def insert( Completely overwrite contents >>> con.insert(table, table_expr, overwrite=True) # quartodoc: +SKIP # doctest: +SKIP """ + if isinstance(obj, ir.Table): + self._run_pre_execute_hooks(obj) table = self.table(table_name, database=database) return table.insert( obj=obj, @@ -800,12 +836,6 @@ def cache_table(self, table_name, *, database=None, pool="default"): statement = ddl.CacheTable(table_name, database=database, pool=pool) self._safe_exec_sql(statement) - def _get_schema_using_query(self, query): - with self._safe_raw_sql(f"SELECT * FROM ({query}) t0 LIMIT 0") as cur: - ibis_fields = self._adapt_types(cur.description) - - return sch.Schema(ibis_fields) - def create_function(self, func, name=None, database=None): """Create a function within Impala. @@ -931,7 +961,7 @@ def list_udfs(self, database=None, like=None): database = self.current_database statement = ddl.ListFunction(database, like=like, aggregate=False) with self._safe_raw_sql(statement) as cur: - return self._get_udfs(cur, udf.ImpalaUDF) + return self._get_udfs(cur) def list_udas(self, database=None, like=None): """Lists all UDAFs associated with a given database.""" @@ -939,40 +969,29 @@ def list_udas(self, database=None, like=None): database = self.current_database statement = ddl.ListFunction(database, like=like, aggregate=True) with self._safe_raw_sql(statement) as cur: - return self._get_udfs(cur, udf.ImpalaUDA) - - def _get_udfs(self, cur, klass): - def _to_type(x): - ibis_type = udf._impala_type_to_ibis(x.lower()) - return dt.dtype(ibis_type) + return self._get_udfs(cur) + def _get_udfs(self, cur): rows = fetchall(cur) - if not rows.empty: - result = [] - for _, row in rows.iterrows(): - out_type = row["return type"] - sig = row["signature"] - name, types = _split_signature(sig) - types = _type_parser(types).types - - inputs = [] - for arg in types: - argm = _arg_type.match(arg) - var, simple = argm.groups() - if simple: - t = _to_type(simple) - inputs.append(t) - else: - t = _to_type(var) - inputs = rlz.listof(t) - break - - output = udf._impala_type_to_ibis(out_type.lower()) - result.append(klass(inputs, output, name=name)) - return result - else: + + if rows.empty: return [] + current_database = self.current_database + type_mapper = self.compiler.type_mapper + result = [] + for return_type, signature, *_ in rows.itertuples(index=False): + anon = sg.parse_one(signature) + name = anon.this + inputs = [ + type_mapper.from_string(expr.this.this) for expr in anon.expressions + ] + + output = type_mapper.from_string(return_type) + + result.append((current_database, name, tuple(inputs), output)) + return result + def exists_udf(self, name: str, database: str | None = None) -> bool: """Checks if a given UDF exists within a specified database.""" return bool(self.list_udfs(database=database, like=name)) @@ -1106,27 +1125,12 @@ def column_stats(self, name, database=None): def _exec_statement(self, stmt): with self._safe_raw_sql(stmt) as cur: - return self.fetch_from_cursor(cur, schema=None) + return fetchall(cur) def _table_command(self, cmd, name, database=None): qualified_name = self._fully_qualified_name(name, database) return f"{cmd} {qualified_name}" - def _adapt_types(self, descr): - names = [] - adapted_types = [] - for col in descr: - names.append(col[0]) - impala_typename = col[1] - typename = udf._impala_to_ibis_type[impala_typename.lower()] - - if typename == "decimal": - precision, scale = col[4:6] - adapted_types.append(dt.Decimal(precision, scale)) - else: - adapted_types.append(typename) - return dict(zip(names, adapted_types)) - def to_pyarrow( self, expr: ir.Expr, @@ -1139,6 +1143,8 @@ def to_pyarrow( from ibis.formats.pyarrow import PyArrowData + self._run_pre_execute_hooks(expr) + table_expr = expr.as_table() output = pa.Table.from_pandas( self.execute(table_expr, params=params, limit=limit, **kwargs), @@ -1157,6 +1163,8 @@ def to_pyarrow_batches( **kwargs: Any, ) -> pa.ipc.RecordBatchReader: pa = self._import_pyarrow() + self._run_pre_execute_hooks(expr) + pa_table = self.to_pyarrow( expr.as_table(), params=params, limit=limit, **kwargs ) @@ -1177,16 +1185,7 @@ def explain( str Query plan """ - if isinstance(expr, ir.Expr): - context = self.compiler.make_context(params=params) - query_ast = self.compiler.to_ast(expr, context) - if len(query_ast.queries) > 1: - raise Exception("Multi-query expression") - - query = query_ast.queries[0].compile() - else: - query = expr - + query = self.compile(expr, params=params) statement = f"EXPLAIN {query}" with self._safe_raw_sql(statement) as cur: @@ -1194,6 +1193,46 @@ def explain( return "\n".join(["Query:", util.indent(query, 2), "", *results.iloc[:, 0]]) + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise com.IbisTypeError( + "Impala cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" + ) + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + type_mapper = self.compiler.type_mapper + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=type_mapper.from_ibis(typ), + # we don't support `NOT NULL` constraints in trino because + # because each trino connector differs in whether it + # supports nullability constraints, and whether the + # connector supports it isn't visible to ibis via a + # metadata query + ) + for colname, typ in schema.items() + ] + + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema( + this=sg.to_identifier(name, quoted=quoted), expressions=column_defs + ), + ).sql(self.name, pretty=True) + + data = op.data.to_frame().itertuples(index=False) + specs = ", ".join("?" * len(schema)) + table = sg.table(name, quoted=quoted).sql(self.name) + insert_stmt = f"INSERT INTO {table} VALUES ({specs})" + with self._safe_raw_sql(create_stmt) as cur: + for row in data: + cur.execute(insert_stmt, row) + def fetchall(cur): batches = cur.fetchcolumnar() diff --git a/ibis/backends/impala/client.py b/ibis/backends/impala/client.py index b7c49b5fca57..cac294bd5cba 100644 --- a/ibis/backends/impala/client.py +++ b/ibis/backends/impala/client.py @@ -28,34 +28,36 @@ def _client(self): @property def _database(self) -> str: - return self.op().namespace + return self.op().namespace.database def compute_stats(self, incremental=False): """Invoke Impala COMPUTE STATS command on the table.""" - return self._client.compute_stats(self._qualified_name, incremental=incremental) + return self._client.compute_stats( + self.op().name, database=self._database, incremental=incremental + ) def invalidate_metadata(self): - self._client.invalidate_metadata(self._qualified_name) + self._client.invalidate_metadata(self.op().name, database=self._database) def refresh(self): - self._client.refresh(self._qualified_name) + self._client.refresh(self.op().name, database=self._database) def metadata(self): """Return results of `DESCRIBE FORMATTED` statement.""" - return self._client.describe_formatted(self._qualified_name) + return self._client.describe_formatted(self.op().name, database=self._database) describe_formatted = metadata def files(self): """Return results of SHOW FILES statement.""" - return self._client.show_files(self._qualified_name) + return self._client.show_files(self.op().name, database=self._database) def drop(self): """Drop the table from the database.""" - self._client.drop_table_or_view(self._qualified_name) + self._client.drop_table_or_view(self.op().name, database=self._database) def truncate(self): - self._client.truncate_table(self._qualified_name) + self._client.truncate_table(self.op().name, database=self._database) def insert( self, @@ -121,11 +123,9 @@ def insert( else: partition_schema = None - ast = self._client.compiler.to_ast(expr) - select = ast.queries[0] statement = InsertSelect( self._qualified_name, - select, + self._client.compile(expr), partition=partition, partition_schema=partition_schema, overwrite=overwrite, diff --git a/ibis/backends/impala/compat.py b/ibis/backends/impala/compat.py deleted file mode 100644 index 49bcc0991f3b..000000000000 --- a/ibis/backends/impala/compat.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import annotations - -# Copyright 2015 Cloudera Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import impala.dbapi as impyla -from impala.error import Error as ImpylaError -from impala.error import HiveServer2Error as HS2Error - -__all__ = ( - "impyla", - "ImpylaError", - "HS2Error", -) diff --git a/ibis/backends/impala/compiler.py b/ibis/backends/impala/compiler.py index 54fb8f7f3437..2f3386ece696 100644 --- a/ibis/backends/impala/compiler.py +++ b/ibis/backends/impala/compiler.py @@ -1,66 +1,433 @@ from __future__ import annotations +import contextlib +from functools import singledispatchmethod + +import sqlglot as sg +import sqlglot.expressions as sge +from sqlglot.dialects import Hive +from sqlglot.dialects.dialect import rename_func + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sql.compiler import Compiler, ExprTranslator, TableSetFormatter -from ibis.backends.base.sql.registry import binary_infix_ops, operation_registry, unary +from ibis import util +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import ImpalaType +from ibis.backends.base.sqlglot.rewrites import ( + rewrite_empty_order_by_window, + rewrite_first_to_first_value, + rewrite_last_to_last_value, +) from ibis.expr.rewrites import rewrite_sample -class ImpalaTableSetFormatter(TableSetFormatter): - def _get_join_type(self, op): - jname = self._join_names[type(op)] +def _interval(self, e): + """Work around Impala's inability to handle string literals in INTERVAL syntax.""" + arg = e.args["this"].this + with contextlib.suppress(AttributeError): + arg = arg.sql(self.dialect) + res = f"INTERVAL {arg} {e.args['unit']}" + return res - # Impala requires this - if not op.predicates: - jname = self._join_names[ops.CrossJoin] - return jname +class Impala(Hive): + class Generator(Hive.Generator): + TRANSFORMS = Hive.Generator.TRANSFORMS.copy() | { + sge.ApproxDistinct: rename_func("ndv"), + sge.IsNan: rename_func("is_nan"), + sge.IsInf: rename_func("is_inf"), + sge.DayOfWeek: rename_func("dayofweek"), + sge.Interval: _interval, + } - def _format_in_memory_table(self, op): - if op.data: - return super()._format_in_memory_table(op) - schema = op.schema - names = schema.names - types = schema.types - rows = [ - f"{self._translate(ops.Cast(ops.Literal(None, dtype=dtype), to=dtype))} AS {name}" - for name, dtype in zip(map(self._quote_identifier, names), types) - ] - return f"(SELECT * FROM (SELECT {', '.join(rows)}) AS _ LIMIT 0)" +class ImpalaCompiler(SQLGlotCompiler): + __slots__ = () - -class ImpalaExprTranslator(ExprTranslator): - _registry = {**operation_registry, **binary_infix_ops, ops.Hash: unary("fnv_hash")} - _forbids_frame_clause = ( - *ExprTranslator._forbids_frame_clause, - ops.Lag, - ops.Lead, - ops.FirstValue, - ops.LastValue, - ) - _unsupported_reductions = ( - ops.ApproxMedian, - ops.ApproxCountDistinct, - ops.GroupConcat, + dialect = "impala" + type_mapper = ImpalaType + rewrites = ( + rewrite_sample, + rewrite_first_to_first_value, + rewrite_last_to_last_value, + rewrite_empty_order_by_window, + *SQLGlotCompiler.rewrites, ) - _dialect_name = "hive" - _quote_identifiers = True + quoted = True + + def _aggregate(self, funcname: str, *args, where): + if where is not None: + args = tuple(self.if_(where, arg, NULL) for arg in args) + + return self.f[funcname](*args, dialect=self.dialect) + + @staticmethod + def _minimize_spec(start, end, spec): + # start is None means unbounded preceding + if start is None: + # end is None: unbounded following + # end == 0 => current row + # these are treated the same because for the functions where these + # are not allowed they end up behaving the same + # + # I think we're not covering some cases here: + # These will be treated the same, even though they're not + # - window(order_by=x, rows=(None, None)) # should be equivalent to `over ()` + # - window(order_by=x, rows=(None, 0)) # equivalent to a cumulative aggregation + # + # TODO(cpcloud): we need to clean up the semantics of unbounded + # following vs current row at the API level. + # + if end is None or ( + isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype): + if value is None and dtype.is_binary(): + return NULL + return super().visit_Literal(op, value=value, dtype=dtype) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + if where is not None: + return self.f.sum(self.cast(where, op.dtype)) + return self.f.count(STAR) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, arg, where): + expressions = ( + sg.column(name, table=arg.alias_or_name, quoted=self.quoted) + for name in op.arg.schema.keys() + ) + if where is not None: + expressions = (self.if_(where, expr, NULL) for expr in expressions) + return self.f.count(sge.Distinct(expressions=list(expressions))) + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.count(sge.Distinct(expressions=[arg])) + + @visit_node.register(ops.Xor) + def visit_Xor(self, op, *, left, right): + return sg.and_(sg.or_(left, right), sg.not_(sg.and_(left, right))) + + @visit_node.register(ops.RandomScalar) + def visit_RandomScalar(self, op): + return self.f.rand(self.f.utc_to_unix_micros(self.f.utc_timestamp())) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return self.f.pmod(self.f.dayofweek(arg) - 2, 7) + + @visit_node.register(ops.ExtractMillisecond) + def viist_ExtractMillisecond(self, op, *, arg): + return self.f.extract(self.v.millisecond, arg) % 1_000 + + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + return self.f.extract(self.v.microsecond, arg) % 1_000_000 + + @visit_node.register(ops.Degrees) + def visit_Degrees(self, op, *, arg): + return 180.0 * arg / self.f.pi() + + @visit_node.register(ops.Radians) + def visit_Radians(self, op, *, arg): + return self.f.pi() * arg / 180.0 + + @visit_node.register(ops.HashBytes) + def visit_HashBytes(self, op, *, arg, how): + if how not in ("md5", "sha1", "sha256", "sha512"): + raise com.UnsupportedOperationError(how) + return self.f[how](arg) + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + if base is None: + return self.f.ln(arg) + return self.f.log(base, arg, dialect=self.dialect) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return self.cast( + self.f.concat( + self.f.lpad(self.cast(year, dt.string), 4, "0"), + "-", + self.f.lpad(self.cast(month, dt.string), 2, "0"), + "-", + self.f.lpad(self.cast(day, dt.string), 2, "0"), + ), + dt.date, + ) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_date() or dtype.is_timestamp(): + # hack to return a string literal because impala doesn't support a + # wide range of properly-typed date values + # + # the date implementation is very unpolished: some proper dates are + # supported, but only within a certain range, and the + # implementation wraps on over- and underflow + return sge.convert(value.isoformat()) + elif dtype.is_string(): + value = ( + value + # Escape \ first so we don't double escape other characters. + .replace("\\", "\\\\") + # ASCII escape sequences that are recognized in Python: + # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals + .replace("\a", "\\a") # Bell + .replace("\b", "\\b") # Backspace + .replace("\f", "\\f") # Formfeed + .replace("\n", "\\n") # Newline / Linefeed + .replace("\r", "\\r") # Carriage return + .replace("\t", "\\t") # Tab + .replace("\v", "\\v") # Vertical tab + ) + return sge.convert(value) + elif dtype.is_decimal() and not value.is_finite(): + raise com.UnsupportedOperationError( + f"Non-finite decimal literal values are not supported by Impala; got: {value}" + ) + elif dtype.is_array() or dtype.is_map() or dtype.is_struct(): + raise com.UnsupportedBackendType( + f"Impala does not support {dtype.name.lower()} literals" + ) + elif dtype.is_uuid(): + return sge.convert(str(value)) + return None + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + from_ = op.arg.dtype + if from_.is_integer() and to.is_interval(): + return sge.Interval(this=sge.convert(arg), unit=to.unit.singular.upper()) + elif from_.is_temporal() and to.is_integer(): + return 1_000_000 * self.f.unix_timestamp(arg) + return super().visit_Cast(op, arg=arg, to=to) + + @visit_node.register(ops.StartsWith) + def visit_StartsWith(self, op, *, arg, start): + return arg.like(self.f.concat(start, "%")) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return arg.like(self.f.concat("%", end)) + + @visit_node.register(ops.FindInSet) + def visit_FindInSet(self, op, *, needle, values): + return self.f.find_in_set(needle, self.f.concat_ws(",", *values)) + + @visit_node.register(ops.ExtractProtocol) + @visit_node.register(ops.ExtractAuthority) + @visit_node.register(ops.ExtractUserInfo) + @visit_node.register(ops.ExtractHost) + @visit_node.register(ops.ExtractFile) + @visit_node.register(ops.ExtractPath) + def visit_ExtractUrlField(self, op, *, arg): + return self.f.parse_url(arg, type(op).__name__[len("Extract") :].upper()) + + @visit_node.register(ops.ExtractQuery) + def visit_ExtractQuery(self, op, *, arg, key): + return self.f.parse_url(*filter(None, (arg, "QUERY", key))) + + @visit_node.register(ops.ExtractFragment) + def visit_ExtractFragment(self, op, *, arg): + return self.f.parse_url(arg, "REF") + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if start is not None: + return self.f.locate(substr, arg, start + 1) + return self.f.locate(substr, arg) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.locate(needle, haystack) > 0 + + @visit_node.register(ops.TimestampDiff) + def visit_TimestampDiff(self, op, *, left, right): + return self.f.unix_timestamp(left) - self.f.unix_timestamp(right) + + @visit_node.register(ops.Strftime) + def visit_Strftime(self, op, *, arg, format_str): + if not isinstance(op.format_str, ops.Literal): + raise com.UnsupportedOperationError( + "strftime format string must be a literal; " + f"got: {type(op.format_str).__name__}" + ) + format_str = sg.time.format_time( + op.format_str.value, {v: k for k, v in Impala.TIME_MAPPING.items()} + ) + return self.f.from_unixtime( + self.f.unix_timestamp(self.cast(arg, dt.string)), format_str + ) + + @visit_node.register(ops.ExtractWeekOfYear) + def visit_ExtractWeekOfYear(self, op, *, arg): + return self.f.anon.weekofyear(arg) + + @visit_node.register(ops.TimestampTruncate) + def visit_TimestampTruncate(self, op, *, arg, unit): + units = { + "Y": "YEAR", + "M": "MONTH", + "W": "WEEK", + "D": "DAY", + "h": "HOUR", + "m": "MINUTE", + "s": "SECOND", + "ms": "MILLISECONDS", + "us": "MICROSECONDS", + } + if unit.short == "Q": + return self.f.trunc(arg, "Q") + if (impala_unit := units.get(unit.short)) is None: + raise com.UnsupportedOperationError( + f"{unit!r} unit is not supported in timestamp/date truncate" + ) + return self.f.date_trunc(impala_unit, arg) + + @visit_node.register(ops.DateTruncate) + def visit_DateTruncate(self, op, *, arg, unit): + if unit.short == "Q": + return self.f.trunc(arg, "Q") + return self.f.date_trunc(unit.name.upper(), arg) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + arg = self.cast(util.convert_unit(arg, unit.short, "s"), dt.int32) + return self.cast(self.f.from_unixtime(arg, "yyyy-MM-dd HH:mm:ss"), dt.timestamp) + + @visit_node.register(ops.DateAdd) + def visit_DateAdd(self, op, *, left, right): + return self.cast( + super().visit_DateAdd(op, left=self.cast(left, dt.date), right=right), + dt.date, + ) + + @visit_node.register(ops.TimestampAdd) + def visit_TimestampAdd(self, op, *, left, right): + if not isinstance(right, sge.Interval): + raise com.UnsupportedOperationError( + "right operand to timestamp add operation must be a literal" + ) + + return self.cast( + super().visit_TimestampAdd( + op, left=self.cast(left, dt.timestamp), right=right + ), + dt.timestamp, + ) + + @visit_node.register(ops.DateDiff) + def visit_DateDiff(self, op, *, left, right): + return self.f.anon.datediff(left, right) + + @visit_node.register(ops.Date) + def visit_Date(self, op, *, arg): + return self.cast(self.f.to_date(arg), dt.date) + + @visit_node.register(ops.RegexReplace) + def visit_RegexReplace(self, op, *, arg, pattern, replacement): + return self.f.regexp_replace(arg, pattern, replacement, dialect=self.dialect) + + @visit_node.register(ops.Round) + def visit_Round(self, op, *, arg, digits): + rounded = self.f.round(*filter(None, (arg, digits))) + + dtype = op.dtype + if dtype.is_integer(): + return self.cast(rounded, dtype) + return rounded + + @visit_node.register(ops.Sign) + def visit_Sign(self, op, *, arg): + sign = self.f.sign(arg) + dtype = op.dtype + if not dtype.is_float32(): + return self.cast(sign, dtype) + return sign + + @visit_node.register(ops.Arbitrary) + @visit_node.register(ops.ArgMax) + @visit_node.register(ops.ArgMin) + @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.ArrayColumn) + @visit_node.register(ops.Covariance) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.ExtractDayOfYear) + @visit_node.register(ops.First) + @visit_node.register(ops.Last) + @visit_node.register(ops.Levenshtein) + @visit_node.register(ops.Map) + @visit_node.register(ops.Median) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.NthValue) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.RowID) + @visit_node.register(ops.StringSplit) + @visit_node.register(ops.StructColumn) + @visit_node.register(ops.Time) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimestampDelta) + @visit_node.register(ops.Unnest) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + +_SIMPLE_OPS = { + ops.All: "min", + ops.Any: "max", + ops.ApproxMedian: "appx_median", + ops.BaseConvert: "conv", + ops.BitwiseAnd: "bitand", + ops.BitwiseLeftShift: "shiftleft", + ops.BitwiseNot: "bitnot", + ops.BitwiseOr: "bitor", + ops.BitwiseRightShift: "shiftright", + ops.BitwiseXor: "bitxor", + ops.Cot: "cot", + ops.DayOfWeekName: "dayname", + ops.ExtractEpochSeconds: "unix_timestamp", + ops.Hash: "fnv_hash", + ops.LStrip: "ltrim", + ops.Ln: "ln", + ops.Log10: "log10", + ops.Log2: "log2", + ops.RStrip: "rtrim", + ops.Strip: "trim", + ops.TypeOf: "typeof", +} -rewrites = ImpalaExprTranslator.rewrites +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + @ImpalaCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) -@rewrites(ops.FloorDivide) -def _floor_divide(op): - return ops.Floor(ops.Divide(op.left, op.right)) + else: + @ImpalaCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) -class ImpalaCompiler(Compiler): - translator_class = ImpalaExprTranslator - table_set_formatter_class = ImpalaTableSetFormatter - rewrites = Compiler.rewrites | rewrite_sample + setattr(ImpalaCompiler, f"visit_{_op.__name__}", _fmt) - # impala supports this but sqlglot fails to parse the aliasing in - # (VALUES (a AS b)) - support_values_syntax_in_select = False +del _op, _name, _fmt diff --git a/ibis/backends/impala/tests/conftest.py b/ibis/backends/impala/tests/conftest.py index a41302f7781c..8cb150ea1988 100644 --- a/ibis/backends/impala/tests/conftest.py +++ b/ibis/backends/impala/tests/conftest.py @@ -13,7 +13,6 @@ import ibis.expr.types as ir from ibis import options, util from ibis.backends.conftest import TEST_TABLES -from ibis.backends.impala.compiler import ImpalaCompiler, ImpalaExprTranslator from ibis.backends.tests.base import BackendTest from ibis.tests.expr.mocks import MockBackend @@ -74,27 +73,29 @@ def preload(self): def _load_data(self, **_: Any) -> None: """Load test data into a backend.""" con = self.connection - con.raw_sql("CREATE DATABASE IF NOT EXISTS ibis_testing") - con.raw_sql("USE ibis_testing") + database = "ibis_testing" + + con.create_database(database, force=True) + con.raw_sql(f"USE {database}") (parquet,) = self.test_files # container path to data prefix = "/user/hive/warehouse/impala/parquet" for dir in parquet.joinpath("parquet").glob("*"): - con.raw_sql(f"DROP TABLE IF EXISTS ibis_testing.{dir.name}") + con.drop_table(dir.name, database=database, force=True) location = f"{prefix}/{dir.name}" first_file = next( itertools.chain(dir.rglob("*.parq"), dir.rglob("*.parquet")) ) - create_query = f""" - CREATE EXTERNAL TABLE IF NOT EXISTS ibis_testing.{dir.name} - LIKE PARQUET '{location}/{first_file.name}' - LOCATION '{location}' - """ - con.raw_sql(create_query) + con.parquet_file( + location, + name=dir.name, + database=database, + like_file=f"{location}/{first_file.name}", + ) con.drop_table("win", database="ibis_testing", force=True) con.create_table( @@ -123,12 +124,9 @@ def connect(*, tmpdir, worker_id, **kw): env = IbisTestEnv() return ibis.impala.connect(host=env.impala_host, port=env.impala_port, **kw) - def _get_original_column_names(self, tablename: str) -> list[str]: - return list(TEST_TABLES[tablename].names) - def _get_renamed_table(self, tablename: str) -> ir.Table: t = self.connection.table(tablename) - original_names = self._get_original_column_names(tablename) + original_names = TEST_TABLES[tablename].names return t.rename(dict(zip(original_names, t.columns))) @property @@ -155,8 +153,9 @@ def impala_port(self): @property def tmp_dir(self): + leaf = util.gen_name("impala_test_tmp_dir") options.impala.temp_path = tmp_dir = os.environ.get( - "IBIS_TEST_TMP_DIR", f"/tmp/__ibis_test_{util.guid()}" + "IBIS_TEST_TMP_DIR", f"/tmp/{leaf}" ) return tmp_dir @@ -212,7 +211,7 @@ def alltypes_df(alltypes): @pytest.fixture def temp_parquet_table_schema(): - return ibis.schema([("id", "int32"), ("name", "string"), ("files", "int32")]) + return ibis.schema(dict(id="int32", name="string", files="int32")) @pytest.fixture @@ -242,7 +241,7 @@ def kudu_table(con, test_data_db): f"""\ CREATE TABLE {test_data_db}.{name} ( a STRING, - PRIMARY KEY(a) + PRIMARY KEY (a) ) PARTITION BY HASH PARTITIONS 2 STORED AS KUDU @@ -257,8 +256,5 @@ def kudu_table(con, test_data_db): con.drop_table(name, database=test_data_db) -def translate(expr, context=None, named=False): - if context is None: - context = ImpalaCompiler.make_context() - translator = ImpalaExprTranslator(expr.op(), context=context, named=named) - return translator.get_result() +def translate(expr): + return ibis.to_sql(expr, dialect="impala") diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql index f0e93516a778..c6b1bb3235f9 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql @@ -1 +1,3 @@ -first_value(`double_col`) OVER (ORDER BY `id` ASC) \ No newline at end of file +SELECT + FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC NULLS LAST) AS `First(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_arg/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_arg/out.sql index 7e778d96083a..864b3a2efb6c 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_arg/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_arg/out.sql @@ -1 +1,3 @@ -lag(`string_col`, 2) \ No newline at end of file +SELECT + LAG(`t0`.`string_col`, 2) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lag(string_col, 2)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_default/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_default/out.sql index cb1f9bf529a7..9326b3b54e4c 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_default/out.sql @@ -1 +1,3 @@ -lag(`string_col`) \ No newline at end of file +SELECT + LAG(`t0`.`string_col`) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lag(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_explicit_default/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_explicit_default/out.sql index 7a389e6a3e54..1a917ebf03ae 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_explicit_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_explicit_default/out.sql @@ -1 +1,3 @@ -lag(`string_col`, 1, 0) \ No newline at end of file +SELECT + LAG(`t0`.`string_col`, 1, 0) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lag(string_col, 0)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql index 58c72df6a70b..4409732d6cf7 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql @@ -1 +1,3 @@ -last_value(`double_col`) OVER (ORDER BY `id` ASC) \ No newline at end of file +SELECT + LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC NULLS LAST) AS `Last(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_arg/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_arg/out.sql index 7427b71c5cac..0ff670b2a562 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_arg/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_arg/out.sql @@ -1 +1,3 @@ -lead(`string_col`, 2) \ No newline at end of file +SELECT + LEAD(`t0`.`string_col`, 2) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lead(string_col, 2)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_default/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_default/out.sql index 72daf9f7d9d1..f2f42db96f81 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_default/out.sql @@ -1 +1,3 @@ -lead(`string_col`) \ No newline at end of file +SELECT + LEAD(`t0`.`string_col`) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lead(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_explicit_default/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_explicit_default/out.sql index 41cd162464d6..4051eb507852 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_explicit_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_explicit_default/out.sql @@ -1 +1,3 @@ -lead(`string_col`, 1, 0) \ No newline at end of file +SELECT + LEAD(`t0`.`string_col`, 1, 0) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lead(string_col, 0)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql index e0b6514cbabc..b05add74ff76 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql @@ -1 +1,3 @@ -(ntile(3) OVER (ORDER BY `double_col` ASC) - 1) \ No newline at end of file +SELECT + NTILE(3) OVER (ORDER BY `t0`.`double_col` ASC NULLS LAST) - 1 AS `NTile(3)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql index 5907b2c45b84..adfc11bd37a6 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql @@ -1 +1,3 @@ -percent_rank() OVER (ORDER BY `double_col` ASC) \ No newline at end of file +SELECT + PERCENT_RANK() OVER (ORDER BY `t0`.`double_col` ASC NULLS LAST) AS `PercentRank()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_assign_labels/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_assign_labels/out.sql index 8f4cd9c30f66..32330b60fdc9 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_assign_labels/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_assign_labels/out.sql @@ -1,20 +1,43 @@ SELECT - CASE t0.`tier` - WHEN 0 THEN 'Under 0' - WHEN 1 THEN '0 to 10' - WHEN 2 THEN '10 to 25' - WHEN 3 THEN '25 to 50' + CASE `t1`.`tier` + WHEN 0 + THEN 'Under 0' + WHEN 1 + THEN '0 to 10' + WHEN 2 + THEN '10 to 25' + WHEN 3 + THEN '25 to 50' ELSE 'error' - END AS `tier2`, t0.`CountStar(alltypes)` + END AS `tier2`, + `t1`.`CountStar()` FROM ( SELECT CASE - WHEN t1.`f` < 0 THEN 0 - WHEN (0 <= t1.`f`) AND (t1.`f` < 10) THEN 1 - WHEN (10 <= t1.`f`) AND (t1.`f` < 25) THEN 2 - WHEN (25 <= t1.`f`) AND (t1.`f` <= 50) THEN 3 - ELSE CAST(NULL AS tinyint) - END AS `tier`, count(1) AS `CountStar(alltypes)` - FROM `alltypes` t1 - GROUP BY 1 -) t0 \ No newline at end of file + WHEN `t0`.`f` < 0 + THEN 0 + WHEN ( + 0 <= `t0`.`f` + ) AND ( + `t0`.`f` < 10 + ) + THEN 1 + WHEN ( + 10 <= `t0`.`f` + ) AND ( + `t0`.`f` < 25 + ) + THEN 2 + WHEN ( + 25 <= `t0`.`f` + ) AND ( + `t0`.`f` <= 50 + ) + THEN 3 + ELSE CAST(NULL AS TINYINT) + END AS `tier`, + COUNT(*) AS `CountStar()` + FROM `alltypes` AS `t0` + GROUP BY + 1 +) AS `t1` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false/out.sql index 7c9a2f700ff9..461a8aa9f242 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false/out.sql @@ -1,6 +1,23 @@ -CASE - WHEN (0 <= `f`) AND (`f` < 10) THEN 0 - WHEN (10 <= `f`) AND (`f` < 25) THEN 1 - WHEN (25 <= `f`) AND (`f` < 50) THEN 2 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN ( + 0 <= `t0`.`f` + ) AND ( + `t0`.`f` < 10 + ) + THEN 0 + WHEN ( + 10 <= `t0`.`f` + ) AND ( + `t0`.`f` < 25 + ) + THEN 1 + WHEN ( + 25 <= `t0`.`f` + ) AND ( + `t0`.`f` < 50 + ) + THEN 2 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false_closed_right/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false_closed_right/out.sql index 67290ddc413b..06537d8a9463 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false_closed_right/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false_closed_right/out.sql @@ -1,6 +1,23 @@ -CASE - WHEN (0 < `f`) AND (`f` <= 10) THEN 0 - WHEN (10 < `f`) AND (`f` <= 25) THEN 1 - WHEN (25 < `f`) AND (`f` <= 50) THEN 2 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN ( + 0 < `t0`.`f` + ) AND ( + `t0`.`f` <= 10 + ) + THEN 0 + WHEN ( + 10 < `t0`.`f` + ) AND ( + `t0`.`f` <= 25 + ) + THEN 1 + WHEN ( + 25 < `t0`.`f` + ) AND ( + `t0`.`f` <= 50 + ) + THEN 2 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false_include_under_include_over/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false_include_under_include_over/out.sql index b95638ae45d6..b4aa949c85a5 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false_include_under_include_over/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/close_extreme_false_include_under_include_over/out.sql @@ -1,8 +1,27 @@ -CASE - WHEN `f` < 0 THEN 0 - WHEN (0 <= `f`) AND (`f` < 10) THEN 1 - WHEN (10 <= `f`) AND (`f` < 25) THEN 2 - WHEN (25 <= `f`) AND (`f` < 50) THEN 3 - WHEN 50 <= `f` THEN 4 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN `t0`.`f` < 0 + THEN 0 + WHEN ( + 0 <= `t0`.`f` + ) AND ( + `t0`.`f` < 10 + ) + THEN 1 + WHEN ( + 10 <= `t0`.`f` + ) AND ( + `t0`.`f` < 25 + ) + THEN 2 + WHEN ( + 25 <= `t0`.`f` + ) AND ( + `t0`.`f` < 50 + ) + THEN 3 + WHEN 50 <= `t0`.`f` + THEN 4 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right/out.sql index 78811483e16c..cf3f70f2d673 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right/out.sql @@ -1,6 +1,23 @@ -CASE - WHEN (0 <= `f`) AND (`f` <= 10) THEN 0 - WHEN (10 < `f`) AND (`f` <= 25) THEN 1 - WHEN (25 < `f`) AND (`f` <= 50) THEN 2 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN ( + 0 <= `t0`.`f` + ) AND ( + `t0`.`f` <= 10 + ) + THEN 0 + WHEN ( + 10 < `t0`.`f` + ) AND ( + `t0`.`f` <= 25 + ) + THEN 1 + WHEN ( + 25 < `t0`.`f` + ) AND ( + `t0`.`f` <= 50 + ) + THEN 2 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right_close_extreme_false_include_under/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right_close_extreme_false_include_under/out.sql index 1d254824f5c4..d59ea7900f17 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right_close_extreme_false_include_under/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right_close_extreme_false_include_under/out.sql @@ -1,7 +1,25 @@ -CASE - WHEN `f` <= 0 THEN 0 - WHEN (0 < `f`) AND (`f` <= 10) THEN 1 - WHEN (10 < `f`) AND (`f` <= 25) THEN 2 - WHEN (25 < `f`) AND (`f` <= 50) THEN 3 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN `t0`.`f` <= 0 + THEN 0 + WHEN ( + 0 < `t0`.`f` + ) AND ( + `t0`.`f` <= 10 + ) + THEN 1 + WHEN ( + 10 < `t0`.`f` + ) AND ( + `t0`.`f` <= 25 + ) + THEN 2 + WHEN ( + 25 < `t0`.`f` + ) AND ( + `t0`.`f` <= 50 + ) + THEN 3 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right_include_over_include_under/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right_include_over_include_under/out.sql index 1649d97034ea..b0f5158c750d 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right_include_over_include_under/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/closed_right_include_over_include_under/out.sql @@ -1,5 +1,9 @@ -CASE - WHEN `f` <= 10 THEN 0 - WHEN 10 < `f` THEN 1 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN `t0`.`f` <= 10 + THEN 0 + WHEN 10 < `t0`.`f` + THEN 1 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/default/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/default/out.sql index ca91f0ba81e1..c9ab2234f883 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/default/out.sql @@ -1,6 +1,23 @@ -CASE - WHEN (0 <= `f`) AND (`f` < 10) THEN 0 - WHEN (10 <= `f`) AND (`f` < 25) THEN 1 - WHEN (25 <= `f`) AND (`f` <= 50) THEN 2 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN ( + 0 <= `t0`.`f` + ) AND ( + `t0`.`f` < 10 + ) + THEN 0 + WHEN ( + 10 <= `t0`.`f` + ) AND ( + `t0`.`f` < 25 + ) + THEN 1 + WHEN ( + 25 <= `t0`.`f` + ) AND ( + `t0`.`f` <= 50 + ) + THEN 2 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under0/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under0/out.sql index 1901aa775e98..e6e63f334c08 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under0/out.sql @@ -1,5 +1,9 @@ -CASE - WHEN `f` < 10 THEN 0 - WHEN 10 <= `f` THEN 1 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN `t0`.`f` < 10 + THEN 0 + WHEN 10 <= `t0`.`f` + THEN 1 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under1/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under1/out.sql index 0dae0d0438ab..104dcc83014a 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under1/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under1/out.sql @@ -1,5 +1,9 @@ -CAST(CASE - WHEN `f` < 10 THEN 0 - WHEN 10 <= `f` THEN 1 - ELSE CAST(NULL AS tinyint) -END AS int) \ No newline at end of file +SELECT + CAST(CASE + WHEN `t0`.`f` < 10 + THEN 0 + WHEN 10 <= `t0`.`f` + THEN 1 + ELSE CAST(NULL AS TINYINT) + END AS INT) AS `Cast(Bucket(f), int32)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under2/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under2/out.sql index 5b95c1b7cdca..f4661b51024a 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_over_include_under2/out.sql @@ -1,5 +1,9 @@ -CAST(CASE - WHEN `f` < 10 THEN 0 - WHEN 10 <= `f` THEN 1 - ELSE CAST(NULL AS tinyint) -END AS double) \ No newline at end of file +SELECT + CAST(CASE + WHEN `t0`.`f` < 10 + THEN 0 + WHEN 10 <= `t0`.`f` + THEN 1 + ELSE CAST(NULL AS TINYINT) + END AS DOUBLE) AS `Cast(Bucket(f), float64)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_under/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_under/out.sql index 30b0a9823016..39a83640fe1b 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_under/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_under/out.sql @@ -1,7 +1,25 @@ -CASE - WHEN `f` < 0 THEN 0 - WHEN (0 <= `f`) AND (`f` < 10) THEN 1 - WHEN (10 <= `f`) AND (`f` < 25) THEN 2 - WHEN (25 <= `f`) AND (`f` <= 50) THEN 3 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN `t0`.`f` < 0 + THEN 0 + WHEN ( + 0 <= `t0`.`f` + ) AND ( + `t0`.`f` < 10 + ) + THEN 1 + WHEN ( + 10 <= `t0`.`f` + ) AND ( + `t0`.`f` < 25 + ) + THEN 2 + WHEN ( + 25 <= `t0`.`f` + ) AND ( + `t0`.`f` <= 50 + ) + THEN 3 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_under_include_over/out.sql b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_under_include_over/out.sql index 79a328c92a52..0dda250e0ef1 100644 --- a/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_under_include_over/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_bucket_histogram/test_bucket_to_case/include_under_include_over/out.sql @@ -1,8 +1,27 @@ -CASE - WHEN `f` < 0 THEN 0 - WHEN (0 <= `f`) AND (`f` < 10) THEN 1 - WHEN (10 <= `f`) AND (`f` < 25) THEN 2 - WHEN (25 <= `f`) AND (`f` <= 50) THEN 3 - WHEN 50 < `f` THEN 4 - ELSE CAST(NULL AS tinyint) -END \ No newline at end of file +SELECT + CASE + WHEN `t0`.`f` < 0 + THEN 0 + WHEN ( + 0 <= `t0`.`f` + ) AND ( + `t0`.`f` < 10 + ) + THEN 1 + WHEN ( + 10 <= `t0`.`f` + ) AND ( + `t0`.`f` < 25 + ) + THEN 2 + WHEN ( + 25 <= `t0`.`f` + ) AND ( + `t0`.`f` <= 50 + ) + THEN 3 + WHEN 50 < `t0`.`f` + THEN 4 + ELSE CAST(NULL AS TINYINT) + END AS `Bucket(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_extendedprice/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_extendedprice/out.sql index 18d4d73cf05a..4890dcc7d096 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_extendedprice/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_extendedprice/out.sql @@ -1 +1,3 @@ -coalesce(`l_extendedprice`, 0) \ No newline at end of file +SELECT + COALESCE(`t0`.`l_extendedprice`, 0) AS `Coalesce()` +FROM `tpch_lineitem` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_extendedprice_double/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_extendedprice_double/out.sql index bb6d9e6dc505..59bd466502bc 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_extendedprice_double/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_extendedprice_double/out.sql @@ -1 +1,3 @@ -coalesce(`l_extendedprice`, 0.0) \ No newline at end of file +SELECT + COALESCE(`t0`.`l_extendedprice`, 0.0) AS `Coalesce()` +FROM `tpch_lineitem` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_quantity/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_quantity/out.sql index 38e98e3285da..3fa2c6486406 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_quantity/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_decimal_fillna_cast_arg/fillna_l_quantity/out.sql @@ -1 +1,3 @@ -coalesce(`l_quantity`, 0) \ No newline at end of file +SELECT + COALESCE(`t0`.`l_quantity`, 0) AS `Coalesce()` +FROM `tpch_lineitem` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_identical_to/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_identical_to/out.sql index 0565bc3ad7b2..4ec0618d6bde 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_identical_to/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_identical_to/out.sql @@ -1,2 +1,3 @@ -SELECT t0.`tinyint_col` IS NOT DISTINCT FROM t0.`double_col` AS `tmp` -FROM `functional_alltypes` t0 \ No newline at end of file +SELECT + `t0`.`tinyint_col` IS NOT DISTINCT FROM `t0`.`double_col` AS `tmp` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_identical_to_special_case/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_identical_to_special_case/out.sql index cc71612458b0..48da5cb117bb 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_identical_to_special_case/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_identical_to_special_case/out.sql @@ -1 +1,2 @@ -SELECT TRUE AS `tmp` \ No newline at end of file +SELECT + CAST(NULL AS BIGINT) IS NOT DISTINCT FROM CAST(NULL AS BIGINT) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_ifelse_use_if/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_ifelse_use_if/out.sql index d82567438333..27e2ebb44fab 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_ifelse_use_if/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_ifelse_use_if/out.sql @@ -1 +1,3 @@ -if(`f` > 0, `e`, `a`) \ No newline at end of file +SELECT + IF(`t0`.`f` > 0, `t0`.`e`, `t0`.`a`) AS `IfElse(Greater(f, 0), e, a)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_isnull_1_0/out1.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_isnull_1_0/out1.sql index 25cdb744423b..9cb074f3c97d 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_isnull_1_0/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_isnull_1_0/out1.sql @@ -1 +1,3 @@ -if(`g` IS NULL, 1, 0) \ No newline at end of file +SELECT + IF(`t0`.`g` IS NULL, 1, 0) AS `IfElse(IsNull(g), 1, 0)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_isnull_1_0/out2.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_isnull_1_0/out2.sql index eeff1bc1fe6f..03c9edf98286 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_isnull_1_0/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_isnull_1_0/out2.sql @@ -1 +1,3 @@ -sum(if(`g` IS NULL, 1, 0)) \ No newline at end of file +SELECT + SUM(IF(`t0`.`g` IS NULL, 1, 0)) AS `Sum(IfElse(IsNull(g), 1, 0))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_boolean/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_boolean/out.sql index 540ae2672d19..fefaae99655a 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_boolean/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_boolean/out.sql @@ -1 +1,3 @@ -nullif(`l_quantity` = 0, `l_quantity` = 0) \ No newline at end of file +SELECT + NULLIF(`t0`.`l_quantity` = 0, `t0`.`l_quantity` = 0) AS `NullIf(Equals(l_quantity, 0), Equals(l_quantity, 0))` +FROM `tpch_lineitem` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_input/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_input/out.sql index 54e0e8065bb4..dadef905f39d 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_input/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_input/out.sql @@ -1 +1,3 @@ -nullif(`l_quantity`, `l_quantity`) \ No newline at end of file +SELECT + NULLIF(`t0`.`l_quantity`, `t0`.`l_quantity`) AS `NullIf(l_quantity, l_quantity)` +FROM `tpch_lineitem` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_negate_boolean/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_negate_boolean/out.sql index a79bfc504605..5a986e7ddb3e 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_negate_boolean/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_nullif_ifnull/nullif_negate_boolean/out.sql @@ -1 +1,3 @@ -nullif(`l_quantity` != 0, `l_quantity` = 0) \ No newline at end of file +SELECT + NULLIF(`t0`.`l_quantity` <> 0, `t0`.`l_quantity` = 0) AS `NullIf(NotEquals(l_quantity, 0), Equals(l_quantity, 0))` +FROM `tpch_lineitem` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_search_case/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_search_case/out.sql index f4ee943f9d48..086867ecdd77 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_search_case/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_search_case/out.sql @@ -1,5 +1,9 @@ -CASE - WHEN `f` > 0 THEN `d` * 2 - WHEN `c` < 0 THEN `a` * 2 - ELSE CAST(NULL AS bigint) -END \ No newline at end of file +SELECT + CASE + WHEN `t0`.`f` > 0 + THEN `t0`.`d` * 2 + WHEN `t0`.`c` < 0 + THEN `t0`.`a` * 2 + ELSE CAST(NULL AS BIGINT) + END AS `SearchedCase(Cast(None, int64))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_simple_case/out.sql b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_simple_case/out.sql index 56a1c8ac9b31..bb9300299c72 100644 --- a/ibis/backends/impala/tests/snapshots/test_case_exprs/test_simple_case/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_case_exprs/test_simple_case/out.sql @@ -1,5 +1,3 @@ -CASE `g` - WHEN 'foo' THEN 'bar' - WHEN 'baz' THEN 'qux' - ELSE 'default' -END \ No newline at end of file +SELECT + CASE `t0`.`g` WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS `SimpleCase(g, 'default')` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/coalesce_columns/out.sql b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/coalesce_columns/out.sql index 11470952abab..37a2675f2b39 100644 --- a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/coalesce_columns/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/coalesce_columns/out.sql @@ -1 +1,3 @@ -coalesce(`int_col`, `bigint_col`) \ No newline at end of file +SELECT + COALESCE(`t0`.`int_col`, `t0`.`bigint_col`) AS `Coalesce()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/coalesce_scalar/out.sql b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/coalesce_scalar/out.sql index 7d545f322a6f..d0f093f98b7a 100644 --- a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/coalesce_scalar/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/coalesce_scalar/out.sql @@ -1 +1,3 @@ -coalesce(`string_col`, 'foo') \ No newline at end of file +SELECT + COALESCE(`t0`.`string_col`, 'foo') AS `Coalesce()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/greatest_columns/out.sql b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/greatest_columns/out.sql index 82e0083716de..d667c8a69296 100644 --- a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/greatest_columns/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/greatest_columns/out.sql @@ -1 +1,3 @@ -greatest(`int_col`, `bigint_col`) \ No newline at end of file +SELECT + GREATEST(`t0`.`int_col`, `t0`.`bigint_col`) AS `Greatest()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/greatest_scalar/out.sql b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/greatest_scalar/out.sql index 98df465a098c..693db916250b 100644 --- a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/greatest_scalar/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/greatest_scalar/out.sql @@ -1 +1,3 @@ -greatest(`string_col`, 'foo') \ No newline at end of file +SELECT + GREATEST(`t0`.`string_col`, 'foo') AS `Greatest()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/least_columns/out.sql b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/least_columns/out.sql index 4458c9aee386..90f4b2d35f1c 100644 --- a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/least_columns/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/least_columns/out.sql @@ -1 +1,3 @@ -least(`int_col`, `bigint_col`) \ No newline at end of file +SELECT + LEAST(`t0`.`int_col`, `t0`.`bigint_col`) AS `Least()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/least_scalar/out.sql b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/least_scalar/out.sql index 563206d70430..8260947a866e 100644 --- a/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/least_scalar/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_coalesce_greater_least/test_varargs_functions/least_scalar/out.sql @@ -1 +1,3 @@ -least(`string_col`, 'foo') \ No newline at end of file +SELECT + LEAST(`t0`.`string_col`, 'foo') AS `Least()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_avro_other_formats/out.sql b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_avro_other_formats/out.sql index 0609f60905c4..da51b1c54307 100644 --- a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_avro_other_formats/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_avro_other_formats/out.sql @@ -1,5 +1,6 @@ CREATE TABLE IF NOT EXISTS `tname` STORED AS AVRO AS -SELECT t0.* -FROM `functional_alltypes` t0 \ No newline at end of file +SELECT + * +FROM `functional_alltypes` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_create_external_table_as/out.sql b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_create_external_table_as/out.sql index ce648de38dd2..273b2045aa34 100644 --- a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_create_external_table_as/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_create_external_table_as/out.sql @@ -2,5 +2,6 @@ CREATE EXTERNAL TABLE `foo`.`another_table` STORED AS PARQUET LOCATION '/path/to/table' AS -SELECT t0.* -FROM `test1` t0 \ No newline at end of file +SELECT + * +FROM `test1` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_create_table_parquet/out.sql b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_create_table_parquet/out.sql index 9eb1ba2b3ea6..c786bd23a9a3 100644 --- a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_create_table_parquet/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_create_table_parquet/out.sql @@ -1,6 +1,20 @@ CREATE TABLE `bar`.`some_table` STORED AS PARQUET AS -SELECT t0.* -FROM `functional_alltypes` t0 -WHERE t0.`bigint_col` > 0 \ No newline at end of file +SELECT + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month` +FROM `functional_alltypes` AS `t0` +WHERE + `t0`.`bigint_col` > 0 \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_no_overwrite/out.sql b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_no_overwrite/out.sql index 22dbd3b0ec32..21655b333d8f 100644 --- a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_no_overwrite/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_no_overwrite/out.sql @@ -1,6 +1,20 @@ CREATE TABLE IF NOT EXISTS `tname` STORED AS PARQUET AS -SELECT t0.* -FROM `functional_alltypes` t0 -WHERE t0.`bigint_col` > 0 \ No newline at end of file +SELECT + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month` +FROM `functional_alltypes` AS `t0` +WHERE + `t0`.`bigint_col` > 0 \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_select_basics/out1.sql b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_select_basics/out1.sql index 38de0f178cfd..7a42f5244dd1 100644 --- a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_select_basics/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_select_basics/out1.sql @@ -1,4 +1,5 @@ INSERT INTO `foo`.`testing123456` -SELECT t0.* -FROM `functional_alltypes` t0 +SELECT + * +FROM `functional_alltypes` AS `t0` LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_select_basics/out2.sql b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_select_basics/out2.sql index 32eb8fd6e9e9..6dcec22f28c0 100644 --- a/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_select_basics/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_ddl_compilation/test_select_basics/out2.sql @@ -1,4 +1,5 @@ INSERT OVERWRITE `foo`.`testing123456` -SELECT t0.* -FROM `functional_alltypes` t0 +SELECT + * +FROM `functional_alltypes` AS `t0` LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql b/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql index 22bce18c3ec9..3174e14dfa1e 100644 --- a/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql @@ -1,18 +1,16 @@ -WITH t0 AS ( - SELECT t4.`col`, NULL AS `filter` - FROM `x` t4 -), -t1 AS ( - SELECT t0.* - FROM t0 - WHERE t0.`filter` IS NULL -), -t2 AS ( - SELECT t1.`col`, t1.`filter` - FROM t1 -) -SELECT t3.`col`, t3.`analytic` +SELECT + `t2`.`col`, + `t2`.`analytic` FROM ( - SELECT t2.`col`, count(1) OVER () AS `analytic` - FROM t2 -) t3 \ No newline at end of file + SELECT + `t1`.`col`, + COUNT(*) OVER (ORDER BY NULL ASC NULLS LAST) AS `analytic` + FROM ( + SELECT + `t0`.`col`, + NULL AS `filter` + FROM `x` AS `t0` + WHERE + NULL IS NULL + ) AS `t1` +) AS `t2` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_exprs/test_named_from_filter_group_by/abc.sql b/ibis/backends/impala/tests/snapshots/test_exprs/test_named_from_filter_group_by/abc.sql index c283f69194a7..084d6ae3fcc9 100644 --- a/ibis/backends/impala/tests/snapshots/test_exprs/test_named_from_filter_group_by/abc.sql +++ b/ibis/backends/impala/tests/snapshots/test_exprs/test_named_from_filter_group_by/abc.sql @@ -1,4 +1,17 @@ -SELECT t0.`key`, sum(((t0.`value` + 1) + 2) + 3) AS `abc` -FROM `t0` t0 -WHERE t0.`value` = 42 -GROUP BY 1 \ No newline at end of file +SELECT + `t1`.`key`, + SUM(( + ( + `t1`.`value` + 1 + ) + 2 + ) + 3) AS `abc` +FROM ( + SELECT + `t0`.`key`, + `t0`.`value` + FROM `t0` AS `t0` + WHERE + `t0`.`value` = 42 +) AS `t1` +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_exprs/test_named_from_filter_group_by/foo.sql b/ibis/backends/impala/tests/snapshots/test_exprs/test_named_from_filter_group_by/foo.sql index 7dbaca888a9f..e49a2c885636 100644 --- a/ibis/backends/impala/tests/snapshots/test_exprs/test_named_from_filter_group_by/foo.sql +++ b/ibis/backends/impala/tests/snapshots/test_exprs/test_named_from_filter_group_by/foo.sql @@ -1,4 +1,17 @@ -SELECT t0.`key`, sum(((t0.`value` + 1) + 2) + 3) AS `foo` -FROM `t0` t0 -WHERE t0.`value` = 42 -GROUP BY 1 \ No newline at end of file +SELECT + `t1`.`key`, + SUM(( + ( + `t1`.`value` + 1 + ) + 2 + ) + 3) AS `foo` +FROM ( + SELECT + `t0`.`key`, + `t0`.`value` + FROM `t0` AS `t0` + WHERE + `t0`.`value` = 42 +) AS `t1` +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_exprs/test_nunique_where/out.sql b/ibis/backends/impala/tests/snapshots/test_exprs/test_nunique_where/out.sql index df7409ce4a9b..b9962ec6e33c 100644 --- a/ibis/backends/impala/tests/snapshots/test_exprs/test_nunique_where/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_exprs/test_nunique_where/out.sql @@ -1,2 +1,3 @@ -SELECT count(DISTINCT if(t0.`value` >= 1.0, t0.`key`, NULL)) AS `CountDistinct(key, GreaterEqual(value, 1.0))` -FROM `t0` t0 \ No newline at end of file +SELECT + COUNT(DISTINCT IF(`t0`.`value` >= 1.0, `t0`.`key`, NULL)) AS `CountDistinct(key, GreaterEqual(value, 1.0))` +FROM `t0` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_exprs/test_where_with_timestamp/out.sql b/ibis/backends/impala/tests/snapshots/test_exprs/test_where_with_timestamp/out.sql index b812ca961326..9265a674c3c3 100644 --- a/ibis/backends/impala/tests/snapshots/test_exprs/test_where_with_timestamp/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_exprs/test_where_with_timestamp/out.sql @@ -1,3 +1,6 @@ -SELECT t0.`uuid`, min(if(t0.`search_level` = 1, t0.`ts`, NULL)) AS `min_date` -FROM `t` t0 -GROUP BY 1 \ No newline at end of file +SELECT + `t0`.`uuid`, + MIN(IF(`t0`.`search_level` = 1, `t0`.`ts`, NULL)) AS `min_date` +FROM `t` AS `t0` +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_field_in_literals/isin/out.sql b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_field_in_literals/isin/out.sql index ac7a1c815743..d0c44759597f 100644 --- a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_field_in_literals/isin/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_field_in_literals/isin/out.sql @@ -1 +1,3 @@ -`g` IN ('foo', 'bar', 'baz') \ No newline at end of file +SELECT + `t0`.`g` IN ('foo', 'bar', 'baz') AS `InValues(g)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_field_in_literals/notin/out.sql b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_field_in_literals/notin/out.sql index 38443dc2a049..1962710dd06e 100644 --- a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_field_in_literals/notin/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_field_in_literals/notin/out.sql @@ -1 +1,5 @@ -NOT `g` IN ('foo', 'bar', 'baz') \ No newline at end of file +SELECT + NOT ( + `t0`.`g` IN ('foo', 'bar', 'baz') + ) AS `Not(InValues(g))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_isin_notin_in_select/isin/out.sql b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_isin_notin_in_select/isin/out.sql index c407478b5bc8..4a7728cdd59e 100644 --- a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_isin_notin_in_select/isin/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_isin_notin_in_select/isin/out.sql @@ -1,3 +1,15 @@ -SELECT t0.* -FROM `alltypes` t0 -WHERE t0.`g` IN ('foo', 'bar') \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`d`, + `t0`.`e`, + `t0`.`f`, + `t0`.`g`, + `t0`.`h`, + `t0`.`i`, + `t0`.`j`, + `t0`.`k` +FROM `alltypes` AS `t0` +WHERE + `t0`.`g` IN ('foo', 'bar') \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_isin_notin_in_select/notin/out.sql b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_isin_notin_in_select/notin/out.sql index 64e7f906868c..164b3a38338e 100644 --- a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_isin_notin_in_select/notin/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_isin_notin_in_select/notin/out.sql @@ -1,3 +1,17 @@ -SELECT t0.* -FROM `alltypes` t0 -WHERE NOT t0.`g` IN ('foo', 'bar') \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`d`, + `t0`.`e`, + `t0`.`f`, + `t0`.`g`, + `t0`.`h`, + `t0`.`i`, + `t0`.`j`, + `t0`.`k` +FROM `alltypes` AS `t0` +WHERE + NOT ( + `t0`.`g` IN ('foo', 'bar') + ) \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_literal_in_fields/isin/out.sql b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_literal_in_fields/isin/out.sql index 24cb73205760..5e82e8ab1e24 100644 --- a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_literal_in_fields/isin/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_literal_in_fields/isin/out.sql @@ -1 +1,3 @@ -2 IN (`a`, `b`, `c`) \ No newline at end of file +SELECT + 2 IN (`t0`.`a`, `t0`.`b`, `t0`.`c`) AS `InValues(2)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_literal_in_fields/notin/out.sql b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_literal_in_fields/notin/out.sql index ca2178c7fcd7..5eb7de11190a 100644 --- a/ibis/backends/impala/tests/snapshots/test_in_not_in/test_literal_in_fields/notin/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_in_not_in/test_literal_in_fields/notin/out.sql @@ -1 +1,5 @@ -NOT 2 IN (`a`, `b`, `c`) \ No newline at end of file +SELECT + NOT ( + 2 IN (`t0`.`a`, `t0`.`b`, `t0`.`c`) + ) AS `Not(InValues(2))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql index a0de577d9d19..28c6528bec63 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql @@ -1,3 +1,6 @@ -SELECT t0.*, - sum(t0.`two`) OVER (PARTITION BY t0.`three` ORDER BY t0.`one` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `four` -FROM `my_data` t0 \ No newline at end of file +SELECT + `t0`.`one`, + `t0`.`two`, + `t0`.`three`, + SUM(`t0`.`two`) OVER (PARTITION BY `t0`.`three` ORDER BY `t0`.`one` ASC NULLS LAST) AS `four` +FROM `my_data` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/isnull/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/isnull/out.sql index 6dcf3cf96553..034d6a342694 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/isnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/isnull/out.sql @@ -1,3 +1,6 @@ -SELECT t0.* -FROM `table` t0 -WHERE (t0.`a` IS NULL) = (t0.`b` IS NULL) \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b` +FROM `table` AS `t0` +WHERE + `t0`.`a` IS NULL = `t0`.`b` IS NULL \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/notnull/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/notnull/out.sql index 7ab93d396b71..e34061596c65 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/notnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/notnull/out.sql @@ -1,3 +1,6 @@ -SELECT t0.* -FROM `table` t0 -WHERE (t0.`a` IS NOT NULL) = (t0.`b` IS NOT NULL) \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b` +FROM `table` AS `t0` +WHERE + NOT `t0`.`a` IS NULL = NOT `t0`.`b` IS NULL \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens_identical_to/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens_identical_to/out.sql index a596fec7d727..9080afdaec50 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens_identical_to/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens_identical_to/out.sql @@ -1,3 +1,6 @@ -SELECT t0.* -FROM `table` t0 -WHERE (t0.`a` IS NOT DISTINCT FROM NULL) = (t0.`b` IS NOT DISTINCT FROM NULL) \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b` +FROM `table` AS `t0` +WHERE + `t0`.`a` IS NOT DISTINCT FROM NULL = `t0`.`b` IS NOT DISTINCT FROM NULL \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql index 125984a40369..e74367136685 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql @@ -1,39 +1,87 @@ -WITH t0 AS ( - SELECT t7.*, t7.`a` + 20 AS `d` - FROM `test_table` t7 -), -t1 AS ( - SELECT t0.`d`, t0.`c` - FROM t0 -), -t2 AS ( - SELECT t1.`d`, CAST(t1.`d` / 15 AS bigint) AS `idx`, t1.`c`, - count(1) AS `row_count` - FROM t1 - GROUP BY 1, 2, 3 -), -t3 AS ( - SELECT t2.`d`, sum(t2.`row_count`) AS `total` - FROM t2 - GROUP BY 1 -), -t4 AS ( - SELECT t2.*, t3.`total` - FROM t2 - INNER JOIN t3 - ON t2.`d` = t3.`d` -), -t5 AS ( - SELECT t4.* - FROM t4 - WHERE t4.`row_count` < (t4.`total` / 2) -) -SELECT t6.*, t5.`total` +SELECT + `t6`.`d`, + `t6`.`b`, + `t6`.`count`, + `t6`.`unique`, + `t13`.`total` FROM ( - SELECT t0.`d`, t0.`b`, count(1) AS `count`, - count(DISTINCT t0.`c`) AS `unique` - FROM t0 - GROUP BY 1, 2 -) t6 - INNER JOIN t5 - ON t6.`d` = t5.`d` \ No newline at end of file + SELECT + `t1`.`d`, + `t1`.`b`, + COUNT(*) AS `count`, + COUNT(DISTINCT `t1`.`c`) AS `unique` + FROM ( + SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`a` + 20 AS `d` + FROM `test_table` AS `t0` + ) AS `t1` + GROUP BY + 1, + 2 +) AS `t6` +INNER JOIN ( + SELECT + `t11`.`d`, + `t11`.`idx`, + `t11`.`c`, + `t11`.`row_count`, + `t11`.`total` + FROM ( + SELECT + `t7`.`d`, + `t7`.`idx`, + `t7`.`c`, + `t7`.`row_count`, + `t9`.`total` + FROM ( + SELECT + `t2`.`d`, + CAST(`t2`.`d` / 15 AS BIGINT) AS `idx`, + `t2`.`c`, + COUNT(*) AS `row_count` + FROM ( + SELECT + `t0`.`a` + 20 AS `d`, + `t0`.`c` + FROM `test_table` AS `t0` + ) AS `t2` + GROUP BY + 1, + 2, + 3 + ) AS `t7` + INNER JOIN ( + SELECT + `t5`.`d`, + SUM(`t5`.`row_count`) AS `total` + FROM ( + SELECT + `t2`.`d`, + CAST(`t2`.`d` / 15 AS BIGINT) AS `idx`, + `t2`.`c`, + COUNT(*) AS `row_count` + FROM ( + SELECT + `t0`.`a` + 20 AS `d`, + `t0`.`c` + FROM `test_table` AS `t0` + ) AS `t2` + GROUP BY + 1, + 2, + 3 + ) AS `t5` + GROUP BY + 1 + ) AS `t9` + ON `t7`.`d` = `t9`.`d` + ) AS `t11` + WHERE + `t11`.`row_count` < ( + `t11`.`total` / 2 + ) +) AS `t13` + ON `t6`.`d` = `t13`.`d` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql index 4bb710c4adae..865f638ff542 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql @@ -1,32 +1,118 @@ -WITH t0 AS ( - SELECT t5.*, t3.`r_name` AS `region`, t6.`o_totalprice`, - CAST(t6.`o_orderdate` AS timestamp) AS `odate` - FROM `tpch_region` t3 - INNER JOIN `tpch_nation` t4 - ON t3.`r_regionkey` = t4.`n_regionkey` - INNER JOIN `tpch_customer` t5 - ON t5.`c_nationkey` = t4.`n_nationkey` - INNER JOIN `tpch_orders` t6 - ON t6.`o_custkey` = t5.`c_custkey` -), -t1 AS ( - SELECT extract(t0.`odate`, 'year') AS `year`, count(1) AS `CountStar()` - FROM t0 - WHERE t0.`o_totalprice` > ( - SELECT avg(t3.`o_totalprice`) AS `Mean(o_totalprice)` - FROM t0 t3 - WHERE t3.`region` = t0.`region` - ) - GROUP BY 1 -), -t2 AS ( - SELECT extract(t0.`odate`, 'year') AS `year`, count(1) AS `CountStar()` - FROM t0 - GROUP BY 1 -) -SELECT t2.`year`, t2.`CountStar()` AS `pre_count`, - t1.`CountStar()` AS `post_count`, - t1.`CountStar()` / CAST(t2.`CountStar()` AS double) AS `fraction` -FROM t2 - INNER JOIN t1 - ON t2.`year` = t1.`year` \ No newline at end of file +SELECT + `t14`.`year`, + `t14`.`CountStar()` AS `pre_count`, + `t19`.`CountStar()` AS `post_count`, + `t19`.`CountStar()` / CAST(`t14`.`CountStar()` AS DOUBLE) AS `fraction` +FROM ( + SELECT + EXTRACT(year FROM `t11`.`odate`) AS `year`, + COUNT(*) AS `CountStar()` + FROM ( + SELECT + `t6`.`c_custkey`, + `t6`.`c_name`, + `t6`.`c_address`, + `t6`.`c_nationkey`, + `t6`.`c_phone`, + `t6`.`c_acctbal`, + `t6`.`c_mktsegment`, + `t6`.`c_comment`, + `t4`.`r_name` AS `region`, + `t7`.`o_totalprice`, + CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` + FROM `tpch_region` AS `t4` + INNER JOIN `tpch_nation` AS `t5` + ON `t4`.`r_regionkey` = `t5`.`n_regionkey` + INNER JOIN `tpch_customer` AS `t6` + ON `t6`.`c_nationkey` = `t5`.`n_nationkey` + INNER JOIN `tpch_orders` AS `t7` + ON `t7`.`o_custkey` = `t6`.`c_custkey` + ) AS `t11` + GROUP BY + 1 +) AS `t14` +INNER JOIN ( + SELECT + EXTRACT(year FROM `t17`.`odate`) AS `year`, + COUNT(*) AS `CountStar()` + FROM ( + SELECT + `t11`.`c_custkey`, + `t11`.`c_name`, + `t11`.`c_address`, + `t11`.`c_nationkey`, + `t11`.`c_phone`, + `t11`.`c_acctbal`, + `t11`.`c_mktsegment`, + `t11`.`c_comment`, + `t11`.`region`, + `t11`.`o_totalprice`, + `t11`.`odate` + FROM ( + SELECT + `t6`.`c_custkey`, + `t6`.`c_name`, + `t6`.`c_address`, + `t6`.`c_nationkey`, + `t6`.`c_phone`, + `t6`.`c_acctbal`, + `t6`.`c_mktsegment`, + `t6`.`c_comment`, + `t4`.`r_name` AS `region`, + `t7`.`o_totalprice`, + CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` + FROM `tpch_region` AS `t4` + INNER JOIN `tpch_nation` AS `t5` + ON `t4`.`r_regionkey` = `t5`.`n_regionkey` + INNER JOIN `tpch_customer` AS `t6` + ON `t6`.`c_nationkey` = `t5`.`n_nationkey` + INNER JOIN `tpch_orders` AS `t7` + ON `t7`.`o_custkey` = `t6`.`c_custkey` + ) AS `t11` + WHERE + `t11`.`o_totalprice` > ( + SELECT + AVG(`t15`.`o_totalprice`) AS `Mean(o_totalprice)` + FROM ( + SELECT + `t12`.`c_custkey`, + `t12`.`c_name`, + `t12`.`c_address`, + `t12`.`c_nationkey`, + `t12`.`c_phone`, + `t12`.`c_acctbal`, + `t12`.`c_mktsegment`, + `t12`.`c_comment`, + `t12`.`region`, + `t12`.`o_totalprice`, + `t12`.`odate` + FROM ( + SELECT + `t6`.`c_custkey`, + `t6`.`c_name`, + `t6`.`c_address`, + `t6`.`c_nationkey`, + `t6`.`c_phone`, + `t6`.`c_acctbal`, + `t6`.`c_mktsegment`, + `t6`.`c_comment`, + `t4`.`r_name` AS `region`, + `t7`.`o_totalprice`, + CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` + FROM `tpch_region` AS `t4` + INNER JOIN `tpch_nation` AS `t5` + ON `t4`.`r_regionkey` = `t5`.`n_regionkey` + INNER JOIN `tpch_customer` AS `t6` + ON `t6`.`c_nationkey` = `t5`.`n_nationkey` + INNER JOIN `tpch_orders` AS `t7` + ON `t7`.`o_custkey` = `t6`.`c_custkey` + ) AS `t12` + WHERE + `t12`.`region` = `t11`.`region` + ) AS `t15` + ) + ) AS `t17` + GROUP BY + 1 +) AS `t19` + ON `t14`.`year` = `t19`.`year` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql index 7063b0417815..5d6f63cb5b69 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql @@ -1,21 +1,61 @@ -WITH t0 AS ( - SELECT t5.*, t3.`r_name` AS `region`, t6.`o_totalprice`, - CAST(t6.`o_orderdate` AS timestamp) AS `odate` - FROM `tpch_region` t3 - INNER JOIN `tpch_nation` t4 - ON t3.`r_regionkey` = t4.`n_regionkey` - INNER JOIN `tpch_customer` t5 - ON t5.`c_nationkey` = t4.`n_nationkey` - INNER JOIN `tpch_orders` t6 - ON t6.`o_custkey` = t5.`c_custkey` -), -t1 AS ( - SELECT extract(t0.`odate`, 'year') AS `year`, count(1) AS `CountStar()` - FROM t0 - GROUP BY 1 -) -SELECT t1.`year`, t1.`CountStar()` AS `pre_count`, - t2.`CountStar()` AS `post_count` -FROM t1 - INNER JOIN t1 t2 - ON t1.`year` = t2.`year` \ No newline at end of file +SELECT + `t13`.`year`, + `t13`.`CountStar()` AS `pre_count`, + `t15`.`CountStar()` AS `post_count` +FROM ( + SELECT + EXTRACT(year FROM `t11`.`odate`) AS `year`, + COUNT(*) AS `CountStar()` + FROM ( + SELECT + `t6`.`c_custkey`, + `t6`.`c_name`, + `t6`.`c_address`, + `t6`.`c_nationkey`, + `t6`.`c_phone`, + `t6`.`c_acctbal`, + `t6`.`c_mktsegment`, + `t6`.`c_comment`, + `t4`.`r_name` AS `region`, + `t7`.`o_totalprice`, + CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` + FROM `tpch_region` AS `t4` + INNER JOIN `tpch_nation` AS `t5` + ON `t4`.`r_regionkey` = `t5`.`n_regionkey` + INNER JOIN `tpch_customer` AS `t6` + ON `t6`.`c_nationkey` = `t5`.`n_nationkey` + INNER JOIN `tpch_orders` AS `t7` + ON `t7`.`o_custkey` = `t6`.`c_custkey` + ) AS `t11` + GROUP BY + 1 +) AS `t13` +INNER JOIN ( + SELECT + EXTRACT(year FROM `t11`.`odate`) AS `year`, + COUNT(*) AS `CountStar()` + FROM ( + SELECT + `t6`.`c_custkey`, + `t6`.`c_name`, + `t6`.`c_address`, + `t6`.`c_nationkey`, + `t6`.`c_phone`, + `t6`.`c_acctbal`, + `t6`.`c_mktsegment`, + `t6`.`c_comment`, + `t4`.`r_name` AS `region`, + `t7`.`o_totalprice`, + CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` + FROM `tpch_region` AS `t4` + INNER JOIN `tpch_nation` AS `t5` + ON `t4`.`r_regionkey` = `t5`.`n_regionkey` + INNER JOIN `tpch_customer` AS `t6` + ON `t6`.`c_nationkey` = `t5`.`n_nationkey` + INNER JOIN `tpch_orders` AS `t7` + ON `t7`.`o_custkey` = `t6`.`c_custkey` + ) AS `t11` + GROUP BY + 1 +) AS `t15` + ON `t13`.`year` = `t15`.`year` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/cross_join/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/cross_join/out.sql index 2c27c805038b..51bc5cb4ec76 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/cross_join/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/cross_join/out.sql @@ -1,3 +1,7 @@ -SELECT t0.* -FROM `star1` t0 - CROSS JOIN `star2` t1 \ No newline at end of file +SELECT + `t2`.`c`, + `t2`.`f`, + `t2`.`foo_id`, + `t2`.`bar_id` +FROM `star1` AS `t2` +CROSS JOIN `star2` AS `t3` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/inner_join/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/inner_join/out.sql index 2c27c805038b..3a2417b3c9d2 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/inner_join/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/inner_join/out.sql @@ -1,3 +1,8 @@ -SELECT t0.* -FROM `star1` t0 - CROSS JOIN `star2` t1 \ No newline at end of file +SELECT + `t2`.`c`, + `t2`.`f`, + `t2`.`foo_id`, + `t2`.`bar_id` +FROM `star1` AS `t2` +INNER JOIN `star2` AS `t3` + ON TRUE \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/left_join/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/left_join/out.sql index 2c27c805038b..4fca9e6c563c 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/left_join/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/left_join/out.sql @@ -1,3 +1,8 @@ -SELECT t0.* -FROM `star1` t0 - CROSS JOIN `star2` t1 \ No newline at end of file +SELECT + `t2`.`c`, + `t2`.`f`, + `t2`.`foo_id`, + `t2`.`bar_id` +FROM `star1` AS `t2` +LEFT OUTER JOIN `star2` AS `t3` + ON TRUE \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/outer_join/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/outer_join/out.sql index 2c27c805038b..51f3450f16ad 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/outer_join/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_no_predicates_for_impala/outer_join/out.sql @@ -1,3 +1,8 @@ -SELECT t0.* -FROM `star1` t0 - CROSS JOIN `star2` t1 \ No newline at end of file +SELECT + `t2`.`c`, + `t2`.`f`, + `t2`.`foo_id`, + `t2`.`bar_id` +FROM `star1` AS `t2` +FULL OUTER JOIN `star2` AS `t3` + ON TRUE \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_or_condition/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_or_condition/out.sql index 2b2706c2b4bc..dda0c97b4843 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_or_condition/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_or_condition/out.sql @@ -1,5 +1,13 @@ -SELECT t0.* -FROM `t` t0 - INNER JOIN `t` t1 - ON (t0.`a` = t1.`a`) AND - ((t0.`a` != t1.`b`) OR (t0.`b` != t1.`a`)) \ No newline at end of file +SELECT + `t1`.`a`, + `t1`.`b` +FROM `t` AS `t1` +INNER JOIN `t` AS `t3` + ON `t1`.`a` = `t3`.`a` + AND ( + ( + `t1`.`a` <> `t3`.`b` + ) OR ( + `t1`.`b` <> `t3`.`a` + ) + ) \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_xor_condition/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_xor_condition/out.sql index 42dfe3e6a41b..22c41b392f86 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_xor_condition/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_xor_condition/out.sql @@ -1,5 +1,14 @@ -SELECT t0.* -FROM `t` t0 - INNER JOIN `t` t1 - ON (t0.`a` = t1.`a`) AND - (((t0.`a` != t1.`b`) OR (t0.`b` != t1.`a`)) AND NOT ((t0.`a` != t1.`b`) AND (t0.`b` != t1.`a`))) \ No newline at end of file +SELECT + `t1`.`a`, + `t1`.`b` +FROM `t` AS `t1` +INNER JOIN `t` AS `t3` + ON `t1`.`a` = `t3`.`a` + AND ( + ( + `t1`.`a` <> `t3`.`b` OR `t1`.`b` <> `t3`.`a` + ) + AND NOT ( + `t1`.`a` <> `t3`.`b` AND `t1`.`b` <> `t3`.`a` + ) + ) \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql index 2f11e9a2ac66..b408d5a00767 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql @@ -1,8 +1,27 @@ -WITH t0 AS ( - SELECT t2.* - FROM `functional_alltypes` t2 +SELECT + `t2`.`id`, + `t2`.`bool_col`, + `t2`.`tinyint_col`, + `t2`.`smallint_col`, + `t2`.`int_col`, + `t2`.`bigint_col`, + `t2`.`float_col`, + `t2`.`double_col`, + `t2`.`date_string_col`, + `t2`.`string_col`, + `t2`.`timestamp_col`, + `t2`.`year`, + `t2`.`month` +FROM ( + SELECT + * + FROM `functional_alltypes` AS `t0` LIMIT 100 -) -SELECT t0.* -FROM t0 - CROSS JOIN t0 t1 \ No newline at end of file +) AS `t2` +INNER JOIN ( + SELECT + * + FROM `functional_alltypes` AS `t0` + LIMIT 100 +) AS `t4` + ON TRUE \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_logically_negate_complex_boolean_expr/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_logically_negate_complex_boolean_expr/out.sql index 06566cbf9c18..8d93c3148789 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_logically_negate_complex_boolean_expr/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_logically_negate_complex_boolean_expr/out.sql @@ -1,2 +1,5 @@ -SELECT NOT (t0.`a` IN ('foo') AND (t0.`c` IS NOT NULL)) AS `tmp` -FROM `t` t0 \ No newline at end of file +SELECT + NOT ( + `t0`.`a` IN ('foo') AND NOT `t0`.`c` IS NULL + ) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_multiple_filters/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_multiple_filters/out.sql index 4ebc521ad039..54573e65ff5c 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_multiple_filters/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_multiple_filters/out.sql @@ -1,8 +1,18 @@ -SELECT t0.* -FROM `t0` t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM `t0` t0 - WHERE t0.`a` < 100 -)) \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b` +FROM `t0` AS `t0` +WHERE + `t0`.`a` < 100 + AND `t0`.`a` = ( + SELECT + MAX(`t1`.`a`) AS `Max(a)` + FROM ( + SELECT + `t0`.`a`, + `t0`.`b` + FROM `t0` AS `t0` + WHERE + `t0`.`a` < 100 + ) AS `t1` + ) \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_multiple_filters2/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_multiple_filters2/out.sql index f2a3ae74e8c7..ca8b8101a9c6 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_multiple_filters2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_multiple_filters2/out.sql @@ -1,9 +1,19 @@ -SELECT t0.* -FROM `t0` t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM `t0` t0 - WHERE t0.`a` < 100 -)) AND - (t0.`b` = 'a') \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b` +FROM `t0` AS `t0` +WHERE + `t0`.`a` < 100 + AND `t0`.`a` = ( + SELECT + MAX(`t1`.`a`) AS `Max(a)` + FROM ( + SELECT + `t0`.`a`, + `t0`.`b` + FROM `t0` AS `t0` + WHERE + `t0`.`a` < 100 + ) AS `t1` + ) + AND `t0`.`b` = 'a' \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql index 08a1b2731c0e..0afadafb7be9 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql @@ -1,14 +1,27 @@ -WITH t0 AS ( - SELECT t2.`uuid`, count(1) AS `CountStar(t)` - FROM `t` t2 - GROUP BY 1 -), -t1 AS ( - SELECT t0.`uuid`, max(t0.`CountStar(t)`) AS `max_count` - FROM t0 - GROUP BY 1 -) -SELECT t0.* -FROM t1 - LEFT OUTER JOIN t0 - ON t1.`uuid` = t0.`uuid` \ No newline at end of file +SELECT + `t4`.`uuid`, + `t2`.`CountStar()` +FROM ( + SELECT + `t1`.`uuid`, + MAX(`t1`.`CountStar()`) AS `max_count` + FROM ( + SELECT + `t0`.`uuid`, + COUNT(*) AS `CountStar()` + FROM `t` AS `t0` + GROUP BY + 1 + ) AS `t1` + GROUP BY + 1 +) AS `t4` +LEFT OUTER JOIN ( + SELECT + `t0`.`uuid`, + COUNT(*) AS `CountStar()` + FROM `t` AS `t0` + GROUP BY + 1 +) AS `t2` + ON `t4`.`uuid` = `t2`.`uuid` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql index 74bb1229da66..5a4bca2f68ab 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql @@ -1,30 +1,61 @@ -WITH t0 AS ( - SELECT t2.* - FROM t2 - WHERE (t2.`userid` = 118205) AND - (extract(t2.`datetime`, 'year') > 2001) AND - (t2.`userid` = 118205) AND - (extract(t2.`datetime`, 'year') < 2009) -), -t1 AS ( - SELECT t3.`userid`, t3.`movieid`, t3.`rating`, - CAST(t3.`timestamp` AS timestamp) AS `datetime` - FROM `ratings` t3 -), -t2 AS ( - SELECT t1.*, t3.`title` - FROM t1 - INNER JOIN `movies` t3 - ON t1.`movieid` = t3.`movieid` -) -SELECT t2.* -FROM t2 -WHERE (t2.`userid` = 118205) AND - (extract(t2.`datetime`, 'year') > 2001) AND - (t2.`movieid` IN ( - SELECT t3.`movieid` +SELECT + `t7`.`userid`, + `t7`.`movieid`, + `t7`.`rating`, + `t7`.`datetime`, + `t7`.`title` +FROM ( + SELECT + `t6`.`userid`, + `t6`.`movieid`, + `t6`.`rating`, + `t6`.`datetime`, + `t6`.`title` FROM ( - SELECT t0.`movieid` - FROM t0 - ) t3 -)) \ No newline at end of file + SELECT + `t4`.`userid`, + `t4`.`movieid`, + `t4`.`rating`, + `t4`.`datetime`, + `t2`.`title` + FROM ( + SELECT + `t0`.`userid`, + `t0`.`movieid`, + `t0`.`rating`, + CAST(`t0`.`timestamp` AS TIMESTAMP) AS `datetime` + FROM `ratings` AS `t0` + ) AS `t4` + INNER JOIN `movies` AS `t2` + ON `t4`.`movieid` = `t2`.`movieid` + ) AS `t6` + WHERE + `t6`.`userid` = 118205 AND EXTRACT(year FROM `t6`.`datetime`) > 2001 +) AS `t7` +WHERE + `t7`.`movieid` IN ( + SELECT + `t6`.`movieid` + FROM ( + SELECT + `t4`.`userid`, + `t4`.`movieid`, + `t4`.`rating`, + `t4`.`datetime`, + `t2`.`title` + FROM ( + SELECT + `t0`.`userid`, + `t0`.`movieid`, + `t0`.`rating`, + CAST(`t0`.`timestamp` AS TIMESTAMP) AS `datetime` + FROM `ratings` AS `t0` + ) AS `t4` + INNER JOIN `movies` AS `t2` + ON `t4`.`movieid` = `t2`.`movieid` + ) AS `t6` + WHERE + `t6`.`userid` = 118205 + AND EXTRACT(year FROM `t6`.`datetime`) > 2001 + AND EXTRACT(year FROM `t6`.`datetime`) < 2009 + ) \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_joins_single_cte/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_joins_single_cte/out.sql index fb84b7aed87c..95e2f8ad4d5b 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_joins_single_cte/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_joins_single_cte/out.sql @@ -1,26 +1,37 @@ -WITH t0 AS ( - SELECT t4.`uuid`, count(1) AS `CountStar(t)` - FROM `t` t4 - GROUP BY 1 -), -t1 AS ( - SELECT t0.`uuid`, max(t0.`CountStar(t)`) AS `max_count` - FROM t0 - GROUP BY 1 -), -t2 AS ( - SELECT t4.`uuid`, max(t4.`ts`) AS `last_visit` - FROM `t` t4 - GROUP BY 1 -), -t3 AS ( - SELECT t0.* - FROM t1 - LEFT OUTER JOIN t0 - ON (t1.`uuid` = t0.`uuid`) AND - (t1.`max_count` = t0.`CountStar(t)`) -) -SELECT t3.*, t2.`last_visit` -FROM t3 - LEFT OUTER JOIN t2 - ON t3.`uuid` = t2.`uuid` \ No newline at end of file +SELECT + `t6`.`uuid`, + `t2`.`CountStar()`, + `t4`.`last_visit` +FROM ( + SELECT + `t1`.`uuid`, + MAX(`t1`.`CountStar()`) AS `max_count` + FROM ( + SELECT + `t0`.`uuid`, + COUNT(*) AS `CountStar()` + FROM `t` AS `t0` + GROUP BY + 1 + ) AS `t1` + GROUP BY + 1 +) AS `t6` +LEFT OUTER JOIN ( + SELECT + `t0`.`uuid`, + COUNT(*) AS `CountStar()` + FROM `t` AS `t0` + GROUP BY + 1 +) AS `t2` + ON `t6`.`uuid` = `t2`.`uuid` AND `t6`.`max_count` = `t2`.`CountStar()` +LEFT OUTER JOIN ( + SELECT + `t0`.`uuid`, + MAX(`t0`.`ts`) AS `last_visit` + FROM `t` AS `t0` + GROUP BY + 1 +) AS `t4` + ON `t6`.`uuid` = `t4`.`uuid` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_find/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_find/out.sql index 931850b2c67b..1ea36d138126 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_find/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_find/out.sql @@ -1 +1,3 @@ -locate('a', `string_col`, `tinyint_col` + 1) - 1 \ No newline at end of file +SELECT + LOCATE('a', `t0`.`string_col`, `t0`.`tinyint_col` + 1) - 1 AS `StringFind(string_col, 'a', tinyint_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/ascii_str/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/ascii_str/out.sql index 7ef218478bd7..325872e8165c 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/ascii_str/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/ascii_str/out.sql @@ -1 +1,3 @@ -ascii(`string_col`) \ No newline at end of file +SELECT + ASCII(`t0`.`string_col`) AS `StringAscii(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/capitalize/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/capitalize/out.sql index c57891147a73..d23a9119ada0 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/capitalize/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/capitalize/out.sql @@ -1 +1,3 @@ -initcap(`string_col`) \ No newline at end of file +SELECT + INITCAP(`t0`.`string_col`) AS `Capitalize(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/extract_host/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/extract_host/out.sql index a863c53f5ade..6146d224587b 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/extract_host/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/extract_host/out.sql @@ -1 +1,3 @@ -parse_url(`string_col`, 'HOST') \ No newline at end of file +SELECT + PARSE_URL(`t0`.`string_col`, 'HOST') AS `ExtractHost(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find/out.sql index 0db084cf43ce..ed76d18fd686 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find/out.sql @@ -1 +1,3 @@ -locate('a', `string_col`) - 1 \ No newline at end of file +SELECT + LOCATE('a', `t0`.`string_col`) - 1 AS `StringFind(string_col, 'a')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_in_set_multiple/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_in_set_multiple/out.sql index 2904c469455c..614cf9e0c23c 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_in_set_multiple/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_in_set_multiple/out.sql @@ -1 +1,3 @@ -find_in_set(`string_col`, 'a,b') - 1 \ No newline at end of file +SELECT + FIND_IN_SET(`t0`.`string_col`, CONCAT_WS(',', 'a', 'b')) - 1 AS `FindInSet(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_in_set_single/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_in_set_single/out.sql index 8f691abdb529..1affe459d9a1 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_in_set_single/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_in_set_single/out.sql @@ -1 +1,3 @@ -find_in_set(`string_col`, 'a') - 1 \ No newline at end of file +SELECT + FIND_IN_SET(`t0`.`string_col`, CONCAT_WS(',', 'a')) - 1 AS `FindInSet(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_with_offset/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_with_offset/out.sql index 717a9cb020a2..93e63cc0d06b 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_with_offset/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/find_with_offset/out.sql @@ -1 +1,3 @@ -locate('a', `string_col`, 3) - 1 \ No newline at end of file +SELECT + LOCATE('a', `t0`.`string_col`, 2 + 1) - 1 AS `StringFind(string_col, 'a', 2)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/length/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/length/out.sql index 1e0744be0de5..8d6c4bdca3ea 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/length/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/length/out.sql @@ -1 +1,3 @@ -length(`string_col`) \ No newline at end of file +SELECT + LENGTH(`t0`.`string_col`) AS `StringLength(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/like/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/like/out.sql index 39c5029114fe..b3c468b064bc 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/like/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/like/out.sql @@ -1 +1,3 @@ -`string_col` LIKE 'foo%' \ No newline at end of file +SELECT + `t0`.`string_col` LIKE 'foo%' AS `StringSQLLike(string_col, 'foo%')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/like_multiple/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/like_multiple/out.sql index 13127122bbb3..b599e8873296 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/like_multiple/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/like_multiple/out.sql @@ -1 +1,3 @@ -`string_col` LIKE 'foo%' OR `string_col` LIKE '%bar' \ No newline at end of file +SELECT + `t0`.`string_col` LIKE 'foo%' OR `t0`.`string_col` LIKE '%bar' AS `Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lower/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lower/out.sql index 0edad56fc694..4cfba200403e 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lower/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lower/out.sql @@ -1 +1,3 @@ -lower(`string_col`) \ No newline at end of file +SELECT + LOWER(`t0`.`string_col`) AS `Lowercase(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lpad_char/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lpad_char/out.sql index aeb12d288947..83db8a156a2f 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lpad_char/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lpad_char/out.sql @@ -1 +1,3 @@ -lpad(`string_col`, 1, 'a') \ No newline at end of file +SELECT + LPAD(`t0`.`string_col`, 1, 'a') AS `LPad(string_col, 1, 'a')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lpad_default/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lpad_default/out.sql index 5fd117829c59..8776196103c9 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lpad_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lpad_default/out.sql @@ -1 +1,3 @@ -lpad(`string_col`, 25, ' ') \ No newline at end of file +SELECT + LPAD(`t0`.`string_col`, 25, ' ') AS `LPad(string_col, 25, ' ')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql index c4aa27fec27a..f4ce746edd1d 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql @@ -1 +1,3 @@ -ltrim(`string_col`) \ No newline at end of file +SELECT + LTRIM(`t0`.`string_col`) AS `LStrip(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_extract/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_extract/out.sql index a0c1030f8b35..76df41ef3249 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_extract/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_extract/out.sql @@ -1 +1,3 @@ -regexp_extract(`string_col`, '[\\d]+', 0) \ No newline at end of file +SELECT + REGEXP_EXTRACT(`t0`.`string_col`, '[\\d]+', 0) AS `RegexExtract(string_col, '[\\d]+', 0)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_replace/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_replace/out.sql index bcf6bf242b75..194115208582 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_replace/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_replace/out.sql @@ -1 +1,3 @@ -regexp_replace(`string_col`, '[\\d]+', 'aaa') \ No newline at end of file +SELECT + REGEXP_REPLACE(`t0`.`string_col`, '[\\d]+', 'aaa') AS `RegexReplace(string_col, '[\\d]+', 'aaa')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_search/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_search/out.sql index bd5b241c04f4..b8f2e9ad9c4a 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_search/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_search/out.sql @@ -1 +1,3 @@ -regexp_like(`string_col`, '[\\d]+') \ No newline at end of file +SELECT + `t0`.`string_col` RLIKE '[\\d]+' AS `RegexSearch(string_col, '[\\d]+')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/repeat/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/repeat/out.sql index 659b7785e0bc..8701c0d8b670 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/repeat/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/repeat/out.sql @@ -1 +1,3 @@ -repeat(`string_col`, 2) \ No newline at end of file +SELECT + REPEAT(`t0`.`string_col`, 2) AS `Repeat(string_col, 2)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/reverse/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/reverse/out.sql index 8a2d1509e518..4fa987fde0e7 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/reverse/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/reverse/out.sql @@ -1 +1,3 @@ -reverse(`string_col`) \ No newline at end of file +SELECT + REVERSE(`t0`.`string_col`) AS `Reverse(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rlike/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rlike/out.sql index bd5b241c04f4..b8f2e9ad9c4a 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rlike/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rlike/out.sql @@ -1 +1,3 @@ -regexp_like(`string_col`, '[\\d]+') \ No newline at end of file +SELECT + `t0`.`string_col` RLIKE '[\\d]+' AS `RegexSearch(string_col, '[\\d]+')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rpad_char/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rpad_char/out.sql index 6799fb1000f4..23314bf1cafa 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rpad_char/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rpad_char/out.sql @@ -1 +1,3 @@ -rpad(`string_col`, 1, 'a') \ No newline at end of file +SELECT + RPAD(`t0`.`string_col`, 1, 'a') AS `RPad(string_col, 1, 'a')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rpad_default/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rpad_default/out.sql index 3353a8f7c1ee..c2f18f32a5ce 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rpad_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rpad_default/out.sql @@ -1 +1,3 @@ -rpad(`string_col`, 25, ' ') \ No newline at end of file +SELECT + RPAD(`t0`.`string_col`, 25, ' ') AS `RPad(string_col, 25, ' ')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql index 7cb8c95a18f1..825a36325c6c 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql @@ -1 +1,3 @@ -rtrim(`string_col`) \ No newline at end of file +SELECT + RTRIM(`t0`.`string_col`) AS `RStrip(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql index 3b74aded9106..a3e2c1476733 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql @@ -1 +1,3 @@ -trim(`string_col`) \ No newline at end of file +SELECT + TRIM(`t0`.`string_col`) AS `Strip(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strright/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strright/out.sql index 8bb8e51bc07b..72f21e0b1f06 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strright/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strright/out.sql @@ -1 +1,5 @@ -strright(`string_col`, 4) \ No newline at end of file +SELECT + SUBSTRING(`t0`.`string_col`, LENGTH(`t0`.`string_col`) - ( + 4 - 1 + )) AS `StrRight(string_col, 4)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/substr_0_3/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/substr_0_3/out.sql index ddc149a06a5a..1ecb1c5ff111 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/substr_0_3/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/substr_0_3/out.sql @@ -1 +1,9 @@ -substr(`string_col`, 0 + 1, 3) \ No newline at end of file +SELECT + IF( + ( + 0 + 1 + ) >= 1, + SUBSTRING(`t0`.`string_col`, 0 + 1, 3), + SUBSTRING(`t0`.`string_col`, 0 + 1 + LENGTH(`t0`.`string_col`), 3) + ) AS `Substring(string_col, 0, 3)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/substr_2/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/substr_2/out.sql index 359297f9a27a..08c2cd1ee1eb 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/substr_2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/substr_2/out.sql @@ -1 +1,9 @@ -substr(`string_col`, 2 + 1) \ No newline at end of file +SELECT + IF( + ( + 2 + 1 + ) >= 1, + SUBSTRING(`t0`.`string_col`, 2 + 1), + SUBSTRING(`t0`.`string_col`, 2 + 1 + LENGTH(`t0`.`string_col`)) + ) AS `Substring(string_col, 2)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/translate/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/translate/out.sql index 684f47fe7375..56e8f09fe492 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/translate/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/translate/out.sql @@ -1 +1,3 @@ -translate(`string_col`, 'a', 'b') \ No newline at end of file +SELECT + TRANSLATE(`t0`.`string_col`, 'a', 'b') AS `Translate(string_col, 'a', 'b')` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/upper/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/upper/out.sql index 946d3f8bdeff..51f7c7961559 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/upper/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/upper/out.sql @@ -1 +1,3 @@ -upper(`string_col`) \ No newline at end of file +SELECT + UPPER(`t0`.`string_col`) AS `Uppercase(string_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_join/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_join/out.sql index 3bf4c101fd3a..495885e4d146 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_join/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_join/out.sql @@ -1 +1,2 @@ -concat_ws(',', 'a', 'b') \ No newline at end of file +SELECT + CONCAT_WS(',', 'a', 'b') AS `StringJoin(',')` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation/out.sql b/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation/out.sql index 30b16fc26974..6ae4e58e0380 100644 --- a/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation/out.sql @@ -1 +1,2 @@ -SELECT udf_testing.identity('hello world') AS `UDF_Tester('hello world')` \ No newline at end of file +SELECT + UDF_TESTING.IDENTITY('hello world') AS `identity('hello world')` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation_from_infoclass/out.sql b/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation_from_infoclass/out.sql index 16e8b29a90ef..7331927594aa 100644 --- a/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation_from_infoclass/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation_from_infoclass/out.sql @@ -1 +1,2 @@ -SELECT udf_testing.info_test('hello world') AS `tmp` \ No newline at end of file +SELECT + UDF_TESTING.INFO_TEST('hello world') AS `tmp` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_hash/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_hash/out.sql index 994e8535524e..0e1a518d861a 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_hash/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_hash/out.sql @@ -1 +1,3 @@ -fnv_hash(`int_col`) \ No newline at end of file +SELECT + FNV_HASH(`t0`.`int_col`) AS `Hash(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/log_with_base/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/log_with_base/out.sql index 0ddbc401afb6..2238b9b6689a 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/log_with_base/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/log_with_base/out.sql @@ -1 +1,3 @@ -log(5, `double_col`) \ No newline at end of file +SELECT + LOG(5, `t0`.`double_col`) AS `Log(double_col, 5)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_expr/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_expr/out.sql index 89bf6b7e1e7f..28e045098b09 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_expr/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_expr/out.sql @@ -1 +1,3 @@ -round(`double_col`, `tinyint_col`) \ No newline at end of file +SELECT + ROUND(`t0`.`double_col`, `t0`.`tinyint_col`) AS `Round(double_col, tinyint_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_no_args/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_no_args/out.sql index d5f8e2bbfbda..8acebb77ef31 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_no_args/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_no_args/out.sql @@ -1 +1,3 @@ -cast(round(`double_col`) AS BIGINT) \ No newline at end of file +SELECT + CAST(ROUND(`t0`.`double_col`) AS BIGINT) AS `Round(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_two/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_two/out.sql index 8a47c0984f28..61400c7e45a3 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_two/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_two/out.sql @@ -1 +1,3 @@ -round(`double_col`, 2) \ No newline at end of file +SELECT + ROUND(`t0`.`double_col`, 2) AS `Round(double_col, 2)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_zero/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_zero/out.sql index 7fb9eaf37aaf..5454c320029e 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_zero/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/round_zero/out.sql @@ -1 +1,3 @@ -round(`double_col`, 0) \ No newline at end of file +SELECT + ROUND(`t0`.`double_col`, 0) AS `Round(double_col, 0)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_double/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_double/out.sql index 72a6377e9981..1ac9e131c659 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_double/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_double/out.sql @@ -1 +1,3 @@ -CAST(sign(`double_col`) AS double) \ No newline at end of file +SELECT + CAST(SIGN(`t0`.`double_col`) AS DOUBLE) AS `Sign(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_float/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_float/out.sql index f3e012c490c0..32291b80199a 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_float/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_float/out.sql @@ -1 +1,3 @@ -sign(`float_col`) \ No newline at end of file +SELECT + SIGN(`t0`.`float_col`) AS `Sign(float_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_tinyint/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_tinyint/out.sql index 286c57270265..aeab3363194a 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_tinyint/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric/sign_tinyint/out.sql @@ -1 +1,3 @@ -CAST(sign(`tinyint_col`) AS tinyint) \ No newline at end of file +SELECT + CAST(SIGN(`t0`.`tinyint_col`) AS TINYINT) AS `Sign(tinyint_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-abs/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-abs/out.sql index 1e825cc95736..6a5fbd114957 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-abs/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-abs/out.sql @@ -1 +1,3 @@ -abs(`double_col`) \ No newline at end of file +SELECT + ABS(`t0`.`double_col`) AS `Abs(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-approx_median/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-approx_median/out.sql index 7c6259d1f207..1b8952091788 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-approx_median/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-approx_median/out.sql @@ -1 +1,3 @@ -appx_median(`double_col`) \ No newline at end of file +SELECT + APPX_MEDIAN(`t0`.`double_col`) AS `ApproxMedian(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-approx_nunique/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-approx_nunique/out.sql index bbd86f78c7b2..8b6a9a30cd5e 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-approx_nunique/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-approx_nunique/out.sql @@ -1 +1,3 @@ -ndv(`double_col`) \ No newline at end of file +SELECT + NDV(`t0`.`double_col`) AS `ApproxCountDistinct(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-ceil/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-ceil/out.sql index 34b44958f5b2..e1d4ccc8bfc8 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-ceil/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-ceil/out.sql @@ -1 +1,3 @@ -cast(ceil(`double_col`) AS BIGINT) \ No newline at end of file +SELECT + CAST(CEIL(`t0`.`double_col`) AS BIGINT) AS `Ceil(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-exp/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-exp/out.sql index 3fcad3b1277a..901825a56673 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-exp/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-exp/out.sql @@ -1 +1,3 @@ -exp(`double_col`) \ No newline at end of file +SELECT + EXP(`t0`.`double_col`) AS `Exp(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-floor/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-floor/out.sql index 5c9398c38a57..ce08c79223ee 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-floor/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-floor/out.sql @@ -1 +1,3 @@ -cast(floor(`double_col`) AS BIGINT) \ No newline at end of file +SELECT + CAST(FLOOR(`t0`.`double_col`) AS BIGINT) AS `Floor(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-ln/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-ln/out.sql index d877e8555ef2..054c37c01e12 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-ln/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-ln/out.sql @@ -1 +1,3 @@ -ln(`double_col`) \ No newline at end of file +SELECT + LN(`t0`.`double_col`) AS `Ln(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log/out.sql index d877e8555ef2..3ab583ef1b1e 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log/out.sql @@ -1 +1,3 @@ -ln(`double_col`) \ No newline at end of file +SELECT + LN(`t0`.`double_col`) AS `Log(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log10/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log10/out.sql index 21221b5e1e77..3855ea62162b 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log10/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log10/out.sql @@ -1 +1,3 @@ -log10(`double_col`) \ No newline at end of file +SELECT + LOG10(`t0`.`double_col`) AS `Log10(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log2/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log2/out.sql index 4bc576e89d92..95fa9de02cd7 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-log2/out.sql @@ -1 +1,3 @@ -log2(`double_col`) \ No newline at end of file +SELECT + LOG2(`t0`.`double_col`) AS `Log2(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-nullif_zero/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-nullif_zero/out.sql index 692f32c5426b..9ec19055a585 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-nullif_zero/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-nullif_zero/out.sql @@ -1 +1,3 @@ -nullif(`double_col`, 0) \ No newline at end of file +SELECT + NULLIF(`t0`.`double_col`, 0) AS `NullIf(double_col, 0)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-sqrt/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-sqrt/out.sql index 014bda7b6ce8..a97c5b3d9c41 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-sqrt/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-sqrt/out.sql @@ -1 +1,3 @@ -sqrt(`double_col`) \ No newline at end of file +SELECT + SQRT(`t0`.`double_col`) AS `Sqrt(double_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-zero_ifnull/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-zero_ifnull/out.sql index b13fcb27c9c0..50332619efc8 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-zero_ifnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/double_col-zero_ifnull/out.sql @@ -1 +1,3 @@ -coalesce(`double_col`, 0) \ No newline at end of file +SELECT + COALESCE(`t0`.`double_col`, 0) AS `Coalesce()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-abs/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-abs/out.sql index 981659054851..47bbebc076ad 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-abs/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-abs/out.sql @@ -1 +1,3 @@ -abs(`int_col`) \ No newline at end of file +SELECT + ABS(`t0`.`int_col`) AS `Abs(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-approx_median/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-approx_median/out.sql index 78a2961d5bdc..f4aa84e0340d 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-approx_median/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-approx_median/out.sql @@ -1 +1,3 @@ -appx_median(`int_col`) \ No newline at end of file +SELECT + APPX_MEDIAN(`t0`.`int_col`) AS `ApproxMedian(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-approx_nunique/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-approx_nunique/out.sql index ae7d5abebd59..61bf77ff6b36 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-approx_nunique/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-approx_nunique/out.sql @@ -1 +1,3 @@ -ndv(`int_col`) \ No newline at end of file +SELECT + NDV(`t0`.`int_col`) AS `ApproxCountDistinct(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-ceil/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-ceil/out.sql index ad631e5f0301..9719624e9899 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-ceil/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-ceil/out.sql @@ -1 +1,3 @@ -cast(ceil(`int_col`) AS BIGINT) \ No newline at end of file +SELECT + CAST(CEIL(`t0`.`int_col`) AS BIGINT) AS `Ceil(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-exp/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-exp/out.sql index 3dfb7771b2b0..8fc65dc0bcf7 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-exp/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-exp/out.sql @@ -1 +1,3 @@ -exp(`int_col`) \ No newline at end of file +SELECT + EXP(`t0`.`int_col`) AS `Exp(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-floor/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-floor/out.sql index 2c1c3b987be9..c1176e87d5ca 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-floor/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-floor/out.sql @@ -1 +1,3 @@ -cast(floor(`int_col`) AS BIGINT) \ No newline at end of file +SELECT + CAST(FLOOR(`t0`.`int_col`) AS BIGINT) AS `Floor(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-ln/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-ln/out.sql index b8f0063ba5cc..0d28cf45d8d0 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-ln/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-ln/out.sql @@ -1 +1,3 @@ -ln(`int_col`) \ No newline at end of file +SELECT + LN(`t0`.`int_col`) AS `Ln(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log/out.sql index b8f0063ba5cc..445d79468855 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log/out.sql @@ -1 +1,3 @@ -ln(`int_col`) \ No newline at end of file +SELECT + LN(`t0`.`int_col`) AS `Log(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log10/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log10/out.sql index 523571bfbda5..f70ec7877869 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log10/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log10/out.sql @@ -1 +1,3 @@ -log10(`int_col`) \ No newline at end of file +SELECT + LOG10(`t0`.`int_col`) AS `Log10(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log2/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log2/out.sql index 802aca35efb1..85c0ab5865db 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-log2/out.sql @@ -1 +1,3 @@ -log2(`int_col`) \ No newline at end of file +SELECT + LOG2(`t0`.`int_col`) AS `Log2(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-nullif_zero/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-nullif_zero/out.sql index b5728635f362..189059ca5eec 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-nullif_zero/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-nullif_zero/out.sql @@ -1 +1,3 @@ -nullif(`int_col`, 0) \ No newline at end of file +SELECT + NULLIF(`t0`.`int_col`, 0) AS `NullIf(int_col, 0)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-sqrt/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-sqrt/out.sql index f16508299bb0..66207d0c3bfb 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-sqrt/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-sqrt/out.sql @@ -1 +1,3 @@ -sqrt(`int_col`) \ No newline at end of file +SELECT + SQRT(`t0`.`int_col`) AS `Sqrt(int_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-zero_ifnull/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-zero_ifnull/out.sql index fe70093ef8b6..98cd6d13641f 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-zero_ifnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_numeric_unary_builtins/int_col-zero_ifnull/out.sql @@ -1 +1,3 @@ -coalesce(`int_col`, 0) \ No newline at end of file +SELECT + COALESCE(`t0`.`int_col`, 0) AS `Coalesce()` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/avg/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/avg/out.sql index e8a6fda0c3c3..a77ec0333df1 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/avg/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/avg/out.sql @@ -1 +1,3 @@ -avg(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + AVG(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `Mean(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/count/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/count/out.sql index d16ebdb59378..f1c9820372f8 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/count/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/count/out.sql @@ -1 +1,3 @@ -count(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + COUNT(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `Count(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/max/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/max/out.sql index 1177c7248f3a..201339f34a34 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/max/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/max/out.sql @@ -1 +1,3 @@ -max(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + MAX(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `Max(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/min/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/min/out.sql index ef78e51fd300..3d7b4c2b6338 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/min/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/min/out.sql @@ -1 +1,3 @@ -min(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + MIN(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `Min(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/stddev_pop/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/stddev_pop/out.sql index db9bd6952bbe..904bd3cedb82 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/stddev_pop/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/stddev_pop/out.sql @@ -1 +1,3 @@ -stddev_pop(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + STDDEV_POP(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `StandardDev(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/stddev_samp/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/stddev_samp/out.sql index a99c13b7aa32..5441b3e8a148 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/stddev_samp/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/stddev_samp/out.sql @@ -1 +1,3 @@ -stddev_samp(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + STDDEV_SAMP(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `StandardDev(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/sum/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/sum/out.sql index fe67276335ab..2e5f42ac441b 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/sum/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/sum/out.sql @@ -1 +1,3 @@ -sum(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + SUM(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `Sum(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/var_pop/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/var_pop/out.sql index 1c4cc2673fca..6f8f349f6dea 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/var_pop/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/var_pop/out.sql @@ -1 +1,3 @@ -var_pop(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + VARIANCE_POP(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `Variance(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/var_samp/out.sql b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/var_samp/out.sql index eb78e2ad5fcc..687ec674190a 100644 --- a/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/var_samp/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_unary_builtins/test_reduction_where/var_samp/out.sql @@ -1 +1,3 @@ -var_samp(if(`bigint_col` < 70, `double_col`, NULL)) \ No newline at end of file +SELECT + VARIANCE(IF(`t0`.`bigint_col` < 70, `t0`.`double_col`, NULL)) AS `Variance(double_col, Less(bigint_col, 70))` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/all/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/all/out.sql index 5ce0bf8a7908..b3c01f218e9c 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/all/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/all/out.sql @@ -1 +1,3 @@ -min(`f` = 0) \ No newline at end of file +SELECT + MIN(`t0`.`f` = 0) AS `All(Equals(f, 0))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/any/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/any/out.sql index 99666cb4f85e..3603a3775525 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/any/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/any/out.sql @@ -1 +1,3 @@ -max(`f` = 0) \ No newline at end of file +SELECT + MAX(`t0`.`f` = 0) AS `Any(Equals(f, 0))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/not_all/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/not_all/out.sql index 6a728d740a87..a8dddedc924a 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/not_all/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/not_all/out.sql @@ -1 +1,5 @@ -NOT min(`f` = 0) \ No newline at end of file +SELECT + NOT ( + MIN(`t0`.`f` = 0) + ) AS `Not(All(Equals(f, 0)))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/not_any/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/not_any/out.sql index acf6c77d626b..f34b0119dd6a 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/not_any/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_any_all/not_any/out.sql @@ -1 +1,5 @@ -NOT max(`f` = 0) \ No newline at end of file +SELECT + NOT ( + MAX(`t0`.`f` = 0) + ) AS `Not(Any(Equals(f, 0)))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_between/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_between/out.sql index ad73d2ff66b9..074d1cf7504a 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_between/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_between/out.sql @@ -1 +1,3 @@ -`f` BETWEEN 0 AND 1 \ No newline at end of file +SELECT + `t0`.`f` BETWEEN 0 AND 1 AS `Between(f, 0, 1)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/add/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/add/out.sql index de42587aa531..c75bfca19b2e 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/add/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/add/out.sql @@ -1 +1,3 @@ -`a` + `b` \ No newline at end of file +SELECT + `t0`.`a` + `t0`.`b` AS `Add(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/and/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/and/out.sql index e7a19d915a4d..62490957d853 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/and/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/and/out.sql @@ -1 +1,5 @@ -`h` AND (`a` > 0) \ No newline at end of file +SELECT + `t0`.`h` AND ( + `t0`.`a` > 0 + ) AS `And(h, Greater(a, 0))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/div/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/div/out.sql index 738f82062003..ff486466e25d 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/div/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/div/out.sql @@ -1 +1,3 @@ -`a` / `b` \ No newline at end of file +SELECT + `t0`.`a` / `t0`.`b` AS `Divide(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/eq/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/eq/out.sql index f63b650bb512..7cd246375633 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/eq/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/eq/out.sql @@ -1 +1,3 @@ -`a` = `b` \ No newline at end of file +SELECT + `t0`.`a` = `t0`.`b` AS `Equals(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/ge/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/ge/out.sql index d082c8a0f7b1..2886b762bb3c 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/ge/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/ge/out.sql @@ -1 +1,3 @@ -`a` >= `b` \ No newline at end of file +SELECT + `t0`.`a` >= `t0`.`b` AS `GreaterEqual(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/gt/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/gt/out.sql index 5dbacd9aeacb..2de270e4e888 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/gt/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/gt/out.sql @@ -1 +1,3 @@ -`a` > `b` \ No newline at end of file +SELECT + `t0`.`a` > `t0`.`b` AS `Greater(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/le/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/le/out.sql index 3af3232deda5..539f4c941b99 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/le/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/le/out.sql @@ -1 +1,3 @@ -`a` <= `b` \ No newline at end of file +SELECT + `t0`.`a` <= `t0`.`b` AS `LessEqual(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/lt/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/lt/out.sql index cd59403991fd..d6a149a69a6d 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/lt/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/lt/out.sql @@ -1 +1,3 @@ -`a` < `b` \ No newline at end of file +SELECT + `t0`.`a` < `t0`.`b` AS `Less(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/mul/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/mul/out.sql index 377ffe5d81a9..0082b5406b2a 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/mul/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/mul/out.sql @@ -1 +1,3 @@ -`a` * `b` \ No newline at end of file +SELECT + `t0`.`a` * `t0`.`b` AS `Multiply(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/ne/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/ne/out.sql index 4859254caf16..43510c0f6f20 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/ne/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/ne/out.sql @@ -1 +1,3 @@ -`a` != `b` \ No newline at end of file +SELECT + `t0`.`a` <> `t0`.`b` AS `NotEquals(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/or/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/or/out.sql index 4ee3468ac338..f0934f0d1e85 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/or/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/or/out.sql @@ -1 +1,5 @@ -`h` OR (`a` > 0) \ No newline at end of file +SELECT + `t0`.`h` OR ( + `t0`.`a` > 0 + ) AS `Or(h, Greater(a, 0))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/pow/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/pow/out.sql index e03a233264b7..e6692989f4ed 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/pow/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/pow/out.sql @@ -1 +1,3 @@ -pow(`a`, `b`) \ No newline at end of file +SELECT + POWER(`t0`.`a`, `t0`.`b`) AS `Power(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/sub/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/sub/out.sql index eef355b72f37..61ca4f714055 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/sub/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/sub/out.sql @@ -1 +1,3 @@ -`a` - `b` \ No newline at end of file +SELECT + `t0`.`a` - `t0`.`b` AS `Subtract(a, b)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/xor/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/xor/out.sql index be15c73a65d9..3d40e7082127 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/xor/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_operators/xor/out.sql @@ -1 +1,7 @@ -(`h` OR (`a` > 0)) AND NOT (`h` AND (`a` > 0)) \ No newline at end of file +SELECT + ( + `t0`.`h` OR `t0`.`a` > 0 + ) AND NOT ( + `t0`.`h` AND `t0`.`a` > 0 + ) AS `Xor(h, Greater(a, 0))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/function_call/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/function_call/out.sql index 01d8d54ab171..0a0acb3d3916 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/function_call/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/function_call/out.sql @@ -1 +1,3 @@ -ln(`a`) + `c` \ No newline at end of file +SELECT + LN(`t0`.`a`) + `t0`.`c` AS `Add(Log(a), c)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/negation/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/negation/out.sql index d3ddcdaf596c..57843a872be2 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/negation/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/negation/out.sql @@ -1 +1,5 @@ -`b` + (-(`a` + `c`)) \ No newline at end of file +SELECT + `t0`.`b` + -( + `t0`.`a` + `t0`.`c` + ) AS `Add(b, Negate(Add(a, c)))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/parens_left/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/parens_left/out.sql index 6ae6e64d1587..5129499fb406 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/parens_left/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_binary_infix_parenthesization/parens_left/out.sql @@ -1 +1,5 @@ -(`a` + `b`) + `c` \ No newline at end of file +SELECT + ( + `t0`.`a` + `t0`.`b` + ) + `t0`.`c` AS `Add(Add(a, b), c)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int16/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int16/out.sql index e8acf4f526ac..704a46db2ab0 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int16/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int16/out.sql @@ -1 +1,3 @@ -CAST(`a` AS smallint) \ No newline at end of file +SELECT + CAST(`t0`.`a` AS SMALLINT) AS `Cast(a, int16)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int32/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int32/out.sql index 009f91e4dff7..3600cc467680 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int32/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int32/out.sql @@ -1 +1,3 @@ -CAST(`a` AS int) \ No newline at end of file +SELECT + CAST(`t0`.`a` AS INT) AS `Cast(a, int32)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int64/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int64/out.sql index f8908356c41a..754de6351cb3 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int64/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-int64/out.sql @@ -1 +1,3 @@ -CAST(`a` AS bigint) \ No newline at end of file +SELECT + CAST(`t0`.`a` AS BIGINT) AS `Cast(a, int64)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-string/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-string/out.sql index 532de8e0c518..5a81bd7a8fb5 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-string/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/a-string/out.sql @@ -1 +1,3 @@ -CAST(`a` AS string) \ No newline at end of file +SELECT + CAST(`t0`.`a` AS STRING) AS `Cast(a, string)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/d-int8/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/d-int8/out.sql index 05775da9ff65..3c9cacaea43d 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/d-int8/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/d-int8/out.sql @@ -1 +1,3 @@ -CAST(`d` AS tinyint) \ No newline at end of file +SELECT + CAST(`t0`.`d` AS TINYINT) AS `Cast(d, int8)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/g-double/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/g-double/out.sql index 16885af75ca0..bcc0c2a4bf5f 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/g-double/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/g-double/out.sql @@ -1 +1,3 @@ -CAST(`g` AS double) \ No newline at end of file +SELECT + CAST(`t0`.`g` AS DOUBLE) AS `Cast(g, float64)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/g-timestamp/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/g-timestamp/out.sql index 2eac75bda518..e3b96a62dfa1 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/g-timestamp/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_casts/g-timestamp/out.sql @@ -1 +1,3 @@ -CAST(`g` AS timestamp) \ No newline at end of file +SELECT + CAST(`t0`.`g` AS TIMESTAMP) AS `Cast(g, timestamp)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_column_ref_table_aliases/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_column_ref_table_aliases/out.sql deleted file mode 100644 index 720bb988d4ce..000000000000 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_column_ref_table_aliases/out.sql +++ /dev/null @@ -1 +0,0 @@ -t0.`value1` - t1.`value and2` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_correlated_predicate_subquery/out1.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_correlated_predicate_subquery/out1.sql new file mode 100644 index 000000000000..037d0612d48b --- /dev/null +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_correlated_predicate_subquery/out1.sql @@ -0,0 +1,15 @@ +SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`d`, + `t0`.`e`, + `t0`.`f`, + `t0`.`g`, + `t0`.`h`, + `t0`.`i`, + `t0`.`j`, + `t0`.`k` +FROM `alltypes` AS `t0` +WHERE + `t0`.`g` = `t1`.`g` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_correlated_predicate_subquery/out2.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_correlated_predicate_subquery/out2.sql new file mode 100644 index 000000000000..1f4705a4b3f4 --- /dev/null +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_correlated_predicate_subquery/out2.sql @@ -0,0 +1,15 @@ +SELECT + `t1`.`a`, + `t1`.`b`, + `t1`.`c`, + `t1`.`d`, + `t1`.`e`, + `t1`.`f`, + `t1`.`g`, + `t1`.`h`, + `t1`.`i`, + `t1`.`j`, + `t1`.`k` +FROM `alltypes` AS `t1` +WHERE + `t0`.`g` = `t1`.`g` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_decimal_casts/column/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_decimal_casts/column/out.sql index 8bd09e13e15a..d64f9372d0d0 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_decimal_casts/column/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_decimal_casts/column/out.sql @@ -1 +1,3 @@ -CAST(`f` AS decimal(12, 2)) \ No newline at end of file +SELECT + CAST(`t0`.`f` AS DECIMAL(12, 2)) AS `Cast(f, decimal(12, 2))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_decimal_casts/literal/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_decimal_casts/literal/out.sql index d1714725990c..2cf3990f2dff 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_decimal_casts/literal/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_decimal_casts/literal/out.sql @@ -1 +1,2 @@ -CAST('9.9999999' AS decimal(38, 5)) \ No newline at end of file +SELECT + CAST('9.9999999' AS DECIMAL(38, 5)) AS `Cast('9.9999999', decimal(38, 5))` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/compound_isnull/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/compound_isnull/out.sql index 7b86eb84d3ca..7448f99b030a 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/compound_isnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/compound_isnull/out.sql @@ -1 +1,5 @@ -`a` + `b` IS NULL \ No newline at end of file +SELECT + ( + `t0`.`a` + `t0`.`b` + ) IS NULL AS `IsNull(Add(a, b))` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/isnull/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/isnull/out.sql index 1cc4e78d323f..95a4553c56f8 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/isnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/isnull/out.sql @@ -1 +1,3 @@ -`g` IS NULL \ No newline at end of file +SELECT + `t0`.`g` IS NULL AS `IsNull(g)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/notnull/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/notnull/out.sql index cf5142f9ff8d..e73b0f80ce14 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/notnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/notnull/out.sql @@ -1 +1,3 @@ -`a` IS NOT NULL \ No newline at end of file +SELECT + NOT `t0`.`a` IS NULL AS `NotNull(a)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/embedded_double_quote/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/embedded_double_quote/out.sql index 7ec9ac6b2e01..3925c3655429 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/embedded_double_quote/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/embedded_double_quote/out.sql @@ -1 +1,2 @@ -'An "escape"' \ No newline at end of file +SELECT + 'An "escape"' AS `'An "escape"'` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/embedded_single_quote/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/embedded_single_quote/out.sql index de1ddacc9239..47f1d01c628c 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/embedded_single_quote/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/embedded_single_quote/out.sql @@ -1 +1,2 @@ -'I can\'t' \ No newline at end of file +SELECT + 'I can\'t' AS `"I can't"` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/false/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/false/out.sql index 5a03bd3f56c5..d651d2b7b227 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/false/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/false/out.sql @@ -1 +1,2 @@ -FALSE \ No newline at end of file +SELECT + FALSE AS `False` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/float/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/float/out.sql index 400122e60f59..961eaa4da513 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/float/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/float/out.sql @@ -1 +1,2 @@ -1.5 \ No newline at end of file +SELECT + 1.5 AS `1.5` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/int/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/int/out.sql index 7813681f5b41..f9e238629c44 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/int/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/int/out.sql @@ -1 +1,2 @@ -5 \ No newline at end of file +SELECT + 5 AS `5` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/simple/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/simple/out.sql index 271237e4c2bc..56d5f0cd08ab 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/simple/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/simple/out.sql @@ -1 +1,2 @@ -'simple' \ No newline at end of file +SELECT + 'simple' AS `'simple'` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/true/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/true/out.sql index 51cb8b0f7c29..0872d8303204 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/true/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_literals/true/out.sql @@ -1 +1,2 @@ -TRUE \ No newline at end of file +SELECT + TRUE AS `True` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_misc_conditionals/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_misc_conditionals/out.sql index 85bacbbef964..d5c70b443e78 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_misc_conditionals/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_misc_conditionals/out.sql @@ -1 +1,3 @@ -nullif(`a`, 0) \ No newline at end of file +SELECT + NULLIF(`t0`.`a`, 0) AS `NullIf(a, 0)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/cast/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/cast/out.sql index ce4aafe52ec2..0a301bc6cdcd 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/cast/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/cast/out.sql @@ -1 +1,3 @@ -CAST(`g` AS double) AS `g_dub` \ No newline at end of file +SELECT + CAST(`t0`.`g` AS DOUBLE) AS `g_dub` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/compound_expr/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/compound_expr/out.sql index 2778f0de3244..8fb6371ac2b7 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/compound_expr/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/compound_expr/out.sql @@ -1 +1,5 @@ -(`a` - `b`) * `a` AS `expr` \ No newline at end of file +SELECT + ( + `t0`.`a` - `t0`.`b` + ) * `t0`.`a` AS `expr` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/spaces/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/spaces/out.sql index 2b69e2760caf..dcdbc154e1a0 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/spaces/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_named_expressions/spaces/out.sql @@ -1 +1,3 @@ -`g` AS `has a space` \ No newline at end of file +SELECT + `t0`.`g` AS `has a space` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/a/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/a/out.sql index ddbd6aba9757..6d8d3bffa41b 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/a/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/a/out.sql @@ -1 +1,5 @@ --`a` \ No newline at end of file +SELECT + -( + `t0`.`a` + ) AS `Negate(a)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/f/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/f/out.sql index fc80397e5fb8..9816ef075d8b 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/f/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/f/out.sql @@ -1 +1,5 @@ --`f` \ No newline at end of file +SELECT + -( + `t0`.`f` + ) AS `Negate(f)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/h/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/h/out.sql index 09b4c8e4209b..522c778fade2 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/h/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_negate/h/out.sql @@ -1 +1,5 @@ -NOT `h` \ No newline at end of file +SELECT + NOT ( + `t0`.`h` + ) AS `Not(h)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_sql_extract/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_sql_extract/out.sql index 6615fd983818..618635097742 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_sql_extract/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_sql_extract/out.sql @@ -1,3 +1,5 @@ -SELECT extract(t0.`i`, 'year') AS `year`, extract(t0.`i`, 'month') AS `month`, - extract(t0.`i`, 'day') AS `day` -FROM `alltypes` t0 \ No newline at end of file +SELECT + EXTRACT(year FROM `t0`.`i`) AS `year`, + EXTRACT(month FROM `t0`.`i`) AS `month`, + EXTRACT(day FROM `t0`.`i`) AS `day` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_day_of_week/full_name/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_day_of_week/full_name/out.sql index 9c4297e0fb82..a984be382ea8 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_day_of_week/full_name/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_day_of_week/full_name/out.sql @@ -1 +1,2 @@ -dayname('2015-09-01T01:00:23') \ No newline at end of file +SELECT + DAYNAME('2015-09-01T01:00:23') AS `DayOfWeekName(datetime.datetime(2015, 9, 1, 1, 0, 23))` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_day_of_week/index/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_day_of_week/index/out.sql index fc166ccac8c2..9a26667a72fa 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_day_of_week/index/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_day_of_week/index/out.sql @@ -1 +1,2 @@ -pmod(dayofweek('2015-09-01T01:00:23') - 2, 7) \ No newline at end of file +SELECT + PMOD(DAY_OF_WEEK('2015-09-01T01:00:23') - 2, 7) AS `DayOfWeekIndex(datetime.datetime(2015, 9, 1, 1, 0, 23))` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/days/out1.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/days/out1.sql index 9c6a6f3f3f4e..7237d7b9004d 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/days/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/days/out1.sql @@ -1 +1,3 @@ -date_add(cast(`i` as timestamp), INTERVAL 5 DAY) \ No newline at end of file +SELECT + CAST(CAST(`t0`.`i` AS TIMESTAMP) + INTERVAL '5' DAY AS TIMESTAMP) AS `TimestampAdd(i, 5D)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/days/out2.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/days/out2.sql index c180d3bf65cd..eb169d991d79 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/days/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/days/out2.sql @@ -1 +1,3 @@ -date_sub(cast(`i` as timestamp), INTERVAL 5 DAY) \ No newline at end of file +SELECT + `t0`.`i` - INTERVAL '5' DAY AS `TimestampSub(i, 5D)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/hours/out1.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/hours/out1.sql index 0fcb7a0d5199..328d2ef5a820 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/hours/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/hours/out1.sql @@ -1 +1,3 @@ -date_add(cast(`i` as timestamp), INTERVAL 5 HOUR) \ No newline at end of file +SELECT + CAST(CAST(`t0`.`i` AS TIMESTAMP) + INTERVAL '5' HOUR AS TIMESTAMP) AS `TimestampAdd(i, 5h)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/hours/out2.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/hours/out2.sql index 7402f369a533..86951b1a52ab 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/hours/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/hours/out2.sql @@ -1 +1,3 @@ -date_sub(cast(`i` as timestamp), INTERVAL 5 HOUR) \ No newline at end of file +SELECT + `t0`.`i` - INTERVAL '5' HOUR AS `TimestampSub(i, 5h)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/minutes/out1.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/minutes/out1.sql index b7096d5ba4be..a97e1fda3fb7 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/minutes/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/minutes/out1.sql @@ -1 +1,3 @@ -date_add(cast(`i` as timestamp), INTERVAL 5 MINUTE) \ No newline at end of file +SELECT + CAST(CAST(`t0`.`i` AS TIMESTAMP) + INTERVAL '5' MINUTE AS TIMESTAMP) AS `TimestampAdd(i, 5m)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/minutes/out2.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/minutes/out2.sql index e7a3872edc48..9646ba677e2f 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/minutes/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/minutes/out2.sql @@ -1 +1,3 @@ -date_sub(cast(`i` as timestamp), INTERVAL 5 MINUTE) \ No newline at end of file +SELECT + `t0`.`i` - INTERVAL '5' MINUTE AS `TimestampSub(i, 5m)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/months/out1.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/months/out1.sql index 32421590426e..2f2d23da1686 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/months/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/months/out1.sql @@ -1 +1,3 @@ -date_add(cast(`i` as timestamp), INTERVAL 5 MONTH) \ No newline at end of file +SELECT + CAST(CAST(`t0`.`i` AS TIMESTAMP) + INTERVAL '5' MONTH AS TIMESTAMP) AS `TimestampAdd(i, 5M)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/months/out2.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/months/out2.sql index a71b731d050e..fc1911b370cb 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/months/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/months/out2.sql @@ -1 +1,3 @@ -date_sub(cast(`i` as timestamp), INTERVAL 5 MONTH) \ No newline at end of file +SELECT + `t0`.`i` - INTERVAL '5' MONTH AS `TimestampSub(i, 5M)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/seconds/out1.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/seconds/out1.sql index f15d5d5ece75..9fb8b0686200 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/seconds/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/seconds/out1.sql @@ -1 +1,3 @@ -date_add(cast(`i` as timestamp), INTERVAL 5 SECOND) \ No newline at end of file +SELECT + CAST(CAST(`t0`.`i` AS TIMESTAMP) + INTERVAL '5' SECOND AS TIMESTAMP) AS `TimestampAdd(i, 5s)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/seconds/out2.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/seconds/out2.sql index b3cc2bded61a..8f89c57c0e98 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/seconds/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/seconds/out2.sql @@ -1 +1,3 @@ -date_sub(cast(`i` as timestamp), INTERVAL 5 SECOND) \ No newline at end of file +SELECT + `t0`.`i` - INTERVAL '5' SECOND AS `TimestampSub(i, 5s)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/weeks/out1.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/weeks/out1.sql index 676ba865764e..2ee339b0a84e 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/weeks/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/weeks/out1.sql @@ -1 +1,3 @@ -date_add(cast(`i` as timestamp), INTERVAL 5 WEEK) \ No newline at end of file +SELECT + CAST(CAST(`t0`.`i` AS TIMESTAMP) + INTERVAL '5' WEEK AS TIMESTAMP) AS `TimestampAdd(i, 5W)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/weeks/out2.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/weeks/out2.sql index c494e3d406eb..92b9cfeda5ba 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/weeks/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/weeks/out2.sql @@ -1 +1,3 @@ -date_sub(cast(`i` as timestamp), INTERVAL 5 WEEK) \ No newline at end of file +SELECT + `t0`.`i` - INTERVAL '5' WEEK AS `TimestampSub(i, 5W)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/years/out1.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/years/out1.sql index b79ad3f637f1..b0bbfd8bd6ca 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/years/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/years/out1.sql @@ -1 +1,3 @@ -date_add(cast(`i` as timestamp), INTERVAL 5 YEAR) \ No newline at end of file +SELECT + CAST(CAST(`t0`.`i` AS TIMESTAMP) + INTERVAL '5' YEAR AS TIMESTAMP) AS `TimestampAdd(i, 5Y)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/years/out2.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/years/out2.sql index 8096bbacd63f..0778cd8b068e 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/years/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_deltas/years/out2.sql @@ -1 +1,3 @@ -date_sub(cast(`i` as timestamp), INTERVAL 5 YEAR) \ No newline at end of file +SELECT + `t0`.`i` - INTERVAL '5' YEAR AS `TimestampSub(i, 5Y)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/day/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/day/out.sql index bb6eb1e9743e..7d48d45ef57d 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/day/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/day/out.sql @@ -1 +1,3 @@ -extract(`i`, 'day') \ No newline at end of file +SELECT + EXTRACT(day FROM `t0`.`i`) AS `ExtractDay(i)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/hour/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/hour/out.sql index aee2ae57deb3..7b40771226a7 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/hour/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/hour/out.sql @@ -1 +1,3 @@ -extract(`i`, 'hour') \ No newline at end of file +SELECT + EXTRACT(hour FROM `t0`.`i`) AS `ExtractHour(i)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/microsecond/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/microsecond/out.sql index cfbd2bf17deb..581fbe79370d 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/microsecond/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/microsecond/out.sql @@ -1 +1,3 @@ -extract(`i`, 'microsecond') % 1000000 \ No newline at end of file +SELECT + EXTRACT(microsecond FROM `t0`.`i`) % 1000000 AS `ExtractMicrosecond(i)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/millisecond/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/millisecond/out.sql index ccc6e55c3369..8ec3fd789ebd 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/millisecond/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/millisecond/out.sql @@ -1 +1,3 @@ -extract(`i`, 'millisecond') % 1000 \ No newline at end of file +SELECT + EXTRACT(millisecond FROM `t0`.`i`) % 1000 AS `ExtractMillisecond(i)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/minute/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/minute/out.sql index f02e4f778cc4..ad23d825b136 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/minute/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/minute/out.sql @@ -1 +1,3 @@ -extract(`i`, 'minute') \ No newline at end of file +SELECT + EXTRACT(minute FROM `t0`.`i`) AS `ExtractMinute(i)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/month/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/month/out.sql index 057f3677f3ce..0464f32264b0 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/month/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/month/out.sql @@ -1 +1,3 @@ -extract(`i`, 'month') \ No newline at end of file +SELECT + EXTRACT(month FROM `t0`.`i`) AS `ExtractMonth(i)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/second/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/second/out.sql index c2cfcf566b93..415228f0ac53 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/second/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/second/out.sql @@ -1 +1,3 @@ -extract(`i`, 'second') \ No newline at end of file +SELECT + EXTRACT(second FROM `t0`.`i`) AS `ExtractSecond(i)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/year/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/year/out.sql index c9e4e97df7b7..559c7e58f189 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/year/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_extract_field/year/out.sql @@ -1 +1,3 @@ -extract(`i`, 'year') \ No newline at end of file +SELECT + EXTRACT(year FROM `t0`.`i`) AS `ExtractYear(i)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/default/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/default/out.sql index e400a829e547..fe7e222fd8c7 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/default/out.sql @@ -1 +1,3 @@ -CAST(from_unixtime(`c`, "yyyy-MM-dd HH:mm:ss") AS timestamp) \ No newline at end of file +SELECT + CAST(FROM_UNIXTIME(CAST(`t0`.`c` AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/ms/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/ms/out.sql index 41cc7d327cb1..c03300822962 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/ms/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/ms/out.sql @@ -1 +1,3 @@ -CAST(from_unixtime(CAST(cast(floor(`c` / 1000) AS BIGINT) AS int), "yyyy-MM-dd HH:mm:ss") AS timestamp) \ No newline at end of file +SELECT + CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000 AS INT) AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/us/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/us/out.sql index bcff6c32c5a6..50128d971354 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/us/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/us/out.sql @@ -1 +1,3 @@ -CAST(from_unixtime(CAST(cast(floor(`c` / 1000000) AS BIGINT) AS int), "yyyy-MM-dd HH:mm:ss") AS timestamp) \ No newline at end of file +SELECT + CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000000 AS INT) AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/pd_timestamp/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/pd_timestamp/out.sql index 40caefedf273..f8d8c3ecd268 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/pd_timestamp/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/pd_timestamp/out.sql @@ -1 +1,2 @@ -'2015-01-01T12:34:56' \ No newline at end of file +SELECT + '2015-01-01T12:34:56' AS `datetime.datetime(2015, 1, 1, 12, 34, 56)` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/pydatetime/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/pydatetime/out.sql index 40caefedf273..f8d8c3ecd268 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/pydatetime/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/pydatetime/out.sql @@ -1 +1,2 @@ -'2015-01-01T12:34:56' \ No newline at end of file +SELECT + '2015-01-01T12:34:56' AS `datetime.datetime(2015, 1, 1, 12, 34, 56)` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/timestamp_function/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/timestamp_function/out.sql index 40caefedf273..f8d8c3ecd268 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/timestamp_function/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_literals/timestamp_function/out.sql @@ -1 +1,2 @@ -'2015-01-01T12:34:56' \ No newline at end of file +SELECT + '2015-01-01T12:34:56' AS `datetime.datetime(2015, 1, 1, 12, 34, 56)` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_now/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_now/out.sql index 53945a84effc..4e6e8f3efaa7 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_now/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_now/out.sql @@ -1 +1,2 @@ -now() \ No newline at end of file +SELECT + CURRENT_TIMESTAMP() AS `TimestampNow()` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql index 5ca27037145c..7aca02887dac 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql @@ -1,7 +1,18 @@ -SELECT t0.*, - lag(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC) AS `lag`, - lead(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC) - t0.`f` AS `fwd_diff`, - first_value(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC) AS `first`, - last_value(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC) AS `last`, - lag(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`d` ASC) AS `lag2` -FROM `alltypes` t0 \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`d`, + `t0`.`e`, + `t0`.`f`, + `t0`.`g`, + `t0`.`h`, + `t0`.`i`, + `t0`.`j`, + `t0`.`k`, + LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) AS `lag`, + LEAD(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) - `t0`.`f` AS `fwd_diff`, + FIRST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g`) AS `first`, + LAST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g`) AS `last`, + LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`d` ASC NULLS LAST) AS `lag2` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql index eac69a8de4eb..f1f482ce6b36 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql @@ -1,2 +1,14 @@ -SELECT t0.*, t0.`f` / sum(t0.`f`) OVER () AS `normed_f` -FROM `alltypes` t0 \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`d`, + `t0`.`e`, + `t0`.`f`, + `t0`.`g`, + `t0`.`h`, + `t0`.`i`, + `t0`.`j`, + `t0`.`k`, + `t0`.`f` / SUM(`t0`.`f`) OVER (ORDER BY NULL ASC NULLS LAST) AS `normed_f` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql index e4d6a93a43cb..94b0b10ea0c5 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql @@ -1,2 +1,3 @@ -SELECT max(t0.`f`) OVER (ORDER BY t0.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql index e4d6a93a43cb..94b0b10ea0c5 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql @@ -1,2 +1,3 @@ -SELECT max(t0.`f`) OVER (ORDER BY t0.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql index 8df3c5d8d98f..183b589665ac 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql @@ -1,2 +1,3 @@ -SELECT avg(t0.`f`) OVER (ORDER BY t0.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql index 8df3c5d8d98f..183b589665ac 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql @@ -1,2 +1,3 @@ -SELECT avg(t0.`f`) OVER (ORDER BY t0.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql index debaa216f1c1..8fe0142ae10e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql @@ -1,2 +1,3 @@ -SELECT min(t0.`f`) OVER (ORDER BY t0.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql index debaa216f1c1..8fe0142ae10e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql @@ -1,2 +1,3 @@ -SELECT min(t0.`f`) OVER (ORDER BY t0.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql index 4ac5a6cc4bd8..ab0a805400b4 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`f`) OVER (ORDER BY t0.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql index 4ac5a6cc4bd8..ab0a805400b4 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`f`) OVER (ORDER BY t0.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql index a0e24423bb33..ba464a7151de 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql @@ -1,3 +1,4 @@ -SELECT t0.`g`, - sum(t0.`f`) OVER (PARTITION BY t0.`g`) - sum(t0.`f`) OVER () AS `result` -FROM `alltypes` t0 \ No newline at end of file +SELECT + `t0`.`g`, + SUM(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) - SUM(`t0`.`f`) OVER (ORDER BY NULL ASC NULLS LAST) AS `result` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_nested_analytic_function/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_nested_analytic_function/out.sql index 535ce547cfcf..706a705bf519 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_nested_analytic_function/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_nested_analytic_function/out.sql @@ -1,2 +1,3 @@ -SELECT lag(t0.`f` - lag(t0.`f`) OVER (ORDER BY t0.`f` ASC)) OVER (ORDER BY t0.`f` ASC) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + LAG(`t0`.`f` - LAG(`t0`.`f`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST)) OVER (ORDER BY `t0`.`f` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out1.sql index 4c951c7cca15..5692444e6ef9 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out1.sql @@ -1,2 +1,4 @@ -SELECT t0.`f`, (row_number() OVER (ORDER BY t0.`f` DESC) - 1) AS `revrank` -FROM `alltypes` t0 \ No newline at end of file +SELECT + `t0`.`f`, + ROW_NUMBER() OVER (ORDER BY `t0`.`f` DESC) - 1 AS `revrank` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql index 9cc34ab06aa9..fd79e6972531 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql @@ -1,3 +1,4 @@ -SELECT lag(t0.`d`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` DESC) AS `foo`, - max(t0.`a`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` DESC) AS `Max(a)` -FROM `alltypes` t0 \ No newline at end of file +SELECT + LAG(`t0`.`d`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC) AS `foo`, + MAX(`t0`.`a`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC) AS `Max(a)` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_propagate_nested_windows/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_propagate_nested_windows/out.sql index f9bfe6018076..96f8ae2f0e6c 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_propagate_nested_windows/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_propagate_nested_windows/out.sql @@ -1,2 +1,5 @@ -SELECT lag(t0.`f` - lag(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC)) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + LAG( + `t0`.`f` - LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC NULLS LAST) + ) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_rank_functions/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_rank_functions/out.sql index 6e1c60b61afe..92c9d14f20b5 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_rank_functions/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_rank_functions/out.sql @@ -1,3 +1,5 @@ -SELECT t0.`g`, (rank() OVER (ORDER BY t0.`f` ASC) - 1) AS `minr`, - (dense_rank() OVER (ORDER BY t0.`f` ASC) - 1) AS `denser` -FROM `alltypes` t0 \ No newline at end of file +SELECT + `t0`.`g`, + RANK() OVER (ORDER BY `t0`.`f` ASC NULLS LAST) - 1 AS `minr`, + DENSE_RANK() OVER (ORDER BY `t0`.`f` ASC NULLS LAST) - 1 AS `denser` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out1.sql index 7b0d0695d2eb..d4075f13c006 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out1.sql @@ -1,2 +1,14 @@ -SELECT t0.*, (row_number() OVER (PARTITION BY t0.`g`) - 1) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`d`, + `t0`.`e`, + `t0`.`f`, + `t0`.`g`, + `t0`.`h`, + `t0`.`i`, + `t0`.`j`, + `t0`.`k`, + ROW_NUMBER() OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out2.sql index fab4205d6c77..c423c5e14049 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out2.sql @@ -1,3 +1,14 @@ -SELECT t0.*, - (row_number() OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC) - 1) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`d`, + `t0`.`e`, + `t0`.`f`, + `t0`.`g`, + `t0`.`h`, + `t0`.`i`, + `t0`.`j`, + `t0`.`k`, + ROW_NUMBER() OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC NULLS LAST) - 1 AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_properly_composes_with_arithmetic/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_properly_composes_with_arithmetic/out.sql index d3e80ddcd836..47864c94d3db 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_properly_composes_with_arithmetic/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_properly_composes_with_arithmetic/out.sql @@ -1,2 +1,16 @@ -SELECT t0.*, (row_number() OVER (ORDER BY t0.`f` ASC) - 1) / 2 AS `new` -FROM `alltypes` t0 \ No newline at end of file +SELECT + `t0`.`a`, + `t0`.`b`, + `t0`.`c`, + `t0`.`d`, + `t0`.`e`, + `t0`.`f`, + `t0`.`g`, + `t0`.`h`, + `t0`.`i`, + `t0`.`j`, + `t0`.`k`, + ( + ROW_NUMBER() OVER (ORDER BY `t0`.`f` ASC NULLS LAST) - 1 + ) / 2 AS `new` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql index 2df1a73b376b..fb0afc364573 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql index 2df1a73b376b..fb0afc364573 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_10_5/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_10_5/out.sql index fd7db32b7434..0c5d75598474 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_10_5/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_10_5/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 10 PRECEDING AND 5 PRECEDING) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 10 preceding AND 5 preceding) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2/out.sql index f7699801c4f8..b2c51c37cdd4 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 2 following) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2_prec_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2_prec_0/out.sql index 19cb117aee8d..133948c9fac3 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2_prec_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2_prec_0/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN CURRENT ROW AND 2 following) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_5_10/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_5_10/out.sql index 8512e0762038..66299747e54f 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_5_10/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_5_10/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 5 FOLLOWING AND 10 FOLLOWING) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 5 following AND 10 following) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_0/out.sql index 29a0f6f63dc8..77b80b90de80 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_0/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5/out.sql index 69569d915f04..72fbc24d4f8b 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 5 PRECEDING AND UNBOUNDED FOLLOWING) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 5 preceding AND UNBOUNDED FOLLOWING) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_0/out.sql index e8649012e560..a7b4b896db68 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_0/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 5 preceding AND CURRENT ROW) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_2/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_2/out.sql index 58bb52bbe936..15f6313e751c 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_2/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 5 preceding AND 2 following) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/trailing_10/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/trailing_10/out.sql index 911a39e3a00d..e7baeb238d4d 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/trailing_10/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/trailing_10/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 10 PRECEDING AND CURRENT ROW) AS `foo` -FROM `alltypes` t0 \ No newline at end of file +SELECT + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 10 preceding AND CURRENT ROW) AS `foo` +FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/test_bucket_histogram.py b/ibis/backends/impala/tests/test_bucket_histogram.py index 75ca56224189..4e630f5912bb 100644 --- a/ibis/backends/impala/tests/test_bucket_histogram.py +++ b/ibis/backends/impala/tests/test_bucket_histogram.py @@ -87,4 +87,4 @@ def test_bucket_assign_labels(table, snapshot): ).name("tier2") expr = size[labelled, size[1]] - snapshot.assert_match(ImpalaCompiler.to_sql(expr), "out.sql") + snapshot.assert_match(translate(expr), "out.sql") diff --git a/ibis/backends/impala/tests/test_case_exprs.py b/ibis/backends/impala/tests/test_case_exprs.py index d5263f1e5f51..e559517e8864 100644 --- a/ibis/backends/impala/tests/test_case_exprs.py +++ b/ibis/backends/impala/tests/test_case_exprs.py @@ -95,11 +95,11 @@ def test_decimal_fillna_cast_arg(tpch_lineitem, expr_fn, snapshot): def test_identical_to(mockcon, snapshot): t = mockcon.table("functional_alltypes") expr = t.tinyint_col.identical_to(t.double_col).name("tmp") - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") def test_identical_to_special_case(snapshot): expr = ibis.NA.cast("int64").identical_to(ibis.NA.cast("int64")).name("tmp") - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") diff --git a/ibis/backends/impala/tests/test_client.py b/ibis/backends/impala/tests/test_client.py index 4f92f3d06937..e1050a1c9698 100644 --- a/ibis/backends/impala/tests/test_client.py +++ b/ibis/backends/impala/tests/test_client.py @@ -234,4 +234,4 @@ def test_list_databases(con): def test_list_tables(con, test_data_db): assert con.list_tables(database=test_data_db) - assert con.list_tables(like="*nat*", database=test_data_db) + assert con.list_tables(like=".*nat.*", database=test_data_db) diff --git a/ibis/backends/impala/tests/test_ddl.py b/ibis/backends/impala/tests/test_ddl.py index 774a72cdfa3c..822f801ec973 100644 --- a/ibis/backends/impala/tests/test_ddl.py +++ b/ibis/backends/impala/tests/test_ddl.py @@ -14,7 +14,8 @@ from ibis.tests.util import assert_equal pytest.importorskip("impala") -from ibis.backends.impala.compat import HS2Error # noqa: E402 + +from impala.error import HiveServer2Error # noqa: E402 @pytest.fixture @@ -71,7 +72,7 @@ def test_create_table_with_location_execute( def test_drop_table_not_exist(con): non_existent_table = f"ibis_table_{util.guid()}" - with pytest.raises(HS2Error): + with pytest.raises(HiveServer2Error): con.drop_table(non_existent_table) con.drop_table(non_existent_table, force=True) @@ -83,7 +84,7 @@ def test_truncate_table(con, alltypes, temp_table): try: con.truncate_table(temp_table) - except HS2Error as e: + except HiveServer2Error as e: if "AnalysisException" in e.args[0]: pytest.skip("TRUNCATE not available in this version of Impala") diff --git a/ibis/backends/impala/tests/test_ddl_compilation.py b/ibis/backends/impala/tests/test_ddl_compilation.py index 953b3cd43398..4b96400e6e8f 100644 --- a/ibis/backends/impala/tests/test_ddl_compilation.py +++ b/ibis/backends/impala/tests/test_ddl_compilation.py @@ -10,7 +10,6 @@ InsertSelect, ) from ibis.backends.impala import ddl -from ibis.backends.impala.compiler import ImpalaCompiler @pytest.fixture @@ -32,8 +31,8 @@ def test_select_basics(t, snapshot): name = "testing123456" expr = t.limit(10) - select, _ = _get_select(expr) + select = str(ibis.to_sql(expr, "impala")) stmt = InsertSelect(name, select, database="foo") result = stmt.compile() snapshot.assert_match(result, "out1.sql") @@ -174,7 +173,7 @@ def expr(t): def test_create_external_table_as(mockcon, snapshot): path = "/path/to/table" - select, _ = _get_select(mockcon.table("test1")) + select = ibis.to_sql(mockcon.table("test1"), "impala") statement = CTAS( "another_table", select, @@ -328,20 +327,7 @@ def test_avro_other_formats(t, snapshot): def _create_table(table_name, expr, database=None, can_exist=False, format="parquet"): - ast = ImpalaCompiler.to_ast(expr) - select = ast.queries[0] - statement = CTAS( - table_name, - select, - database=database, - format=format, - can_exist=can_exist, + select = str(ibis.to_sql(expr, dialect="impala")) + return CTAS( + table_name, select, database=database, format=format, can_exist=can_exist ) - return statement - - -def _get_select(expr, context=None): - ast = ImpalaCompiler.to_ast(expr, context) - select = ast.queries[0] - context = ast.context - return select, context diff --git a/ibis/backends/impala/tests/test_exprs.py b/ibis/backends/impala/tests/test_exprs.py index 01dc5f08ccd9..34aa186bbda7 100644 --- a/ibis/backends/impala/tests/test_exprs.py +++ b/ibis/backends/impala/tests/test_exprs.py @@ -5,12 +5,12 @@ import pandas as pd import pandas.testing as tm import pytest +from impala.error import HiveServer2Error from pytest import param import ibis import ibis.expr.types as ir from ibis import literal as L -from ibis.backends.impala.compiler import ImpalaCompiler from ibis.expr import api @@ -141,7 +141,7 @@ def test_builtins(con, alltypes): def _check_impala_output_types_match(con, table): - query = ImpalaCompiler.to_sql(table) + query = ibis.to_sql(table, dialect="impala") t = con.sql(query) left_schema, right_schema = t.schema(), table.schema() @@ -169,7 +169,7 @@ def _check_impala_output_types_match(con, table): ) def test_int_builtins(con, expr, expected): result = con.execute(expr) - assert result == expected, ImpalaCompiler.to_sql(expr) + assert result == expected, ibis.to_sql(expr, dialect="impala") @pytest.mark.parametrize( @@ -226,7 +226,7 @@ def test_column_types(alltypes_df, col, expected): ) def test_timestamp_builtins(con, expr, expected): result = con.execute(expr) - assert result == expected, ImpalaCompiler.to_sql(expr) + assert result == expected, ibis.to_sql(expr, dialect="impala") @pytest.mark.parametrize( @@ -245,7 +245,7 @@ def test_timestamp_builtins(con, expr, expected): ) def test_decimal_builtins(con, expr, expected): result = con.execute(expr) - assert result == expected, ImpalaCompiler.to_sql(expr) + assert result == expected, ibis.to_sql(expr, dialect="impala") def approx_equal(a, b, eps): @@ -586,7 +586,11 @@ def test_tpch_correlated_subquery_failure(con): amount_filter = tpch.amount > conditional_avg expr = tpch[amount_filter].limit(0) - con.explain(expr) + + # impala can't plan this because its correlated subquery implementation is + # broken: it cannot detect the outer reference inside the inner query + with pytest.raises(HiveServer2Error, match="Could not resolve .+ reference"): + con.explain(expr) def test_non_equijoin(con): diff --git a/ibis/backends/impala/tests/test_in_not_in.py b/ibis/backends/impala/tests/test_in_not_in.py index fb76bd919096..ceeb3aebe002 100644 --- a/ibis/backends/impala/tests/test_in_not_in.py +++ b/ibis/backends/impala/tests/test_in_not_in.py @@ -3,7 +3,6 @@ import pytest from ibis import literal as L -from ibis.backends.impala.compiler import ImpalaCompiler from ibis.backends.impala.tests.conftest import translate @@ -35,5 +34,5 @@ def test_isin_notin_in_select(table, method_name, snapshot): values = ["foo", "bar"] method = getattr(table.g, method_name) filtered = table[method(values)] - result = ImpalaCompiler.to_sql(filtered) + result = translate(filtered) snapshot.assert_match(result, "out.sql") diff --git a/ibis/backends/impala/tests/test_parquet_ddl.py b/ibis/backends/impala/tests/test_parquet_ddl.py index fda0c24e8403..86ed5250c02e 100644 --- a/ibis/backends/impala/tests/test_parquet_ddl.py +++ b/ibis/backends/impala/tests/test_parquet_ddl.py @@ -9,7 +9,7 @@ pytest.importorskip("impala") -from ibis.backends.impala.compat import HS2Error # noqa: E402 +from impala.error import HiveServer2Error # noqa: E402 def test_parquet_file_with_name(con, test_data_dir, temp_table): @@ -91,5 +91,5 @@ def test_create_table_persist_fails_if_called_twice(con, temp_table, test_data_d hdfs_path = pjoin(test_data_dir, "impala/parquet/region") con.parquet_file(hdfs_path, like_table="region", name=temp_table) - with pytest.raises(HS2Error): + with pytest.raises(HiveServer2Error): con.parquet_file(hdfs_path, like_table="region", name=temp_table) diff --git a/ibis/backends/impala/tests/test_partition.py b/ibis/backends/impala/tests/test_partition.py index ae096654c677..4549ae4652a2 100644 --- a/ibis/backends/impala/tests/test_partition.py +++ b/ibis/backends/impala/tests/test_partition.py @@ -13,7 +13,7 @@ pytest.importorskip("impala") -from ibis.backends.impala.compat import ImpylaError # noqa: E402 +from impala.error import Error as ImpylaError # noqa: E402 @pytest.fixture diff --git a/ibis/backends/impala/tests/test_sql.py b/ibis/backends/impala/tests/test_sql.py index 53b811273a90..93f0a5f9600e 100644 --- a/ibis/backends/impala/tests/test_sql.py +++ b/ibis/backends/impala/tests/test_sql.py @@ -30,13 +30,13 @@ def test_join_no_predicates_for_impala(con, join_type, snapshot): t2 = con.table("star2") joined = getattr(t1, join_type)(t2)[[t1]] - result = ImpalaCompiler.to_sql(joined) + result = ibis.to_sql(joined, dialect="impala") snapshot.assert_match(result, "out.sql") def test_limit_cte_extract(limit_cte_extract, snapshot): case = limit_cte_extract - result = ImpalaCompiler.to_sql(case) + result = ibis.to_sql(case, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -45,7 +45,7 @@ def test_nested_join_base(snapshot): counts = t.group_by("uuid").size() max_counts = counts.group_by("uuid").aggregate(max_count=lambda x: x[1].max()) result = max_counts.left_join(counts, "uuid").select(counts) - compiled_result = ImpalaCompiler.to_sql(result) + compiled_result = ibis.to_sql(result, dialect="impala") snapshot.assert_match(compiled_result, "out.sql") @@ -65,7 +65,7 @@ def test_nested_joins_single_cte(snapshot): result = main_kw.left_join(last_visit, "uuid").select( main_kw, last_visit.last_visit ) - compiled_result = ImpalaCompiler.to_sql(result) + compiled_result = ibis.to_sql(result, dialect="impala") snapshot.assert_match(compiled_result, "out.sql") @@ -87,7 +87,7 @@ def test_nested_join_multiple_ctes(snapshot): # that for now see issue #1295 cond = joined3.movieid.isin(top_user_old_movie_ids.movieid) result = joined3[cond] - compiled_result = ImpalaCompiler.to_sql(result) + compiled_result = ibis.to_sql(result, dialect="impala") snapshot.assert_match(compiled_result, "out.sql") @@ -101,7 +101,7 @@ def f(t): return t.a.isin(["foo"]) & t.c.notnull() expr = (~f(t)).name("tmp") - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -111,7 +111,7 @@ def test_join_with_nested_or_condition(snapshot): joined = t1.join(t2, [t1.a == t2.a, (t1.a != t2.b) | (t1.b != t2.a)]) expr = joined[t1] - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -121,7 +121,7 @@ def test_join_with_nested_xor_condition(snapshot): joined = t1.join(t2, [t1.a == t2.a, (t1.a != t2.b) ^ (t1.b != t2.a)]) expr = joined[t1] - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -131,7 +131,7 @@ def test_is_parens(method, snapshot): func = operator.methodcaller(method) expr = t[func(t.a) == func(t.b)] - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -139,7 +139,7 @@ def test_is_parens_identical_to(snapshot): t = ibis.table([("a", "string"), ("b", "string")], "table") expr = t[t.a.identical_to(None) == t.b.identical_to(None)] - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -162,7 +162,7 @@ def test_join_aliasing(snapshot): .view() ) result = agg.join(test5, agg.d == test5.d)[agg, test5.total] - result = ImpalaCompiler.to_sql(result) + result = ibis.to_sql(result, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -170,7 +170,7 @@ def test_multiple_filters(snapshot): t = ibis.table([("a", "int64"), ("b", "string")], name="t0") filt = t[t.a < 100] expr = filt[filt.a == filt.a.max()] - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -179,7 +179,7 @@ def test_multiple_filters2(snapshot): filt = t[t.a < 100] expr = filt[filt.a == filt.a.max()] expr = expr[expr.b == "a"] - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") diff --git a/ibis/backends/impala/tests/test_udf.py b/ibis/backends/impala/tests/test_udf.py index ba489522f180..5891df86dc53 100644 --- a/ibis/backends/impala/tests/test_udf.py +++ b/ibis/backends/impala/tests/test_udf.py @@ -97,18 +97,24 @@ def all_cols(i8, i16, i32, i64, d, f, dec, s, b, t): def test_sql_generation(snapshot): - func = api.scalar_function(["string"], "string", name="Tester") - func.register("identity", "udf_testing") - + func = api.scalar_function( + ["string"], "string", name="identity", database="udf_testing" + ) result = func("hello world") snapshot.assert_match(ibis.impala.compile(result), "out.sql") def test_sql_generation_from_infoclass(snapshot): - func = api.wrap_udf("test.so", ["string"], "string", "info_test") + func = api.wrap_udf( + "test.so", + ["string"], + "string", + "info_test", + name="info_test", + database="udf_testing", + ) repr(func) - func.register("info_test", "udf_testing") result = func("hello world").name("tmp") snapshot.assert_match(ibis.impala.compile(result), "out.sql") @@ -242,14 +248,12 @@ def test_mult_args(i32, d, s, b, t): def _register_udf(inputs, output, name): - func = api.scalar_function(inputs, output, name=name) - func.register(name, "ibis_testing") + func = api.scalar_function(inputs, output, name=name, database="ibis_testing") return func def _register_uda(inputs, output, name): - func = api.aggregate_function(inputs, output, name=name) - func.register(name, "ibis_testing") + func = api.aggregate_function(inputs, output, name=name, database="ibis_testing") return func @@ -438,8 +442,9 @@ def test_udf_varargs(con, alltypes, udf_ll, test_data_db): name = f"add_numbers_{util.guid()[:4]}" input_sig = rules.varargs(rules.double) - func = api.wrap_udf(udf_ll, input_sig, "double", "AddNumbers", name=name) - func.register(name, test_data_db) + func = api.wrap_udf( + udf_ll, input_sig, "double", "AddNumbers", name=name, database=test_data_db + ) con.create_function(func, database=test_data_db) expr = func(t.double_col, t.double_col) @@ -459,12 +464,10 @@ def test_drop_uda_not_exists(con): def udf_creation_to_op(udf_ll, con, test_data_db, name, symbol, inputs, output): - func = api.wrap_udf(udf_ll, inputs, output, symbol, name) + func = api.wrap_udf(udf_ll, inputs, output, symbol, name, database=test_data_db) con.create_function(func, database=test_data_db) - func.register(name, test_data_db) - assert con.exists_udf(name, test_data_db) return func @@ -492,18 +495,18 @@ def conforming_wrapper(where, inputs, output, prefix, serialize=True, name=None) @pytest.fixture -def wrapped_count_uda(uda_so): +def wrapped_count_uda(uda_so, test_data_db): name = f"user_count_{util.guid()}" - return api.wrap_uda(uda_so, ["int32"], "int64", "CountUpdate", name=name) + return api.wrap_uda( + uda_so, ["int32"], "int64", "CountUpdate", name=name, database=test_data_db + ) def test_count_uda(con, alltypes, test_data_db, wrapped_count_uda): - func = wrapped_count_uda - func.register(func.name, test_data_db) - con.create_function(func, database=test_data_db) + con.create_function(wrapped_count_uda, database=test_data_db) # it works! - func(alltypes.int_col).execute() + wrapped_count_uda(alltypes.int_col).execute() def test_list_udas(con, wrapped_count_uda): @@ -512,10 +515,12 @@ def test_list_udas(con, wrapped_count_uda): funcs = con.list_udas() - (f,) = (ff for ff in funcs if func.name == ff.name) - assert f.name == func.name - assert f.inputs == func.inputs - assert f.output == func.output + ((name, inputs, output),) = ( + (name, inputs, output) for _, name, inputs, output in funcs if func.name == name + ) + assert func.name == name + assert func.inputs == inputs + assert func.output == output @pytest.fixture diff --git a/ibis/backends/impala/tests/test_value_exprs.py b/ibis/backends/impala/tests/test_value_exprs.py index a4f75ff4673a..406581936b15 100644 --- a/ibis/backends/impala/tests/test_value_exprs.py +++ b/ibis/backends/impala/tests/test_value_exprs.py @@ -5,8 +5,8 @@ from pytest import param import ibis +import ibis.common.exceptions as com from ibis import literal as L -from ibis.backends.impala.compiler import ImpalaCompiler from ibis.backends.impala.tests.conftest import translate @@ -33,33 +33,14 @@ def test_literals(value, snapshot): snapshot.assert_match(result, "out.sql") -def test_column_ref_table_aliases(snapshot): - context = ImpalaCompiler.make_context() - +def test_column_ref_table_aliases(): table1 = ibis.table([("key1", "string"), ("value1", "double")]) - table2 = ibis.table([("key2", "string"), ("value and2", "double")]) - context.set_ref(table1.op(), "t0") - context.set_ref(table2.op(), "t1") - expr = table1["value1"] - table2["value and2"] - result = translate(expr, context=context) - snapshot.assert_match(result, "out.sql") - - -def test_column_ref_quoting(): - schema = [("has a space", "double")] - table = ibis.table(schema) - translate(table["has a space"], named="`has a space`") - - -def test_identifier_quoting(): - schema = [("date", "double"), ("table", "string")] - table = ibis.table(schema) - translate(table["date"], named="`date`") - translate(table["table"], named="`table`") + with pytest.raises(com.RelationError, match="multiple base table references"): + translate(expr) @pytest.mark.parametrize( @@ -73,7 +54,7 @@ def test_identifier_quoting(): ) def test_named_expressions(table, expr_fn, snapshot): expr = expr_fn(table) - result = translate(expr, named=True) + result = translate(expr) snapshot.assert_match(result, "out.sql") @@ -200,7 +181,7 @@ def test_sql_extract(table, snapshot): table.i.day().name("day"), ] - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -270,18 +251,12 @@ def test_correlated_predicate_subquery(table, snapshot): t0 = table t1 = t0.view() - expr = t0.g == t1.g + # both are valid constructions + expr1 = t0[t0.g == t1.g] + expr2 = t1[t0.g == t1.g] - ctx = ImpalaCompiler.make_context() - ctx.make_alias(t0.op()) - - # Grab alias from parent context - subctx = ctx.subcontext() - subctx.make_alias(t1.op()) - subctx.make_alias(t0.op()) - - result = translate(expr, context=subctx) - snapshot.assert_match(result, "out.sql") + snapshot.assert_match(translate(expr1), "out1.sql") + snapshot.assert_match(translate(expr2), "out2.sql") @pytest.mark.parametrize( diff --git a/ibis/backends/impala/tests/test_window.py b/ibis/backends/impala/tests/test_window.py index a3c1aca3b1d7..5f0451f16adf 100644 --- a/ibis/backends/impala/tests/test_window.py +++ b/ibis/backends/impala/tests/test_window.py @@ -18,7 +18,7 @@ def alltypes(mockcon): def assert_sql_equal(expr, snapshot, out="out.sql"): - result = ImpalaCompiler.to_sql(expr) + result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, out) @@ -71,7 +71,7 @@ def test_window_rows_with_max_lookback(alltypes): w = ibis.trailing_window(mlb, order_by=t.i) expr = t.a.sum().over(w) with pytest.raises(NotImplementedError): - ImpalaCompiler.to_sql(expr) + ibis.to_sql(expr, dialect="impala") @pytest.mark.parametrize("name", ["sum", "min", "max", "mean"]) @@ -148,19 +148,6 @@ def test_row_number_properly_composes_with_arithmetic(alltypes, snapshot): assert_sql_equal(expr, snapshot) -@pytest.mark.parametrize( - ["column", "op"], - [("f", "approx_nunique"), ("f", "approx_median"), ("g", "group_concat")], -) -def test_unsupported_aggregate_functions(alltypes, column, op): - t = alltypes - w = ibis.window(order_by=t.d) - expr = getattr(t[column], op)() - proj = t.select(foo=expr.over(w)) - with pytest.raises(com.TranslationError): - ImpalaCompiler.to_sql(proj) - - def test_propagate_nested_windows(alltypes, snapshot): # GH #469 t = alltypes diff --git a/ibis/backends/impala/udf.py b/ibis/backends/impala/udf.py index 100a41791f30..7fc21d1fb31c 100644 --- a/ibis/backends/impala/udf.py +++ b/ibis/backends/impala/udf.py @@ -14,31 +14,22 @@ from __future__ import annotations import abc -import re +import inspect import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops -import ibis.expr.rules as rlz from ibis import util -from ibis.backends.base.sql.registry import fixed_arity, sql_type_names -from ibis.backends.impala.compiler import ImpalaExprTranslator -from ibis.legacy.udf.validate import validate_output_type -__all__ = [ - "add_operation", - "scalar_function", - "aggregate_function", - "wrap_udf", - "wrap_uda", -] +__all__ = ["scalar_function", "aggregate_function", "wrap_udf", "wrap_uda"] -class Function(metaclass=abc.ABCMeta): - def __init__(self, inputs, output, name): +class Function(abc.ABC): + def __init__(self, inputs, output, name, database): self.inputs = tuple(map(dt.dtype, inputs)) self.output = dt.dtype(output) self.name = name or util.guid() + self.database = database self._klass = self._create_operation_class() @abc.abstractmethod @@ -46,38 +37,50 @@ def _create_operation_class(self): pass def __repr__(self): - klass = type(self).__name__ - return f"{klass}({self.name}, {self.inputs!r}, {self.output!r})" + ident = ".".join(filter(None, (self.database, self.name))) + return f"{ident}({self.inputs!r}, {self.output!r})" def __call__(self, *args): - return self._klass(*args).to_expr() + return self._klass(*args) - def register(self, name: str, database: str) -> None: - """Register the given operation. + def _make_fn(self): + def fn(*args, **kwargs): + ... - Parameters - ---------- - name - Used in issuing statements to SQL engine - database - Database the relevant operator is registered to - """ - add_operation(self._klass, name, database) + fn.__name__ = self.name + fn.__signature__ = inspect.Signature( + parameters=[ + inspect.Parameter( + f"input{i:d}", + annotation=input, + kind=inspect.Parameter.POSITIONAL_ONLY, + ) + for i, input in enumerate(self.inputs) + ], + return_annotation=self.output, + ) + + return fn class ScalarFunction(Function): def _create_operation_class(self): - fields = {f"_{i}": rlz.ValueOf(dtype) for i, dtype in enumerate(self.inputs)} - fields["dtype"] = self.output - fields["shape"] = rlz.shape_like("args") - return type(f"UDF_{self.name}", (ops.Value,), fields) + return ops.scalar.builtin( + fn=self._make_fn(), + name=self.name, + signature=(self.inputs, self.output), + schema=self.database, + ) class AggregateFunction(Function): def _create_operation_class(self): - fields = {f"_{i}": rlz.ValueOf(dtype) for i, dtype in enumerate(self.inputs)} - fields["dtype"] = self.output - return type(f"UDA_{self.name}", (ops.Reduction,), fields) + return ops.agg.builtin( + fn=self._make_fn(), + name=self.name, + signature=(self.inputs, self.output), + schema=self.database, + ) class ImpalaFunction: @@ -93,28 +96,19 @@ def _check_library(self): if suffix not in [".so", ".ll"]: raise ValueError("Invalid file type. Must be .so or .ll ") - def hash(self): - raise NotImplementedError - class ImpalaUDF(ScalarFunction, ImpalaFunction): """Feel free to customize my __doc__ or wrap in a nicer user API.""" - def __init__(self, inputs, output, so_symbol=None, lib_path=None, name=None): + def __init__( + self, inputs, output, so_symbol=None, lib_path=None, name=None, database=None + ): + from ibis.legacy.udf.validate import validate_output_type + validate_output_type(output) self.so_symbol = so_symbol ImpalaFunction.__init__(self, name=name, lib_path=lib_path) - ScalarFunction.__init__(self, inputs, output, name=self.name) - - def hash(self): - # TODO: revisit this later - # from hashlib import sha1 - # val = self.so_symbol - # for in_type in self.inputs: - # val += in_type.name() - - # return sha1(val).hexdigest() - pass + ScalarFunction.__init__(self, inputs, output, name=self.name, database=database) class ImpalaUDA(AggregateFunction, ImpalaFunction): @@ -129,6 +123,7 @@ def __init__( serialize_fn=None, lib_path=None, name=None, + database=None, ): self.init_fn = init_fn self.update_fn = update_fn @@ -136,10 +131,14 @@ def __init__( self.finalize_fn = finalize_fn self.serialize_fn = serialize_fn + from ibis.legacy.udf.validate import validate_output_type + validate_output_type(output) ImpalaFunction.__init__(self, name=name, lib_path=lib_path) - AggregateFunction.__init__(self, inputs, output, name=self.name) + AggregateFunction.__init__( + self, inputs, output, name=self.name, database=database + ) def _check_library(self): suffix = self.lib_path[-3:] @@ -159,6 +158,7 @@ def wrap_uda( finalize_fn: str | None = None, serialize_fn: str | None = None, name: str | None = None, + database: str | None = None, ): """Creates a callable aggregation function object. @@ -185,10 +185,8 @@ def wrap_uda( UDAs. name Used internally to track function - - Returns - ------- - container : UDA object + database + Name of database """ return ImpalaUDA( inputs, @@ -200,10 +198,11 @@ def wrap_uda( serialize_fn=serialize_fn, name=name, lib_path=hdfs_file, + database=database, ) -def wrap_udf(hdfs_file, inputs, output, so_symbol, name=None): +def wrap_udf(hdfs_file, inputs, output, so_symbol, name=None, database=None): """Creates a callable scalar function object. Must be created in Impala to be used. @@ -220,13 +219,17 @@ def wrap_udf(hdfs_file, inputs, output, so_symbol, name=None): C++ function name for relevant UDF name Used internally to track function + database + Name of database """ - func = ImpalaUDF(inputs, output, so_symbol, name=name, lib_path=hdfs_file) + func = ImpalaUDF( + inputs, output, so_symbol, name=name, lib_path=hdfs_file, database=database + ) return func -def scalar_function(inputs, output, name=None): - """Creates an operator class that can be passed to add_operation(). +def scalar_function(inputs, output, name=None, database=None): + """Create an operator class. Parameters ---------- @@ -236,96 +239,24 @@ def scalar_function(inputs, output, name=None): Ibis data type name Used internally to track function + database + Name of database """ - return ScalarFunction(inputs, output, name=name) + return ScalarFunction(inputs, output, name=name, database=database) -def aggregate_function(inputs, output, name=None): - """Creates an operator class that can be passed to add_operation(). +def aggregate_function(inputs, output, name=None, database=None): + """Create an operator class. Parameters ---------- - inputs: list of strings - Ibis data type names - output: string - Ibis data type - name: string, optional + inputs + Ibis data type names + output + Ibis data type + name Used internally to track function + database + Name of database """ - return AggregateFunction(inputs, output, name=name) - - -def add_operation(op, func_name, db): - """Registers the given operation within the Ibis SQL translation toolchain. - - Parameters - ---------- - op - operator class - func_name - used in issuing statements to SQL engine - db - database the relevant operator is registered to - """ - full_name = f"{db}.{func_name}" - arity = len(op.__signature__.parameters) - translator = fixed_arity(full_name, arity) - - ImpalaExprTranslator._registry[op] = translator - - -def parse_type(t): - t = t.lower() - if t in _impala_to_ibis_type: - return _impala_to_ibis_type[t] - elif "varchar" in t or "char" in t: - return "string" - elif "decimal" in t: - result = dt.dtype(t) - if result: - return t - else: - return ValueError(t) - else: - raise Exception(t) - - -_VARCHAR_RE = re.compile(r"varchar\((\d+)\)") - - -def _parse_varchar(t): - m = _VARCHAR_RE.match(t) - if m: - return "string" - return None - - -def _impala_type_to_ibis(tval): - if tval in _impala_to_ibis_type: - return _impala_to_ibis_type[tval] - return tval - - -def _ibis_string_to_impala(tval): - if tval in sql_type_names: - return sql_type_names[tval] - result = dt.validate_type(tval) - return repr(result) if result else None - - -_impala_to_ibis_type = { - "boolean": "boolean", - "tinyint": "int8", - "smallint": "int16", - "int": "int32", - "bigint": "int64", - "float": "float32", - "double": "float64", - "string": "string", - "varchar": "string", - "char": "string", - "timestamp": "timestamp", - "decimal": "decimal", - "date": "date", - "void": "null", -} + return AggregateFunction(inputs, output, name=name, database=database) diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/impala/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/impala/out.sql new file mode 100644 index 000000000000..f63de03c314a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/impala/out.sql @@ -0,0 +1,5 @@ +SELECT + `t0`.`id`, + `t0`.`bool_col` +FROM `functional_alltypes` AS `t0` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/impala/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/impala/out.sql new file mode 100644 index 000000000000..f63de03c314a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/impala/out.sql @@ -0,0 +1,5 @@ +SELECT + `t0`.`id`, + `t0`.`bool_col` +FROM `functional_alltypes` AS `t0` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/impala/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/impala/out.sql new file mode 100644 index 000000000000..d8a9c4090dc1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/impala/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM(`t0`.`bigint_col`) AS `Sum(bigint_col)` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/impala/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/impala/out.sql new file mode 100644 index 000000000000..d4b1b19815b0 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/impala/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + `t0`.`id`, + `t0`.`bool_col` + FROM `functional_alltypes` AS `t0` + LIMIT 10 +) AS `t2` +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/impala/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/impala/out.sql index f1c63ad8f7e4..ac006b1d5f25 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/impala/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.`continent` + CASE `t0`.`continent` WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -16,7 +16,7 @@ SELECT THEN 'Antarctica' ELSE 'Unknown continent' END AS `cont`, - SUM(t0.`population`) AS `total_pop` -FROM `countries` AS t0 + SUM(`t0`.`population`) AS `total_pop` +FROM `countries` AS `t0` GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/impala/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/impala/out.sql index 409cf82c7981..db5ddb124e86 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/impala/out.sql @@ -1,13 +1,9 @@ SELECT - t0.`x` IN ( + `t0`.`x` IN ( SELECT - t1.`x` - FROM ( - SELECT - t0.* - FROM `t` AS t0 - WHERE - t0.`x` > 2 - ) AS t1 - ) AS `InColumn(x, x)` -FROM `t` AS t0 \ No newline at end of file + `t0`.`x` + FROM `t` AS `t0` + WHERE + `t0`.`x` > 2 + ) AS `InSubquery(x)` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 34dec9149fbc..ddbf55d77bbf 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -18,6 +18,7 @@ ClickHouseDatabaseError, ExaQueryError, GoogleBadRequest, + ImpalaHiveServer2Error, MySQLNotSupportedError, PolarsInvalidOperationError, Py4JError, @@ -844,7 +845,7 @@ def test_reduction_ops( reason="backend doesn't support count distinct with multiple columns", ) @pytest.mark.notyet( - ["datafusion", "impala"], + ["datafusion"], raises=com.OperationNotDefinedError, reason="no one has attempted implementation yet", ) @@ -914,7 +915,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): "bigquery", "dask", "datafusion", - "impala", "mssql", "polars", "sqlite", @@ -924,7 +924,9 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): ], raises=com.OperationNotDefinedError, ), - pytest.mark.notyet(["mysql"], raises=com.UnsupportedBackendType), + pytest.mark.notyet( + ["mysql", "impala"], raises=com.UnsupportedBackendType + ), pytest.mark.notyet( ["snowflake"], reason="backend doesn't implement array of quantiles as input", @@ -1600,7 +1602,8 @@ def test_grouped_case(backend, con): reason="Dask does not windowize this operation correctly", raises=AssertionError, ) -@pytest.mark.notyet(["impala", "flink"], raises=com.UnsupportedOperationError) +@pytest.mark.notyet(["flink"], raises=com.UnsupportedOperationError) +@pytest.mark.notyet(["impala"], raises=ImpalaHiveServer2Error) @pytest.mark.notyet(["clickhouse"], raises=ClickHouseDatabaseError) @pytest.mark.notyet(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 760334ef7235..75a0d5d21cef 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -20,6 +20,7 @@ from ibis.backends.tests.errors import ( ClickHouseDatabaseError, GoogleBadRequest, + ImpalaHiveServer2Error, MySQLOperationalError, PolarsComputeError, PsycoPg2IndeterminateDatatype, @@ -36,7 +37,15 @@ raises=Exception, ), pytest.mark.never(["mysql"], reason="No array support", raises=(com.UnsupportedBackendType, com.OperationNotDefinedError, MySQLOperationalError)), - pytest.mark.notyet(["impala"], reason="No array support", raises=Exception), + pytest.mark.notyet( + ["impala"], + reason="No array support", + raises=( + com.UnsupportedBackendType, + com.OperationNotDefinedError, + ImpalaHiveServer2Error, + ), + ), pytest.mark.notimpl(["druid", "oracle"], raises=Exception), ] @@ -420,7 +429,6 @@ def test_array_slice(backend, start, stop): [ "datafusion", "flink", - "impala", "mssql", "polars", "snowflake", @@ -483,7 +491,6 @@ def test_array_map(con, input, output): "dask", "datafusion", "flink", - "impala", "mssql", "pandas", "polars", @@ -537,7 +544,6 @@ def test_array_filter(con, input, output): @builtin_array @pytest.mark.notimpl(["mssql", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) -@pytest.mark.never(["impala"], reason="array_types table isn't defined") @pytest.mark.notimpl( ["risingwave"], raises=ValueError, @@ -602,9 +608,7 @@ def test_array_position(backend, con, a, expected_array): @builtin_array -@pytest.mark.notimpl( - ["dask", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["dask", "mssql", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["risingwave"], raises=AssertionError, @@ -639,8 +643,7 @@ def test_array_remove(con, a): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "impala", "mssql", "polars"], - raises=com.OperationNotDefinedError, + ["dask", "datafusion", "mssql", "polars"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl( ["sqlite"], raises=NotImplementedError, reason="Unsupported type: Array..." @@ -693,7 +696,7 @@ def test_array_unique(con, input, expected): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "flink", "impala", "mssql", "polars"], + ["dask", "datafusion", "flink", "mssql", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -714,8 +717,7 @@ def test_array_sort(con): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "impala", "mssql", "polars"], - raises=com.OperationNotDefinedError, + ["dask", "datafusion", "mssql", "polars"], raises=com.OperationNotDefinedError ) @pytest.mark.parametrize( ("a", "b", "expected_array"), @@ -759,7 +761,7 @@ def test_array_union(con, a, b, expected_array): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "impala", "mssql", "pandas", "polars", "flink"], + ["dask", "datafusion", "mssql", "pandas", "polars", "flink"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( @@ -827,7 +829,7 @@ def test_unnest_struct(con): @builtin_array @pytest.mark.never( - ["impala", "mssql"], raises=com.OperationNotDefinedError, reason="no array support" + ["mssql"], raises=com.OperationNotDefinedError, reason="no array support" ) @pytest.mark.notimpl( [ @@ -1081,7 +1083,6 @@ def test_unnest_empty_array(con): [ "datafusion", "flink", - "impala", "mssql", "polars", "snowflake", @@ -1107,7 +1108,6 @@ def test_array_map_with_conflicting_names(backend, con): [ "datafusion", "flink", - "impala", "mssql", "polars", "snowflake", @@ -1287,6 +1287,9 @@ def test_timestamp_range_zero_step(con, start, stop, step, tzinfo): assert list(result) == [] +@pytest.mark.notimpl( + ["impala"], raises=AssertionError, reason="backend doesn't support arrays" +) def test_repr_timestamp_array(con, monkeypatch): monkeypatch.setattr(ibis.options, "interactive", True) assert ibis.options.interactive is True diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 6846bf96b4d0..a0a67a36c687 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -81,7 +81,16 @@ def time_keyed_right(time_keyed_df2): ("direction", "op"), [("backward", operator.ge), ("forward", operator.le)] ) @pytest.mark.notyet( - ["datafusion", "snowflake", "trino", "postgres", "mysql", "pyspark", "druid"] + [ + "datafusion", + "snowflake", + "trino", + "postgres", + "mysql", + "pyspark", + "druid", + "impala", + ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): on = op(time_left["time"], time_right["time"]) @@ -107,7 +116,16 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds" ) @pytest.mark.notyet( - ["datafusion", "snowflake", "trino", "postgres", "mysql", "pyspark", "druid"] + [ + "datafusion", + "snowflake", + "trino", + "postgres", + "mysql", + "pyspark", + "druid", + "impala", + ] ) def test_keyed_asof_join_with_tolerance( con, diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index b7f2c5dd5487..90c14a598da0 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -219,7 +219,10 @@ def test_dot_sql_reuse_alias_with_different_types(backend, alltypes, df): _NO_SQLGLOT_DIALECT = {"pandas", "dask", "druid", "flink", "risingwave"} no_sqlglot_dialect = sorted( - param(backend, marks=pytest.mark.xfail) for backend in _NO_SQLGLOT_DIALECT + # TODO(cpcloud): remove the strict=False hack once backends are ported to + # sqlglot + param(backend, marks=pytest.mark.xfail(strict=False)) + for backend in _NO_SQLGLOT_DIALECT ) @@ -283,7 +286,7 @@ def test_con_dot_sql_transpile(backend, con, dialect, df): @dot_sql_notimpl @dot_sql_never -@pytest.mark.notimpl(["druid", "flink", "impala", "polars"]) +@pytest.mark.notimpl(["druid", "flink", "polars"]) @pytest.mark.notyet(["snowflake"], reason="snowflake column names are case insensitive") @pytest.mark.notyet( ["risingwave"], @@ -308,5 +311,5 @@ def test_order_by_no_projection(backend): @dot_sql_never @pytest.mark.notyet(["polars"], raises=PolarsComputeError) def test_dot_sql_limit(con): - expr = con.sql("SELECT 'abc' ts").limit(1) + expr = con.sql("SELECT * FROM (SELECT 'abc' ts) _").limit(1) assert expr.execute().equals(pd.DataFrame({"ts": ["abc"]})) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index f3bde9dafbe4..617abc539b64 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1348,7 +1348,6 @@ def hash_256(col): "pandas", "dask", "bigquery", - "impala", "mssql", "oracle", "risingwave", @@ -1369,11 +1368,17 @@ def hash_256(col): "int", 1672531200, marks=[ - pytest.mark.notyet(["duckdb"], reason="casts to None"), - pytest.mark.notyet(["druid"], reason="returns milliseconds"), + pytest.mark.notyet(["duckdb", "impala"], reason="casts to NULL"), pytest.mark.notyet(["trino"], raises=TrinoUserError), - pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), - pytest.mark.broken(["datafusion"], reason="casts to 1672531200000000"), + pytest.mark.broken( + ["druid"], reason="casts to 1672531200000 (millisecond)" + ), + pytest.mark.broken( + ["polars"], reason="casts to 1672531200000000000 (nanoseconds)" + ), + pytest.mark.broken( + ["datafusion"], reason="casts to 1672531200000000 (microseconds)" + ), pytest.mark.broken(["mysql"], reason="returns 20230101000000"), ], ), @@ -1393,7 +1398,6 @@ def test_try_cast(con, from_val, to_type, expected): "datafusion", "druid", "exasol", - "impala", "mssql", "mysql", "oracle", @@ -1412,7 +1416,9 @@ def test_try_cast(con, from_val, to_type, expected): datetime.datetime(2023, 1, 1), "int", marks=[ - pytest.mark.never(["clickhouse"], reason="casts to 1672531200"), + pytest.mark.never( + ["clickhouse", "pyspark"], reason="casts to 1672531200" + ), pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), ], @@ -1431,7 +1437,6 @@ def test_try_cast_null(con, from_val, to_type): "bigquery", "datafusion", "druid", - "impala", "mssql", "mysql", "oracle", @@ -1461,7 +1466,6 @@ def test_try_cast_table(backend, con): "dask", "bigquery", "datafusion", - "impala", "mssql", "mysql", "oracle", @@ -1476,7 +1480,7 @@ def test_try_cast_table(backend, con): @pytest.mark.parametrize( ("from_val", "to_type", "func"), [ - param("a", "float", pd.isna), + param("a", "float", pd.isna, id="string-to-float"), param( datetime.datetime(2023, 1, 1), "float", @@ -1488,6 +1492,7 @@ def test_try_cast_table(backend, con): ), pytest.mark.notyet(["trino"], raises=TrinoUserError), ], + id="datetime-to-float", ), ], ) @@ -1551,7 +1556,18 @@ def test_try_cast_func(con, from_val, to_type, func): ################## ### POSITIVE start # no stop - param(slice(3, 0), lambda _: 0, id="[3:0]"), + param( + slice(3, 0), + lambda _: 0, + id="[3:0]", + marks=[ + pytest.mark.never( + ["impala"], + raises=ImpalaHiveServer2Error, + reason="impala doesn't support OFFSET without ORDER BY", + ) + ], + ), param( slice(3, None), lambda t: t.count().to_pandas() - 3, @@ -1584,7 +1600,18 @@ def test_try_cast_func(con, from_val, to_type, func): ], ), # positive stop - param(slice(3, 2), lambda _: 0, id="[3:2]"), + param( + slice(3, 2), + lambda _: 0, + id="[3:2]", + marks=[ + pytest.mark.never( + ["impala"], + raises=ImpalaHiveServer2Error, + reason="impala doesn't support OFFSET without ORDER BY", + ) + ], + ), param( slice(3, 4), lambda _: 1, diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index f1118ca466e0..19bf6f15f35e 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -266,8 +266,8 @@ def test_numeric_literal(con, backend, expr, expected_types): "trino": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.1"), - "risingwave": 1.1, + "impala": decimal.Decimal("1"), "postgres": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), @@ -282,6 +282,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": "NUMERIC", "snowflake": "DECIMAL", "sqlite": "real", + "impala": "DECIMAL(9,0)", "trino": "decimal(18,3)", "duckdb": "DECIMAL(18,3)", "postgres": "numeric", @@ -289,25 +290,12 @@ def test_numeric_literal(con, backend, expr, expected_types): "flink": "DECIMAL(38, 18) NOT NULL", }, marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), + pytest.mark.notimpl(["exasol"], raises=ExaQueryError), pytest.mark.notimpl( ["clickhouse"], "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["impala"], - "impala.error.HiveServer2Error: AnalysisException: Syntax error in line 1:" - "SELECT typeof(Decimal('1.1')) AS `TypeOf(Decimal('1.1'))" - "Encountered: DECIMAL" - "Expected: ALL, CASE, CAST, DEFAULT, DISTINCT, EXISTS, FALSE, IF, " - "INTERVAL, LEFT, NOT, NULL, REPLACE, RIGHT, TRUNCATE, TRUE, IDENTIFIER" - "CAUSED BY: Exception: Syntax error", - raises=ImpalaHiveServer2Error, - ), ], id="default", ), @@ -321,6 +309,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "trino": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.100000000"), "risingwave": 1.1, + "impala": decimal.Decimal("1.1"), "postgres": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), @@ -337,6 +326,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": "NUMERIC", "clickhouse": "Decimal(38, 9)", "snowflake": "DECIMAL", + "impala": "DECIMAL(38,9)", "sqlite": "real", "trino": "decimal(38,9)", "duckdb": "DECIMAL(38,9)", @@ -344,22 +334,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "risingwave": "numeric", "flink": "DECIMAL(38, 9) NOT NULL", }, - marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), - pytest.mark.broken( - ["impala"], - "impala.error.HiveServer2Error: AnalysisException: Syntax error in line 1:" - "SELECT typeof(Decimal('1.1')) AS `TypeOf(Decimal('1.1'))" - "Encountered: DECIMAL" - "Expected: ALL, CASE, CAST, DEFAULT, DISTINCT, EXISTS, FALSE, IF, " - "INTERVAL, LEFT, NOT, NULL, REPLACE, RIGHT, TRUNCATE, TRUE, IDENTIFIER" - "CAUSED BY: Exception: Syntax error", - raises=ImpalaHiveServer2Error, - ), - ], + marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], id="decimal-small", ), param( @@ -393,16 +368,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.notimpl(["exasol"], raises=ExaQueryError), pytest.mark.notimpl(["mysql"], raises=MySQLOperationalError), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), - pytest.mark.broken( - ["impala"], - "impala.error.HiveServer2Error: AnalysisException: Syntax error in line 1:" - "SELECT typeof(Decimal('1.2')) AS `TypeOf(Decimal('1.2'))" - "Encountered: DECIMAL" - "Expected: ALL, CASE, CAST, DEFAULT, DISTINCT, EXISTS, FALSE, IF, " - "INTERVAL, LEFT, NOT, NULL, REPLACE, RIGHT, TRUNCATE, TRUE, IDENTIFIER" - "CAUSED BY: Exception: Syntax error", - raises=ImpalaHiveServer2Error, - ), + pytest.mark.notyet(["impala"], raises=ImpalaHiveServer2Error), pytest.mark.broken( ["duckdb"], reason="Unsupported precision.", @@ -436,7 +402,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "pandas": decimal.Decimal("Infinity"), "dask": decimal.Decimal("Infinity"), "pyspark": decimal.Decimal("Infinity"), - "impala": float("inf"), "exasol": float("inf"), "duckdb": float("inf"), }, @@ -445,7 +410,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "sqlite": "real", "postgres": "numeric", "risingwave": "numeric", - "impala": "DOUBLE", "duckdb": "FLOAT", }, marks=[ @@ -461,7 +425,9 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), + pytest.mark.notyet( + ["mysql", "impala"], raises=com.UnsupportedOperationError + ), pytest.mark.broken( ["mssql"], "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'Infinity'." @@ -513,7 +479,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "pandas": decimal.Decimal("-Infinity"), "dask": decimal.Decimal("-Infinity"), "pyspark": decimal.Decimal("-Infinity"), - "impala": float("-inf"), "exasol": float("-inf"), "duckdb": float("-inf"), }, @@ -522,7 +487,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "sqlite": "real", "postgres": "numeric", "risingwave": "numeric", - "impala": "DOUBLE", "duckdb": "FLOAT", }, marks=[ @@ -538,7 +502,9 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), + pytest.mark.notyet( + ["mysql", "impala"], raises=com.UnsupportedOperationError + ), pytest.mark.broken( ["mssql"], "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'Infinity'." @@ -591,7 +557,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "pandas": decimal.Decimal("NaN"), "dask": decimal.Decimal("NaN"), "pyspark": decimal.Decimal("NaN"), - "impala": float("nan"), "exasol": float("nan"), "duckdb": float("nan"), }, @@ -601,7 +566,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "sqlite": "null", "postgres": "numeric", "risingwave": "numeric", - "impala": "DOUBLE", "duckdb": "FLOAT", }, marks=[ @@ -617,7 +581,9 @@ def test_numeric_literal(con, backend, expr, expected_types): "query_id=20230128_024107_01084_y8zm3)", raises=sa.exc.ProgrammingError, ), - pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), + pytest.mark.notyet( + ["mysql", "impala"], raises=com.UnsupportedOperationError + ), pytest.mark.broken( ["mssql"], "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'NaN'." diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index f8b2dd3cd365..410f7f30045b 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -676,7 +676,7 @@ def uses_java_re(t): id="negative-index", marks=[ pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), - pytest.mark.broken(["impala", "flink"], raises=AssertionError), + pytest.mark.broken(["flink"], raises=AssertionError), ], ), param( @@ -961,13 +961,16 @@ def test_capitalize(con): ["dask", "pandas", "polars", "oracle", "flink"], raises=com.OperationNotDefinedError ) @pytest.mark.notyet( - ["impala", "mssql", "sqlite", "exasol"], + ["mssql", "sqlite", "exasol"], reason="no arrays", raises=com.OperationNotDefinedError, ) @pytest.mark.never( ["mysql"], raises=com.OperationNotDefinedError, reason="no array support" ) +@pytest.mark.notimpl( + ["impala"], raises=com.UnsupportedBackendType, reason="no array support" +) def test_array_string_join(con): s = ibis.array(["a", "b", "c"]) expected = "a,b,c" diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index f55e6764d452..9ed2b1e83855 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -344,7 +344,6 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): param( "W", marks=[ - pytest.mark.notimpl(["impala"], raises=AssertionError), pytest.mark.broken(["sqlite"], raises=AssertionError), pytest.mark.notimpl(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( @@ -403,9 +402,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): param( "s", marks=[ - pytest.mark.notimpl( - ["impala", "sqlite"], raises=com.UnsupportedOperationError - ), + pytest.mark.notimpl(["sqlite"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["polars"], raises=AssertionError, @@ -422,13 +419,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): "ms", marks=[ pytest.mark.notimpl( - [ - "clickhouse", - "impala", - "mysql", - "sqlite", - "datafusion", - ], + ["clickhouse", "mysql", "sqlite", "datafusion"], raises=com.UnsupportedOperationError, ), pytest.mark.broken( @@ -447,14 +438,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): "us", marks=[ pytest.mark.notimpl( - [ - "clickhouse", - "impala", - "mysql", - "sqlite", - "trino", - "datafusion", - ], + ["clickhouse", "mysql", "sqlite", "trino", "datafusion"], raises=com.UnsupportedOperationError, ), pytest.mark.broken( @@ -572,7 +556,6 @@ def test_timestamp_truncate(backend, alltypes, df, unit): param( "W", marks=[ - pytest.mark.broken(["impala"], raises=AssertionError), pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.never( ["flink"], @@ -900,7 +883,6 @@ def convert_to_offset(x): pytest.mark.notimpl( [ "dask", - "impala", "risingwave", "snowflake", "sqlite", @@ -908,6 +890,7 @@ def convert_to_offset(x): ], raises=com.OperationNotDefinedError, ), + pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError), pytest.mark.notimpl(["mysql"], raises=sg.ParseError), pytest.mark.notimpl( ["druid"], @@ -928,12 +911,12 @@ def convert_to_offset(x): "sqlite", "risingwave", "polars", - "impala", "snowflake", "bigquery", ], raises=com.OperationNotDefinedError, ), + pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError), pytest.mark.notimpl(["mysql"], raises=sg.ParseError), pytest.mark.notimpl( ["druid"], @@ -1527,11 +1510,7 @@ def test_interval_add_cast_column(backend, alltypes, df): reason="Polars does not support columnar argument StringConcat()", ), pytest.mark.notyet(["dask"], raises=com.OperationNotDefinedError), - pytest.mark.broken( - ["impala"], - raises=AttributeError, - reason="'StringConcat' object has no attribute 'value'", - ), + pytest.mark.notyet(["impala"], raises=com.UnsupportedOperationError), pytest.mark.notimpl( ["druid"], raises=AttributeError, @@ -1710,7 +1689,6 @@ def test_integer_to_timestamp(backend, con, unit): "risingwave", "clickhouse", "sqlite", - "impala", "datafusion", "mssql", "druid", @@ -1865,6 +1843,10 @@ def test_now_from_projection(alltypes): DATE_BACKEND_TYPES = { "bigquery": "DATE", "clickhouse": "Date", + "duckdb": "DATE", + "flink": "DATE NOT NULL", + "impala": "DATE", + "postgres": "date", "snowflake": "DATE", "sqlite": "text", "trino": "date", @@ -1882,7 +1864,6 @@ def test_now_from_projection(alltypes): @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936 missing expression" ) -@pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], @@ -2167,7 +2148,6 @@ def test_interval_literal(con, backend): @pytest.mark.broken( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936: missing expression" ) -@pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @pytest.mark.notimpl( ["risingwave"], diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index d577ef903a37..1cb3acbf0df8 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -5,32 +5,31 @@ import pytest import sqlalchemy.exc -from packaging.version import parse as vparse import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt + RAW_TEST_UUID = "08f48812-7948-4718-96c7-27fa6a398db6" TEST_UUID = uuid.UUID(RAW_TEST_UUID) -SQLALCHEMY2 = vparse(sqlalchemy.__version__) >= vparse("2") - UUID_BACKEND_TYPE = { "bigquery": "STRING", + "clickhouse": "Nullable(UUID)", "duckdb": "UUID", "exasol": "UUID", "flink": "CHAR(36) NOT NULL", - "sqlite": "text", - "snowflake": "VARCHAR", - "trino": "varchar(32)" if SQLALCHEMY2 else "uuid", + "impala": "STRING", "postgres": "uuid", - "clickhouse": "Nullable(UUID)", + "snowflake": "VARCHAR", + "sqlite": "text", + "trino": "uuid", } @pytest.mark.notimpl( - ["impala", "datafusion", "polars"], raises=NotImplementedError + ["datafusion", "polars"], raises=NotImplementedError ) @pytest.mark.notimpl( ["risingwave"], diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index ea9667698b74..4e27ccb16a68 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -1168,10 +1168,10 @@ def test_mutate_window_filter(backend, alltypes): @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["impala"], - reason="the database returns incorrect results", - raises=AssertionError, +@pytest.mark.notimpl( + ["flink"], + raises=Exception, + reason="KeyError: Table with name win doesn't exist.", ) @pytest.mark.notimpl(["dask"], raises=NotImplementedError) @pytest.mark.notimpl( diff --git a/ibis/expr/operations/udf.py b/ibis/expr/operations/udf.py index 7d900df7c813..945f70f429d7 100644 --- a/ibis/expr/operations/udf.py +++ b/ibis/expr/operations/udf.py @@ -417,6 +417,7 @@ def pyarrow(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): ) +@public class agg(_UDF): __slots__ = () diff --git a/poetry.lock b/poetry.lock index cdcf1d01b3a5..2bc9f08730ab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7345,7 +7345,7 @@ examples = ["pins"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] flink = [] geospatial = ["geopandas", "shapely"] -impala = ["impyla", "sqlalchemy"] +impala = ["impyla"] mssql = ["pyodbc", "sqlalchemy", "sqlalchemy-views"] mysql = ["pymysql"] oracle = ["oracledb", "packaging", "sqlalchemy", "sqlalchemy-views"] @@ -7362,4 +7362,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "a2edd5b6e62e78267c3e2339efde3600b637e86ae05c0deec081798be5d1d34e" +content-hash = "1939863bb76e53c0c8a1575ffe8fd2e035e6768ac21682fe12a9e640ffe3ade1" diff --git a/pyproject.toml b/pyproject.toml index abfdeb7fa3f0..53d58ee328f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -187,7 +187,7 @@ duckdb = ["duckdb"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] flink = [] geospatial = ["geopandas", "shapely"] -impala = ["impyla", "sqlalchemy"] +impala = ["impyla"] mssql = ["sqlalchemy", "pyodbc", "sqlalchemy-views"] mysql = ["pymysql"] oracle = ["sqlalchemy", "oracledb", "packaging", "sqlalchemy-views"] From 71bb3db9b0aaa02a8d1e24c139fb9dd7d91a275d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 18 Jan 2024 09:26:41 +0100 Subject: [PATCH 079/161] refactor(pandas): simplify pandas helpers --- ibis/backends/pandas/executor.py | 2 +- ibis/backends/pandas/helpers.py | 28 +++++++++++++--------------- ibis/backends/tests/test_window.py | 3 --- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/ibis/backends/pandas/executor.py b/ibis/backends/pandas/executor.py index f9dd69a3c027..815cecdf51f9 100644 --- a/ibis/backends/pandas/executor.py +++ b/ibis/backends/pandas/executor.py @@ -545,7 +545,7 @@ def visit(cls, op: ops.Project, parent, values): df, all_scalars = asframe(values) if all_scalars and len(parent) != len(df): df = pd.concat([df] * len(parent)) - return df + return df.reset_index(drop=True) @classmethod def visit(cls, op: ops.Filter, parent, predicates): diff --git a/ibis/backends/pandas/helpers.py b/ibis/backends/pandas/helpers.py index d8bc9efd54eb..92597be14461 100644 --- a/ibis/backends/pandas/helpers.py +++ b/ibis/backends/pandas/helpers.py @@ -38,7 +38,7 @@ def asframe(values: dict | tuple, concat=True): columns = [asseries(v, size) for v in values] if concat: - df = pd.concat(columns, axis=1, keys=names).reset_index(drop=True) + df = pd.concat(columns, axis=1, keys=names) return df, all_scalars else: return columns, all_scalars @@ -49,31 +49,29 @@ def generic(func: Callable, operands): def rowwise(func: Callable, operands): + """Kernel applied to a row, where all the operands are scalars.""" # dealing with a collection of series objects - df, all_scalars = asframe(operands) - result = df.apply(func, axis=1) # , **kwargs) - return result.iat[0] if all_scalars else result + df, _ = asframe(operands) + return df.apply(func, axis=1) def columnwise(func: Callable, operands): - df, all_scalars = asframe(operands) - result = func(df) - return result.iat[0] if all_scalars else result + """Kernel where all the operands are series objects.""" + df, _ = asframe(operands) + return func(df) def serieswise(func, operands): + """Kernel where the first operand is a series object.""" (key, value), *rest = operands.items() - if isinstance(value, pd.Series): - # dealing with a single series object - return func(**operands) - else: - # dealing with a single scalar object - value = pd.Series([value]) - operands = {key: value, **dict(rest)} - return func(**operands).iat[0] + # ensure that the first operand is a series object + value = asseries(value) + operands = {key: value, **dict(rest)} + return func(**operands) def elementwise(func, operands): + """Kernel applied to an element, where all the operands are scalars.""" value = operands.pop(next(iter(operands))) if isinstance(value, pd.Series): # dealing with a single series object diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 4e27ccb16a68..ff4baf80211d 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -1111,9 +1111,6 @@ def test_percent_rank_whole_table_no_order_by(backend, alltypes, df): @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["dask"], raises=NotImplementedError) -@pytest.mark.broken( - ["pandas"], reason="pandas returns incorrect results", raises=AssertionError -) def test_grouped_ordered_window_coalesce(backend, alltypes, df): t = alltypes expr = ( From a78e9ee5082deb9999e369dfa5b36a0d8ab38ae6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 18 Jan 2024 07:32:22 -0500 Subject: [PATCH 080/161] chore(impala): remove unused imports --- ibis/backends/impala/tests/test_bucket_histogram.py | 1 - ibis/backends/impala/tests/test_case_exprs.py | 1 - ibis/backends/impala/tests/test_sql.py | 1 - ibis/backends/impala/tests/test_window.py | 2 -- 4 files changed, 5 deletions(-) diff --git a/ibis/backends/impala/tests/test_bucket_histogram.py b/ibis/backends/impala/tests/test_bucket_histogram.py index 4e630f5912bb..af1ca0591a14 100644 --- a/ibis/backends/impala/tests/test_bucket_histogram.py +++ b/ibis/backends/impala/tests/test_bucket_histogram.py @@ -2,7 +2,6 @@ import pytest -from ibis.backends.impala.compiler import ImpalaCompiler from ibis.backends.impala.tests.conftest import translate diff --git a/ibis/backends/impala/tests/test_case_exprs.py b/ibis/backends/impala/tests/test_case_exprs.py index e559517e8864..e23a9436c6fb 100644 --- a/ibis/backends/impala/tests/test_case_exprs.py +++ b/ibis/backends/impala/tests/test_case_exprs.py @@ -4,7 +4,6 @@ import ibis import ibis.expr.types as ir -from ibis.backends.impala.compiler import ImpalaCompiler from ibis.backends.impala.tests.conftest import translate diff --git a/ibis/backends/impala/tests/test_sql.py b/ibis/backends/impala/tests/test_sql.py index 93f0a5f9600e..65c125a8f457 100644 --- a/ibis/backends/impala/tests/test_sql.py +++ b/ibis/backends/impala/tests/test_sql.py @@ -5,7 +5,6 @@ import pytest import ibis -from ibis.backends.impala.compiler import ImpalaCompiler from ibis.backends.impala.tests.mocks import MockImpalaConnection diff --git a/ibis/backends/impala/tests/test_window.py b/ibis/backends/impala/tests/test_window.py index 5f0451f16adf..b7fa17fbf788 100644 --- a/ibis/backends/impala/tests/test_window.py +++ b/ibis/backends/impala/tests/test_window.py @@ -4,9 +4,7 @@ from pytest import param import ibis -import ibis.common.exceptions as com from ibis import window -from ibis.backends.impala.compiler import ImpalaCompiler from ibis.tests.util import assert_equal pytest.importorskip("impala") From 56794e60f6fc8472ad3a662822ff800fc4517d3b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 9 Jan 2024 10:59:24 -0500 Subject: [PATCH 081/161] refactor(bigquery): port to sqlglot --- .github/workflows/ibis-backends-cloud.yml | 4 +- ibis/backends/base/sqlglot/compiler.py | 70 +- ibis/backends/base/sqlglot/datatypes.py | 119 ++ ibis/backends/bigquery/__init__.py | 249 ++-- ibis/backends/bigquery/client.py | 2 +- ibis/backends/bigquery/compiler.py | 920 +++++++++++++-- ibis/backends/bigquery/converter.py | 20 + ibis/backends/bigquery/custom_udfs.py | 41 - ibis/backends/bigquery/datatypes.py | 114 +- ibis/backends/bigquery/operations.py | 9 - ibis/backends/bigquery/registry.py | 1020 ----------------- ibis/backends/bigquery/rewrites.py | 32 - .../test_cross_project_query/out.sql | 14 +- .../test_multiple_project_queries/out.sql | 8 +- .../test_subquery_scalar_params/out.sql | 29 +- .../bigquery/tests/system/test_client.py | 4 +- .../tests/system/udf/test_udf_execute.py | 87 +- .../test_approx/filter-approx_median/out.sql | 2 +- .../test_approx/filter-approx_nunique/out.sql | 2 +- .../no_filter-approx_median/out.sql | 2 +- .../no_filter-approx_nunique/out.sql | 2 +- .../test_compiler/test_binary/out.sql | 2 +- .../test_bit/filter-bit_and/out.sql | 2 +- .../test_bit/filter-bit_or/out.sql | 2 +- .../test_bit/filter-bit_xor/out.sql | 2 +- .../test_bit/no_filter-bit_and/out.sql | 2 +- .../test_bit/no_filter-bit_or/out.sql | 2 +- .../test_bit/no_filter-bit_xor/out.sql | 2 +- .../test_bool_reducers/mean/out.sql | 2 +- .../test_bool_reducers/sum/out.sql | 2 +- .../test_bool_reducers_where_conj/out.sql | 12 +- .../test_bool_reducers_where_simple/out.sql | 2 +- .../test_compiler/test_bucket/out.sql | 10 +- .../test_cast_float_to_int/out.sql | 2 +- .../test_compile_toplevel/out.sql | 4 +- .../test_compiler/test_cov/pop/out.sql | 2 +- .../test_compiler/test_cov/sample/out.sql | 2 +- .../test_day_of_week/date/index.sql | 2 +- .../test_day_of_week/date/name.sql | 2 +- .../test_day_of_week/datetime/index.sql | 2 +- .../test_day_of_week/datetime/name.sql | 2 +- .../test_day_of_week/string_date/index.sql | 2 +- .../test_day_of_week/string_date/name.sql | 2 +- .../string_timestamp/index.sql | 2 +- .../string_timestamp/name.sql | 2 +- .../test_day_of_week/timestamp/index.sql | 2 +- .../test_day_of_week/timestamp/name.sql | 2 +- .../test_day_of_week/timestamp_date/index.sql | 2 +- .../test_day_of_week/timestamp_date/name.sql | 2 +- .../test_divide_by_zero/floordiv/out.sql | 2 +- .../test_divide_by_zero/truediv/out.sql | 2 +- .../date/out.sql | 2 +- .../time/out.sql | 2 +- .../test_geospatial_azimuth/out.sql | 2 +- .../test_geospatial_binary/contains/out.sql | 2 +- .../test_geospatial_binary/covered_by/out.sql | 2 +- .../test_geospatial_binary/covers/out.sql | 2 +- .../test_geospatial_binary/d_within/out.sql | 2 +- .../test_geospatial_binary/difference/out.sql | 2 +- .../test_geospatial_binary/disjoint/out.sql | 2 +- .../test_geospatial_binary/distance/out.sql | 2 +- .../test_geospatial_binary/geo_equals/out.sql | 2 +- .../intersection/out.sql | 2 +- .../test_geospatial_binary/intersects/out.sql | 2 +- .../max_distance/out.sql | 2 +- .../test_geospatial_binary/touches/out.sql | 2 +- .../test_geospatial_binary/union/out.sql | 2 +- .../test_geospatial_binary/within/out.sql | 2 +- .../test_geospatial_minmax/x_max/out.sql | 2 +- .../test_geospatial_minmax/x_min/out.sql | 2 +- .../test_geospatial_minmax/y_max/out.sql | 2 +- .../test_geospatial_minmax/y_min/out.sql | 2 +- .../test_geospatial_point/out.sql | 2 +- .../test_geospatial_simplify/out.sql | 2 +- .../test_geospatial_unary/aread/out.sql | 2 +- .../test_geospatial_unary/as_binary/out.sql | 2 +- .../test_geospatial_unary/as_text/out.sql | 2 +- .../test_geospatial_unary/buffer/out.sql | 2 +- .../test_geospatial_unary/centroid/out.sql | 2 +- .../test_geospatial_unary/end_point/out.sql | 2 +- .../geometry_type/out.sql | 2 +- .../test_geospatial_unary/length/out.sql | 2 +- .../test_geospatial_unary/npoints/out.sql | 2 +- .../test_geospatial_unary/perimeter/out.sql | 2 +- .../test_geospatial_unary/point_n/out.sql | 2 +- .../test_geospatial_unary/start_point/out.sql | 2 +- .../test_geospatial_unary_union/out.sql | 2 +- .../test_geospatial_xy/x/out.sql | 2 +- .../test_geospatial_xy/y/out.sql | 2 +- .../test_compiler/test_hash/binary/out.sql | 2 +- .../test_hashbytes/md5-test-binary/out.sql | 2 +- .../test_hashbytes/md5-test-string/out.sql | 2 +- .../test_hashbytes/sha1-test-binary/out.sql | 2 +- .../test_hashbytes/sha1-test-string/out.sql | 2 +- .../test_hashbytes/sha256-test-binary/out.sql | 2 +- .../test_hashbytes/sha256-test-string/out.sql | 2 +- .../test_hashbytes/sha512-test-binary/out.sql | 2 +- .../test_hashbytes/sha512-test-string/out.sql | 2 +- .../test_compiler/test_identical_to/out.sql | 22 +- .../test_integer_to_timestamp/ms/out.sql | 2 +- .../test_integer_to_timestamp/ns/out.sql | 2 +- .../test_integer_to_timestamp/s/out.sql | 2 +- .../test_integer_to_timestamp/us/out.sql | 2 +- .../datetime/out.sql | 2 +- .../string_time/out.sql | 2 +- .../string_timestamp/out.sql | 2 +- .../time/out.sql | 2 +- .../timestamp/out.sql | 2 +- .../test_literal_year/date/out.sql | 2 +- .../test_literal_year/datetime/out.sql | 2 +- .../test_literal_year/string_date/out.sql | 2 +- .../string_timestamp/out.sql | 2 +- .../test_literal_year/timestamp/out.sql | 2 +- .../test_literal_year/timestamp_date/out.sql | 2 +- .../snapshots/test_compiler/test_now/out.sql | 2 +- .../out.sql | 46 +- .../prec_foll/out.sql | 16 +- .../prec_prec/out.sql | 16 +- .../test_set_operation/difference/out.sql | 12 +- .../test_set_operation/intersect/out.sql | 12 +- .../test_set_operation/union_all/out.sql | 12 +- .../test_set_operation/union_distinct/out.sql | 12 +- .../test_compiler/test_substring/out.sql | 6 +- .../test_temporal_truncate/day-date/out.sql | 2 +- .../day-timestamp/out.sql | 2 +- .../test_temporal_truncate/hour-time/out.sql | 2 +- .../hour-timestamp/out.sql | 2 +- .../micros-time/out.sql | 2 +- .../micros-timestamp/out.sql | 2 +- .../millis-time/out.sql | 2 +- .../millis-timestamp/out.sql | 2 +- .../minute-time/out.sql | 2 +- .../minute-timestamp/out.sql | 2 +- .../test_temporal_truncate/month-date/out.sql | 2 +- .../month-timestamp/out.sql | 2 +- .../quarter-date/out.sql | 2 +- .../quarter-timestamp/out.sql | 2 +- .../second-time/out.sql | 2 +- .../second-timestamp/out.sql | 2 +- .../test_temporal_truncate/week-date/out.sql | 2 +- .../week-timestamp/out.sql | 2 +- .../test_temporal_truncate/year-date/out.sql | 2 +- .../year-timestamp/out.sql | 2 +- .../test_to_timestamp_no_timezone/out.sql | 2 +- .../test_to_timestamp_timezone/out.sql | 2 +- .../test_trailing_range_window/days/out.sql | 16 +- .../test_trailing_range_window/five/out.sql | 16 +- .../test_trailing_range_window/hours/out.sql | 16 +- .../test_trailing_range_window/micros/out.sql | 16 +- .../minutes/out.sql | 16 +- .../test_trailing_range_window/nanos/out.sql | 16 + .../seconds/out.sql | 16 +- .../two_days/out.sql | 16 +- .../test_trailing_range_window/week/out.sql | 19 +- .../test_compiler/test_union/False/out.sql | 36 +- .../test_compiler/test_union/True/out.sql | 36 +- .../test_union_cte/False-False/out.sql | 65 +- .../test_union_cte/False-True/out.sql | 65 +- .../test_union_cte/True-False/out.sql | 65 +- .../test_union_cte/True-True/out.sql | 65 +- .../test_unnest/out_one_unnest.sql | 12 +- .../test_unnest/out_two_unnests.sql | 26 +- .../test_window_function/current_foll/out.sql | 16 +- .../test_window_function/prec_current/out.sql | 16 +- .../test_window_function/prec_prec/out.sql | 16 +- .../test_window_unbounded/following/out.sql | 2 +- .../test_window_unbounded/preceding/out.sql | 2 +- .../bigquery/tests/unit/test_compiler.py | 22 +- .../test_bqutil_fn_from_hex/out.sql | 2 +- .../test_farm_fingerprint/out.sql | 2 +- .../test_multiple_calls_redefinition/out.sql | 18 - .../test_udf_determinism/False/out.sql | 10 - .../test_udf_determinism/None/out.sql | 9 - .../test_udf_determinism/True/out.sql | 10 - .../snapshots/test_usage/test_udf_sql/out.sql | 10 - .../bigquery/tests/unit/udf/test_builtin.py | 2 +- .../bigquery/tests/unit/udf/test_usage.py | 114 +- ibis/backends/bigquery/udf/__init__.py | 396 ------- ibis/backends/bigquery/udf/core.py | 14 +- ibis/backends/duckdb/__init__.py | 7 +- .../test_udf/test_sql_generation/out.sql | 2 +- ibis/backends/postgres/__init__.py | 2 +- .../test_default_limit/bigquery/out.sql | 5 + .../test_disable_query_limit/bigquery/out.sql | 5 + .../bigquery/out.sql | 3 + .../test_respect_set_limit/bigquery/out.sql | 10 + .../test_group_by_has_index/bigquery/out.sql | 6 +- .../test_group_by_has_index/pyspark/out.sql | 22 + .../test_sql/test_isin_bug/bigquery/out.sql | 16 +- .../test_sql/test_isin_bug/pyspark/out.sql | 9 + .../test_union_aliasing/bigquery/out.sql | 280 +++-- ibis/backends/tests/test_aggregation.py | 4 +- ibis/backends/tests/test_asof_join.py | 2 + ibis/backends/tests/test_generic.py | 9 +- ibis/backends/tests/test_numeric.py | 7 +- ibis/backends/tests/test_param.py | 29 +- ibis/backends/tests/test_sql.py | 4 +- ibis/backends/tests/test_string.py | 1 - ibis/backends/tests/test_temporal.py | 15 - ibis/backends/tests/test_udf.py | 2 +- ibis/backends/tests/test_window.py | 9 +- ibis/expr/operations/udf.py | 59 +- ibis/expr/rewrites.py | 2 +- 203 files changed, 2054 insertions(+), 2788 deletions(-) create mode 100644 ibis/backends/bigquery/converter.py delete mode 100644 ibis/backends/bigquery/custom_udfs.py delete mode 100644 ibis/backends/bigquery/operations.py delete mode 100644 ibis/backends/bigquery/registry.py delete mode 100644 ibis/backends/bigquery/rewrites.py create mode 100644 ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/nanos/out.sql delete mode 100644 ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_multiple_calls_redefinition/out.sql delete mode 100644 ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/False/out.sql delete mode 100644 ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/None/out.sql delete mode 100644 ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/True/out.sql delete mode 100644 ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_sql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/bigquery/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/bigquery/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/bigquery/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/bigquery/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/pyspark/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_isin_bug/pyspark/out.sql diff --git a/.github/workflows/ibis-backends-cloud.yml b/.github/workflows/ibis-backends-cloud.yml index 7ba51ae8bd37..62801325cf29 100644 --- a/.github/workflows/ibis-backends-cloud.yml +++ b/.github/workflows/ibis-backends-cloud.yml @@ -43,8 +43,8 @@ jobs: - "3.9" - "3.11" backend: - # - name: bigquery - # title: BigQuery + - name: bigquery + title: BigQuery - name: snowflake title: Snowflake steps: diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 1cc732ac89a0..f8e14222c3fe 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -20,6 +20,7 @@ import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.backends.base.sqlglot.rewrites import Select, Window, sqlize +from ibis.expr.operations.udf import InputType from ibis.expr.rewrites import ( add_one_to_nth_value_input, add_order_by_to_empty_ranking_window_functions, @@ -874,9 +875,20 @@ def visit_RowID(self, op, *, table): # TODO(kszucs): this should be renamed to something UDF related def __sql_name__(self, op: ops.ScalarUDF | ops.AggUDF) -> str: + # for builtin functions use the exact function name, otherwise use the + # generated name to handle the case of redefinition + funcname = ( + op.__func_name__ + if op.__input_type__ == InputType.BUILTIN + else type(op).__name__ + ) + # not actually a table, but easier to quote individual namespace # components this way - return sg.table(op.__func_name__, db=op.__udf_namespace__).sql(self.dialect) + namespace = op.__udf_namespace__ + return sg.table(funcname, db=namespace.schema, catalog=namespace.database).sql( + self.dialect + ) @visit_node.register(ops.ScalarUDF) def visit_ScalarUDF(self, op, **kw): @@ -919,6 +931,23 @@ def _dedup_name( else value.as_(key, quoted=self.quoted) ) + @staticmethod + def _gen_valid_name(name: str) -> str: + """Generate a valid name for a value expression. + + Override this method if the dialect has restrictions on valid + identifiers even when quoted. + + See the BigQuery backend's implementation for an example. + """ + return name + + def _cleanup_names(self, exprs: Mapping[str, sge.Expression]): + """Compose `_gen_valid_name` and `_dedup_name` to clean up names in projections.""" + return starmap( + self._dedup_name, toolz.keymap(self._gen_valid_name, exprs).items() + ) + @visit_node.register(Select) def visit_Select(self, op, *, parent, selections, predicates, sort_keys): # if we've constructed a useless projection return the parent relation @@ -928,9 +957,7 @@ def visit_Select(self, op, *, parent, selections, predicates, sort_keys): result = parent if selections: - result = sg.select(*starmap(self._dedup_name, selections.items())).from_( - result - ) + result = sg.select(*self._cleanup_names(selections)).from_(result) if predicates: result = result.where(*predicates) @@ -942,7 +969,7 @@ def visit_Select(self, op, *, parent, selections, predicates, sort_keys): @visit_node.register(ops.DummyTable) def visit_DummyTable(self, op, *, values): - return sg.select(*starmap(self._dedup_name, values.items())) + return sg.select(*self._cleanup_names(values)) @visit_node.register(ops.UnboundTable) def visit_UnboundTable( @@ -978,7 +1005,7 @@ def visit_SelfReference(self, op, *, parent, identifier): @visit_node.register(ops.JoinChain) def visit_JoinChain(self, op, *, first, rest, values): - result = sg.select(*starmap(self._dedup_name, values.items())).from_(first) + result = sg.select(*self._cleanup_names(values)).from_(first) for link in rest: if isinstance(link, sge.Alias): @@ -1019,15 +1046,9 @@ def visit_JoinLink(self, op, *, how, table, predicates): on = sg.and_(*predicates) if predicates else None return sge.Join(this=table, side=sides[how], kind=kinds[how], on=on) - @staticmethod - def _gen_valid_name(name: str) -> str: - return name - @visit_node.register(ops.Project) def visit_Project(self, op, *, parent, values): - # needs_alias should never be true here in explicitly, but it may get - # passed via a (recursive) call to translate_val - return sg.select(*starmap(self._dedup_name, values.items())).from_(parent) + return sg.select(*self._cleanup_names(values)).from_(parent) @staticmethod def _generate_groups(groups): @@ -1036,12 +1057,7 @@ def _generate_groups(groups): @visit_node.register(ops.Aggregate) def visit_Aggregate(self, op, *, parent, groups, metrics): sel = sg.select( - *starmap( - self._dedup_name, toolz.keymap(self._gen_valid_name, groups).items() - ), - *starmap( - self._dedup_name, toolz.keymap(self._gen_valid_name, metrics).items() - ), + *self._cleanup_names(groups), *self._cleanup_names(metrics) ).from_(parent) if groups: @@ -1190,21 +1206,15 @@ def visit_FillNa(self, op, *, parent, replacements): for name, dtype in op.schema.items() if dtype.nullable } - exprs = [ - ( - sg.alias( - sge.Coalesce( - this=sg.column(col, quoted=self.quoted), - expressions=[sge.convert(alt)], - ), - col, - ) + exprs = { + col: ( + self.f.coalesce(sg.column(col, quoted=self.quoted), sge.convert(alt)) if (alt := mapping.get(col)) is not None else sg.column(col, quoted=self.quoted) ) for col in op.schema.keys() - ] - return sg.select(*exprs).from_(parent) + } + return sg.select(*self._cleanup_names(exprs)).from_(parent) @visit_node.register(ops.View) def visit_View(self, op, *, child, name: str): diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index b1d4c336739e..e09e582f9671 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -620,3 +620,122 @@ class PySparkType(SqlglotType): default_decimal_precision = 38 default_decimal_scale = 18 + + +class BigQueryType(SqlglotType): + dialect = "bigquery" + + default_decimal_precision = 38 + default_decimal_scale = 9 + + @classmethod + def _from_sqlglot_NUMERIC(cls) -> dt.Decimal: + return dt.Decimal( + cls.default_decimal_precision, + cls.default_decimal_scale, + nullable=cls.default_nullable, + ) + + @classmethod + def _from_sqlglot_BIGNUMERIC(cls) -> dt.Decimal: + return dt.Decimal(76, 38, nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_DATETIME(cls) -> dt.Decimal: + return dt.Timestamp(timezone=None, nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_TIMESTAMP(cls) -> dt.Decimal: + return dt.Timestamp(timezone="UTC", nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_GEOGRAPHY(cls) -> dt.Decimal: + return dt.GeoSpatial( + geotype="geography", srid=4326, nullable=cls.default_nullable + ) + + @classmethod + def _from_sqlglot_TINYINT(cls) -> dt.Int64: + return dt.Int64(nullable=cls.default_nullable) + + _from_sqlglot_UINT = ( + _from_sqlglot_USMALLINT + ) = ( + _from_sqlglot_UTINYINT + ) = _from_sqlglot_INT = _from_sqlglot_SMALLINT = _from_sqlglot_TINYINT + + @classmethod + def _from_sqlglot_UBIGINT(cls) -> dt.Int64: + raise TypeError("Unsigned BIGINT isn't representable in BigQuery INT64") + + @classmethod + def _from_sqlglot_FLOAT(cls) -> dt.Double: + return dt.Float64(nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_MAP(cls) -> dt.Map: + raise NotImplementedError( + "Cannot convert sqlglot Map type to ibis type: maps are not supported in BigQuery" + ) + + @classmethod + def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType: + raise NotImplementedError( + "Cannot convert Ibis Map type to BigQuery type: maps are not supported in BigQuery" + ) + + @classmethod + def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType: + if dtype.timezone is None: + return sge.DataType(this=sge.DataType.Type.DATETIME) + elif dtype.timezone == "UTC": + return sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ) + else: + raise TypeError( + "BigQuery does not support timestamps with timezones other than 'UTC'" + ) + + @classmethod + def _from_ibis_Decimal(cls, dtype: dt.Decimal) -> sge.DataType: + precision = dtype.precision + scale = dtype.scale + if (precision, scale) == (76, 38): + return sge.DataType(this=sge.DataType.Type.BIGDECIMAL) + elif (precision, scale) in ((38, 9), (None, None)): + return sge.DataType(this=sge.DataType.Type.DECIMAL) + else: + raise TypeError( + "BigQuery only supports decimal types with precision of 38 and " + f"scale of 9 (NUMERIC) or precision of 76 and scale of 38 (BIGNUMERIC). " + f"Current precision: {dtype.precision}. Current scale: {dtype.scale}" + ) + + @classmethod + def _from_ibis_UInt64(cls, dtype: dt.UInt64) -> sge.DataType: + raise TypeError( + f"Conversion from {dtype} to BigQuery integer type (Int64) is lossy" + ) + + @classmethod + def _from_ibis_UInt32(cls, dtype: dt.UInt32) -> sge.DataType: + return sge.DataType(this=sge.DataType.Type.BIGINT) + + _from_ibis_UInt8 = _from_ibis_UInt16 = _from_ibis_UInt32 + + @classmethod + def _from_ibis_GeoSpatial(cls, dtype: dt.GeoSpatial) -> sge.DataType: + if (dtype.geotype, dtype.srid) == ("geography", 4326): + return sge.DataType(this=sge.DataType.Type.GEOGRAPHY) + else: + raise TypeError( + "BigQuery geography uses points on WGS84 reference ellipsoid." + f"Current geotype: {dtype.geotype}, Current srid: {dtype.srid}" + ) + + +class BigQueryUDFType(BigQueryType): + @classmethod + def _from_ibis_Int64(cls, dtype: dt.Int64) -> sge.DataType: + raise com.UnsupportedBackendType( + "int64 is not a supported input or output type in BigQuery UDFs; use float64 instead" + ) diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index 6d37cc87bc45..5e0cddf43f9a 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -3,11 +3,10 @@ from __future__ import annotations import concurrent.futures -import contextlib import glob import os import re -from functools import partial +from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Callable, Optional from urllib.parse import parse_qs, urlparse @@ -17,6 +16,7 @@ import pandas as pd import pydata_google_auth import sqlglot as sg +import sqlglot.expressions as sge from pydata_google_auth import cache import ibis @@ -25,7 +25,8 @@ import ibis.expr.types as ir from ibis import util from ibis.backends.base import CanCreateSchema, Database -from ibis.backends.base.sql import BaseSQLBackend +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.datatypes import BigQueryType from ibis.backends.bigquery.client import ( BigQueryCursor, bigquery_param, @@ -34,19 +35,17 @@ schema_from_bigquery_table, ) from ibis.backends.bigquery.compiler import BigQueryCompiler -from ibis.backends.bigquery.datatypes import BigQuerySchema, BigQueryType - -with contextlib.suppress(ImportError): - from ibis.backends.bigquery.udf import udf # noqa: F401 +from ibis.backends.bigquery.datatypes import BigQuerySchema +from ibis.backends.bigquery.udf.core import PythonToJavaScriptTranslator if TYPE_CHECKING: - from collections.abc import Iterable, Mapping + from collections.abc import Iterable, Iterator, Mapping from pathlib import Path import pyarrow as pa from google.cloud.bigquery.table import RowIterator - import ibis.expr.schema as sch + import ibis.expr.types as dt SCOPES = ["https://www.googleapis.com/auth/bigquery"] EXTERNAL_DATA_SCOPES = [ @@ -82,36 +81,40 @@ def _create_client_info_gapic(application_name): return ClientInfo(user_agent=_create_user_agent(application_name)) -def _anonymous_unnest_to_explode(node: sg.exp.Expression): - """Convert `ANONYMOUS` `unnest` function calls to `EXPLODE` calls. - - This allows us to generate DuckDB-like `UNNEST` calls and let sqlglot do - the work of transforming those into the correct BigQuery SQL. - """ - if isinstance(node, sg.exp.Anonymous) and node.this.lower() == "unnest": - return sg.exp.Explode(this=node.expressions[0]) - return node - - _MEMTABLE_PATTERN = re.compile(r"^_?ibis_(?:pandas|pyarrow)_memtable_[a-z0-9]{26}$") def _qualify_memtable( - node: sg.exp.Expression, *, dataset: str | None, project: str | None -) -> sg.exp.Expression: + node: sge.Expression, *, dataset: str | None, project: str | None +) -> sge.Expression: """Add a BigQuery dataset and project to memtable references.""" - if ( - isinstance(node, sg.exp.Table) - and _MEMTABLE_PATTERN.match(node.name) is not None - ): + if isinstance(node, sge.Table) and _MEMTABLE_PATTERN.match(node.name) is not None: node.args["db"] = dataset node.args["catalog"] = project return node -class Backend(BaseSQLBackend, CanCreateSchema): +def _remove_nulls_first_from_invalid_window_orderings( + node: sge.Expression, +) -> sge.Expression: + if isinstance(node, sge.Window): + order = node.args.get("order") + if order is not None: + for key in order.args["expressions"]: + kargs = key.args + if kargs.get("desc") is True and kargs.get("nulls_first", False): + kargs["nulls_first"] = False + elif kargs.get("desc") is False and not kargs.setdefault( + "nulls_first", True + ): + kargs["nulls_first"] = True + + return node + + +class Backend(SQLGlotBackend, CanCreateSchema): name = "bigquery" - compiler = BigQueryCompiler + compiler = BigQueryCompiler() supports_in_memory_tables = True supports_python_udfs = False @@ -455,20 +458,20 @@ def create_schema( **options: Any, ) -> None: properties = [ - sg.exp.Property(this=sg.to_identifier(name), value=sg.exp.convert(value)) + sge.Property(this=sg.to_identifier(name), value=sge.convert(value)) for name, value in (options or {}).items() ] if collate is not None: properties.append( - sg.exp.CollateProperty(this=sg.exp.convert(collate), default=True) + sge.CollateProperty(this=sge.convert(collate), default=True) ) - stmt = sg.exp.Create( + stmt = sge.Create( kind="SCHEMA", this=sg.table(name, db=database), exists=force, - properties=sg.exp.Properties(expressions=properties), + properties=sge.Properties(expressions=properties), ) self.raw_sql(stmt.sql(self.name)) @@ -481,7 +484,7 @@ def drop_schema( cascade: bool = False, ) -> None: """Drop a BigQuery dataset.""" - stmt = sg.exp.Drop( + stmt = sge.Drop( kind="SCHEMA", this=sg.table(name, db=database), exists=force, @@ -499,7 +502,7 @@ def table( "`database` specifier. Include a `schema` argument." ) - table = sg.parse_one(name, into=sg.exp.Table, read=self.name) + table = sg.parse_one(name, into=sge.Table, read=self.name) # table.catalog will be the empty string if table.catalog: @@ -520,11 +523,11 @@ def table( schema = table.db if database is not None and schema is None: - database = sg.parse_one(database, into=sg.exp.Table, read=self.name) + database = sg.parse_one(database, into=sge.Table, read=self.name) database.args["quoted"] = False database = database.sql(dialect=self.name) elif database is None and schema is not None: - database = sg.parse_one(schema, into=sg.exp.Table, read=self.name) + database = sg.parse_one(schema, into=sge.Table, read=self.name) database.args["quoted"] = False database = database.sql(dialect=self.name) else: @@ -567,7 +570,7 @@ def _make_session(self) -> tuple[str, str]: dataset_id=query.destination.dataset_id, ) - def _get_schema_using_query(self, query: str) -> sch.Schema: + def _metadata(self, query: str) -> Iterator[tuple[name, dt.DataType]]: self._make_session() job = self.client.query( @@ -575,9 +578,11 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: job_config=bq.QueryJobConfig(dry_run=True, use_query_cache=False), project=self.billing_project, ) - return BigQuerySchema.to_ibis(job.schema) + return ( + (f.name, BigQuerySchema._dtype_from_bigquery_field(f)) for f in job.schema + ) - def _execute(self, stmt, results=True, query_parameters=None): + def _execute(self, stmt, query_parameters=None): self._make_session() job_config = bq.job.QueryJobConfig(query_parameters=query_parameters or []) @@ -587,12 +592,12 @@ def _execute(self, stmt, results=True, query_parameters=None): query.result() # blocks until finished return BigQueryCursor(query) - def compile( + def _to_sqlglot( self, expr: ir.Expr, limit: str | None = None, params: Mapping[ir.Expr, Any] | None = None, - **_, + **kwargs, ) -> Any: """Compile an Ibis expression. @@ -605,6 +610,8 @@ def compile( of values/rows. Overrides any limit already set on the expression. params Named unbound parameters + kwargs + Keyword arguments passed to the compiler Returns ------- @@ -614,24 +621,15 @@ def compile( """ self._make_session() self._define_udf_translation_rules(expr) - sql = self.compiler.to_ast_ensure_limit(expr, limit, params=params).compile() - - return ";\n\n".join( - # convert unnest function calls to explode - query.transform(_anonymous_unnest_to_explode) - # add dataset and project to memtable references - .transform( - partial( - _qualify_memtable, - dataset=getattr(self._session_dataset, "dataset_id", None), - project=getattr(self._session_dataset, "project", None), - ) - ) - .sql(dialect=self.name, pretty=True) - for query in sg.parse(sql, read=self.name) - ) + sql = super()._to_sqlglot(expr, limit=limit, params=params, **kwargs) - def raw_sql(self, query: str, results=False, params=None): + return sql.transform( + _qualify_memtable, + dataset=getattr(self._session_dataset, "dataset_id", None), + project=getattr(self._session_dataset, "project", None), + ).transform(_remove_nulls_first_from_invalid_window_orderings) + + def raw_sql(self, query: str, params=None): query_parameters = [ bigquery_param( param.type(), @@ -644,7 +642,7 @@ def raw_sql(self, query: str, results=False, params=None): ) for param, value in (params or {}).items() ] - return self._execute(query, results=results, query_parameters=query_parameters) + return self._execute(query, query_parameters=query_parameters) @property def current_database(self) -> str: @@ -663,6 +661,23 @@ def database(self, name=None): ) return Database(name or self.dataset, self) + def compile( + self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any + ): + """Compile an Ibis expression to a SQL string.""" + query = self._to_sqlglot(expr, limit=limit, params=params, **kwargs) + udf_sources = [] + for udf_node in expr.op().find(ops.ScalarUDF): + compile_func = getattr( + self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" + ) + if sql := compile_func(udf_node): + udf_sources.append(sql.sql(self.name, pretty=True)) + + sql = ";\n".join([*udf_sources, query.sql(dialect=self.name, pretty=True)]) + self._log(sql) + return sql + def execute(self, expr, params=None, limit="default", **kwargs): """Compile and execute the given Ibis expression. @@ -700,11 +715,11 @@ def execute(self, expr, params=None, limit="default", **kwargs): return expr.__pandas_result__(result) def fetch_from_cursor(self, cursor, schema): - from ibis.formats.pandas import PandasData + from ibis.backends.bigquery.converter import BigQueryPandasData arrow_t = self._cursor_to_arrow(cursor) df = arrow_t.to_pandas(timestamp_as_object=True) - return PandasData.convert_table(df, schema) + return BigQueryPandasData.convert_table(df, schema) def _cursor_to_arrow( self, @@ -830,7 +845,7 @@ def list_tables( "Include a `schema` argument." ) elif database is None and schema is not None: - database = sg.parse_one(schema, into=sg.exp.Table, read=self.name) + database = sg.parse_one(schema, into=sge.Table, read=self.name) database.args["quoted"] = False database = database.sql(dialect=self.name) else: @@ -918,15 +933,13 @@ def create_table( if default_collate is not None: properties.append( - sg.exp.CollateProperty( - this=sg.exp.convert(default_collate), default=True - ) + sge.CollateProperty(this=sge.convert(default_collate), default=True) ) if partition_by is not None: properties.append( - sg.exp.PartitionedByProperty( - this=sg.exp.Tuple( + sge.PartitionedByProperty( + this=sge.Tuple( expressions=list(map(sg.to_identifier, partition_by)) ) ) @@ -934,11 +947,11 @@ def create_table( if cluster_by is not None: properties.append( - sg.exp.Cluster(expressions=list(map(sg.to_identifier, cluster_by))) + sge.Cluster(expressions=list(map(sg.to_identifier, cluster_by))) ) properties.extend( - sg.exp.Property(this=sg.to_identifier(name), value=sg.exp.convert(value)) + sge.Property(this=sg.to_identifier(name), value=sge.convert(value)) for name, value in (options or {}).items() ) @@ -960,7 +973,7 @@ def create_table( dataset = database or self.current_schema try: - table = sg.parse_one(name, into=sg.exp.Table, read="bigquery") + table = sg.parse_one(name, into=sge.Table, read="bigquery") except sg.ParseError: table = sg.table(name, db=dataset, catalog=project_id) else: @@ -971,25 +984,23 @@ def create_table( table.args["catalog"] = project_id column_defs = [ - sg.exp.ColumnDef( + sge.ColumnDef( this=name, kind=BigQueryType.from_ibis(typ), constraints=( None if typ.nullable or typ.is_array() - else [ - sg.exp.ColumnConstraint(kind=sg.exp.NotNullColumnConstraint()) - ] + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] ), ) for name, typ in (schema or {}).items() ] - stmt = sg.exp.Create( + stmt = sge.Create( kind="TABLE", - this=sg.exp.Schema(this=table, expressions=column_defs or None), + this=sge.Schema(this=table, expressions=column_defs or None), replace=overwrite, - properties=sg.exp.Properties(expressions=properties), + properties=sge.Properties(expressions=properties), expression=None if obj is None else self.compile(obj), ) @@ -1006,7 +1017,7 @@ def drop_table( database: str | None = None, force: bool = False, ) -> None: - stmt = sg.exp.Drop( + stmt = sge.Drop( kind="TABLE", this=sg.table( name, @@ -1026,7 +1037,7 @@ def create_view( database: str | None = None, overwrite: bool = False, ) -> ir.Table: - stmt = sg.exp.Create( + stmt = sge.Create( kind="VIEW", this=sg.table( name, @@ -1048,7 +1059,7 @@ def drop_view( database: str | None = None, force: bool = False, ) -> None: - stmt = sg.exp.Drop( + stmt = sge.Drop( kind="VIEW", this=sg.table( name, @@ -1069,6 +1080,84 @@ def _clean_up_cached_table(self, op): database=self._session_dataset.project, ) + def _get_udf_source(self, udf_node: ops.ScalarUDF): + name = type(udf_node).__name__ + type_mapper = self.compiler.udf_type_mapper + + body = PythonToJavaScriptTranslator(udf_node.__func__).compile() + config = udf_node.__config__ + libraries = config.get("libraries", []) + + signature = [ + sge.ColumnDef( + this=sg.to_identifier(name), + kind=type_mapper.from_ibis(param.annotation.pattern.dtype), + ) + for name, param in udf_node.__signature__.parameters.items() + ] + + lines = ['"""'] + + if config.get("strict", True): + lines.append('"use strict";') + + lines += [ + body, + "", + f"return {udf_node.__func_name__}({', '.join(udf_node.argnames)});", + '"""', + ] + + func = sge.Create( + kind="FUNCTION", + this=sge.UserDefinedFunction( + this=sg.to_identifier(name), expressions=signature, wrapped=True + ), + # not exactly what I had in mind, but it works + # + # quoting is too simplistic to handle multiline strings + expression=sge.Var(this="\n".join(lines)), + exists=False, + properties=sge.Properties( + expressions=[ + sge.TemporaryProperty(), + sge.ReturnsProperty(this=type_mapper.from_ibis(udf_node.dtype)), + sge.StabilityProperty( + this="IMMUTABLE" if config.get("determinism") else "VOLATILE" + ), + sge.LanguageProperty(this=sg.to_identifier("js")), + ] + + [ + sge.Property( + this=sg.to_identifier("library"), + value=self.compiler.f.array(*libraries), + ) + ] + * bool(libraries) + ), + ) + + return func + + def _compile_builtin_udf(self, udf_node: ops.ScalarUDF) -> None: + """No op.""" + + def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> None: + return self._get_udf_source(udf_node) + + def _compile_pyarrow_udf(self, udf_node: ops.ScalarUDF) -> None: + raise NotImplementedError("PyArrow UDFs are not supported in BigQuery") + + def _compile_pandas_udf(self, udf_node: ops.ScalarUDF) -> str: + raise NotImplementedError("Pandas UDFs are not supported in BigQuery") + + def _register_udfs(self, expr: ir.Expr) -> None: + """No op because UDFs made with CREATE TEMPORARY FUNCTION must be followed by a query.""" + + @contextmanager + def _safe_raw_sql(self, *args, **kwargs): + yield self.raw_sql(*args, **kwargs) + def compile(expr, params=None, **kwargs): """Compile an expression for BigQuery.""" diff --git a/ibis/backends/bigquery/client.py b/ibis/backends/bigquery/client.py index eadf83384898..d785e99ddfa7 100644 --- a/ibis/backends/bigquery/client.py +++ b/ibis/backends/bigquery/client.py @@ -24,7 +24,7 @@ def schema_from_bigquery_table(table): partition_field = partition_info.field or NATIVE_PARTITION_COL # Only add a new column if it's not already a column in the schema if partition_field not in schema: - schema |= {partition_field: dt.timestamp} + schema |= {partition_field: dt.Timestamp(timezone="UTC")} return schema diff --git a/ibis/backends/bigquery/compiler.py b/ibis/backends/bigquery/compiler.py index c5e101426f5b..9b0bd63fac3c 100644 --- a/ibis/backends/bigquery/compiler.py +++ b/ibis/backends/bigquery/compiler.py @@ -3,145 +3,825 @@ from __future__ import annotations import re -from functools import partial +from functools import singledispatchmethod import sqlglot as sg -import toolz +import sqlglot.expressions as sge -import ibis.common.graph as lin +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis.backends.base.sql import compiler as sql_compiler -from ibis.backends.bigquery import operations, registry, rewrites - - -class BigQueryUDFDefinition(sql_compiler.DDL): - """Represents definition of a temporary UDF.""" - - def __init__(self, expr, context): - self.expr = expr - self.context = context - - def compile(self): - """Generate UDF string from definition.""" - op = expr.op() if isinstance(expr := self.expr, ir.Expr) else expr - return op.sql - - -class BigQueryUnion(sql_compiler.Union): - """Union of tables.""" - - @classmethod - def keyword(cls, distinct): - """Use distinct UNION if distinct is True.""" - return "UNION DISTINCT" if distinct else "UNION ALL" - - -class BigQueryIntersection(sql_compiler.Intersection): - """Intersection of tables.""" - - @classmethod - def keyword(cls, distinct): - return "INTERSECT DISTINCT" if distinct else "INTERSECT ALL" - - -class BigQueryDifference(sql_compiler.Difference): - """Difference of tables.""" - - @classmethod - def keyword(cls, distinct): - return "EXCEPT DISTINCT" if distinct else "EXCEPT ALL" - - -def find_bigquery_udf(op): - """Filter which includes only UDFs from expression tree.""" - if type(op) in BigQueryExprTranslator._rewrites: - op = BigQueryExprTranslator._rewrites[type(op)](op) - if isinstance(op, operations.BigQueryUDFNode): - result = op - else: - result = None - return lin.proceed, result - +from ibis import util +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler, paren +from ibis.backends.base.sqlglot.datatypes import BigQueryType, BigQueryUDFType +from ibis.backends.base.sqlglot.rewrites import ( + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_row_number, + rewrite_first_to_first_value, + rewrite_last_to_last_value, +) +from ibis.common.patterns import replace +from ibis.common.temporal import DateUnit, IntervalUnit, TimestampUnit, TimeUnit +from ibis.expr.rewrites import p, rewrite_sample, y _NAME_REGEX = re.compile(r'[^!"$()*,./;?@[\\\]^`{}~\n]+') -class BigQueryExprTranslator(sql_compiler.ExprTranslator): - """Translate expressions to strings.""" - - _registry = registry.OPERATION_REGISTRY - _rewrites = rewrites.REWRITES - - _forbids_frame_clause = ( - *sql_compiler.ExprTranslator._forbids_frame_clause, - ops.Lag, - ops.Lead, +@replace(p.WindowFunction(p.MinRank | p.DenseRank, y @ p.WindowFrame(start=None))) +def exclude_unsupported_window_frame_from_rank(_, y): + return ops.Subtract( + _.copy(frame=y.copy(start=None, end=0, order_by=y.order_by or (ops.NULL,))), 1 ) - _unsupported_reductions = (ops.ApproxMedian, ops.ApproxCountDistinct) - _dialect_name = "bigquery" - - @staticmethod - def _gen_valid_name(name: str) -> str: - name = "_".join(_NAME_REGEX.findall(name)) or "tmp" - return f"`{name}`" - - def name(self, translated: str, name: str): - # replace invalid characters in automatically generated names - valid_name = self._gen_valid_name(name) - if translated == valid_name: - return translated - return f"{translated} AS {valid_name}" - - @classmethod - def compiles(cls, klass): - def decorator(f): - cls._registry[klass] = f - return f - - return decorator - - def _trans_param(self, op): - if op not in self.context.params: - raise KeyError(op) - return f"@{op.name}" +class BigQueryCompiler(SQLGlotCompiler): + dialect = "bigquery" + type_mapper = BigQueryType + udf_type_mapper = BigQueryUDFType + rewrites = ( + rewrite_sample, + rewrite_first_to_first_value, + rewrite_last_to_last_value, + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_row_number, + exclude_unsupported_window_frame_from_rank, + *SQLGlotCompiler.rewrites, + ) -compiles = BigQueryExprTranslator.compiles - + NAN = sge.Cast( + this=sge.convert("NaN"), to=sge.DataType(this=sge.DataType.Type.DOUBLE) + ) + POS_INF = sge.Cast( + this=sge.convert("Infinity"), to=sge.DataType(this=sge.DataType.Type.DOUBLE) + ) + NEG_INF = sge.Cast( + this=sge.convert("-Infinity"), to=sge.DataType(this=sge.DataType.Type.DOUBLE) + ) -class BigQueryTableSetFormatter(sql_compiler.TableSetFormatter): - def _quote_identifier(self, name): - return sg.to_identifier(name).sql("bigquery") + def _aggregate(self, funcname: str, *args, where): + func = self.f[funcname] + if where is not None: + args = tuple(self.if_(where, arg, NULL) for arg in args) -class BigQueryCompiler(sql_compiler.Compiler): - translator_class = BigQueryExprTranslator - table_set_formatter_class = BigQueryTableSetFormatter - union_class = BigQueryUnion - intersect_class = BigQueryIntersection - difference_class = BigQueryDifference + return func(*args, dialect=self.dialect) - support_values_syntax_in_select = False - null_limit = None - cheap_in_memory_tables = True + @staticmethod + def _minimize_spec(start, end, spec): + if ( + start is None + and isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + @visit_node.register(ops.GeoXMax) + @visit_node.register(ops.GeoXMin) + @visit_node.register(ops.GeoYMax) + @visit_node.register(ops.GeoYMin) + def visit_BoundingBox(self, op, *, arg): + name = type(op).__name__[len("Geo") :].lower() + return sge.Dot( + this=self.f.st_boundingbox(arg), expression=sg.to_identifier(name) + ) + + @visit_node.register(ops.GeoSimplify) + def visit_GeoSimplify(self, op, *, arg, tolerance, preserve_collapsed): + if ( + not isinstance(op.preserve_collapsed, ops.Literal) + or op.preserve_collapsed.value + ): + raise com.UnsupportedOperationError( + "BigQuery simplify does not support preserving collapsed geometries, " + "pass preserve_collapsed=False" + ) + return self.f.st_simplify(arg, tolerance) + + @visit_node.register(ops.ApproxMedian) + def visit_ApproxMedian(self, op, *, arg, where): + return self.agg.approx_quantiles(arg, 2, where=where)[self.f.offset(1)] + + @visit_node.register(ops.Pi) + def visit_Pi(self, op): + return self.f.acos(-1) + + @visit_node.register(ops.E) + def visit_E(self, op): + return self.f.exp(1) + + @visit_node.register(ops.TimeDelta) + def visit_TimeDelta(self, op, *, left, right, part): + return self.f.time_diff(left, right, part, dialect=self.dialect) + + @visit_node.register(ops.DateDelta) + def visit_DateDelta(self, op, *, left, right, part): + return self.f.date_diff(left, right, part, dialect=self.dialect) + + @visit_node.register(ops.TimestampDelta) + def visit_TimestampDelta(self, op, *, left, right, part): + left_tz = op.left.dtype.timezone + right_tz = op.right.dtype.timezone + + if left_tz is None and right_tz is None: + return self.f.datetime_diff(left, right, part) + elif left_tz is not None and right_tz is not None: + return self.f.timestamp_diff(left, right, part) + + raise com.UnsupportedOperationError( + "timestamp difference with mixed timezone/timezoneless values is not implemented" + ) + + @visit_node.register(ops.GroupConcat) + def visit_GroupConcat(self, op, *, arg, sep, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.string_agg(arg, sep) + + @visit_node.register(ops.FloorDivide) + def visit_FloorDivide(self, op, *, left, right): + return self.cast(self.f.floor(self.f.ieee_divide(left, right)), op.dtype) + + @visit_node.register(ops.Log2) + def visit_Log2(self, op, *, arg): + return self.f.log(arg, 2, dialect=self.dialect) + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + if base is None: + return self.f.ln(arg) + return self.f.log(arg, base, dialect=self.dialect) + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + start = step = 1 + array_length = self.f.array_length(arg) + stop = self.f.greatest(times, 0) * array_length + i = sg.to_identifier("i") + idx = self.f.coalesce( + self.f.nullif(self.f.mod(i, array_length), 0), array_length + ) + series = self.f.generate_array(start, stop, step) + return self.f.array( + sg.select(arg[self.f.safe_ordinal(idx)]).from_(self._unnest(series, as_=i)) + ) + + @visit_node.register(ops.Capitalize) + def visit_Capitalize(self, op, *, arg): + return self.f.concat( + self.f.upper(self.f.substr(arg, 1, 1)), self.f.lower(self.f.substr(arg, 2)) + ) + + @visit_node.register(ops.NthValue) + def visit_NthValue(self, op, *, arg, nth): + if not isinstance(op.nth, ops.Literal): + raise com.UnsupportedOperationError( + f"BigQuery `nth` must be a literal; got {type(op.nth)}" + ) + return self.f.nth_value(arg, nth) + + @visit_node.register(ops.StrRight) + def visit_StrRight(self, op, *, arg, nchars): + return self.f.substr(arg, -self.f.least(self.f.length(arg), nchars)) + + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, arg, sep): + return self.f.array_to_string(self.f.array(*arg), sep) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return self.f.mod(self.f.extract(self.v.dayofweek, arg) + 5, 7) + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + return self.f.initcap(sge.Cast(this=arg, to="STRING FORMAT 'DAY'")) + + @visit_node.register(ops.StringToTimestamp) + def visit_StringToTimestamp(self, op, *, arg, format_str): + if (timezone := op.dtype.timezone) is not None: + return self.f.parse_timestamp(format_str, arg, timezone) + return self.f.parse_datetime(format_str, arg) + + @visit_node.register(ops.Floor) + def visit_Floor(self, op, *, arg): + return self.cast(self.f.floor(arg), op.dtype) + + @visit_node.register(ops.ArrayCollect) + def visit_ArrayCollect(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.array_agg(sge.IgnoreNulls(this=arg)) + + def _neg_idx_to_pos(self, arg, idx): + return self.if_(idx < 0, self.f.array_length(arg) + idx, idx) + + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + index = sg.to_identifier("bq_arr_slice") + cond = [index >= self._neg_idx_to_pos(arg, start)] + + if stop is not None: + cond.append(index < self._neg_idx_to_pos(arg, stop)) + + el = sg.to_identifier("el") + return self.f.array( + sg.select(el).from_(self._unnest(arg, as_=el, offset=index)).where(*cond) + ) + + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return arg[self.f.safe_offset(index)] + + @visit_node.register(ops.ArrayContains) + def visit_ArrayContains(self, op, *, arg, other): + name = sg.to_identifier(util.gen_name("bq_arr_contains")) + return sge.Exists( + this=sg.select(sge.convert(1)) + .from_(self._unnest(arg, as_=name)) + .where(name.eq(other)) + ) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.strpos(haystack, needle) > 0 + + @visit_node.register(ops.StringFind) + def visti_StringFind(self, op, *, arg, substr, start, end): + if start is not None: + raise NotImplementedError( + "`start` not implemented for BigQuery string find" + ) + if end is not None: + raise NotImplementedError("`end` not implemented for BigQuery string find") + return self.f.strpos(arg, substr) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_string(): + return sge.convert( + str(value) + # Escape \ first so we don't double escape other characters. + .replace("\\", "\\\\") + # ASCII escape sequences that are recognized in Python: + # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals + .replace("\a", "\\a") # Bell + .replace("\b", "\\b") # Backspace + .replace("\f", "\\f") # Formfeed + .replace("\n", "\\n") # Newline / Linefeed + .replace("\r", "\\r") # Carriage return + .replace("\t", "\\t") # Tab + .replace("\v", "\\v") # Vertical tab + ) + elif dtype.is_inet() or dtype.is_macaddr(): + return sge.convert(str(value)) + elif dtype.is_timestamp(): + funcname = "datetime" if dtype.timezone is None else "timestamp" + return self.f[funcname](value.isoformat()) + elif dtype.is_date(): + return self.f.datefromparts(value.year, value.month, value.day) + elif dtype.is_time(): + return self.f.time(value.hour, value.minute, value.second) + elif dtype.is_binary(): + return sge.Cast( + this=sge.convert(value.hex()), + to=sge.DataType(this=sge.DataType.Type.BINARY), + format=sge.convert("HEX"), + ) + elif dtype.is_interval(): + if dtype.unit == IntervalUnit.NANOSECOND: + raise com.UnsupportedOperationError( + "BigQuery does not support nanosecond intervals" + ) + elif dtype.is_uuid(): + return sge.convert(str(value)) + return None + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + if unit == IntervalUnit.NANOSECOND: + raise com.UnsupportedOperationError( + "BigQuery does not support nanosecond intervals" + ) + return sge.Interval(this=arg, unit=self.v[unit.singular]) + + @visit_node.register(ops.Strftime) + def visit_Strftime(self, op, *, arg, format_str): + arg_dtype = op.arg.dtype + if arg_dtype.is_timestamp(): + if (timezone := arg_dtype.timezone) is None: + return self.f.format_datetime(format_str, arg) + else: + return self.f.format_timestamp(format_str, arg, timezone) + elif arg_dtype.is_date(): + return self.f.format_date(format_str, arg) + else: + assert arg_dtype.is_time(), arg_dtype + return self.f.format_time(format_str, arg) + + @visit_node.register(ops.IntervalMultiply) + def visit_IntervalMultiply(self, op, *, left, right): + unit = self.v[op.left.dtype.resolution.upper()] + return sge.Interval(this=self.f.extract(unit, left) * right, unit=unit) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + unit = op.unit + if unit == TimestampUnit.SECOND: + return self.f.timestamp_seconds(arg) + elif unit == TimestampUnit.MILLISECOND: + return self.f.timestamp_millis(arg) + elif unit == TimestampUnit.MICROSECOND: + return self.f.timestamp_micros(arg) + elif unit == TimestampUnit.NANOSECOND: + return self.f.timestamp_micros( + self.cast(self.f.round(arg / 1_000), dt.int64) + ) + else: + raise com.UnsupportedOperationError(f"Unit not supported: {unit}") + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + from_ = op.arg.dtype + if from_.is_timestamp() and to.is_integer(): + return self.f.unix_micros(arg) + elif from_.is_integer() and to.is_timestamp(): + return self.f.timestamp_seconds(arg) + elif from_.is_interval() and to.is_integer(): + if from_.unit in { + IntervalUnit.WEEK, + IntervalUnit.QUARTER, + IntervalUnit.NANOSECOND, + }: + raise com.UnsupportedOperationError( + f"BigQuery does not allow extracting date part `{from_.unit}` from intervals" + ) + return self.f.extract(self.v[to.resolution.upper()], arg) + elif from_.is_integer() and to.is_interval(): + return sge.Interval(this=arg, unit=self.v[to.unit.singular]) + elif from_.is_floating() and to.is_integer(): + return self.cast(self.f.trunc(arg), dt.int64) + return super().visit_Cast(op, arg=arg, to=to) + + @visit_node.register(ops.JSONGetItem) + def visit_JSONGetItem(self, op, *, arg, index): + return arg[index] + + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractEpochSeconds(self, op, *, arg): + return self.f.unix_seconds(arg) + + @visit_node.register(ops.ExtractWeekOfYear) + def visit_ExtractWeekOfYear(self, op, *, arg): + return self.f.extract(self.v.isoweek, arg) + + @visit_node.register(ops.ExtractYear) + @visit_node.register(ops.ExtractQuarter) + @visit_node.register(ops.ExtractMonth) + @visit_node.register(ops.ExtractDay) + @visit_node.register(ops.ExtractDayOfYear) + @visit_node.register(ops.ExtractHour) + @visit_node.register(ops.ExtractMinute) + @visit_node.register(ops.ExtractSecond) + @visit_node.register(ops.ExtractMicrosecond) + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractDateField(self, op, *, arg): + name = type(op).__name__[len("Extract") :].upper() + return self.f.extract(self.v[name], arg) + + @visit_node.register(ops.TimestampTruncate) + def visit_Timestamp(self, op, *, arg, unit): + if unit == IntervalUnit.NANOSECOND: + raise com.UnsupportedOperationError( + f"BigQuery does not support truncating {op.arg.dtype} values to unit {unit!r}" + ) + elif unit == IntervalUnit.WEEK: + unit = "WEEK(MONDAY)" + else: + unit = unit.name + return self.f.timestamp_trunc(arg, self.v[unit], dialect=self.dialect) + + @visit_node.register(ops.DateTruncate) + def visit_DateTruncate(self, op, *, arg, unit): + if unit == DateUnit.WEEK: + unit = "WEEK(MONDAY)" + else: + unit = unit.name + return self.f.date_trunc(arg, self.v[unit], dialect=self.dialect) + + @visit_node.register(ops.TimeTruncate) + def visit_TimeTruncate(self, op, *, arg, unit): + if unit == TimeUnit.NANOSECOND: + raise com.UnsupportedOperationError( + f"BigQuery does not support truncating {op.arg.dtype} values to unit {unit!r}" + ) + else: + unit = unit.name + return self.f.time_trunc(arg, self.v[unit], dialect=self.dialect) + + def _nullifzero(self, step, zero, step_dtype): + if step_dtype.is_interval(): + return self.if_(step.eq(zero), NULL, step) + return self.f.nullif(step, zero) + + def _zero(self, dtype): + if dtype.is_interval(): + return self.f.make_interval() + return sge.convert(0) + + def _sign(self, value, dtype): + if dtype.is_interval(): + zero = self._zero(dtype) + return sge.Case( + ifs=[ + self.if_(value < zero, -1), + self.if_(value.eq(zero), 0), + self.if_(value > zero, 1), + ], + default=NULL, + ) + return self.f.sign(value) + + def _make_range(self, func, start, stop, step, step_dtype): + step_sign = self._sign(step, step_dtype) + delta_sign = self._sign(stop - start, step_dtype) + zero = self._zero(step_dtype) + nullifzero = self._nullifzero(step, zero, step_dtype) + condition = sg.and_(sg.not_(nullifzero.is_(NULL)), step_sign.eq(delta_sign)) + gen_array = func(start, stop, step) + name = sg.to_identifier(util.gen_name("bq_arr_range")) + inner = ( + sg.select(name) + .from_(self._unnest(gen_array, as_=name)) + .where(name.neq(stop)) + ) + return self.if_(condition, self.f.array(inner), self.f.array()) + + @visit_node.register(ops.IntegerRange) + def visit_IntegerRange(self, op, *, start, stop, step): + return self._make_range(self.f.generate_array, start, stop, step, op.step.dtype) + + @visit_node.register(ops.TimestampRange) + def visit_TimestampRange(self, op, *, start, stop, step): + if op.start.dtype.timezone is None or op.stop.dtype.timezone is None: + raise com.IbisTypeError( + "Timestamps without timezone values are not supported when generating timestamp ranges" + ) + return self._make_range( + self.f.generate_timestamp_array, start, stop, step, op.step.dtype + ) + + @visit_node.register(ops.First) + def visit_First(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + array = self.f.array_agg( + sge.Limit(this=sge.IgnoreNulls(this=arg), expression=sge.convert(1)), + ) + return array[self.f.safe_offset(0)] + + @visit_node.register(ops.Last) + def visit_Last(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + array = self.f.array_reverse(self.f.array_agg(sge.IgnoreNulls(this=arg))) + return array[self.f.safe_offset(0)] + + @visit_node.register(ops.Arbitrary) + def _arbitrary(self, op, *, arg, how, where): + if how != "first": + raise com.UnsupportedOperationError( + f"{how!r} value not supported for arbitrary in BigQuery" + ) + + return self.agg.any_value(arg, where=where) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, body, param): + return self.f.array( + sg.select(param).from_(self._unnest(arg, as_=param)).where(body) + ) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, body, param): + return self.f.array(sg.select(body).from_(self._unnest(arg, as_=param))) + + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op, *, arg): + lengths = [self.f.array_length(arr) - 1 for arr in arg] + idx = sg.to_identifier(util.gen_name("bq_arr_idx")) + indices = self._unnest( + self.f.generate_array(0, self.f.greatest(*lengths)), as_=idx + ) + struct_fields = [ + arr[self.f.safe_offset(idx)].as_(name) + for name, arr in zip(op.dtype.value_type.names, arg) + ] + return self.f.array( + sge.Select(kind="STRUCT", expressions=struct_fields).from_(indices) + ) + + @visit_node.register(ops.ArrayPosition) + def visit_ArrayPosition(self, op, *, arg, other): + name = sg.to_identifier(util.gen_name("bq_arr")) + idx = sg.to_identifier(util.gen_name("bq_arr_idx")) + unnest = self._unnest(arg, as_=name, offset=idx) + return self.f.coalesce( + sg.select(idx + 1).from_(unnest).where(name.eq(other)).limit(1).subquery(), + 0, + ) + + def _unnest(self, expression, *, as_, offset=None): + alias = sge.TableAlias(columns=[sg.to_identifier(as_)]) + return sge.Unnest(expressions=[expression], alias=alias, offset=offset) + + @visit_node.register(ops.ArrayRemove) + def visit_ArrayRemove(self, op, *, arg, other): + name = sg.to_identifier(util.gen_name("bq_arr")) + unnest = self._unnest(arg, as_=name) + return self.f.array(sg.select(name).from_(unnest).where(name.neq(other))) + + @visit_node.register(ops.ArrayDistinct) + def visit_ArrayDistinct(self, op, *, arg): + name = util.gen_name("bq_arr") + return self.f.array( + sg.select(name).distinct().from_(self._unnest(arg, as_=name)) + ) + + @visit_node.register(ops.ArraySort) + def visit_ArraySort(self, op, *, arg): + name = util.gen_name("bq_arr") + return self.f.array( + sg.select(name).from_(self._unnest(arg, as_=name)).order_by(name) + ) + + @visit_node.register(ops.ArrayUnion) + def visit_ArrayUnion(self, op, *, left, right): + lname = util.gen_name("bq_arr_left") + rname = util.gen_name("bq_arr_right") + lhs = sg.select(lname).from_(self._unnest(left, as_=lname)) + rhs = sg.select(rname).from_(self._unnest(right, as_=rname)) + return self.f.array(sg.union(lhs, rhs, distinct=True)) + + @visit_node.register(ops.ArrayIntersect) + def visit_ArrayIntersect(self, op, *, left, right): + lname = util.gen_name("bq_arr_left") + rname = util.gen_name("bq_arr_right") + lhs = sg.select(lname).from_(self._unnest(left, as_=lname)) + rhs = sg.select(rname).from_(self._unnest(right, as_=rname)) + return self.f.array(sg.intersect(lhs, rhs, distinct=True)) + + @visit_node.register(ops.Substring) + def visit_Substring(self, op, *, arg, start, length): + if isinstance(op.length, ops.Literal) and (value := op.length.value) < 0: + raise com.IbisInputError( + f"Length parameter must be a non-negative value; got {value}" + ) + suffix = (length,) * (length is not None) + if_pos = self.f.substr(arg, start + 1, *suffix) + if_neg = self.f.substr(arg, self.f.length(arg) + start + 1, *suffix) + return self.if_(start >= 0, if_pos, if_neg) + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + matches = self.f.regexp_contains(arg, pattern) + nonzero_index_replace = self.f.regexp_replace( + arg, + self.f.concat(".*?", pattern, ".*"), + self.f.concat("\\\\", self.cast(index, dt.string)), + ) + zero_index_replace = self.f.regexp_replace( + arg, self.f.concat(".*?", self.f.concat("(", pattern, ")"), ".*"), "\\\\1" + ) + extract = self.if_(index.eq(0), zero_index_replace, nonzero_index_replace) + return self.if_(matches, extract, NULL) + + @visit_node.register(ops.TimestampAdd) + @visit_node.register(ops.TimestampSub) + def visit_TimestampAddSub(self, op, *, left, right): + if not isinstance(right, sge.Interval): + raise com.OperationNotDefinedError( + "BigQuery does not support non-literals on the right side of timestamp add/subtract" + ) + if (unit := op.right.dtype.unit) == IntervalUnit.NANOSECOND: + raise com.UnsupportedOperationError( + f"BigQuery does not allow binary operation {type(op).__name__} with " + f"INTERVAL offset {unit}" + ) + + opname = type(op).__name__[len("Timestamp") :] + funcname = f"TIMESTAMP_{opname.upper()}" + return self.f.anon[funcname](left, right) + + @visit_node.register(ops.DateAdd) + @visit_node.register(ops.DateSub) + def visit_DateAddSub(self, op, *, left, right): + if not isinstance(right, sge.Interval): + raise com.OperationNotDefinedError( + "BigQuery does not support non-literals on the right side of date add/subtract" + ) + if not (unit := op.right.dtype.unit).is_date(): + raise com.UnsupportedOperationError( + f"BigQuery does not allow binary operation {type(op).__name__} with " + f"INTERVAL offset {unit}" + ) + opname = type(op).__name__[len("Date") :] + funcname = f"DATE_{opname.upper()}" + return self.f.anon[funcname](left, right) + + @visit_node.register(ops.Covariance) + def visit_Covariance(self, op, *, left, right, how, where): + if where is not None: + left = self.if_(where, left, NULL) + right = self.if_(where, right, NULL) + + if op.left.dtype.is_boolean(): + left = self.cast(left, dt.int64) + + if op.right.dtype.is_boolean(): + right = self.cast(right, dt.int64) + + how = op.how[:4].upper() + assert how in ("POP", "SAMP"), 'how not in ("POP", "SAMP")' + return self.agg[f"COVAR_{how}"](left, right, where=where) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "sample": + raise ValueError(f"Correlation with how={how!r} is not supported.") + + if where is not None: + left = self.if_(where, left, NULL) + right = self.if_(where, right, NULL) + + if op.left.dtype.is_boolean(): + left = self.cast(left, dt.int64) + + if op.right.dtype.is_boolean(): + right = self.cast(right, dt.int64) + + return self.agg.corr(left, right, where=where) + + @visit_node.register(ops.TypeOf) + def visit_TypeOf(self, op, *, arg): + name = sg.to_identifier(util.gen_name("bq_typeof")) + from_ = self._unnest(self.f.array(self.f.format("%T", arg)), as_=name) + ifs = [ + self.if_( + self.f.regexp_contains(name, '^[A-Z]+ "'), + self.f.regexp_extract(name, '^([A-Z]+) "'), + ), + self.if_(self.f.regexp_contains(name, "^-?[0-9]*$"), "INT64"), + self.if_( + self.f.regexp_contains( + name, r'^(-?[0-9]+[.e].*|CAST\\("([^"]*)" AS FLOAT64\\))$' + ), + "FLOAT64", + ), + self.if_(name.isin(sge.convert("true"), sge.convert("false")), "BOOL"), + self.if_( + sg.or_(self.f.starts_with(name, '"'), self.f.starts_with(name, "'")), + "STRING", + ), + self.if_(self.f.starts_with(name, 'b"'), "BYTES"), + self.if_(self.f.starts_with(name, "["), "ARRAY"), + self.if_(self.f.regexp_contains(name, r"^(STRUCT)?\\("), "STRUCT"), + self.if_(self.f.starts_with(name, "ST_"), "GEOGRAPHY"), + self.if_(name.eq(sge.convert("NULL")), "NULL"), + ] + case = sge.Case(ifs=ifs, default=sge.convert("UNKNOWN")) + return sg.select(case).from_(from_).subquery() + + @visit_node.register(ops.Xor) + def visit_Xor(self, op, *, left, right): + return sg.or_(sg.and_(left, sg.not_(right)), sg.and_(sg.not_(left), right)) + + @visit_node.register(ops.HashBytes) + def visit_HashBytes(self, op, *, arg, how): + if how not in ("md5", "sha1", "sha256", "sha512"): + raise NotImplementedError(how) + return self.f[how](arg) @staticmethod - def _generate_setup_queries(expr, context): - """Generate DDL for temporary resources.""" - nodes = lin.traverse(find_bigquery_udf, expr) - queries = map(partial(BigQueryUDFDefinition, context=context), nodes) + def _gen_valid_name(name: str) -> str: + return "_".join(_NAME_REGEX.findall(name)) or "tmp" + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + if where is not None: + return self.f.countif(where) + return self.f.count(STAR) + + @visit_node.register(ops.Degrees) + def visit_Degrees(self, op, *, arg): + return paren(180 * arg / self.f.acos(-1)) + + @visit_node.register(ops.Radians) + def visit_Radians(self, op, *, arg): + return paren(self.f.acos(-1) * arg / 180) + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.count(sge.Distinct(expressions=[arg])) + + @visit_node.register(ops.CountDistinctStar) + @visit_node.register(ops.DateDiff) + @visit_node.register(ops.ExtractAuthority) + @visit_node.register(ops.ExtractFile) + @visit_node.register(ops.ExtractFragment) + @visit_node.register(ops.ExtractHost) + @visit_node.register(ops.ExtractPath) + @visit_node.register(ops.ExtractProtocol) + @visit_node.register(ops.ExtractQuery) + @visit_node.register(ops.ExtractUserInfo) + @visit_node.register(ops.FindInSet) + @visit_node.register(ops.Median) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.RowID) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimestampDiff) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.StringAscii: "ascii", + ops.BitAnd: "bit_and", + ops.BitOr: "bit_or", + ops.BitXor: "bit_xor", + ops.DateFromYMD: "date", + ops.Divide: "ieee_divide", + ops.EndsWith: "ends_with", + ops.GeoArea: "st_area", + ops.GeoAsBinary: "st_asbinary", + ops.GeoAsText: "st_astext", + ops.GeoAzimuth: "st_azimuth", + ops.GeoBuffer: "st_buffer", + ops.GeoCentroid: "st_centroid", + ops.GeoContains: "st_contains", + ops.GeoCoveredBy: "st_coveredby", + ops.GeoCovers: "st_covers", + ops.GeoDWithin: "st_dwithin", + ops.GeoDifference: "st_difference", + ops.GeoDisjoint: "st_disjoint", + ops.GeoDistance: "st_distance", + ops.GeoEndPoint: "st_endpoint", + ops.GeoEquals: "st_equals", + ops.GeoGeometryType: "st_geometrytype", + ops.GeoIntersection: "st_intersection", + ops.GeoIntersects: "st_intersects", + ops.GeoLength: "st_length", + ops.GeoMaxDistance: "st_maxdistance", + ops.GeoNPoints: "st_numpoints", + ops.GeoPerimeter: "st_perimeter", + ops.GeoPoint: "st_geogpoint", + ops.GeoPointN: "st_pointn", + ops.GeoStartPoint: "st_startpoint", + ops.GeoTouches: "st_touches", + ops.GeoUnaryUnion: "st_union_agg", + ops.GeoUnion: "st_union", + ops.GeoWithin: "st_within", + ops.GeoX: "st_x", + ops.GeoY: "st_y", + ops.Hash: "farm_fingerprint", + ops.IsInf: "is_inf", + ops.IsNan: "is_nan", + ops.Log10: "log10", + ops.LPad: "lpad", + ops.RPad: "rpad", + ops.Levenshtein: "edit_distance", + ops.Modulus: "mod", + ops.RandomScalar: "rand", + ops.RegexReplace: "regexp_replace", + ops.RegexSearch: "regexp_contains", + ops.Time: "time", + ops.TimeFromHMS: "time", + ops.TimestampFromYMDHMS: "datetime", + ops.TimestampNow: "current_timestamp", +} + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @BigQueryCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) - # UDFs are uniquely identified by the name of the Node subclass we - # generate. - def key(x): - expr = x.expr - op = expr.op() if isinstance(expr, ir.Expr) else expr - return op.__class__.__name__ + else: - return list(toolz.unique(queries, key=key)) + @BigQueryCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + setattr(BigQueryCompiler, f"visit_{_op.__name__}", _fmt) -# Register custom UDFs -import ibis.backends.bigquery.custom_udfs # noqa: F401, E402 +del _op, _name, _fmt diff --git a/ibis/backends/bigquery/converter.py b/ibis/backends/bigquery/converter.py new file mode 100644 index 000000000000..613260188267 --- /dev/null +++ b/ibis/backends/bigquery/converter.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from ibis.formats.pandas import PandasData + + +class BigQueryPandasData(PandasData): + @classmethod + def convert_GeoSpatial(cls, s, dtype, pandas_type): + import geopandas as gpd + import shapely as shp + + return gpd.GeoSeries(shp.from_wkt(s)) + + convert_Point = ( + convert_LineString + ) = ( + convert_Polygon + ) = ( + convert_MultiLineString + ) = convert_MultiPoint = convert_MultiPolygon = convert_GeoSpatial diff --git a/ibis/backends/bigquery/custom_udfs.py b/ibis/backends/bigquery/custom_udfs.py deleted file mode 100644 index 95b27992ca4d..000000000000 --- a/ibis/backends/bigquery/custom_udfs.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import annotations - -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.bigquery.compiler import BigQueryExprTranslator -from ibis.backends.bigquery.udf import udf - -# Based on: -# https://github.com/GoogleCloudPlatform/bigquery-utils/blob/45e1ac51367ab6209f68e04b1660d5b00258c131/udfs/community/typeof.sqlx#L1 -typeof_ = udf.sql( - name="typeof", - params={"input": "ANY TYPE"}, - output_type=dt.str, - sql_expression=r""" - ( - SELECT - CASE - -- Process NUMERIC, DATE, DATETIME, TIME, TIMESTAMP, - WHEN REGEXP_CONTAINS(literal, r'^[A-Z]+ "') THEN REGEXP_EXTRACT(literal, r'^([A-Z]+) "') - WHEN REGEXP_CONTAINS(literal, r'^-?[0-9]*$') THEN 'INT64' - WHEN - REGEXP_CONTAINS(literal, r'^(-?[0-9]+[.e].*|CAST\("([^"]*)" AS FLOAT64\))$') - THEN - 'FLOAT64' - WHEN literal IN ('true', 'false') THEN 'BOOL' - WHEN literal LIKE '"%' OR literal LIKE "'%" THEN 'STRING' - WHEN literal LIKE 'b"%' THEN 'BYTES' - WHEN literal LIKE '[%' THEN 'ARRAY' - WHEN REGEXP_CONTAINS(literal, r'^(STRUCT)?\(') THEN 'STRUCT' - WHEN literal LIKE 'ST_%' THEN 'GEOGRAPHY' - WHEN literal = 'NULL' THEN 'NULL' - ELSE - 'UNKNOWN' - END - FROM - UNNEST([FORMAT('%T', input)]) AS literal - ) - """, -) - -BigQueryExprTranslator.rewrites(ops.TypeOf)(lambda op: typeof_(op.arg).op()) diff --git a/ibis/backends/bigquery/datatypes.py b/ibis/backends/bigquery/datatypes.py index 130d2500a74c..10aef9e1c943 100644 --- a/ibis/backends/bigquery/datatypes.py +++ b/ibis/backends/bigquery/datatypes.py @@ -1,126 +1,14 @@ from __future__ import annotations import google.cloud.bigquery as bq -import sqlglot.expressions as sge import ibis import ibis.expr.datatypes as dt import ibis.expr.schema as sch -from ibis.backends.base.sqlglot.datatypes import SqlglotType +from ibis.backends.base.sqlglot.datatypes import BigQueryType from ibis.formats import SchemaMapper -class BigQueryType(SqlglotType): - dialect = "bigquery" - - default_decimal_precision = 38 - default_decimal_scale = 9 - - @classmethod - def _from_sqlglot_NUMERIC(cls) -> dt.Decimal: - return dt.Decimal( - cls.default_decimal_precision, - cls.default_decimal_scale, - nullable=cls.default_nullable, - ) - - @classmethod - def _from_sqlglot_BIGNUMERIC(cls) -> dt.Decimal: - return dt.Decimal(76, 38, nullable=cls.default_nullable) - - @classmethod - def _from_sqlglot_DATETIME(cls) -> dt.Decimal: - return dt.Timestamp(timezone=None, nullable=cls.default_nullable) - - @classmethod - def _from_sqlglot_TIMESTAMP(cls) -> dt.Decimal: - return dt.Timestamp(timezone="UTC", nullable=cls.default_nullable) - - @classmethod - def _from_sqlglot_GEOGRAPHY(cls) -> dt.Decimal: - return dt.GeoSpatial( - geotype="geography", srid=4326, nullable=cls.default_nullable - ) - - @classmethod - def _from_sqlglot_TINYINT(cls) -> dt.Int64: - return dt.Int64(nullable=cls.default_nullable) - - _from_sqlglot_UINT = ( - _from_sqlglot_USMALLINT - ) = ( - _from_sqlglot_UTINYINT - ) = _from_sqlglot_INT = _from_sqlglot_SMALLINT = _from_sqlglot_TINYINT - - @classmethod - def _from_sqlglot_UBIGINT(cls) -> dt.Int64: - raise TypeError("Unsigned BIGINT isn't representable in BigQuery INT64") - - @classmethod - def _from_sqlglot_FLOAT(cls) -> dt.Double: - return dt.Float64(nullable=cls.default_nullable) - - @classmethod - def _from_sqlglot_MAP(cls) -> dt.Map: - raise NotImplementedError( - "Cannot convert sqlglot Map type to ibis type: maps are not supported in BigQuery" - ) - - @classmethod - def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType: - raise NotImplementedError( - "Cannot convert Ibis Map type to BigQuery type: maps are not supported in BigQuery" - ) - - @classmethod - def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType: - if dtype.timezone is None: - return sge.DataType(this=sge.DataType.Type.DATETIME) - elif dtype.timezone == "UTC": - return sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ) - else: - raise TypeError( - "BigQuery does not support timestamps with timezones other than 'UTC'" - ) - - @classmethod - def _from_ibis_Decimal(cls, dtype: dt.Decimal) -> sge.DataType: - precision = dtype.precision - scale = dtype.scale - if (precision, scale) == (76, 38): - return sge.DataType(this=sge.DataType.Type.BIGDECIMAL) - elif (precision, scale) in ((38, 9), (None, None)): - return sge.DataType(this=sge.DataType.Type.DECIMAL) - else: - raise TypeError( - "BigQuery only supports decimal types with precision of 38 and " - f"scale of 9 (NUMERIC) or precision of 76 and scale of 38 (BIGNUMERIC). " - f"Current precision: {dtype.precision}. Current scale: {dtype.scale}" - ) - - @classmethod - def _from_ibis_UInt64(cls, dtype: dt.UInt64) -> sge.DataType: - raise TypeError( - f"Conversion from {dtype} to BigQuery integer type (Int64) is lossy" - ) - - @classmethod - def _from_ibis_UInt32(cls, dtype: dt.UInt32) -> sge.DataType: - return sge.DataType(this=sge.DataType.Type.BIGINT) - - _from_ibis_UInt8 = _from_ibis_UInt16 = _from_ibis_UInt32 - - @classmethod - def _from_ibis_GeoSpatial(cls, dtype: dt.GeoSpatial) -> sge.DataType: - if (dtype.geotype, dtype.srid) == ("geography", 4326): - return sge.DataType(this=sge.DataType.Type.GEOGRAPHY) - else: - raise TypeError( - "BigQuery geography uses points on WGS84 reference ellipsoid." - f"Current geotype: {dtype.geotype}, Current srid: {dtype.srid}" - ) - - class BigQuerySchema(SchemaMapper): @classmethod def from_ibis(cls, schema: sch.Schema) -> list[bq.SchemaField]: diff --git a/ibis/backends/bigquery/operations.py b/ibis/backends/bigquery/operations.py deleted file mode 100644 index b37c8cb1ee1d..000000000000 --- a/ibis/backends/bigquery/operations.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Ibis operations specific to BigQuery.""" - -from __future__ import annotations - -import ibis.expr.operations as ops - - -class BigQueryUDFNode(ops.ValueOp): - """Represents use of a UDF.""" diff --git a/ibis/backends/bigquery/registry.py b/ibis/backends/bigquery/registry.py deleted file mode 100644 index 7afc889d9ac8..000000000000 --- a/ibis/backends/bigquery/registry.py +++ /dev/null @@ -1,1020 +0,0 @@ -"""Module to convert from Ibis expression to SQL string.""" - -from __future__ import annotations - -import contextlib -from typing import TYPE_CHECKING, Literal - -import numpy as np -import sqlglot as sg -from multipledispatch import Dispatcher - -import ibis -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis import util -from ibis.backends.base.sql.registry import ( - fixed_arity, - helpers, - operation_registry, - reduction, - unary, -) -from ibis.backends.base.sql.registry.literal import _string_literal_format -from ibis.backends.base.sql.registry.main import table_array_view -from ibis.backends.bigquery.datatypes import BigQueryType -from ibis.common.temporal import DateUnit, IntervalUnit, TimeUnit - -if TYPE_CHECKING: - from ibis.backends.base.sql import compiler - - -def _extract_field(sql_attr): - def extract_field_formatter(translator, op): - arg = translator.translate(op.args[0]) - if sql_attr == "epochseconds": - return f"UNIX_SECONDS({arg})" - else: - return f"EXTRACT({sql_attr} from {arg})" - - return extract_field_formatter - - -bigquery_cast = Dispatcher("bigquery_cast") - - -@bigquery_cast.register(str, dt.Timestamp, dt.Integer) -def bigquery_cast_timestamp_to_integer(compiled_arg, from_, to): - """Convert TIMESTAMP to INT64 (seconds since Unix epoch).""" - return f"UNIX_MICROS({compiled_arg})" - - -@bigquery_cast.register(str, dt.Integer, dt.Timestamp) -def bigquery_cast_integer_to_timestamp(compiled_arg, from_, to): - """Convert INT64 (seconds since Unix epoch) to Timestamp.""" - return f"TIMESTAMP_SECONDS({compiled_arg})" - - -@bigquery_cast.register(str, dt.Interval, dt.Integer) -def bigquery_cast_interval_to_integer(compiled_arg, from_, to): - if from_.unit in {IntervalUnit.WEEK, IntervalUnit.QUARTER, IntervalUnit.NANOSECOND}: - raise com.UnsupportedOperationError( - f"BigQuery does not allow extracting date part `{from_.unit}` from intervals" - ) - - return f"EXTRACT({from_.resolution.upper()} from {compiled_arg})" - - -@bigquery_cast.register(str, dt.Floating, dt.Integer) -def bigquery_cast_floating_to_integer(compiled_arg, from_, to): - """Convert FLOAT64 to INT64 without rounding.""" - return f"CAST(TRUNC({compiled_arg}) AS INT64)" - - -@bigquery_cast.register(str, dt.DataType, dt.DataType) -def bigquery_cast_generate(compiled_arg, from_, to): - """Cast to desired type.""" - sql_type = BigQueryType.to_string(to) - return f"CAST({compiled_arg} AS {sql_type})" - - -@bigquery_cast.register(str, dt.DataType) -def bigquery_cast_generate_simple(compiled_arg, to): - return bigquery_cast(compiled_arg, to, to) - - -def _cast(translator, op): - arg, target_type = op.args - arg_formatted = translator.translate(arg) - input_dtype = arg.dtype - return bigquery_cast(arg_formatted, input_dtype, target_type) - - -def integer_to_timestamp(translator: compiler.ExprTranslator, op) -> str: - """Interprets an integer as a timestamp.""" - arg = translator.translate(op.arg) - unit = op.unit.short - - if unit == "s": - return f"TIMESTAMP_SECONDS({arg})" - elif unit == "ms": - return f"TIMESTAMP_MILLIS({arg})" - elif unit == "us": - return f"TIMESTAMP_MICROS({arg})" - elif unit == "ns": - # Timestamps are represented internally as elapsed microseconds, so some - # rounding is required if an integer represents nanoseconds. - # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type - return f"TIMESTAMP_MICROS(CAST(ROUND({arg} / 1000) AS INT64))" - - raise NotImplementedError(f"cannot cast unit {op.unit}") - - -def _struct_field(translator, op): - arg = translator.translate(op.arg) - return f"{arg}.`{op.field}`" - - -def _struct_column(translator, op): - cols = ( - f"{translator.translate(value)} AS {name}" - for name, value in zip(op.names, op.values) - ) - return "STRUCT({})".format(", ".join(cols)) - - -def _array_concat(translator, op): - return "ARRAY_CONCAT({})".format(", ".join(map(translator.translate, op.arg))) - - -def _array_column(translator, op): - return "[{}]".format(", ".join(map(translator.translate, op.exprs))) - - -def _array_index(translator, op): - # SAFE_OFFSET returns NULL if out of bounds - arg = translator.translate(op.arg) - index = translator.translate(op.index) - return f"{arg}[SAFE_OFFSET({index})]" - - -def _array_contains(translator, op): - arg = translator.translate(op.arg) - other = translator.translate(op.other) - name = util.gen_name("bq_arr") - return f"(SELECT LOGICAL_OR({name} = {other}) FROM UNNEST({arg}) {name})" - - -def _array_position(translator, op): - arg = translator.translate(op.arg) - other = translator.translate(op.other) - name = util.gen_name("bq_arr") - idx = util.gen_name("bq_arr_idx") - unnest = f"UNNEST({arg}) {name} WITH OFFSET AS {idx}" - return f"COALESCE((SELECT {idx} FROM {unnest} WHERE {name} = {other} LIMIT 1), -1)" - - -def _array_remove(translator, op): - arg = translator.translate(op.arg) - other = translator.translate(op.other) - name = util.gen_name("bq_arr") - return f"ARRAY(SELECT {name} FROM UNNEST({arg}) {name} WHERE {name} <> {other})" - - -def _array_distinct(translator, op): - arg = translator.translate(op.arg) - name = util.gen_name("bq_arr") - return f"ARRAY(SELECT DISTINCT {name} FROM UNNEST({arg}) {name})" - - -def _array_sort(translator, op): - arg = translator.translate(op.arg) - name = util.gen_name("bq_arr") - return f"ARRAY(SELECT {name} FROM UNNEST({arg}) {name} ORDER BY {name})" - - -def _array_union(translator, op): - left = translator.translate(op.left) - right = translator.translate(op.right) - - lname = util.gen_name("bq_arr_left") - rname = util.gen_name("bq_arr_right") - - left_expr = f"SELECT {lname} FROM UNNEST({left}) {lname}" - right_expr = f"SELECT {rname} FROM UNNEST({right}) {rname}" - - return f"ARRAY({left_expr} UNION DISTINCT {right_expr})" - - -def _array_intersect(translator, op): - left = translator.translate(op.left) - right = translator.translate(op.right) - - lname = util.gen_name("bq_arr_left") - rname = util.gen_name("bq_arr_right") - - left_expr = f"SELECT {lname} FROM UNNEST({left}) {lname}" - right_expr = f"SELECT {rname} FROM UNNEST({right}) {rname}" - - return f"ARRAY({left_expr} INTERSECT DISTINCT {right_expr})" - - -def _array_zip(translator, op): - arg = list(map(translator.translate, op.arg)) - lengths = ", ".join(map("ARRAY_LENGTH({}) - 1".format, arg)) - indices = f"UNNEST(GENERATE_ARRAY(0, GREATEST({lengths})))" - idx = util.gen_name("bq_arr_idx") - struct_fields = ", ".join( - f"{arr}[SAFE_OFFSET({idx})] AS {name}" - for name, arr in zip(op.dtype.value_type.names, arg) - ) - return f"ARRAY(SELECT AS STRUCT {struct_fields} FROM {indices} {idx})" - - -def _array_map(translator, op): - arg = translator.translate(op.arg) - result = translator.translate(op.body) - param = op.param - return f"ARRAY(SELECT {result} FROM UNNEST({arg}) {param})" - - -def _array_filter(translator, op): - arg = translator.translate(op.arg) - result = translator.translate(op.body) - param = op.param - return f"ARRAY(SELECT {param} FROM UNNEST({arg}) {param} WHERE {result})" - - -def _hash(translator, op): - arg_formatted = translator.translate(op.arg) - return f"farm_fingerprint({arg_formatted})" - - -def _string_find(translator, op): - haystack, needle, start, end = op.args - - if start is not None: - raise NotImplementedError("start not implemented for string find") - if end is not None: - raise NotImplementedError("end not implemented for string find") - - return "STRPOS({}, {}) - 1".format( - translator.translate(haystack), translator.translate(needle) - ) - - -def _regex_search(translator, op): - arg = translator.translate(op.arg) - regex = translator.translate(op.pattern) - return f"REGEXP_CONTAINS({arg}, {regex})" - - -def _regex_extract(translator, op): - arg = translator.translate(op.arg) - regex = translator.translate(op.pattern) - index = translator.translate(op.index) - matches = f"REGEXP_CONTAINS({arg}, {regex})" - # non-greedily match the regex's prefix so the regex can match as much as possible - nonzero_index_replace = rf"REGEXP_REPLACE({arg}, CONCAT('.*?', {regex}, '.*'), CONCAT('\\', CAST({index} AS STRING)))" - # zero index replacement means capture everything matched by the regex, so - # we wrap the regex in an outer group - zero_index_replace = ( - rf"REGEXP_REPLACE({arg}, CONCAT('.*?', CONCAT('(', {regex}, ')'), '.*'), '\\1')" - ) - extract = f"IF({index} = 0, {zero_index_replace}, {nonzero_index_replace})" - return f"IF({matches}, {extract}, NULL)" - - -def _regex_replace(translator, op): - arg = translator.translate(op.arg) - regex = translator.translate(op.pattern) - replacement = translator.translate(op.replacement) - return f"REGEXP_REPLACE({arg}, {regex}, {replacement})" - - -def _string_concat(translator, op): - args = ", ".join(map(translator.translate, op.arg)) - return f"CONCAT({args})" - - -def _string_join(translator, op): - sep, args = op.args - return "ARRAY_TO_STRING([{}], {})".format( - ", ".join(map(translator.translate, args)), translator.translate(sep) - ) - - -def _string_ascii(translator, op): - arg = translator.translate(op.arg) - return f"TO_CODE_POINTS({arg})[SAFE_OFFSET(0)]" - - -def _string_right(translator, op): - arg, nchars = map(translator.translate, op.args) - return f"SUBSTR({arg}, -LEAST(LENGTH({arg}), {nchars}))" - - -def _string_substring(translator, op): - length = op.length - if (length := getattr(length, "value", None)) is not None and length < 0: - raise ValueError("Length parameter must be a non-negative value.") - - arg = translator.translate(op.arg) - start = translator.translate(op.start) - - arg_length = f"LENGTH({arg})" - if op.length is not None: - suffix = f", {translator.translate(op.length)}" - else: - suffix = "" - - if_pos = f"SUBSTR({arg}, {start} + 1{suffix})" - if_neg = f"SUBSTR({arg}, {arg_length} + {start} + 1{suffix})" - return f"IF({start} >= 0, {if_pos}, {if_neg})" - - -def _log(translator, op): - arg, base = op.args - arg_formatted = translator.translate(arg) - - if base is None: - return f"ln({arg_formatted})" - - base_formatted = translator.translate(base) - return f"log({arg_formatted}, {base_formatted})" - - -def _sg_literal(val) -> str: - return sg.exp.Literal(this=str(val), is_string=isinstance(val, str)).sql( - dialect="bigquery" - ) - - -def _literal(t, op): - dtype = op.dtype - value = op.value - - if value is None: - if not dtype.is_null(): - return f"CAST(NULL AS {BigQueryType.to_string(dtype)})" - return "NULL" - elif dtype.is_boolean(): - return str(value).upper() - elif dtype.is_string() or dtype.is_inet() or dtype.is_macaddr(): - return _string_literal_format(t, op) - elif dtype.is_decimal(): - if value.is_nan(): - return "CAST('NaN' AS FLOAT64)" - elif value.is_infinite(): - prefix = "-" * value.is_signed() - return f"CAST('{prefix}inf' AS FLOAT64)" - else: - return f"{BigQueryType.to_string(dtype)} '{value}'" - elif dtype.is_uuid(): - return _sg_literal(str(value)) - elif dtype.is_numeric(): - if not np.isfinite(value): - return f"CAST({str(value)!r} AS FLOAT64)" - return _sg_literal(value) - elif dtype.is_date(): - with contextlib.suppress(AttributeError): - value = value.date() - return f"DATE {_sg_literal(str(value))}" - elif dtype.is_timestamp(): - typename = "DATETIME" if dtype.timezone is None else "TIMESTAMP" - return f"{typename} {_sg_literal(str(value))}" - elif dtype.is_time(): - # TODO: define extractors on TimeValue expressions - return f"TIME {_sg_literal(str(value))}" - elif dtype.is_binary(): - return repr(value) - elif dtype.is_struct(): - cols = ", ".join( - f"{t.translate(ops.Literal(value[name], dtype=typ))} AS `{name}`" - for name, typ in dtype.items() - ) - return f"STRUCT({cols})" - elif dtype.is_array(): - val_type = dtype.value_type - values = ", ".join( - t.translate(ops.Literal(element, dtype=val_type)) for element in value - ) - return f"[{values}]" - elif dtype.is_interval(): - return f"INTERVAL {value} {dtype.resolution.upper()}" - else: - raise NotImplementedError(f"Unsupported type for BigQuery literal: {dtype}") - - -def _arbitrary(translator, op): - arg, how, where = op.args - - if where is not None: - arg = ops.IfElse(where, arg, ibis.NA) - - if how != "first": - raise com.UnsupportedOperationError( - f"{how!r} value not supported for arbitrary in BigQuery" - ) - - return f"ANY_VALUE({translator.translate(arg)})" - - -def _first(translator, op): - arg = op.arg - where = op.where - - if where is not None: - arg = ops.IfElse(where, arg, ibis.NA) - - arg = translator.translate(arg) - return f"ARRAY_AGG({arg} IGNORE NULLS)[SAFE_OFFSET(0)]" - - -def _last(translator, op): - arg = op.arg - where = op.where - - if where is not None: - arg = ops.IfElse(where, arg, ibis.NA) - - arg = translator.translate(arg) - return f"ARRAY_REVERSE(ARRAY_AGG({arg} IGNORE NULLS))[SAFE_OFFSET(0)]" - - -def _truncate(kind, units): - def truncator(translator, op): - arg, unit = op.args - trans_arg = translator.translate(arg) - if unit not in units: - raise com.UnsupportedOperationError( - f"BigQuery does not support truncating {arg.dtype} values to unit {unit!r}" - ) - if unit.name == "WEEK": - unit = "WEEK(MONDAY)" - else: - unit = unit.name - return f"{kind}_TRUNC({trans_arg}, {unit})" - - return truncator - - -# BigQuery doesn't support nanosecond intervals -_date_truncate = _truncate("DATE", DateUnit) -_time_truncate = _truncate("TIME", set(TimeUnit) - {TimeUnit.NANOSECOND}) -_timestamp_truncate = _truncate( - "TIMESTAMP", set(IntervalUnit) - {IntervalUnit.NANOSECOND} -) - - -def _date_binary(func): - def _formatter(translator, op): - arg, offset = op.left, op.right - - unit = offset.dtype.unit - if not unit.is_date(): - raise com.UnsupportedOperationError( - f"BigQuery does not allow binary operation {func} with INTERVAL offset {unit}" - ) - - formatted_arg = translator.translate(arg) - formatted_offset = translator.translate(offset) - return f"{func}({formatted_arg}, {formatted_offset})" - - return _formatter - - -def _timestamp_binary(func): - def _formatter(translator, op): - arg, offset = op.left, op.right - - unit = offset.dtype.unit - if unit == IntervalUnit.NANOSECOND: - raise com.UnsupportedOperationError( - f"BigQuery does not allow binary operation {func} with INTERVAL offset {unit}" - ) - - if unit.is_date(): - try: - offset = offset.to_expr().to_unit("h").op() - except ValueError: - raise com.UnsupportedOperationError( - f"BigQuery does not allow binary operation {func} with INTERVAL offset {unit}" - ) - - formatted_arg = translator.translate(arg) - formatted_offset = translator.translate(offset) - return f"{func}({formatted_arg}, {formatted_offset})" - - return _formatter - - -def _geo_boundingbox(dimension_name): - def _formatter(translator, op): - geog = op.args[0] - geog_formatted = translator.translate(geog) - return f"ST_BOUNDINGBOX({geog_formatted}).{dimension_name}" - - return _formatter - - -def _geo_simplify(translator, op): - geog, tolerance, preserve_collapsed = op.args - if preserve_collapsed.value: - raise com.UnsupportedOperationError( - "BigQuery simplify does not support preserving collapsed geometries, " - "must pass preserve_collapsed=False" - ) - geog, tolerance = map(translator.translate, (geog, tolerance)) - return f"ST_SIMPLIFY({geog}, {tolerance})" - - -STRFTIME_FORMAT_FUNCTIONS = { - dt.date: "DATE", - dt.time: "TIME", - dt.Timestamp(timezone=None): "DATETIME", - dt.Timestamp(timezone="UTC"): "TIMESTAMP", -} - - -def bigquery_day_of_week_index(t, op): - """Convert timestamp to day-of-week integer.""" - arg = op.args[0] - arg_formatted = t.translate(arg) - return f"MOD(EXTRACT(DAYOFWEEK FROM {arg_formatted}) + 5, 7)" - - -def bigquery_day_of_week_name(t, op): - """Convert timestamp to day-of-week name.""" - return f"INITCAP(CAST({t.translate(op.arg)} AS STRING FORMAT 'DAY'))" - - -def bigquery_compiles_divide(t, op): - """Floating point division.""" - return f"IEEE_DIVIDE({t.translate(op.left)}, {t.translate(op.right)})" - - -def compiles_strftime(translator, op): - """Timestamp formatting.""" - arg = op.arg - format_str = op.format_str - arg_type = arg.dtype - strftime_format_func_name = STRFTIME_FORMAT_FUNCTIONS[arg_type] - fmt_string = translator.translate(format_str) - arg_formatted = translator.translate(arg) - if isinstance(arg_type, dt.Timestamp) and arg_type.timezone is None: - return f"FORMAT_{strftime_format_func_name}({fmt_string}, {arg_formatted})" - elif isinstance(arg_type, dt.Timestamp): - return "FORMAT_{}({}, {}, {!r})".format( - strftime_format_func_name, - fmt_string, - arg_formatted, - arg_type.timezone, - ) - else: - return f"FORMAT_{strftime_format_func_name}({fmt_string}, {arg_formatted})" - - -def compiles_string_to_timestamp(translator, op): - """Timestamp parsing.""" - fmt_string = translator.translate(op.format_str) - arg_formatted = translator.translate(op.arg) - return f"PARSE_TIMESTAMP({fmt_string}, {arg_formatted})" - - -def compiles_floor(t, op): - bigquery_type = BigQueryType.to_string(op.dtype) - arg = op.arg - return f"CAST(FLOOR({t.translate(arg)}) AS {bigquery_type})" - - -def compiles_approx(translator, op): - arg = op.arg - where = op.where - - if where is not None: - arg = ops.IfElse(where, arg, ibis.NA) - - return f"APPROX_QUANTILES({translator.translate(arg)}, 2)[OFFSET(1)]" - - -def compiles_covar_corr(func): - def translate(translator, op): - left = op.left - right = op.right - - if (where := op.where) is not None: - left = ops.IfElse(where, left, None) - right = ops.IfElse(where, right, None) - - left = translator.translate( - ops.Cast(left, dt.int64) if left.dtype.is_boolean() else left - ) - right = translator.translate( - ops.Cast(right, dt.int64) if right.dtype.is_boolean() else right - ) - return f"{func}({left}, {right})" - - return translate - - -def _covar(translator, op): - how = op.how[:4].upper() - assert how in ("POP", "SAMP"), 'how not in ("POP", "SAMP")' - return compiles_covar_corr(f"COVAR_{how}")(translator, op) - - -def _corr(translator, op): - if (how := op.how) == "sample": - raise ValueError(f"Correlation with how={how!r} is not supported.") - return compiles_covar_corr("CORR")(translator, op) - - -def _identical_to(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return f"{left} IS NOT DISTINCT FROM {right}" - - -def _floor_divide(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return bigquery_cast(f"FLOOR(IEEE_DIVIDE({left}, {right}))", op.dtype) - - -def _log2(t, op): - return f"LOG({t.translate(op.arg)}, 2)" - - -def _is_nan(t, op): - return f"IS_NAN({t.translate(op.arg)})" - - -def _is_inf(t, op): - return f"IS_INF({t.translate(op.arg)})" - - -def _array_agg(t, op): - arg = op.arg - if (where := op.where) is not None: - arg = ops.IfElse(where, arg, ibis.NA) - return f"ARRAY_AGG({t.translate(arg)} IGNORE NULLS)" - - -def _arg_min_max(sort_dir: Literal["ASC", "DESC"]): - def translate(t, op: ops.ArgMin | ops.ArgMax) -> str: - arg = op.arg - if (where := op.where) is not None: - arg = ops.IfElse(where, arg, None) - arg = t.translate(arg) - key = t.translate(op.key) - return f"ARRAY_AGG({arg} IGNORE NULLS ORDER BY {key} {sort_dir} LIMIT 1)[SAFE_OFFSET(0)]" - - return translate - - -def _array_repeat(t, op): - start = step = 1 - times = t.translate(op.times) - arg = t.translate(op.arg) - array_length = f"ARRAY_LENGTH({arg})" - stop = f"GREATEST({times}, 0) * {array_length}" - idx = f"COALESCE(NULLIF(MOD(i, {array_length}), 0), {array_length})" - series = f"GENERATE_ARRAY({start}, {stop}, {step})" - return f"ARRAY(SELECT {arg}[SAFE_ORDINAL({idx})] FROM UNNEST({series}) AS i)" - - -def _neg_idx_to_pos(array, idx): - return f"IF({idx} < 0, ARRAY_LENGTH({array}) + {idx}, {idx})" - - -def _array_slice(t, op): - arg = t.translate(op.arg) - cond = [f"index >= {_neg_idx_to_pos(arg, t.translate(op.start))}"] - if stop := op.stop: - cond.append(f"index < {_neg_idx_to_pos(arg, t.translate(stop))}") - return ( - f"ARRAY(" - f"SELECT el " - f"FROM UNNEST({arg}) AS el WITH OFFSET index " - f"WHERE {' AND '.join(cond)}" - f")" - ) - - -def _capitalize(t, op): - arg = t.translate(op.arg) - return f"CONCAT(UPPER(SUBSTR({arg}, 1, 1)), LOWER(SUBSTR({arg}, 2)))" - - -def _nth_value(t, op): - arg = t.translate(op.arg) - - if not isinstance(nth_op := op.nth, ops.Literal): - raise TypeError(f"Bigquery nth must be a literal; got {type(op.nth)}") - - return f"NTH_VALUE({arg}, {nth_op.value + 1})" - - -def _interval_multiply(t, op): - if isinstance(op.left, ops.Literal) and isinstance(op.right, ops.Literal): - value = op.left.value * op.right.value - literal = ops.Literal(value, op.left.dtype) - return t.translate(literal) - - left, right = t.translate(op.left), t.translate(op.right) - unit = op.left.dtype.resolution.upper() - return f"INTERVAL EXTRACT({unit} from {left}) * {right} {unit}" - - -def table_column(translator, op): - """Override column references to adjust names for BigQuery.""" - quoted_name = translator._gen_valid_name( - helpers.quote_identifier(op.name, force=True) - ) - - ctx = translator.context - - # If the column does not originate from the table set in the current SELECT - # context, we should format as a subquery - if translator.permit_subquery and ctx.is_foreign_expr(op.table): - # TODO(kszucs): avoid the expression roundtrip - proj_expr = op.table.to_expr().select([op.name]).to_array().op() - return table_array_view(translator, proj_expr) - - alias = ctx.get_ref(op.table, search_parents=True) - if alias is not None: - quoted_name = f"{alias}.{quoted_name}" - - return quoted_name - - -def _count_distinct_star(t, op): - raise com.UnsupportedOperationError( - "BigQuery doesn't support COUNT(DISTINCT ...) with multiple columns" - ) - - -def _time_delta(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return f"TIME_DIFF({left}, {right}, {op.part.value.upper()})" - - -def _date_delta(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return f"DATE_DIFF({left}, {right}, {op.part.value.upper()})" - - -def _timestamp_delta(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - left_tz = op.left.dtype.timezone - right_tz = op.right.dtype.timezone - args = f"{left}, {right}, {op.part.value.upper()}" - if left_tz is None and right_tz is None: - return f"DATETIME_DIFF({args})" - elif left_tz is not None and right_tz is not None: - return f"TIMESTAMP_DIFF({args})" - else: - raise NotImplementedError( - "timestamp difference with mixed timezone/timezoneless values is not implemented" - ) - - -def _group_concat(translator, op): - arg = op.arg - where = op.where - - if where is not None: - arg = ops.IfElse(where, arg, ibis.NA) - - arg = translator.translate(arg) - sep = translator.translate(op.sep) - return f"STRING_AGG({arg}, {sep})" - - -def _zero(dtype): - if dtype.is_interval(): - return "MAKE_INTERVAL()" - return "0" - - -def _sign(value, dtype): - if dtype.is_interval(): - zero = _zero(dtype) - return f"""\ -CASE - WHEN {value} < {zero} THEN -1 - WHEN {value} = {zero} THEN 0 - WHEN {value} > {zero} THEN 1 - ELSE NULL -END""" - return f"SIGN({value})" - - -def _nullifzero(step, zero, step_dtype): - if step_dtype.is_interval(): - return f"IF({step} = {zero}, NULL, {step})" - return f"NULLIF({step}, {zero})" - - -def _make_range(func): - def _range(translator, op): - start = translator.translate(op.start) - stop = translator.translate(op.stop) - step = translator.translate(op.step) - - step_dtype = op.step.dtype - step_sign = _sign(step, step_dtype) - delta_sign = _sign(step, step_dtype) - zero = _zero(step_dtype) - nullifzero = _nullifzero(step, zero, step_dtype) - - condition = f"{nullifzero} IS NOT NULL AND {step_sign} = {delta_sign}" - gen_array = f"{func}({start}, {stop}, {step})" - inner = f"SELECT x FROM UNNEST({gen_array}) x WHERE x <> {stop}" - return f"IF({condition}, ARRAY({inner}), [])" - - return _range - - -def _timestamp_range(translator, op): - start = op.start - stop = op.stop - - if start.dtype.timezone is None or stop.dtype.timezone is None: - raise com.IbisTypeError( - "Timestamps without timezone values are not supported when generating timestamp ranges" - ) - - rule = _make_range("GENERATE_TIMESTAMP_ARRAY") - return rule(translator, op) - - -OPERATION_REGISTRY = { - **operation_registry, - # Literal - ops.Literal: _literal, - # Logical - ops.Any: reduction("LOGICAL_OR"), - ops.All: reduction("LOGICAL_AND"), - ops.NullIf: fixed_arity("NULLIF", 2), - # Reductions - ops.ApproxMedian: compiles_approx, - ops.Covariance: _covar, - ops.Correlation: _corr, - # Math - ops.Divide: bigquery_compiles_divide, - ops.Floor: compiles_floor, - ops.Modulus: fixed_arity("MOD", 2), - ops.Sign: unary("SIGN"), - ops.BitwiseNot: lambda t, op: f"~ {t.translate(op.arg)}", - ops.BitwiseXor: lambda t, op: f"{t.translate(op.left)} ^ {t.translate(op.right)}", - ops.BitwiseOr: lambda t, op: f"{t.translate(op.left)} | {t.translate(op.right)}", - ops.BitwiseAnd: lambda t, op: f"{t.translate(op.left)} & {t.translate(op.right)}", - ops.BitwiseLeftShift: lambda t, - op: f"{t.translate(op.left)} << {t.translate(op.right)}", - ops.BitwiseRightShift: lambda t, - op: f"{t.translate(op.left)} >> {t.translate(op.right)}", - # Temporal functions - ops.Date: unary("DATE"), - ops.DateFromYMD: fixed_arity("DATE", 3), - ops.DateAdd: _date_binary("DATE_ADD"), - ops.DateSub: _date_binary("DATE_SUB"), - ops.DateTruncate: _date_truncate, - ops.DayOfWeekIndex: bigquery_day_of_week_index, - ops.DayOfWeekName: bigquery_day_of_week_name, - ops.ExtractEpochSeconds: _extract_field("epochseconds"), - ops.ExtractYear: _extract_field("year"), - ops.ExtractQuarter: _extract_field("quarter"), - ops.ExtractMonth: _extract_field("month"), - ops.ExtractWeekOfYear: _extract_field("isoweek"), - ops.ExtractDay: _extract_field("day"), - ops.ExtractDayOfYear: _extract_field("dayofyear"), - ops.ExtractHour: _extract_field("hour"), - ops.ExtractMinute: _extract_field("minute"), - ops.ExtractSecond: _extract_field("second"), - ops.ExtractMicrosecond: _extract_field("microsecond"), - ops.ExtractMillisecond: _extract_field("millisecond"), - ops.Strftime: compiles_strftime, - ops.StringToTimestamp: compiles_string_to_timestamp, - ops.Time: unary("TIME"), - ops.TimeFromHMS: fixed_arity("TIME", 3), - ops.TimeTruncate: _time_truncate, - ops.TimestampAdd: _timestamp_binary("TIMESTAMP_ADD"), - ops.TimestampFromUNIX: integer_to_timestamp, - ops.TimestampFromYMDHMS: fixed_arity("DATETIME", 6), - ops.TimestampNow: fixed_arity("CURRENT_TIMESTAMP", 0), - ops.TimestampSub: _timestamp_binary("TIMESTAMP_SUB"), - ops.TimestampTruncate: _timestamp_truncate, - ops.IntervalMultiply: _interval_multiply, - ops.Hash: _hash, - ops.StringReplace: fixed_arity("REPLACE", 3), - ops.StringSplit: fixed_arity("SPLIT", 2), - ops.StringConcat: _string_concat, - ops.StringJoin: _string_join, - ops.StringAscii: _string_ascii, - ops.StringFind: _string_find, - ops.Substring: _string_substring, - ops.StrRight: _string_right, - ops.Capitalize: _capitalize, - ops.Translate: fixed_arity("TRANSLATE", 3), - ops.Repeat: fixed_arity("REPEAT", 2), - ops.RegexSearch: _regex_search, - ops.RegexExtract: _regex_extract, - ops.RegexReplace: _regex_replace, - ops.GroupConcat: _group_concat, - ops.Cast: _cast, - ops.StructField: _struct_field, - ops.StructColumn: _struct_column, - ops.ArrayCollect: _array_agg, - ops.ArrayConcat: _array_concat, - ops.Array: _array_column, - ops.ArrayIndex: _array_index, - ops.ArrayLength: unary("ARRAY_LENGTH"), - ops.ArrayRepeat: _array_repeat, - ops.ArraySlice: _array_slice, - ops.ArrayContains: _array_contains, - ops.ArrayPosition: _array_position, - ops.ArrayRemove: _array_remove, - ops.ArrayDistinct: _array_distinct, - ops.ArraySort: _array_sort, - ops.ArrayUnion: _array_union, - ops.ArrayIntersect: _array_intersect, - ops.ArrayZip: _array_zip, - ops.ArrayMap: _array_map, - ops.ArrayFilter: _array_filter, - ops.Log: _log, - ops.Log2: _log2, - ops.Arbitrary: _arbitrary, - ops.First: _first, - ops.Last: _last, - # Geospatial Columnar - ops.GeoUnaryUnion: unary("ST_UNION_AGG"), - # Geospatial - ops.GeoArea: unary("ST_AREA"), - ops.GeoAsBinary: unary("ST_ASBINARY"), - ops.GeoAsText: unary("ST_ASTEXT"), - ops.GeoAzimuth: fixed_arity("ST_AZIMUTH", 2), - ops.GeoBuffer: fixed_arity("ST_BUFFER", 2), - ops.GeoCentroid: unary("ST_CENTROID"), - ops.GeoContains: fixed_arity("ST_CONTAINS", 2), - ops.GeoCovers: fixed_arity("ST_COVERS", 2), - ops.GeoCoveredBy: fixed_arity("ST_COVEREDBY", 2), - ops.GeoDWithin: fixed_arity("ST_DWITHIN", 3), - ops.GeoDifference: fixed_arity("ST_DIFFERENCE", 2), - ops.GeoDisjoint: fixed_arity("ST_DISJOINT", 2), - ops.GeoDistance: fixed_arity("ST_DISTANCE", 2), - ops.GeoEndPoint: unary("ST_ENDPOINT"), - ops.GeoEquals: fixed_arity("ST_EQUALS", 2), - ops.GeoGeometryType: unary("ST_GEOMETRYTYPE"), - ops.GeoIntersection: fixed_arity("ST_INTERSECTION", 2), - ops.GeoIntersects: fixed_arity("ST_INTERSECTS", 2), - ops.GeoLength: unary("ST_LENGTH"), - ops.GeoMaxDistance: fixed_arity("ST_MAXDISTANCE", 2), - ops.GeoNPoints: unary("ST_NUMPOINTS"), - ops.GeoPerimeter: unary("ST_PERIMETER"), - ops.GeoPoint: fixed_arity("ST_GEOGPOINT", 2), - ops.GeoPointN: fixed_arity("ST_POINTN", 2), - ops.GeoSimplify: _geo_simplify, - ops.GeoStartPoint: unary("ST_STARTPOINT"), - ops.GeoTouches: fixed_arity("ST_TOUCHES", 2), - ops.GeoUnion: fixed_arity("ST_UNION", 2), - ops.GeoWithin: fixed_arity("ST_WITHIN", 2), - ops.GeoX: unary("ST_X"), - ops.GeoXMax: _geo_boundingbox("xmax"), - ops.GeoXMin: _geo_boundingbox("xmin"), - ops.GeoY: unary("ST_Y"), - ops.GeoYMax: _geo_boundingbox("ymax"), - ops.GeoYMin: _geo_boundingbox("ymin"), - ops.BitAnd: reduction("BIT_AND"), - ops.BitOr: reduction("BIT_OR"), - ops.BitXor: reduction("BIT_XOR"), - ops.ApproxCountDistinct: reduction("APPROX_COUNT_DISTINCT"), - ops.ApproxMedian: compiles_approx, - ops.IdenticalTo: _identical_to, - ops.FloorDivide: _floor_divide, - ops.IsNan: _is_nan, - ops.IsInf: _is_inf, - ops.ArgMin: _arg_min_max("ASC"), - ops.ArgMax: _arg_min_max("DESC"), - ops.Pi: lambda *_: "ACOS(-1)", - ops.E: lambda *_: "EXP(1)", - ops.RandomScalar: fixed_arity("RAND", 0), - ops.NthValue: _nth_value, - ops.JSONGetItem: lambda t, op: f"{t.translate(op.arg)}[{t.translate(op.index)}]", - ops.ArrayStringJoin: lambda t, - op: f"ARRAY_TO_STRING({t.translate(op.arg)}, {t.translate(op.sep)})", - ops.StartsWith: fixed_arity("STARTS_WITH", 2), - ops.EndsWith: fixed_arity("ENDS_WITH", 2), - ops.TableColumn: table_column, - ops.CountDistinctStar: _count_distinct_star, - ops.Argument: lambda _, op: op.param, - ops.Unnest: unary("UNNEST"), - ops.TimeDelta: _time_delta, - ops.DateDelta: _date_delta, - ops.TimestampDelta: _timestamp_delta, - ops.IntegerRange: _make_range("GENERATE_ARRAY"), - ops.TimestampRange: _timestamp_range, -} - -_invalid_operations = { - ops.FindInSet, - ops.DateDiff, - ops.TimestampDiff, - ops.ExtractAuthority, - ops.ExtractFile, - ops.ExtractFragment, - ops.ExtractHost, - ops.ExtractPath, - ops.ExtractProtocol, - ops.ExtractQuery, - ops.ExtractUserInfo, -} - -OPERATION_REGISTRY = { - k: v for k, v in OPERATION_REGISTRY.items() if k not in _invalid_operations -} diff --git a/ibis/backends/bigquery/rewrites.py b/ibis/backends/bigquery/rewrites.py deleted file mode 100644 index 2fe34ccc37bf..000000000000 --- a/ibis/backends/bigquery/rewrites.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Methods to translate BigQuery expressions before compilation.""" - -from __future__ import annotations - -import toolz - -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.base.sql import compiler as sql_compiler - - -def bq_sum(op): - if isinstance((arg := op.arg).dtype, dt.Boolean): - return ops.Sum(ops.Cast(arg, dt.int64), where=op.where) - else: - return op - - -def bq_mean(op): - if isinstance((arg := op.arg).dtype, dt.Boolean): - return ops.Mean(ops.Cast(arg, dt.int64), where=op.where) - else: - return op - - -REWRITES = { - **sql_compiler.ExprTranslator._rewrites, - ops.Sum: bq_sum, - ops.Mean: bq_mean, - ops.Any: toolz.identity, - ops.All: toolz.identity, -} diff --git a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_cross_project_query/out.sql b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_cross_project_query/out.sql index 225b5770f6fe..da12e567f6b5 100644 --- a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_cross_project_query/out.sql +++ b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_cross_project_query/out.sql @@ -1,10 +1,6 @@ SELECT - t0.`title`, - t0.`tags` -FROM ( - SELECT - t1.* - FROM `bigquery-public-data`.stackoverflow.posts_questions AS t1 - WHERE - STRPOS(t1.`tags`, 'ibis') - 1 >= 0 -) AS t0 \ No newline at end of file + t0.title, + t0.tags +FROM `bigquery-public-data`.stackoverflow.posts_questions AS t0 +WHERE + strpos(t0.tags, 'ibis') > 0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_multiple_project_queries/out.sql b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_multiple_project_queries/out.sql index f9d06ecd8b53..9832d461eb58 100644 --- a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_multiple_project_queries/out.sql +++ b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_multiple_project_queries/out.sql @@ -1,5 +1,5 @@ SELECT - t0.`title` -FROM `bigquery-public-data`.stackoverflow.posts_questions AS t0 -INNER JOIN `nyc-tlc`.yellow.trips AS t1 - ON t0.`tags` = t1.`rate_code` \ No newline at end of file + t2.title +FROM `bigquery-public-data`.stackoverflow.posts_questions AS t2 +INNER JOIN `nyc-tlc`.yellow.trips AS t3 + ON t2.tags = t3.rate_code \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_subquery_scalar_params/out.sql b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_subquery_scalar_params/out.sql index cc99fcc1a0d3..ec28287c9f13 100644 --- a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_subquery_scalar_params/out.sql +++ b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_subquery_scalar_params/out.sql @@ -1,20 +1,19 @@ -WITH t0 AS ( - SELECT - t2.`float_col`, - t2.`timestamp_col`, - t2.`int_col`, - t2.`string_col` - FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t2 - WHERE - t2.`timestamp_col` < @param_0 -) SELECT - count(t1.`foo`) AS `count` + COUNT(t2.foo) AS count FROM ( SELECT - t0.`string_col`, - sum(t0.`float_col`) AS `foo` - FROM t0 + t1.string_col, + SUM(t1.float_col) AS foo + FROM ( + SELECT + t0.float_col, + t0.timestamp_col, + t0.int_col, + t0.string_col + FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 + WHERE + t0.timestamp_col < datetime('2014-01-01T00:00:00') + ) AS t1 GROUP BY 1 -) AS t1 \ No newline at end of file +) AS t2 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/system/test_client.py b/ibis/backends/bigquery/tests/system/test_client.py index ac7eedee9094..cbb4a3c3c0aa 100644 --- a/ibis/backends/bigquery/tests/system/test_client.py +++ b/ibis/backends/bigquery/tests/system/test_client.py @@ -190,13 +190,13 @@ def test_raw_sql(con): def test_parted_column_rename(parted_alltypes): assert "PARTITIONTIME" in parted_alltypes.columns - assert "_PARTITIONTIME" in parted_alltypes.op().table.schema.names + assert "_PARTITIONTIME" in parted_alltypes.op().parent.schema.names def test_scalar_param_partition_time(parted_alltypes): assert "PARTITIONTIME" in parted_alltypes.columns assert "PARTITIONTIME" in parted_alltypes.schema() - param = ibis.param("timestamp").name("time_param") + param = ibis.param("timestamp('UTC')") expr = parted_alltypes[param > parted_alltypes.PARTITIONTIME] df = expr.execute(params={param: "2017-01-01"}) assert df.empty diff --git a/ibis/backends/bigquery/tests/system/udf/test_udf_execute.py b/ibis/backends/bigquery/tests/system/udf/test_udf_execute.py index 3020a02b58c4..66226ae70e74 100644 --- a/ibis/backends/bigquery/tests/system/udf/test_udf_execute.py +++ b/ibis/backends/bigquery/tests/system/udf/test_udf_execute.py @@ -9,7 +9,7 @@ import ibis import ibis.expr.datatypes as dt -from ibis.backends.bigquery import udf +from ibis import udf PROJECT_ID = os.environ.get("GOOGLE_BIGQUERY_PROJECT_ID", "ibis-gbq") DATASET_ID = "testing" @@ -28,12 +28,8 @@ def df(alltypes): def test_udf(alltypes, df): - @udf( - input_type=[dt.double, dt.double], - output_type=dt.double, - determinism=True, - ) - def my_add(a, b): + @udf.scalar.python(determinism=True) + def my_add(a: float, b: float) -> float: return a + b expr = my_add(alltypes.double_col, alltypes.double_col) @@ -49,13 +45,10 @@ def my_add(a, b): def test_udf_with_struct(alltypes, df, snapshot): - @udf( - input_type=[dt.double, dt.double], - output_type=dt.Struct.from_tuples( - [("width", dt.double), ("height", dt.double)] - ), - ) - def my_struct_thing(a, b): + @udf.scalar.python + def my_struct_thing(a: float, b: float) -> dt.Struct( + {"width": float, "height": float} + ): class Rectangle: def __init__(self, width, height): self.width = width @@ -63,9 +56,6 @@ def __init__(self, width, height): return Rectangle(a, b) - result = my_struct_thing.sql - snapshot.assert_match(result, "out.sql") - expr = my_struct_thing(alltypes.double_col, alltypes.double_col) result = expr.execute() assert not result.empty @@ -75,12 +65,12 @@ def __init__(self, width, height): def test_udf_compose(alltypes, df): - @udf([dt.double], dt.double) - def add_one(x): + @udf.scalar.python + def add_one(x: float) -> float: return x + 1.0 - @udf([dt.double], dt.double) - def times_two(x): + @udf.scalar.python + def times_two(x: float) -> float: return x * 2.0 t = alltypes @@ -91,8 +81,8 @@ def times_two(x): def test_udf_scalar(con): - @udf([dt.double, dt.double], dt.double) - def my_add(x, y): + @udf.scalar.python + def my_add(x: float, y: float) -> float: return x + y expr = my_add(1, 2) @@ -101,29 +91,23 @@ def my_add(x, y): def test_multiple_calls_has_one_definition(con): - @udf([dt.string], dt.double) - def my_str_len(s): + @udf.scalar.python + def my_str_len(s: str) -> float: return s.length s = ibis.literal("abcd") expr = my_str_len(s) + my_str_len(s) - add = expr.op() - - # generated javascript is identical - assert add.left.sql == add.right.sql assert con.execute(expr) == 8.0 def test_udf_libraries(con): - @udf( - [dt.Array(dt.string)], - dt.double, + @udf.scalar.python( # whatever symbols are exported in the library are visible inside the # UDF, in this case lodash defines _ and we use that here - libraries=["gs://ibis-testing-libraries/lodash.min.js"], + libraries=("gs://ibis-testing-libraries/lodash.min.js",), ) - def string_length(strings): + def string_length(strings: list[str]) -> float: return _.sum(_.map(strings, lambda x: x.length)) # noqa: F821 raw_data = ["aaa", "bb", "c"] @@ -135,45 +119,18 @@ def string_length(strings): def test_udf_with_len(con): - @udf([dt.string], dt.double) - def my_str_len(x): + @udf.scalar.python + def my_str_len(x: str) -> float: return len(x) - @udf([dt.Array(dt.string)], dt.double) - def my_array_len(x): + @udf.scalar.python + def my_array_len(x: list[str]) -> float: return len(x) assert con.execute(my_str_len("aaa")) == 3 assert con.execute(my_array_len(["aaa", "bb"])) == 2 -@pytest.mark.parametrize( - ("argument_type",), - [ - param( - dt.string, - id="string", - ), - param( - "ANY TYPE", - id="string", - ), - ], -) -def test_udf_sql(con, argument_type): - format_t = udf.sql( - "format_t", - params={"input": argument_type}, - output_type=dt.string, - sql_expression="FORMAT('%T', input)", - ) - - s = ibis.literal("abcd") - expr = format_t(s) - - con.execute(expr) - - @pytest.mark.parametrize( ("value", "expected"), [ diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_median/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_median/out.sql index 7000a628bb25..e86bffa88a0b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_median/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_median/out.sql @@ -1,3 +1,3 @@ SELECT - APPROX_QUANTILES(IF(t0.`month` > 0, t0.`double_col`, NULL), 2)[OFFSET(1)] AS `ApproxMedian_double_col_ Greater_month_ 0` + approx_quantiles(IF(t0.month > 0, t0.double_col, NULL), IF(t0.month > 0, 2, NULL))[offset(1)] AS `ApproxMedian_double_col_ Greater_month_ 0` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_nunique/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_nunique/out.sql index 4e0b6bfd607c..967e47b3904f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_nunique/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_nunique/out.sql @@ -1,3 +1,3 @@ SELECT - APPROX_COUNT_DISTINCT(IF(t0.`month` > 0, t0.`double_col`, NULL)) AS `ApproxCountDistinct_double_col_ Greater_month_ 0` + APPROX_COUNT_DISTINCT(IF(t0.month > 0, t0.double_col, NULL)) AS `ApproxCountDistinct_double_col_ Greater_month_ 0` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_median/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_median/out.sql index 06569c747fe0..8a4438fd4e83 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_median/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_median/out.sql @@ -1,3 +1,3 @@ SELECT - APPROX_QUANTILES(t0.`double_col`, 2)[OFFSET(1)] AS `ApproxMedian_double_col` + approx_quantiles(t0.double_col, 2)[offset(1)] AS ApproxMedian_double_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_nunique/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_nunique/out.sql index 69b0211b7995..7ee240de9fd9 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_nunique/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_nunique/out.sql @@ -1,3 +1,3 @@ SELECT - APPROX_COUNT_DISTINCT(t0.`double_col`) AS `ApproxCountDistinct_double_col` + APPROX_COUNT_DISTINCT(t0.double_col) AS ApproxCountDistinct_double_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_binary/out.sql index 29bb2f587d13..c9b5a33bdd2a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_binary/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.`value` AS BYTES) AS `Cast_value_ binary` + CAST(t0.value AS BYTES) AS `Cast_value_ binary` FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_and/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_and/out.sql index a3eda8e81031..9086990603db 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_and/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_and/out.sql @@ -1,3 +1,3 @@ SELECT - BIT_AND(IF(t0.`bigint_col` > 0, t0.`int_col`, NULL)) AS `BitAnd_int_col_ Greater_bigint_col_ 0` + bit_and(IF(t0.bigint_col > 0, t0.int_col, NULL)) AS `BitAnd_int_col_ Greater_bigint_col_ 0` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_or/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_or/out.sql index 335de82862ea..ec9ed6c6b66b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_or/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_or/out.sql @@ -1,3 +1,3 @@ SELECT - BIT_OR(IF(t0.`bigint_col` > 0, t0.`int_col`, NULL)) AS `BitOr_int_col_ Greater_bigint_col_ 0` + bit_or(IF(t0.bigint_col > 0, t0.int_col, NULL)) AS `BitOr_int_col_ Greater_bigint_col_ 0` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_xor/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_xor/out.sql index 2905a7ad8912..7997e495ef8f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_xor/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_xor/out.sql @@ -1,3 +1,3 @@ SELECT - BIT_XOR(IF(t0.`bigint_col` > 0, t0.`int_col`, NULL)) AS `BitXor_int_col_ Greater_bigint_col_ 0` + bit_xor(IF(t0.bigint_col > 0, t0.int_col, NULL)) AS `BitXor_int_col_ Greater_bigint_col_ 0` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_and/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_and/out.sql index 622f27b2071d..fcafb18759da 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_and/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_and/out.sql @@ -1,3 +1,3 @@ SELECT - BIT_AND(t0.`int_col`) AS `BitAnd_int_col` + bit_and(t0.int_col) AS BitAnd_int_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_or/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_or/out.sql index 6fed6bb96de3..22f8eb5534f9 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_or/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_or/out.sql @@ -1,3 +1,3 @@ SELECT - BIT_OR(t0.`int_col`) AS `BitOr_int_col` + bit_or(t0.int_col) AS BitOr_int_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_xor/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_xor/out.sql index 91808632fe0c..de39538dbbd7 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_xor/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_xor/out.sql @@ -1,3 +1,3 @@ SELECT - BIT_XOR(t0.`int_col`) AS `BitXor_int_col` + bit_xor(t0.int_col) AS BitXor_int_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/mean/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/mean/out.sql index 41815bb5d337..1345addaed7e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/mean/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/mean/out.sql @@ -1,3 +1,3 @@ SELECT - avg(CAST(t0.`bool_col` AS INT64)) AS `Mean_bool_col` + AVG(CAST(t0.bool_col AS INT64)) AS Mean_bool_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/sum/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/sum/out.sql index 731b35d720f3..52a1683495d8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/sum/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/sum/out.sql @@ -1,3 +1,3 @@ SELECT - sum(CAST(t0.`bool_col` AS INT64)) AS `Sum_bool_col` + SUM(CAST(t0.bool_col AS INT64)) AS Sum_bool_col FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_conj/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_conj/out.sql index 5361ef28236f..686b89ebd74f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_conj/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_conj/out.sql @@ -1,9 +1,7 @@ SELECT - sum( - IF(( - t0.`month` > 6 - ) AND ( - t0.`month` < 10 - ), CAST(t0.`bool_col` AS INT64), NULL) - ) AS `Sum_bool_col_ And_Greater_month_ 6_ Less_month_ 10` + SUM(IF(( + t0.month > 6 + ) AND ( + t0.month < 10 + ), CAST(t0.bool_col AS INT64), NULL)) AS `Sum_bool_col_ And_Greater_month_ 6_ Less_month_ 10` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_simple/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_simple/out.sql index 5f8d980b673f..3e3b21ef17e2 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_simple/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_simple/out.sql @@ -1,3 +1,3 @@ SELECT - avg(IF(t0.`month` > 6, CAST(t0.`bool_col` AS INT64), NULL)) AS `Mean_bool_col_ Greater_month_ 6` + AVG(IF(t0.month > 6, CAST(t0.bool_col AS INT64), NULL)) AS `Mean_bool_col_ Greater_month_ 6` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bucket/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bucket/out.sql index 4c4e2a32e2ea..7aa64367725a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bucket/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bucket/out.sql @@ -1,17 +1,17 @@ SELECT CASE WHEN ( - 0 <= t0.`value` + 0 <= t0.value ) AND ( - t0.`value` < 1 + t0.value < 1 ) THEN 0 WHEN ( - 1 <= t0.`value` + 1 <= t0.value ) AND ( - t0.`value` <= 3 + t0.value <= 3 ) THEN 1 ELSE CAST(NULL AS INT64) - END AS `tmp` + END AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cast_float_to_int/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cast_float_to_int/out.sql index acc0555d337d..d81e2d9cbdcf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cast_float_to_int/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cast_float_to_int/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(TRUNC(t0.`double_col`) AS INT64) AS `Cast_double_col_ int64` + CAST(trunc(t0.double_col) AS INT64) AS `Cast_double_col_ int64` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_compile_toplevel/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_compile_toplevel/out.sql index 119f5daa423e..1b1cef1e6341 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_compile_toplevel/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_compile_toplevel/out.sql @@ -1,3 +1,3 @@ SELECT - sum(t0.`foo`) AS `Sum_foo` -FROM t0 \ No newline at end of file + SUM(t0.foo) AS Sum_foo +FROM t0 AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/pop/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/pop/out.sql index 6f2e6ac5f580..16c39cbe3843 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/pop/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/pop/out.sql @@ -1,3 +1,3 @@ SELECT - COVAR_POP(t0.`double_col`, t0.`double_col`) AS `Covariance_double_col_ double_col` + COVAR_POP(t0.double_col, t0.double_col) AS `Covariance_double_col_ double_col` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/sample/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/sample/out.sql index 7d097f565cdd..cd9d190ae5e5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/sample/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/sample/out.sql @@ -1,3 +1,3 @@ SELECT - COVAR_SAMP(t0.`double_col`, t0.`double_col`) AS `Covariance_double_col_ double_col` + COVAR_SAMP(t0.double_col, t0.double_col) AS `Covariance_double_col_ double_col` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/date/index.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/date/index.sql index fe81b1746772..f95467b3223b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/date/index.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/date/index.sql @@ -1,2 +1,2 @@ SELECT - MOD(EXTRACT(DAYOFWEEK FROM CAST('2017-01-01' AS DATE)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1` \ No newline at end of file + mod(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/date/name.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/date/name.sql index da4b415da6de..f207aa60fec7 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/date/name.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/date/name.sql @@ -1,2 +1,2 @@ SELECT - INITCAP(CAST(CAST('2017-01-01' AS DATE) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1` \ No newline at end of file + INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/datetime/index.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/datetime/index.sql index 2304d7a2866b..7d9d59767bc6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/datetime/index.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/datetime/index.sql @@ -1,2 +1,2 @@ SELECT - MOD(EXTRACT(DAYOFWEEK FROM CAST('2017-01-01 04:55:59' AS DATETIME)) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + mod(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/datetime/name.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/datetime/name.sql index 79ebfd44155a..fed76c0c815d 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/datetime/name.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/datetime/name.sql @@ -1,2 +1,2 @@ SELECT - INITCAP(CAST(CAST('2017-01-01 04:55:59' AS DATETIME) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_date/index.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_date/index.sql index fe81b1746772..f95467b3223b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_date/index.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_date/index.sql @@ -1,2 +1,2 @@ SELECT - MOD(EXTRACT(DAYOFWEEK FROM CAST('2017-01-01' AS DATE)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1` \ No newline at end of file + mod(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_date/name.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_date/name.sql index da4b415da6de..f207aa60fec7 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_date/name.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_date/name.sql @@ -1,2 +1,2 @@ SELECT - INITCAP(CAST(CAST('2017-01-01' AS DATE) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1` \ No newline at end of file + INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_timestamp/index.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_timestamp/index.sql index 2304d7a2866b..7d9d59767bc6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_timestamp/index.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_timestamp/index.sql @@ -1,2 +1,2 @@ SELECT - MOD(EXTRACT(DAYOFWEEK FROM CAST('2017-01-01 04:55:59' AS DATETIME)) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + mod(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_timestamp/name.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_timestamp/name.sql index 79ebfd44155a..fed76c0c815d 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_timestamp/name.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/string_timestamp/name.sql @@ -1,2 +1,2 @@ SELECT - INITCAP(CAST(CAST('2017-01-01 04:55:59' AS DATETIME) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp/index.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp/index.sql index 2304d7a2866b..7d9d59767bc6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp/index.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp/index.sql @@ -1,2 +1,2 @@ SELECT - MOD(EXTRACT(DAYOFWEEK FROM CAST('2017-01-01 04:55:59' AS DATETIME)) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + mod(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp/name.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp/name.sql index 79ebfd44155a..fed76c0c815d 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp/name.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp/name.sql @@ -1,2 +1,2 @@ SELECT - INITCAP(CAST(CAST('2017-01-01 04:55:59' AS DATETIME) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp_date/index.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp_date/index.sql index fe81b1746772..f95467b3223b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp_date/index.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp_date/index.sql @@ -1,2 +1,2 @@ SELECT - MOD(EXTRACT(DAYOFWEEK FROM CAST('2017-01-01' AS DATE)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1` \ No newline at end of file + mod(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp_date/name.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp_date/name.sql index da4b415da6de..f207aa60fec7 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp_date/name.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_day_of_week/timestamp_date/name.sql @@ -1,2 +1,2 @@ SELECT - INITCAP(CAST(CAST('2017-01-01' AS DATE) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1` \ No newline at end of file + INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/floordiv/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/floordiv/out.sql index 23ca5617a3c5..021c289c519a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/floordiv/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/floordiv/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(FLOOR(IEEE_DIVIDE(t0.`double_col`, 0)) AS INT64) AS `FloorDivide_double_col_ 0` + CAST(FLOOR(ieee_divide(t0.double_col, 0)) AS INT64) AS `FloorDivide_double_col_ 0` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/truediv/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/truediv/out.sql index bf88197b4112..2f451f747c62 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/truediv/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/truediv/out.sql @@ -1,3 +1,3 @@ SELECT - IEEE_DIVIDE(t0.`double_col`, 0) AS `Divide_double_col_ 0` + ieee_divide(t0.double_col, 0) AS `Divide_double_col_ 0` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/date/out.sql index 9098cd2126ba..bc3328e12ee8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE(t0.`ts`) AS `tmp` + DATE(t0.ts) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/time/out.sql index 4847f7b392f3..c35dfe0331a2 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME(t0.`ts`) AS `tmp` + time(t0.ts) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_azimuth/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_azimuth/out.sql index 05e9145e119e..c1326749682c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_azimuth/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_azimuth/out.sql @@ -1,3 +1,3 @@ SELECT - ST_AZIMUTH(t0.`p0`, t0.`p1`) AS `tmp` + st_azimuth(t0.p0, t0.p1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/contains/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/contains/out.sql index 1b1a82779e31..41db52f14ef0 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/contains/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/contains/out.sql @@ -1,3 +1,3 @@ SELECT - ST_CONTAINS(t0.`geog0`, t0.`geog1`) AS `tmp` + st_contains(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covered_by/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covered_by/out.sql index 35876973cb31..45073f686366 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covered_by/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covered_by/out.sql @@ -1,3 +1,3 @@ SELECT - ST_COVEREDBY(t0.`geog0`, t0.`geog1`) AS `tmp` + st_coveredby(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covers/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covers/out.sql index 1e347e6b1ae1..316f696e43df 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covers/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covers/out.sql @@ -1,3 +1,3 @@ SELECT - ST_COVERS(t0.`geog0`, t0.`geog1`) AS `tmp` + st_covers(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/d_within/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/d_within/out.sql index 436a09b96fb3..95b081d2f324 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/d_within/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/d_within/out.sql @@ -1,3 +1,3 @@ SELECT - ST_DWITHIN(t0.`geog0`, t0.`geog1`, 5.2) AS `tmp` + st_dwithin(t0.geog0, t0.geog1, 5.2) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/difference/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/difference/out.sql index 28a0a1ca0241..3680bf6dc8cf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/difference/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/difference/out.sql @@ -1,3 +1,3 @@ SELECT - ST_DIFFERENCE(t0.`geog0`, t0.`geog1`) AS `tmp` + st_difference(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/disjoint/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/disjoint/out.sql index 03e7ddd2697f..47f67918fdaf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/disjoint/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/disjoint/out.sql @@ -1,3 +1,3 @@ SELECT - ST_DISJOINT(t0.`geog0`, t0.`geog1`) AS `tmp` + st_disjoint(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/distance/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/distance/out.sql index 8039f2517172..dd498ab034f6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/distance/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/distance/out.sql @@ -1,3 +1,3 @@ SELECT - ST_DISTANCE(t0.`geog0`, t0.`geog1`) AS `tmp` + st_distance(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/geo_equals/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/geo_equals/out.sql index 0d50e35c1aae..525bdd5a98c5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/geo_equals/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/geo_equals/out.sql @@ -1,3 +1,3 @@ SELECT - ST_EQUALS(t0.`geog0`, t0.`geog1`) AS `tmp` + st_equals(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersection/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersection/out.sql index 66b95db7fc95..881691e93e02 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersection/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersection/out.sql @@ -1,3 +1,3 @@ SELECT - ST_INTERSECTION(t0.`geog0`, t0.`geog1`) AS `tmp` + st_intersection(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersects/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersects/out.sql index 7b1912963355..9768493f1bbc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersects/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersects/out.sql @@ -1,3 +1,3 @@ SELECT - ST_INTERSECTS(t0.`geog0`, t0.`geog1`) AS `tmp` + st_intersects(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/max_distance/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/max_distance/out.sql index fe6ed8cc970c..bebf364b68f8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/max_distance/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/max_distance/out.sql @@ -1,3 +1,3 @@ SELECT - ST_MAXDISTANCE(t0.`geog0`, t0.`geog1`) AS `tmp` + st_maxdistance(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/touches/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/touches/out.sql index d6e4482d3630..3d50dc04d227 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/touches/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/touches/out.sql @@ -1,3 +1,3 @@ SELECT - ST_TOUCHES(t0.`geog0`, t0.`geog1`) AS `tmp` + st_touches(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/union/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/union/out.sql index ba3b55982ea3..8f5d1d5a8f42 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/union/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/union/out.sql @@ -1,3 +1,3 @@ SELECT - ST_UNION(t0.`geog0`, t0.`geog1`) AS `tmp` + st_union(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/within/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/within/out.sql index 5008c4637d85..9ac98c0a3feb 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/within/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/within/out.sql @@ -1,3 +1,3 @@ SELECT - ST_WITHIN(t0.`geog0`, t0.`geog1`) AS `tmp` + st_within(t0.geog0, t0.geog1) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_max/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_max/out.sql index 5a1e009d3368..3acaebab60b6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_max/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_max/out.sql @@ -1,3 +1,3 @@ SELECT - ST_BOUNDINGBOX(t0.`geog`).xmax AS `tmp` + st_boundingbox(t0.geog).xmax AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_min/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_min/out.sql index 8b3e8d89f450..3e46dda22606 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_min/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_min/out.sql @@ -1,3 +1,3 @@ SELECT - ST_BOUNDINGBOX(t0.`geog`).xmin AS `tmp` + st_boundingbox(t0.geog).xmin AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_max/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_max/out.sql index a1fdaad96165..f2163789f6e8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_max/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_max/out.sql @@ -1,3 +1,3 @@ SELECT - ST_BOUNDINGBOX(t0.`geog`).ymax AS `tmp` + st_boundingbox(t0.geog).ymax AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_min/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_min/out.sql index e3efd9949a19..dc148200be48 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_min/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_min/out.sql @@ -1,3 +1,3 @@ SELECT - ST_BOUNDINGBOX(t0.`geog`).ymin AS `tmp` + st_boundingbox(t0.geog).ymin AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_point/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_point/out.sql index 89c468270545..a57d8a6952f5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_point/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_point/out.sql @@ -1,3 +1,3 @@ SELECT - ST_GEOGPOINT(t0.`lon`, t0.`lat`) AS `tmp` + st_geogpoint(t0.lon, t0.lat) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_simplify/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_simplify/out.sql index 3d194f1e0368..0f9f1b6cc556 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_simplify/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_simplify/out.sql @@ -1,3 +1,3 @@ SELECT - ST_SIMPLIFY(t0.`geog`, 5.2) AS `tmp` + st_simplify(t0.geog, 5.2) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/aread/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/aread/out.sql index b16445decdb4..6b56cb853600 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/aread/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/aread/out.sql @@ -1,3 +1,3 @@ SELECT - ST_AREA(t0.`geog`) AS `tmp` + st_area(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_binary/out.sql index f9875a1abe00..d08e811dab1a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_binary/out.sql @@ -1,3 +1,3 @@ SELECT - ST_ASBINARY(t0.`geog`) AS `tmp` + st_asbinary(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_text/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_text/out.sql index a42c9d599cc1..5a15cdcf347a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_text/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_text/out.sql @@ -1,3 +1,3 @@ SELECT - ST_ASTEXT(t0.`geog`) AS `tmp` + st_astext(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/buffer/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/buffer/out.sql index e5990a079594..026f5a13ab9c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/buffer/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/buffer/out.sql @@ -1,3 +1,3 @@ SELECT - ST_BUFFER(t0.`geog`, 5.2) AS `tmp` + st_buffer(t0.geog, 5.2) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/centroid/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/centroid/out.sql index 7b9e4235e496..c486a43d1f54 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/centroid/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/centroid/out.sql @@ -1,3 +1,3 @@ SELECT - ST_CENTROID(t0.`geog`) AS `tmp` + st_centroid(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/end_point/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/end_point/out.sql index 3512019b95a3..0f4f517bc5d7 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/end_point/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/end_point/out.sql @@ -1,3 +1,3 @@ SELECT - ST_ENDPOINT(t0.`geog`) AS `tmp` + st_endpoint(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/geometry_type/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/geometry_type/out.sql index 152acfd628b3..6b87c638e82d 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/geometry_type/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/geometry_type/out.sql @@ -1,3 +1,3 @@ SELECT - ST_GEOMETRYTYPE(t0.`geog`) AS `tmp` + st_geometrytype(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/length/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/length/out.sql index 5c604906d5a7..cc0c00c15a40 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/length/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/length/out.sql @@ -1,3 +1,3 @@ SELECT - ST_LENGTH(t0.`geog`) AS `tmp` + st_length(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/npoints/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/npoints/out.sql index 1678db820185..ce6c5c6a9988 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/npoints/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/npoints/out.sql @@ -1,3 +1,3 @@ SELECT - ST_NUMPOINTS(t0.`geog`) AS `tmp` + st_numpoints(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/perimeter/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/perimeter/out.sql index 1cc8832884ea..62b3f322440b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/perimeter/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/perimeter/out.sql @@ -1,3 +1,3 @@ SELECT - ST_PERIMETER(t0.`geog`) AS `tmp` + st_perimeter(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/point_n/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/point_n/out.sql index 12471430c8c7..0b03f583db71 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/point_n/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/point_n/out.sql @@ -1,3 +1,3 @@ SELECT - ST_POINTN(t0.`geog`, 3) AS `tmp` + st_pointn(t0.geog, 3) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/start_point/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/start_point/out.sql index e75c80c0c11e..9c77d20eaae8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/start_point/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/start_point/out.sql @@ -1,3 +1,3 @@ SELECT - ST_STARTPOINT(t0.`geog`) AS `tmp` + st_startpoint(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary_union/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary_union/out.sql index 884ee4e6d94e..112d191a4eae 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary_union/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary_union/out.sql @@ -1,3 +1,3 @@ SELECT - ST_UNION_AGG(t0.`geog`) AS `tmp` + st_union_agg(t0.geog) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/x/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/x/out.sql index 1afb858c9035..76b1ac85d03e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/x/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/x/out.sql @@ -1,3 +1,3 @@ SELECT - ST_X(t0.`pt`) AS `tmp` + st_x(t0.pt) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/y/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/y/out.sql index b89b748ddc35..472068d4b795 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/y/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/y/out.sql @@ -1,3 +1,3 @@ SELECT - ST_Y(t0.`pt`) AS `tmp` + st_y(t0.pt) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hash/binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hash/binary/out.sql index af390b72dfb2..16b493bec461 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hash/binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hash/binary/out.sql @@ -1,2 +1,2 @@ SELECT - farm_fingerprint(b'test of hash') AS `Hash_b'test of hash'` \ No newline at end of file + farm_fingerprint(CAST('74657374206f662068617368' AS BYTES FORMAT 'HEX')) AS `Hash_b'test of hash'` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-binary/out.sql index 097f00501480..06e3d4e02c04 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-binary/out.sql @@ -1,2 +1,2 @@ SELECT - MD5(b'test') AS `tmp` \ No newline at end of file + TO_HEX(MD5(CAST('74657374' AS BYTES FORMAT 'HEX'))) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-string/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-string/out.sql index 8f15582c94e8..a9b91d0a8fd8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-string/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-string/out.sql @@ -1,2 +1,2 @@ SELECT - MD5('test') AS `tmp` \ No newline at end of file + TO_HEX(MD5('test')) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-binary/out.sql index a93af9c0c71d..b47ac212bf11 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-binary/out.sql @@ -1,2 +1,2 @@ SELECT - sha1(b'test') AS `tmp` \ No newline at end of file + SHA(CAST('74657374' AS BYTES FORMAT 'HEX')) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-string/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-string/out.sql index 04a6f16e94cb..006923cc428f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-string/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-string/out.sql @@ -1,2 +1,2 @@ SELECT - sha1('test') AS `tmp` \ No newline at end of file + SHA('test') AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-binary/out.sql index de7f002062c7..66952364a23a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-binary/out.sql @@ -1,2 +1,2 @@ SELECT - SHA256(b'test') AS `tmp` \ No newline at end of file + sha256(CAST('74657374' AS BYTES FORMAT 'HEX')) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-string/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-string/out.sql index da8766656e8f..eee9f3b6f5c6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-string/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-string/out.sql @@ -1,2 +1,2 @@ SELECT - SHA256('test') AS `tmp` \ No newline at end of file + sha256('test') AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-binary/out.sql index fe8843530cd5..0a34496a1b95 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-binary/out.sql @@ -1,2 +1,2 @@ SELECT - SHA512(b'test') AS `tmp` \ No newline at end of file + sha512(CAST('74657374' AS BYTES FORMAT 'HEX')) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-string/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-string/out.sql index 6097e6df15c1..ba16c3204a26 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-string/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-string/out.sql @@ -1,2 +1,2 @@ SELECT - SHA512('test') AS `tmp` \ No newline at end of file + sha512('test') AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_identical_to/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_identical_to/out.sql index a23fe427cf02..dbd6d0bc38db 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_identical_to/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_identical_to/out.sql @@ -1,10 +1,18 @@ SELECT - t0.* + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE - ( - t0.`string_col` IS NOT DISTINCT FROM 'a' - ) - AND ( - t0.`date_string_col` IS NOT DISTINCT FROM 'b' - ) \ No newline at end of file + t0.string_col IS NOT DISTINCT FROM 'a' + AND t0.date_string_col IS NOT DISTINCT FROM 'b' \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ms/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ms/out.sql index 52c8b7dd7a1f..108f5bdd6655 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ms/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ms/out.sql @@ -1,2 +1,2 @@ SELECT - TIMESTAMP_MILLIS(-123456789) AS `tmp` \ No newline at end of file + timestamp_millis(-123456789) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ns/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ns/out.sql index f38a5acbc58a..aaa93e0528b8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ns/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ns/out.sql @@ -1,2 +1,2 @@ SELECT - TIMESTAMP_MICROS(CAST(ROUND(1234567891011 / 1000) AS INT64)) AS `tmp` \ No newline at end of file + timestamp_micros(CAST(ROUND(1234567891011 / 1000) AS INT64)) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/s/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/s/out.sql index 14033e028843..a470e62fd7a5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/s/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/s/out.sql @@ -1,2 +1,2 @@ SELECT - TIMESTAMP_SECONDS(123456789) AS `tmp` \ No newline at end of file + timestamp_seconds(123456789) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/us/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/us/out.sql index e77ca976a4ae..9c2e88bc505f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/us/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/us/out.sql @@ -1,2 +1,2 @@ SELECT - TIMESTAMP_MICROS(123456789) AS `tmp` \ No newline at end of file + timestamp_micros(123456789) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/datetime/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/datetime/out.sql index 742c13e36aa5..4a5e85730e47 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/datetime/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/datetime/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(hour FROM CAST('2017-01-01 04:55:59' AS DATETIME)) AS `tmp` \ No newline at end of file + EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_time/out.sql index a227037389f3..b0b094d49530 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_time/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(hour FROM CAST('04:55:59' AS TIME)) AS `tmp` \ No newline at end of file + EXTRACT(HOUR FROM time(4, 55, 59)) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_timestamp/out.sql index 742c13e36aa5..4a5e85730e47 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(hour FROM CAST('2017-01-01 04:55:59' AS DATETIME)) AS `tmp` \ No newline at end of file + EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/time/out.sql index a227037389f3..b0b094d49530 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/time/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(hour FROM CAST('04:55:59' AS TIME)) AS `tmp` \ No newline at end of file + EXTRACT(HOUR FROM time(4, 55, 59)) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/timestamp/out.sql index 742c13e36aa5..4a5e85730e47 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(hour FROM CAST('2017-01-01 04:55:59' AS DATETIME)) AS `tmp` \ No newline at end of file + EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS tmp \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/date/out.sql index c01e22721bb5..c4e62dab9bdf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/date/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(year FROM CAST('2017-01-01' AS DATE)) AS `ExtractYear_datetime_date_2017_ 1_ 1` \ No newline at end of file + EXTRACT(YEAR FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/datetime/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/datetime/out.sql index e3919b046dfa..c1f82282802c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/datetime/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/datetime/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(year FROM CAST('2017-01-01 04:55:59' AS DATETIME)) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + EXTRACT(YEAR FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/string_date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/string_date/out.sql index c01e22721bb5..c4e62dab9bdf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/string_date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/string_date/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(year FROM CAST('2017-01-01' AS DATE)) AS `ExtractYear_datetime_date_2017_ 1_ 1` \ No newline at end of file + EXTRACT(YEAR FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/string_timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/string_timestamp/out.sql index e3919b046dfa..c1f82282802c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/string_timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/string_timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(year FROM CAST('2017-01-01 04:55:59' AS DATETIME)) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + EXTRACT(YEAR FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/timestamp/out.sql index e3919b046dfa..c1f82282802c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(year FROM CAST('2017-01-01 04:55:59' AS DATETIME)) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file + EXTRACT(YEAR FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/timestamp_date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/timestamp_date/out.sql index c01e22721bb5..c4e62dab9bdf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/timestamp_date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_year/timestamp_date/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(year FROM CAST('2017-01-01' AS DATE)) AS `ExtractYear_datetime_date_2017_ 1_ 1` \ No newline at end of file + EXTRACT(YEAR FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_now/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_now/out.sql index 3eecc7336d34..c86165f516de 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_now/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_now/out.sql @@ -1,2 +1,2 @@ SELECT - CURRENT_TIMESTAMP() AS `TimestampNow` \ No newline at end of file + CURRENT_TIMESTAMP() AS TimestampNow \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql index a064f3cabc1e..c64aff514d1f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql @@ -1,28 +1,26 @@ -WITH t0 AS ( +SELECT + t2.file_date, + t2.PARTITIONTIME, + t2.val, + t2.XYZ +FROM ( SELECT - t5.* - FROM unbound_table AS t5 + CAST(t0.file_date AS DATE) AS file_date, + t0.PARTITIONTIME, + t0.val, + t0.val * 2 AS XYZ + FROM unbound_table AS t0 WHERE - t5.`PARTITIONTIME` < CAST('2017-01-01' AS DATE) -), t1 AS ( - SELECT - CAST(t0.`file_date` AS DATE) AS `file_date`, - t0.`PARTITIONTIME`, - t0.`val` - FROM t0 -), t2 AS ( + t0.PARTITIONTIME < DATE(2017, 1, 1) AND CAST(t0.file_date AS DATE) < DATE(2017, 1, 1) +) AS t2 +INNER JOIN ( SELECT - t1.* - FROM t1 + CAST(t0.file_date AS DATE) AS file_date, + t0.PARTITIONTIME, + t0.val, + t0.val * 2 AS XYZ + FROM unbound_table AS t0 WHERE - t1.`file_date` < CAST('2017-01-01' AS DATE) -), t3 AS ( - SELECT - t2.*, - t2.`val` * 2 AS `XYZ` - FROM t2 -) -SELECT - t3.* -FROM t3 -INNER JOIN t3 AS t4 \ No newline at end of file + t0.PARTITIONTIME < DATE(2017, 1, 1) AND CAST(t0.file_date AS DATE) < DATE(2017, 1, 1) +) AS t4 + ON TRUE \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_foll/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_foll/out.sql index f128f6c60f1a..b429e0b40a4b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_foll/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_foll/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY t0.`month` ASC RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.month ASC RANGE BETWEEN 1 preceding AND CURRENT ROW) AS two_month_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_prec/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_prec/out.sql index c3432e74aa9d..28f748f8d387 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_prec/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_prec/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `two_month_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.timestamp_col ASC RANGE BETWEEN 4 preceding AND 2 preceding) AS two_month_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/difference/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/difference/out.sql index 957f0bc1ed5b..1fba54566ec5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/difference/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/difference/out.sql @@ -1,11 +1,11 @@ SELECT - t0.`a` + t2.a FROM ( SELECT - t1.* - FROM t0 AS t1 + * + FROM t0 AS t0 EXCEPT DISTINCT SELECT - t1.* - FROM t1 -) AS t0 \ No newline at end of file + * + FROM t1 AS t1 +) AS t2 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/intersect/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/intersect/out.sql index f93956e195ff..0300759946af 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/intersect/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/intersect/out.sql @@ -1,11 +1,11 @@ SELECT - t0.`a` + t2.a FROM ( SELECT - t1.* - FROM t0 AS t1 + * + FROM t0 AS t0 INTERSECT DISTINCT SELECT - t1.* - FROM t1 -) AS t0 \ No newline at end of file + * + FROM t1 AS t1 +) AS t2 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_all/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_all/out.sql index 0d385460634b..65ca427a3cc1 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_all/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_all/out.sql @@ -1,11 +1,11 @@ SELECT - t0.`a` + t2.a FROM ( SELECT - t1.* - FROM t0 AS t1 + * + FROM t0 AS t0 UNION ALL SELECT - t1.* - FROM t1 -) AS t0 \ No newline at end of file + * + FROM t1 AS t1 +) AS t2 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_distinct/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_distinct/out.sql index 08eb83fc95fd..f060886f0ca7 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_distinct/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_distinct/out.sql @@ -1,11 +1,11 @@ SELECT - t0.`a` + t2.a FROM ( SELECT - t1.* - FROM t0 AS t1 + * + FROM t0 AS t0 UNION DISTINCT SELECT - t1.* - FROM t1 -) AS t0 \ No newline at end of file + * + FROM t1 AS t1 +) AS t2 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_substring/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_substring/out.sql index 5aa66a18ed01..dde97310a2d4 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_substring/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_substring/out.sql @@ -1,7 +1,3 @@ SELECT - IF( - 3 >= 0, - SUBSTR(t0.`value`, 3 + 1, 1), - SUBSTR(t0.`value`, LENGTH(t0.`value`) + 3 + 1, 1) - ) AS `tmp` + IF(3 >= 0, substr(t0.value, 3 + 1, 1), substr(t0.value, LENGTH(t0.value) + 3 + 1, 1)) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-date/out.sql index 9c2606fa97ea..cfda92082d60 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.`a`, DAY) AS `tmp` + DATE_TRUNC(t0.a, DAY) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-timestamp/out.sql index 123365031a50..5914e551cdfa 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, DAY) AS `tmp` + TIMESTAMP_TRUNC(t0.a, DAY) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-time/out.sql index 0dab09b39086..627fd52607ea 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.`a`, HOUR) AS `tmp` + TIME_TRUNC(t0.a, HOUR) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-timestamp/out.sql index b2cb572f2089..2eb17fc72aa7 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, HOUR) AS `tmp` + TIMESTAMP_TRUNC(t0.a, HOUR) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-time/out.sql index 2c51c76585e8..bc83a4627907 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.`a`, MICROSECOND) AS `tmp` + TIME_TRUNC(t0.a, MICROSECOND) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-timestamp/out.sql index 294a6422566b..85129ae2ca98 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, MICROSECOND) AS `tmp` + TIMESTAMP_TRUNC(t0.a, MICROSECOND) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-time/out.sql index f985ffa2058e..d568ac473f24 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.`a`, MILLISECOND) AS `tmp` + TIME_TRUNC(t0.a, MILLISECOND) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-timestamp/out.sql index 3f2f5970047f..eeb61582028e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, MILLISECOND) AS `tmp` + TIMESTAMP_TRUNC(t0.a, MILLISECOND) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-time/out.sql index e10273b41268..53a26cff7227 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.`a`, MINUTE) AS `tmp` + TIME_TRUNC(t0.a, MINUTE) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-timestamp/out.sql index aadc0830deef..75ed2f48e4f3 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, MINUTE) AS `tmp` + TIMESTAMP_TRUNC(t0.a, MINUTE) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-date/out.sql index 02a34e41ad78..08aea93a5d16 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.`a`, MONTH) AS `tmp` + DATE_TRUNC(t0.a, MONTH) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-timestamp/out.sql index ae1748be8c8f..fb5755607afc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, MONTH) AS `tmp` + TIMESTAMP_TRUNC(t0.a, MONTH) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-date/out.sql index 9954515fe93f..69fb0c1c0073 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.`a`, QUARTER) AS `tmp` + DATE_TRUNC(t0.a, QUARTER) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-timestamp/out.sql index f3e39becf528..6ae384abfe45 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, QUARTER) AS `tmp` + TIMESTAMP_TRUNC(t0.a, QUARTER) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-time/out.sql index 4ca550c5abea..ed1a4f61a766 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.`a`, SECOND) AS `tmp` + TIME_TRUNC(t0.a, SECOND) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-timestamp/out.sql index 31ec37d86b72..ca6b3eea53d1 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, SECOND) AS `tmp` + TIMESTAMP_TRUNC(t0.a, SECOND) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-date/out.sql index 024ff840348f..5fa3caed910e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.`a`, WEEK(MONDAY)) AS `tmp` + DATE_TRUNC(t0.a, WEEK(MONDAY)) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-timestamp/out.sql index 461c643074de..116adb3c510c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, WEEK(MONDAY)) AS `tmp` + TIMESTAMP_TRUNC(t0.a, WEEK(MONDAY)) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-date/out.sql index f3d4c321fb3b..6c6515bd7737 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.`a`, YEAR) AS `tmp` + DATE_TRUNC(t0.a, YEAR) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-timestamp/out.sql index 20551a51be41..9b639983ccda 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.`a`, YEAR) AS `tmp` + TIMESTAMP_TRUNC(t0.a, YEAR) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_no_timezone/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_no_timezone/out.sql index ca0788063c21..13bb7ff2b42b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_no_timezone/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_no_timezone/out.sql @@ -1,3 +1,3 @@ SELECT - PARSE_TIMESTAMP('%F', t0.`date_string_col`) AS `StringToTimestamp_date_string_col_ '%F'` + parse_timestamp('%F', t0.date_string_col, 'UTC') AS `StringToTimestamp_date_string_col_ '%F'` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_timezone/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_timezone/out.sql index 67f139bb19b6..9e192cd1a351 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_timezone/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_timezone/out.sql @@ -1,3 +1,3 @@ SELECT - PARSE_TIMESTAMP('%F %Z', CONCAT(t0.`date_string_col`, ' America/New_York')) AS `StringToTimestamp_StringConcat_ '%F %Z'` + parse_timestamp('%F %Z', CONCAT(t0.date_string_col, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_ '%F %Z'` FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/days/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/days/out.sql index 15bf5ccf4711..45c49c5adc20 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/days/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/days/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN 86400000000 PRECEDING AND EXTRACT(DAY FROM INTERVAL '0' DAY) * 86400000000 FOLLOWING) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' DAY preceding AND INTERVAL 0 DAY following) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/five/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/five/out.sql index f92b75d70240..7483e117fe59 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/five/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/five/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN 5 PRECEDING AND CURRENT ROW) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN 5 preceding AND CURRENT ROW) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/hours/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/hours/out.sql index c96fcf3d161f..497fe55410fe 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/hours/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/hours/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN 3600000000 PRECEDING AND EXTRACT(HOUR FROM INTERVAL '0' HOUR) * 3600000000 FOLLOWING) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' HOUR preceding AND INTERVAL 0 HOUR following) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/micros/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/micros/out.sql index 87ff9a7e26be..37c7b9452f03 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/micros/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/micros/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN 1 PRECEDING AND EXTRACT(MICROSECOND FROM INTERVAL '0' MICROSECOND) * 1 FOLLOWING) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' MICROSECOND preceding AND INTERVAL 0 MICROSECOND following) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/minutes/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/minutes/out.sql index fa4f2044265d..ab8f45c80717 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/minutes/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/minutes/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN 60000000 PRECEDING AND EXTRACT(MINUTE FROM INTERVAL '0' MINUTE) * 60000000 FOLLOWING) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' MINUTE preceding AND INTERVAL 0 MINUTE following) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/nanos/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/nanos/out.sql new file mode 100644 index 000000000000..6a2f458acc07 --- /dev/null +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/nanos/out.sql @@ -0,0 +1,16 @@ +SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC NULLS LAST RANGE BETWEEN INTERVAL '1' NANOSECOND preceding AND CAST(0 AS INTERVAL NANOSECOND) following) AS win_avg +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/seconds/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/seconds/out.sql index 81bbc1eab462..88e0656e843b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/seconds/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/seconds/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN 1000000 PRECEDING AND EXTRACT(SECOND FROM INTERVAL '0' SECOND) * 1000000 FOLLOWING) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' SECOND preceding AND INTERVAL 0 SECOND following) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/two_days/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/two_days/out.sql index 0b58d5414c0b..b2e631f727c3 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/two_days/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/two_days/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN EXTRACT(DAY FROM INTERVAL '2' DAY) * 86400000000 PRECEDING AND EXTRACT(DAY FROM INTERVAL '0' DAY) * 86400000000 FOLLOWING) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL (EXTRACT(DAY FROM INTERVAL '1' DAY) * 2) DAY preceding AND INTERVAL 0 DAY following) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/week/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/week/out.sql index ac28f7967fad..db904f4be055 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/week/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/week/out.sql @@ -1,3 +1,16 @@ -SELECT t0.*, - avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) ASC RANGE BETWEEN 604800000000 PRECEDING AND EXTRACT(WEEK from INTERVAL 0 WEEK) * 604800000000 FOLLOWING) AS `win_avg` -FROM functional_alltypes t0 \ No newline at end of file +SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' WEEK preceding AND INTERVAL 0 WEEK following) AS win_avg +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/False/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/False/out.sql index 64d48db414a7..de91c71e2d5d 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/False/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/False/out.sql @@ -1,23 +1,23 @@ SELECT - t0.`id`, - t0.`bool_col`, - t0.`tinyint_col`, - t0.`smallint_col`, - t0.`int_col`, - t0.`bigint_col`, - t0.`float_col`, - t0.`double_col`, - t0.`date_string_col`, - t0.`string_col`, - t0.`timestamp_col`, - t0.`year`, - t0.`month` + t1.id, + t1.bool_col, + t1.tinyint_col, + t1.smallint_col, + t1.int_col, + t1.bigint_col, + t1.float_col, + t1.double_col, + t1.date_string_col, + t1.string_col, + t1.timestamp_col, + t1.year, + t1.month FROM ( SELECT - t1.* - FROM functional_alltypes AS t1 + * + FROM functional_alltypes AS t0 UNION ALL SELECT - t1.* - FROM functional_alltypes AS t1 -) AS t0 \ No newline at end of file + * + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/True/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/True/out.sql index 580b185465fb..84d01ac2951a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/True/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/True/out.sql @@ -1,23 +1,23 @@ SELECT - t0.`id`, - t0.`bool_col`, - t0.`tinyint_col`, - t0.`smallint_col`, - t0.`int_col`, - t0.`bigint_col`, - t0.`float_col`, - t0.`double_col`, - t0.`date_string_col`, - t0.`string_col`, - t0.`timestamp_col`, - t0.`year`, - t0.`month` + t1.id, + t1.bool_col, + t1.tinyint_col, + t1.smallint_col, + t1.int_col, + t1.bigint_col, + t1.float_col, + t1.double_col, + t1.date_string_col, + t1.string_col, + t1.timestamp_col, + t1.year, + t1.month FROM ( SELECT - t1.* - FROM functional_alltypes AS t1 + * + FROM functional_alltypes AS t0 UNION DISTINCT SELECT - t1.* - FROM functional_alltypes AS t1 -) AS t0 \ No newline at end of file + * + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql index c2ee631db698..061bd3df1a64 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql @@ -1,55 +1,30 @@ -WITH t0 AS ( - SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 - GROUP BY - 1 -) SELECT - t1.`string_col`, - t1.`metric` + t6.string_col, + t6.metric FROM ( - WITH t0 AS ( + SELECT + t4.string_col, + t4.metric + FROM ( SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 GROUP BY 1 - ), t2 AS ( + UNION ALL SELECT - t3.`string_col`, - t3.`metric` - FROM ( - WITH t0 AS ( - SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 - GROUP BY - 1 - ) - SELECT - * - FROM t0 - UNION ALL - SELECT - t4.`string_col`, - sum(t4.`double_col`) AS `metric` - FROM functional_alltypes AS t4 - GROUP BY - 1 - ) AS t3 - ) - SELECT - * - FROM t2 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 + GROUP BY + 1 + ) AS t4 UNION ALL SELECT - t4.`string_col`, - sum(t4.`double_col`) AS `metric` - FROM functional_alltypes AS t4 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 GROUP BY 1 -) AS t1 \ No newline at end of file +) AS t6 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql index 1ee77c4af309..ef59312a1de9 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql @@ -1,55 +1,30 @@ -WITH t0 AS ( - SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 - GROUP BY - 1 -) SELECT - t1.`string_col`, - t1.`metric` + t6.string_col, + t6.metric FROM ( - WITH t0 AS ( + SELECT + t4.string_col, + t4.metric + FROM ( SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 GROUP BY 1 - ), t2 AS ( + UNION DISTINCT SELECT - t3.`string_col`, - t3.`metric` - FROM ( - WITH t0 AS ( - SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 - GROUP BY - 1 - ) - SELECT - * - FROM t0 - UNION DISTINCT - SELECT - t4.`string_col`, - sum(t4.`double_col`) AS `metric` - FROM functional_alltypes AS t4 - GROUP BY - 1 - ) AS t3 - ) - SELECT - * - FROM t2 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 + GROUP BY + 1 + ) AS t4 UNION ALL SELECT - t4.`string_col`, - sum(t4.`double_col`) AS `metric` - FROM functional_alltypes AS t4 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 GROUP BY 1 -) AS t1 \ No newline at end of file +) AS t6 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql index 5ed562f29914..36dde7359805 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql @@ -1,55 +1,30 @@ -WITH t0 AS ( - SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 - GROUP BY - 1 -) SELECT - t1.`string_col`, - t1.`metric` + t6.string_col, + t6.metric FROM ( - WITH t0 AS ( + SELECT + t4.string_col, + t4.metric + FROM ( SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 GROUP BY 1 - ), t2 AS ( + UNION ALL SELECT - t3.`string_col`, - t3.`metric` - FROM ( - WITH t0 AS ( - SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 - GROUP BY - 1 - ) - SELECT - * - FROM t0 - UNION ALL - SELECT - t4.`string_col`, - sum(t4.`double_col`) AS `metric` - FROM functional_alltypes AS t4 - GROUP BY - 1 - ) AS t3 - ) - SELECT - * - FROM t2 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 + GROUP BY + 1 + ) AS t4 UNION DISTINCT SELECT - t4.`string_col`, - sum(t4.`double_col`) AS `metric` - FROM functional_alltypes AS t4 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 GROUP BY 1 -) AS t1 \ No newline at end of file +) AS t6 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql index bc9dc55839a3..1ba202a0f834 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql @@ -1,55 +1,30 @@ -WITH t0 AS ( - SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 - GROUP BY - 1 -) SELECT - t1.`string_col`, - t1.`metric` + t6.string_col, + t6.metric FROM ( - WITH t0 AS ( + SELECT + t4.string_col, + t4.metric + FROM ( SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 GROUP BY 1 - ), t2 AS ( + UNION DISTINCT SELECT - t3.`string_col`, - t3.`metric` - FROM ( - WITH t0 AS ( - SELECT - t2.`string_col`, - sum(t2.`double_col`) AS `metric` - FROM functional_alltypes AS t2 - GROUP BY - 1 - ) - SELECT - * - FROM t0 - UNION DISTINCT - SELECT - t4.`string_col`, - sum(t4.`double_col`) AS `metric` - FROM functional_alltypes AS t4 - GROUP BY - 1 - ) AS t3 - ) - SELECT - * - FROM t2 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 + GROUP BY + 1 + ) AS t4 UNION DISTINCT SELECT - t4.`string_col`, - sum(t4.`double_col`) AS `metric` - FROM functional_alltypes AS t4 + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 GROUP BY 1 -) AS t1 \ No newline at end of file +) AS t6 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_one_unnest.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_one_unnest.sql index 702add1dcd83..1efaf2f26d3a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_one_unnest.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_one_unnest.sql @@ -1,16 +1,16 @@ SELECT - t0.`rowindex`, - IF(pos = pos_2, `repeated_struct_col`, NULL) AS `repeated_struct_col` + t0.rowindex, + IF(pos = pos_2, repeated_struct_col, NULL) AS repeated_struct_col FROM array_test AS t0 -CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t0.`repeated_struct_col`)) - 1)) AS pos -CROSS JOIN UNNEST(t0.`repeated_struct_col`) AS `repeated_struct_col` WITH OFFSET AS pos_2 +CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t0.repeated_struct_col)) - 1)) AS pos +CROSS JOIN UNNEST(t0.repeated_struct_col) AS repeated_struct_col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR ( pos > ( - ARRAY_LENGTH(t0.`repeated_struct_col`) - 1 + ARRAY_LENGTH(t0.repeated_struct_col) - 1 ) AND pos_2 = ( - ARRAY_LENGTH(t0.`repeated_struct_col`) - 1 + ARRAY_LENGTH(t0.repeated_struct_col) - 1 ) ) \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_two_unnests.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_two_unnests.sql index 57ec0a5f4378..febc60d6c4df 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_two_unnests.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_two_unnests.sql @@ -1,32 +1,32 @@ SELECT - IF(pos = pos_2, `level_two`, NULL) AS `level_two` + IF(pos = pos_2, level_two, NULL) AS level_two FROM ( SELECT - t1.`rowindex`, - IF(pos = pos_2, `level_one`, NULL).`nested_struct_col` AS `level_one` - FROM array_test AS t1 - CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t1.`repeated_struct_col`)) - 1)) AS pos - CROSS JOIN UNNEST(t1.`repeated_struct_col`) AS `level_one` WITH OFFSET AS pos_2 + t0.rowindex, + IF(pos = pos_2, level_one, NULL).nested_struct_col AS level_one + FROM array_test AS t0 + CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t0.repeated_struct_col)) - 1)) AS pos + CROSS JOIN UNNEST(t0.repeated_struct_col) AS level_one WITH OFFSET AS pos_2 WHERE pos = pos_2 OR ( pos > ( - ARRAY_LENGTH(t1.`repeated_struct_col`) - 1 + ARRAY_LENGTH(t0.repeated_struct_col) - 1 ) AND pos_2 = ( - ARRAY_LENGTH(t1.`repeated_struct_col`) - 1 + ARRAY_LENGTH(t0.repeated_struct_col) - 1 ) ) -) AS t0 -CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t0.`level_one`)) - 1)) AS pos -CROSS JOIN UNNEST(t0.`level_one`) AS `level_two` WITH OFFSET AS pos_2 +) AS t1 +CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t1.level_one)) - 1)) AS pos +CROSS JOIN UNNEST(t1.level_one) AS level_two WITH OFFSET AS pos_2 WHERE pos = pos_2 OR ( pos > ( - ARRAY_LENGTH(t0.`level_one`) - 1 + ARRAY_LENGTH(t1.level_one) - 1 ) AND pos_2 = ( - ARRAY_LENGTH(t0.`level_one`) - 1 + ARRAY_LENGTH(t1.level_one) - 1 ) ) \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/current_foll/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/current_foll/out.sql index 48fa0b1e700d..f7af87cc3889 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/current_foll/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/current_foll/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY t0.`timestamp_col` ASC ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.timestamp_col ASC ROWS BETWEEN CURRENT ROW AND 2 following) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_current/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_current/out.sql index c7a654e6c4af..812d5c8e17fc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_current/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_current/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY t0.`timestamp_col` ASC ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.timestamp_col ASC ROWS BETWEEN 1 preceding AND CURRENT ROW) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_prec/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_prec/out.sql index 8b6db281ec69..dc3996ac2b7d 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_prec/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_prec/out.sql @@ -1,4 +1,16 @@ SELECT - t0.*, - avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY t0.`timestamp_col` ASC ROWS BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg` + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month, + AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.timestamp_col ASC ROWS BETWEEN 4 preceding AND 2 preceding) AS win_avg FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/following/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/following/out.sql index 0a9989863d4f..4efa722dccbc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/following/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/following/out.sql @@ -1,3 +1,3 @@ SELECT - sum(t0.`a`) OVER (ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS `tmp` + SUM(t0.a) OVER (ROWS BETWEEN 1 following AND UNBOUNDED FOLLOWING) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/preceding/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/preceding/out.sql index cfdb5364e102..fe5c57096cc2 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/preceding/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/preceding/out.sql @@ -1,3 +1,3 @@ SELECT - sum(t0.`a`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS `tmp` + SUM(t0.a) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 preceding) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/test_compiler.py b/ibis/backends/bigquery/tests/unit/test_compiler.py index fafeddbdaec2..0d95438df2c6 100644 --- a/ibis/backends/bigquery/tests/unit/test_compiler.py +++ b/ibis/backends/bigquery/tests/unit/test_compiler.py @@ -1,7 +1,6 @@ from __future__ import annotations import datetime -import re import time from operator import floordiv, methodcaller, truediv @@ -10,6 +9,7 @@ from pytest import param import ibis +import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis import _ @@ -233,7 +233,7 @@ def test_substring_neg_length(): t = ibis.table([("value", "string")], name="t") expr = t["value"].substr(3, -1).name("tmp") with pytest.raises( - Exception, match=r"Length parameter must be a non-negative value\." + Exception, match=r"Length parameter must be a non-negative value; got -1" ): to_sql(expr) @@ -387,10 +387,10 @@ def test_geospatial_simplify(snapshot): def test_geospatial_simplify_error(): t = ibis.table([("geog", "geography")], name="t") expr = t.geog.simplify(5.2, preserve_collapsed=True).name("tmp") - with pytest.raises(Exception) as exception_info: + with pytest.raises( + Exception, match="simplify does not support preserving collapsed geometries" + ): to_sql(expr) - expected = "BigQuery simplify does not support preserving collapsed geometries, must pass preserve_collapsed=False" - assert str(exception_info.value) == expected def test_timestamp_accepts_date_literals(alltypes): @@ -399,7 +399,7 @@ def test_timestamp_accepts_date_literals(alltypes): expr = alltypes.mutate(param=p) params = {p: date_string} result = to_sql(expr, params=params) - assert re.search(r"@param_\d+ AS `param`", result) is not None + assert "2009-03-01T00:00:00" in result @pytest.mark.parametrize("distinct", [True, False]) @@ -483,14 +483,18 @@ def test_range_window_function(alltypes, window, snapshot): "preceding", [ param(5, id="five"), - param(ibis.interval(nanoseconds=1), id="nanos", marks=pytest.mark.xfail), + param( + ibis.interval(nanoseconds=1), + id="nanos", + marks=pytest.mark.xfail(raises=com.UnsupportedOperationError), + ), param(ibis.interval(microseconds=1), id="micros"), param(ibis.interval(seconds=1), id="seconds"), param(ibis.interval(minutes=1), id="minutes"), param(ibis.interval(hours=1), id="hours"), param(ibis.interval(days=1), id="days"), param(2 * ibis.interval(days=1), id="two_days"), - param(ibis.interval(weeks=1), id="week", marks=pytest.mark.xfail), + param(ibis.interval(weeks=1), id="week"), ], ) def test_trailing_range_window(alltypes, preceding, snapshot): @@ -584,7 +588,7 @@ def test_scalar_param_scope(alltypes): t = alltypes param = ibis.param("timestamp") result = to_sql(t.mutate(param=param), params={param: "2017-01-01"}) - assert re.search(r"@param_\d+ AS `param`", result) is not None + assert "2017-01-01T00:00:00" in result def test_cast_float_to_int(alltypes, snapshot): diff --git a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_builtin/test_bqutil_fn_from_hex/out.sql b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_builtin/test_bqutil_fn_from_hex/out.sql index 2cabc41aa447..3308b79b3ad0 100644 --- a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_builtin/test_bqutil_fn_from_hex/out.sql +++ b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_builtin/test_bqutil_fn_from_hex/out.sql @@ -1,2 +1,2 @@ SELECT - `bqutil`.`fn`.from_hex('face') AS `from_hex_'face'` \ No newline at end of file + bqutil.fn.from_hex('face') AS `from_hex_0_'face'` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_builtin/test_farm_fingerprint/out.sql b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_builtin/test_farm_fingerprint/out.sql index 9128e636a8a6..49c49900b198 100644 --- a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_builtin/test_farm_fingerprint/out.sql +++ b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_builtin/test_farm_fingerprint/out.sql @@ -1,2 +1,2 @@ SELECT - farm_fingerprint(b'Hello, World!') AS `farm_fingerprint_b'Hello_ World_'` \ No newline at end of file + farm_fingerprint(CAST('48656c6c6f2c20576f726c6421' AS BYTES FORMAT 'HEX')) AS `farm_fingerprint_0_b'Hello_ World_'` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_multiple_calls_redefinition/out.sql b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_multiple_calls_redefinition/out.sql deleted file mode 100644 index bca06e837674..000000000000 --- a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_multiple_calls_redefinition/out.sql +++ /dev/null @@ -1,18 +0,0 @@ -CREATE TEMPORARY FUNCTION my_len_0( - s STRING -) -RETURNS FLOAT64 -LANGUAGE js AS -'\n\'use strict\';\nfunction my_len(s) {\n return s.length;\n}\nreturn my_len(s);\n'; - -CREATE TEMPORARY FUNCTION my_len_1( - s STRING -) -RETURNS FLOAT64 -LANGUAGE js AS -'\n\'use strict\';\nfunction my_len(s) {\n return (s.length + 1);\n}\nreturn my_len(s);\n'; - -SELECT - ( - my_len_0('abcd') + my_len_0('abcd') - ) + my_len_1('abcd') AS `Add_Add_my_len_0_'abcd'_ my_len_0_'abcd'_ my_len_1_'abcd'` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/False/out.sql b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/False/out.sql deleted file mode 100644 index 4b7713dd8904..000000000000 --- a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/False/out.sql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE TEMPORARY FUNCTION my_len_0( - s STRING -) -RETURNS FLOAT64 -NOT DETERMINISTIC -LANGUAGE js AS -'\n\'use strict\';\nfunction my_len(s) {\n return s.length;\n}\nreturn my_len(s);\n'; - -SELECT - my_len_0('abcd') AS `my_len_0_'abcd'` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/None/out.sql b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/None/out.sql deleted file mode 100644 index 0c86cd6c558b..000000000000 --- a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/None/out.sql +++ /dev/null @@ -1,9 +0,0 @@ -CREATE TEMPORARY FUNCTION my_len_0( - s STRING -) -RETURNS FLOAT64 -LANGUAGE js AS -'\n\'use strict\';\nfunction my_len(s) {\n return s.length;\n}\nreturn my_len(s);\n'; - -SELECT - my_len_0('abcd') AS `my_len_0_'abcd'` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/True/out.sql b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/True/out.sql deleted file mode 100644 index 0ace954587a6..000000000000 --- a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_determinism/True/out.sql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE TEMPORARY FUNCTION my_len_0( - s STRING -) -RETURNS FLOAT64 -DETERMINISTIC -LANGUAGE js AS -'\n\'use strict\';\nfunction my_len(s) {\n return s.length;\n}\nreturn my_len(s);\n'; - -SELECT - my_len_0('abcd') AS `my_len_0_'abcd'` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_sql/out.sql b/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_sql/out.sql deleted file mode 100644 index 70812f001fb9..000000000000 --- a/ibis/backends/bigquery/tests/unit/udf/snapshots/test_usage/test_udf_sql/out.sql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE TEMPORARY FUNCTION format_t_0( - input STRING -) -RETURNS FLOAT64 AS -( - FORMAT('%T', input) -); - -SELECT - format_t_0('abcd') AS `format_t_0_'abcd'` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/udf/test_builtin.py b/ibis/backends/bigquery/tests/unit/udf/test_builtin.py index 2d877eba9f4e..14d2373a45a2 100644 --- a/ibis/backends/bigquery/tests/unit/udf/test_builtin.py +++ b/ibis/backends/bigquery/tests/unit/udf/test_builtin.py @@ -10,7 +10,7 @@ def farm_fingerprint(value: bytes) -> int: ... -@ibis.udf.scalar.builtin(schema="bqutil.fn") +@ibis.udf.scalar.builtin(schema="fn", database="bqutil") def from_hex(value: str) -> int: """Community function to convert from hex string to integer. diff --git a/ibis/backends/bigquery/tests/unit/udf/test_usage.py b/ibis/backends/bigquery/tests/unit/udf/test_usage.py index ae0135187cfb..4f8dd4218d36 100644 --- a/ibis/backends/bigquery/tests/unit/udf/test_usage.py +++ b/ibis/backends/bigquery/tests/unit/udf/test_usage.py @@ -1,120 +1,74 @@ from __future__ import annotations +import re + import pytest -from pytest import param import ibis +import ibis.common.exceptions as com import ibis.expr.datatypes as dt -from ibis.backends.bigquery import udf -from ibis.backends.bigquery.udf import _udf_name_cache - +from ibis import udf -def test_multiple_calls_redefinition(snapshot): - _udf_name_cache.clear() - @udf.python([dt.string], dt.double) - def my_len(s): +def test_multiple_calls_redefinition(): + @udf.scalar.python + def my_len(s: str) -> float: return s.length s = ibis.literal("abcd") expr = my_len(s) + my_len(s) - @udf.python([dt.string], dt.double) - def my_len(s): + @udf.scalar.python + def my_len(s: str) -> float: return s.length + 1 expr = expr + my_len(s) sql = ibis.bigquery.compile(expr) - snapshot.assert_match(sql, "out.sql") + assert len(set(re.findall(r"my_len_(\d+)", sql))) == 2 -@pytest.mark.parametrize( - ("determinism",), - [ - param(True), - param(False), - param(None), - ], -) -def test_udf_determinism(snapshot, determinism): - _udf_name_cache.clear() - - @udf.python([dt.string], dt.double, determinism=determinism) - def my_len(s): +@pytest.mark.parametrize("determinism", [True, False, None]) +def test_udf_determinism(determinism): + @udf.scalar.python(determinism=determinism) + def my_len(s: str) -> float: return s.length s = ibis.literal("abcd") expr = my_len(s) sql = ibis.bigquery.compile(expr) - snapshot.assert_match(sql, "out.sql") - -def test_udf_sql(snapshot): - _udf_name_cache.clear() - - format_t = udf.sql( - "format_t", - params={"input": dt.string}, - output_type=dt.double, - sql_expression="FORMAT('%T', input)", - ) - - s = ibis.literal("abcd") - expr = format_t(s) - - sql = ibis.bigquery.compile(expr) - snapshot.assert_match(sql, "out.sql") + if not determinism: + assert "NOT DETERMINISTIC" in sql + else: + assert "DETERMINISTIC" in sql and "NOT DETERMINISTIC" not in sql @pytest.mark.parametrize( ("argument_type", "return_type"), [ - param( - dt.int64, - dt.float64, - marks=pytest.mark.xfail(raises=TypeError), - id="int_float", - ), - param( - dt.float64, - dt.int64, - marks=pytest.mark.xfail(raises=TypeError), - id="float_int", - ), + # invalid input type + (dt.int64, dt.float64), + # invalid return type + (dt.float64, dt.int64), # complex argument type, valid return type - param( - dt.Array(dt.int64), - dt.float64, - marks=pytest.mark.xfail(raises=TypeError), - id="array_int_float", - ), + (dt.Array(dt.int64), dt.float64), # valid argument type, complex invalid return type - param( - dt.float64, - dt.Array(dt.int64), - marks=pytest.mark.xfail(raises=TypeError), - id="float_array_int", - ), + (dt.float64, dt.Array(dt.int64)), # both invalid - param( - dt.Array(dt.Array(dt.int64)), - dt.int64, - marks=pytest.mark.xfail(raises=TypeError), - id="array_array_int_int", - ), + (dt.Array(dt.Array(dt.int64)), dt.int64), # struct type with nested integer, valid return type - param( - dt.Struct.from_tuples([("x", dt.Array(dt.int64))]), - dt.float64, - marks=pytest.mark.xfail(raises=TypeError), - id="struct", - ), + (dt.Struct({"x": dt.Array(dt.int64)}), dt.float64), ], + ids=str, ) def test_udf_int64(argument_type, return_type): # invalid argument type, valid return type - @udf.python([argument_type], return_type) - def my_int64_add(x): - return 1.0 + @udf.scalar.python(signature=((argument_type,), return_type)) + def my_func(x): + return 1 + + expr = my_func(None) + with pytest.raises(com.UnsupportedBackendType): + ibis.bigquery.compile(expr) diff --git a/ibis/backends/bigquery/udf/__init__.py b/ibis/backends/bigquery/udf/__init__.py index 0e84b972c438..e69de29bb2d1 100644 --- a/ibis/backends/bigquery/udf/__init__.py +++ b/ibis/backends/bigquery/udf/__init__.py @@ -1,396 +0,0 @@ -from __future__ import annotations - -import collections -import inspect -import itertools -from typing import TYPE_CHECKING, Callable, Literal - -import ibis.expr.datatypes as dt -import ibis.expr.rules as rlz -from ibis.backends.bigquery.datatypes import BigQueryType, spread_type -from ibis.backends.bigquery.operations import BigQueryUDFNode -from ibis.backends.bigquery.udf.core import PythonToJavaScriptTranslator -from ibis.legacy.udf.validate import validate_output_type - -if TYPE_CHECKING: - from collections.abc import Iterable, Mapping - -__all__ = ("udf",) - -_udf_name_cache: dict[str, Iterable[int]] = collections.defaultdict(itertools.count) - - -def _make_udf_name(name): - definition = next(_udf_name_cache[name]) - external_name = f"{name}_{definition:d}" - return external_name - - -class _BigQueryUDF: - def __call__(self, *args, **kwargs): - return self.python(*args, **kwargs) - - def python( - self, - input_type: Iterable[dt.DataType], - output_type: dt.DataType, - strict: bool = True, - libraries: Iterable[str] | None = None, - determinism: bool | None = None, - ) -> Callable: - '''Define a UDF for BigQuery. - - The function is transpiled to JS. - - `INT64` is not supported as an argument type or a return type, as per - [the BigQuery documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions#sql-type-encodings-in-javascript). - - Parameters - ---------- - input_type - Iterable of types, one per argument. - output_type - Return type of the UDF. - strict - Whether or not to put a ``'use strict';`` string at the beginning of - the UDF. Setting to ``False`` is probably a bad idea. - libraries - An iterable of Google Cloud Storage URIs containing to JavaScript source - code. Note that any symbols (functions, classes, variables, etc.) that - are exposed in these JavaScript files will be visible inside the UDF. - determinism - Provides a hint to BigQuery as to whether the query result can be cached. - - Returns - ------- - Callable - The wrapped user-defined function. - - Examples - -------- - >>> from ibis.backends.bigquery import udf - >>> import ibis.expr.datatypes as dt - >>> @udf.python(input_type=[dt.double], output_type=dt.double) - ... def add_one(x): - ... return x + 1 - >>> print(add_one.sql) - CREATE TEMPORARY FUNCTION add_one_0(x FLOAT64) - RETURNS FLOAT64 - LANGUAGE js AS """ - 'use strict'; - function add_one(x) { - return (x + 1); - } - return add_one(x); - """; - >>> @udf.python(input_type=[dt.double, dt.double], output_type=dt.Array(dt.double)) - ... def my_range(start, stop): - ... def gen(start, stop): - ... curr = start - ... while curr < stop: - ... yield curr - ... curr += 1 - ... - ... result = [] - ... for value in gen(start, stop): - ... result.append(value) - ... return result - >>> print(my_range.sql) - CREATE TEMPORARY FUNCTION my_range_0(start FLOAT64, stop FLOAT64) - RETURNS ARRAY - LANGUAGE js AS """ - 'use strict'; - function my_range(start, stop) { - function* gen(start, stop) { - let curr = start; - while ((curr < stop)) { - yield curr; - curr += 1; - } - } - let result = []; - for (let value of gen(start, stop)) { - result.push(value); - } - return result; - } - return my_range(start, stop); - """; - >>> @udf.python( - ... input_type=[dt.double, dt.double], - ... output_type=dt.Struct.from_tuples([("width", "double"), ("height", "double")]), - ... ) - ... def my_rectangle(width, height): - ... class Rectangle: - ... def __init__(self, width, height): - ... self.width = width - ... self.height = height - ... - ... @property - ... def area(self): - ... return self.width * self.height - ... - ... def perimeter(self): - ... return 2 * (self.width + self.height) - ... - ... return Rectangle(width, height) - >>> print(my_rectangle.sql) - CREATE TEMPORARY FUNCTION my_rectangle_0(width FLOAT64, height FLOAT64) - RETURNS STRUCT - LANGUAGE js AS """ - 'use strict'; - function my_rectangle(width, height) { - class Rectangle { - constructor(width, height) { - this.width = width; - this.height = height; - } - get area() { - return (this.width * this.height); - } - perimeter() { - return (2 * (this.width + this.height)); - } - } - return (new Rectangle(width, height)); - } - return my_rectangle(width, height); - """; - ''' - validate_output_type(output_type) - - if libraries is None: - libraries = [] - - def wrapper(f): - if not callable(f): - raise TypeError(f"f must be callable, got {f}") - - signature = inspect.signature(f) - parameter_names = signature.parameters.keys() - source = PythonToJavaScriptTranslator(f).compile() - args = ", ".join(parameter_names) - strict_str = repr("use strict") + ";\n" if strict else "" - function_body = f"""\ -{strict_str}{source} -return {f.__name__}({args});\ -""" - - return self.js( - name=f.__name__, - params=(dict(zip(parameter_names, input_type))), - output_type=output_type, - body=function_body, - libraries=libraries, - determinism=determinism, - ) - - return wrapper - - @staticmethod - def js( - name: str, - params: Mapping[str, dt.DataType], - output_type: dt.DataType, - body: str, - libraries: Iterable[str] | None = None, - determinism: bool | None = None, - ) -> Callable: - '''Define a Javascript UDF for BigQuery. - - `INT64` is not supported as an argument type or a return type, as per - [the BigQuery documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions#sql-type-encodings-in-javascript). - - Parameters - ---------- - name: - The name of the function. - params - Mapping of names and types of parameters - output_type - Return type of the UDF. - body: - The code of the function. - libraries - An iterable of Google Cloud Storage URIs containing to JavaScript source - code. Note that any symbols (functions, classes, variables, etc.) that - are exposed in these JavaScript files will be visible inside the UDF. - determinism - Provides a hint to BigQuery as to whether the query result can be cached. - - Returns - ------- - Callable - The user-defined function. - - Examples - -------- - >>> from ibis.backends.bigquery import udf - >>> import ibis.expr.datatypes as dt - >>> add_one = udf.js( - ... name="add_one", - ... params={"a": dt.double}, - ... output_type=dt.double, - ... body="return x + 1", - ... ) - >>> print(add_one.sql) - CREATE TEMPORARY FUNCTION add_one_0(x FLOAT64) - RETURNS FLOAT64 - LANGUAGE js AS """ - return x + 1 - """; - ''' - validate_output_type(output_type) - if any( - type_ == dt.int64 - for param_type in params.values() - for type_ in spread_type(param_type) - ) or any(type_ == dt.int64 for type_ in spread_type(output_type)): - raise TypeError( - "BigQuery does not support INT64 as an argument type or a return type " - "for UDFs. Replace INT64 with FLOAT64 in your UDF signature and " - "cast all INT64 inputs to FLOAT64." - ) - - if libraries is None: - libraries = [] - - bigquery_signature = ", ".join( - f"{name} {BigQueryType.to_string(dt.dtype(type_))}" - for name, type_ in params.items() - ) - return_type = BigQueryType.to_string(dt.dtype(output_type)) - libraries_opts = ( - f"\nOPTIONS (\n library={list(libraries)!r}\n)" if libraries else "" - ) - determinism_formatted = { - True: "DETERMINISTIC\n", - False: "NOT DETERMINISTIC\n", - None: "", - }.get(determinism) - - name = _make_udf_name(name) - sql_code = f'''\ -CREATE TEMPORARY FUNCTION {name}({bigquery_signature}) -RETURNS {return_type} -{determinism_formatted}LANGUAGE js AS """ -{body} -"""{libraries_opts};''' - - udf_node_fields = { - name: rlz.ValueOf(None if type_ == "ANY TYPE" else type_) - for name, type_ in params.items() - } - - udf_node_fields["dtype"] = output_type - udf_node_fields["shape"] = rlz.shape_like("args") - udf_node_fields["sql"] = sql_code - - udf_node = type(name, (BigQueryUDFNode,), udf_node_fields) - - from ibis.backends.bigquery.compiler import compiles - - @compiles(udf_node) - def compiles_udf_node(t, op): - args = ", ".join(map(t.translate, op.args)) - return f"{udf_node.__name__}({args})" - - def wrapped(*args, **kwargs): - node = udf_node(*args, **kwargs) - return node.to_expr() - - wrapped.__signature__ = inspect.Signature( - parameters=[ - inspect.Parameter( - name=param, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD - ) - for param in params.keys() - ] - ) - wrapped.__name__ = name - wrapped.sql = sql_code - return wrapped - - @staticmethod - def sql( - name: str, - params: Mapping[str, dt.DataType | Literal["ANY TYPE"]], - output_type: dt.DataType, - sql_expression: str, - ) -> Callable: - """Define a SQL UDF for BigQuery. - - Parameters - ---------- - name: - The name of the function. - params - Mapping of names and types of parameters - output_type - Return type of the UDF. - sql_expression - The SQL expression that defines the function. - - Returns - ------- - Callable - The wrapped user-defined function. - - Examples - -------- - >>> from ibis.backends.bigquery import udf - >>> import ibis.expr.datatypes as dt - >>> add_one = udf.sql( - ... name="add_one", - ... params={"x": dt.double}, - ... output_type=dt.double, - ... sql_expression="x + 1", - ... ) - >>> print(add_one.sql) - CREATE TEMPORARY FUNCTION add_one_0(x FLOAT64) - RETURNS FLOAT64 - AS (x + 1) - """ - validate_output_type(output_type) - udf_node_fields = { - name: rlz.ValueOf(None if type_ == "ANY TYPE" else type_) - for name, type_ in params.items() - } - return_type = BigQueryType.to_string(dt.dtype(output_type)) - - bigquery_signature = ", ".join( - "{name} {type}".format( - name=name, - type="ANY TYPE" - if type_ == "ANY TYPE" - else BigQueryType.to_string(dt.dtype(type_)), - ) - for name, type_ in params.items() - ) - name = _make_udf_name(name) - sql_code = f"""\ -CREATE TEMPORARY FUNCTION {name}({bigquery_signature}) -RETURNS {return_type} -AS ({sql_expression});""" - - udf_node_fields["dtype"] = output_type - udf_node_fields["shape"] = rlz.shape_like("args") - udf_node_fields["sql"] = sql_code - - udf_node = type(name, (BigQueryUDFNode,), udf_node_fields) - - from ibis.backends.bigquery.compiler import compiles - - @compiles(udf_node) - def compiles_udf_node(t, op): - args = ", ".join(map(t.translate, op.args)) - return f"{udf_node.__name__}({args})" - - def wrapper(*args, **kwargs): - node = udf_node(*args, **kwargs) - return node.to_expr() - - return wrapper - - -udf = _BigQueryUDF() diff --git a/ibis/backends/bigquery/udf/core.py b/ibis/backends/bigquery/udf/core.py index 00a4bf3c6235..58351841e7f1 100644 --- a/ibis/backends/bigquery/udf/core.py +++ b/ibis/backends/bigquery/udf/core.py @@ -10,7 +10,6 @@ from collections import ChainMap from typing import Callable -import ibis.expr.datatypes as dt from ibis.backends.bigquery.udf.find import find_names from ibis.backends.bigquery.udf.rewrite import rewrite @@ -514,14 +513,11 @@ def visit_Delete(self, node): if __name__ == "__main__": - from ibis.backends.bigquery.udf import udf + import ibis + from ibis import udf - @udf( - input_type=[dt.double, dt.double, dt.int64], - output_type=dt.Array(dt.double), - strict=False, - ) - def my_func(a, b, n): + @udf.scalar.python(strict=False) + def my_func(a: float, b: float, n: float) -> list[float]: class Rectangle: def __init__(self, width, height): self.width = width @@ -598,4 +594,4 @@ def range(n): nnn = len(values) return [sum(values) - a + b * y**-x, z, foo.width, nnn] - print(my_func.sql) # noqa: T201 + print(ibis.bigquery.compile(my_func(42.7, 13.2, 1))) # noqa: T201 diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index d7111410cb28..f13b8b8127cd 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1434,12 +1434,13 @@ def _register_udfs(self, expr: ir.Expr) -> None: def _compile_udf(self, udf_node: ops.ScalarUDF) -> None: func = udf_node.__func__ - name = func.__name__ + name = type(udf_node).__name__ + type_mapper = self.compiler.type_mapper input_types = [ - self.compiler.type_mapper.to_string(param.annotation.pattern.dtype) + type_mapper.to_string(param.annotation.pattern.dtype) for param in udf_node.__signature__.parameters.values() ] - output_type = self.compiler.type_mapper.to_string(udf_node.dtype) + output_type = type_mapper.to_string(udf_node.dtype) def register_udf(con): return con.create_function( diff --git a/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation/out.sql b/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation/out.sql index 6ae4e58e0380..1aa828dc1fc5 100644 --- a/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_udf/test_sql_generation/out.sql @@ -1,2 +1,2 @@ SELECT - UDF_TESTING.IDENTITY('hello world') AS `identity('hello world')` \ No newline at end of file + UDF_TESTING.IDENTITY('hello world') AS `identity_0('hello world')` \ No newline at end of file diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index 7e25f97c9432..f177d38bf126 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -433,7 +433,7 @@ def _get_udf_source(self, udf_node: ops.ScalarUDF): type_mapper = self.compiler.type_mapper argnames = udf_node.argnames return dict( - name=udf_node.__func_name__, + name=type(udf_node).__name__, ident=self.compiler.__sql_name__(udf_node), signature=", ".join( f"{argname} {type_mapper.to_string(arg.dtype)}" diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/bigquery/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/bigquery/out.sql new file mode 100644 index 000000000000..09e7f95d3580 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/bigquery/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/bigquery/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/bigquery/out.sql new file mode 100644 index 000000000000..09e7f95d3580 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/bigquery/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/bigquery/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/bigquery/out.sql new file mode 100644 index 000000000000..7601954ae202 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/bigquery/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM(t0.bigint_col) AS Sum_bigint_col +FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/bigquery/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/bigquery/out.sql new file mode 100644 index 000000000000..1f1906f84e03 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/bigquery/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + t0.id, + t0.bool_col + FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 + LIMIT 10 +) AS t2 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/bigquery/out.sql index 96780ab79914..fc16f2428d16 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/bigquery/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.`continent` + CASE t0.continent WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,8 +15,8 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS `cont`, - sum(t0.`population`) AS `total_pop` + END AS cont, + SUM(t0.population) AS total_pop FROM countries AS t0 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/pyspark/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/pyspark/out.sql new file mode 100644 index 000000000000..ac006b1d5f25 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/pyspark/out.sql @@ -0,0 +1,22 @@ +SELECT + CASE `t0`.`continent` + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS `cont`, + SUM(`t0`.`population`) AS `total_pop` +FROM `countries` AS `t0` +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/bigquery/out.sql index 95548f1704ec..9e6bcbdd13af 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/bigquery/out.sql @@ -1,13 +1,9 @@ SELECT - t0.`x` IN ( + t0.x IN ( SELECT - t1.`x` - FROM ( - SELECT - t0.* - FROM t AS t0 - WHERE - t0.`x` > 2 - ) AS t1 - ) AS `InColumn_x_ x` + t0.x + FROM t AS t0 + WHERE + t0.x > 2 + ) AS InSubquery_x FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/pyspark/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/pyspark/out.sql new file mode 100644 index 000000000000..db5ddb124e86 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/pyspark/out.sql @@ -0,0 +1,9 @@ +SELECT + `t0`.`x` IN ( + SELECT + `t0`.`x` + FROM `t` AS `t0` + WHERE + `t0`.`x` > 2 + ) AS `InSubquery(x)` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql index 58b8e22977be..a53e4550c0dd 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql @@ -1,156 +1,138 @@ -WITH t0 AS ( - SELECT - t7.`field_of_study`, - IF(pos = pos_2, `__pivoted__`, NULL) AS `__pivoted__` - FROM humanities AS t7 - CROSS JOIN UNNEST(GENERATE_ARRAY( - 0, - GREATEST( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t7.`1970-71` AS degrees), STRUCT('1975-76' AS years, t7.`1975-76` AS degrees), STRUCT('1980-81' AS years, t7.`1980-81` AS degrees), STRUCT('1985-86' AS years, t7.`1985-86` AS degrees), STRUCT('1990-91' AS years, t7.`1990-91` AS degrees), STRUCT('1995-96' AS years, t7.`1995-96` AS degrees), STRUCT('2000-01' AS years, t7.`2000-01` AS degrees), STRUCT('2005-06' AS years, t7.`2005-06` AS degrees), STRUCT('2010-11' AS years, t7.`2010-11` AS degrees), STRUCT('2011-12' AS years, t7.`2011-12` AS degrees), STRUCT('2012-13' AS years, t7.`2012-13` AS degrees), STRUCT('2013-14' AS years, t7.`2013-14` AS degrees), STRUCT('2014-15' AS years, t7.`2014-15` AS degrees), STRUCT('2015-16' AS years, t7.`2015-16` AS degrees), STRUCT('2016-17' AS years, t7.`2016-17` AS degrees), STRUCT('2017-18' AS years, t7.`2017-18` AS degrees), STRUCT('2018-19' AS years, t7.`2018-19` AS degrees), STRUCT('2019-20' AS years, t7.`2019-20` AS degrees)] - ) - ) - 1 - )) AS pos - CROSS JOIN UNNEST([STRUCT('1970-71' AS years, t7.`1970-71` AS degrees), STRUCT('1975-76' AS years, t7.`1975-76` AS degrees), STRUCT('1980-81' AS years, t7.`1980-81` AS degrees), STRUCT('1985-86' AS years, t7.`1985-86` AS degrees), STRUCT('1990-91' AS years, t7.`1990-91` AS degrees), STRUCT('1995-96' AS years, t7.`1995-96` AS degrees), STRUCT('2000-01' AS years, t7.`2000-01` AS degrees), STRUCT('2005-06' AS years, t7.`2005-06` AS degrees), STRUCT('2010-11' AS years, t7.`2010-11` AS degrees), STRUCT('2011-12' AS years, t7.`2011-12` AS degrees), STRUCT('2012-13' AS years, t7.`2012-13` AS degrees), STRUCT('2013-14' AS years, t7.`2013-14` AS degrees), STRUCT('2014-15' AS years, t7.`2014-15` AS degrees), STRUCT('2015-16' AS years, t7.`2015-16` AS degrees), STRUCT('2016-17' AS years, t7.`2016-17` AS degrees), STRUCT('2017-18' AS years, t7.`2017-18` AS degrees), STRUCT('2018-19' AS years, t7.`2018-19` AS degrees), STRUCT('2019-20' AS years, t7.`2019-20` AS degrees)]) AS `__pivoted__` WITH OFFSET AS pos_2 - WHERE - pos = pos_2 - OR ( - pos > ( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t7.`1970-71` AS degrees), STRUCT('1975-76' AS years, t7.`1975-76` AS degrees), STRUCT('1980-81' AS years, t7.`1980-81` AS degrees), STRUCT('1985-86' AS years, t7.`1985-86` AS degrees), STRUCT('1990-91' AS years, t7.`1990-91` AS degrees), STRUCT('1995-96' AS years, t7.`1995-96` AS degrees), STRUCT('2000-01' AS years, t7.`2000-01` AS degrees), STRUCT('2005-06' AS years, t7.`2005-06` AS degrees), STRUCT('2010-11' AS years, t7.`2010-11` AS degrees), STRUCT('2011-12' AS years, t7.`2011-12` AS degrees), STRUCT('2012-13' AS years, t7.`2012-13` AS degrees), STRUCT('2013-14' AS years, t7.`2013-14` AS degrees), STRUCT('2014-15' AS years, t7.`2014-15` AS degrees), STRUCT('2015-16' AS years, t7.`2015-16` AS degrees), STRUCT('2016-17' AS years, t7.`2016-17` AS degrees), STRUCT('2017-18' AS years, t7.`2017-18` AS degrees), STRUCT('2018-19' AS years, t7.`2018-19` AS degrees), STRUCT('2019-20' AS years, t7.`2019-20` AS degrees)] - ) - 1 - ) - AND pos_2 = ( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t7.`1970-71` AS degrees), STRUCT('1975-76' AS years, t7.`1975-76` AS degrees), STRUCT('1980-81' AS years, t7.`1980-81` AS degrees), STRUCT('1985-86' AS years, t7.`1985-86` AS degrees), STRUCT('1990-91' AS years, t7.`1990-91` AS degrees), STRUCT('1995-96' AS years, t7.`1995-96` AS degrees), STRUCT('2000-01' AS years, t7.`2000-01` AS degrees), STRUCT('2005-06' AS years, t7.`2005-06` AS degrees), STRUCT('2010-11' AS years, t7.`2010-11` AS degrees), STRUCT('2011-12' AS years, t7.`2011-12` AS degrees), STRUCT('2012-13' AS years, t7.`2012-13` AS degrees), STRUCT('2013-14' AS years, t7.`2013-14` AS degrees), STRUCT('2014-15' AS years, t7.`2014-15` AS degrees), STRUCT('2015-16' AS years, t7.`2015-16` AS degrees), STRUCT('2016-17' AS years, t7.`2016-17` AS degrees), STRUCT('2017-18' AS years, t7.`2017-18` AS degrees), STRUCT('2018-19' AS years, t7.`2018-19` AS degrees), STRUCT('2019-20' AS years, t7.`2019-20` AS degrees)] - ) - 1 - ) - ) -), t1 AS ( - SELECT - t0.`field_of_study`, - t0.`__pivoted__`.`years` AS `years`, - t0.`__pivoted__`.`degrees` AS `degrees` - FROM t0 -), t2 AS ( - SELECT - t1.*, - first_value(t1.`degrees`) OVER (PARTITION BY t1.`field_of_study` ORDER BY t1.`years` ASC) AS `earliest_degrees`, - last_value(t1.`degrees`) OVER (PARTITION BY t1.`field_of_study` ORDER BY t1.`years` ASC) AS `latest_degrees` - FROM t1 -), t3 AS ( - SELECT - t2.*, - t2.`latest_degrees` - t2.`earliest_degrees` AS `diff` - FROM t2 -), t4 AS ( - SELECT - t3.`field_of_study`, - ANY_VALUE(t3.`diff`) AS `diff` - FROM t3 - GROUP BY - 1 -), t5 AS ( - SELECT - t4.* - FROM t4 - WHERE - t4.`diff` < 0 -) SELECT - t6.`field_of_study`, - t6.`diff` + t10.field_of_study, + t10.diff FROM ( - WITH t0 AS ( - SELECT - t7.`field_of_study`, - IF(pos = pos_2, `__pivoted__`, NULL) AS `__pivoted__` - FROM humanities AS t7 - CROSS JOIN UNNEST(GENERATE_ARRAY( - 0, - GREATEST( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t7.`1970-71` AS degrees), STRUCT('1975-76' AS years, t7.`1975-76` AS degrees), STRUCT('1980-81' AS years, t7.`1980-81` AS degrees), STRUCT('1985-86' AS years, t7.`1985-86` AS degrees), STRUCT('1990-91' AS years, t7.`1990-91` AS degrees), STRUCT('1995-96' AS years, t7.`1995-96` AS degrees), STRUCT('2000-01' AS years, t7.`2000-01` AS degrees), STRUCT('2005-06' AS years, t7.`2005-06` AS degrees), STRUCT('2010-11' AS years, t7.`2010-11` AS degrees), STRUCT('2011-12' AS years, t7.`2011-12` AS degrees), STRUCT('2012-13' AS years, t7.`2012-13` AS degrees), STRUCT('2013-14' AS years, t7.`2013-14` AS degrees), STRUCT('2014-15' AS years, t7.`2014-15` AS degrees), STRUCT('2015-16' AS years, t7.`2015-16` AS degrees), STRUCT('2016-17' AS years, t7.`2016-17` AS degrees), STRUCT('2017-18' AS years, t7.`2017-18` AS degrees), STRUCT('2018-19' AS years, t7.`2018-19` AS degrees), STRUCT('2019-20' AS years, t7.`2019-20` AS degrees)] - ) - ) - 1 - )) AS pos - CROSS JOIN UNNEST([STRUCT('1970-71' AS years, t7.`1970-71` AS degrees), STRUCT('1975-76' AS years, t7.`1975-76` AS degrees), STRUCT('1980-81' AS years, t7.`1980-81` AS degrees), STRUCT('1985-86' AS years, t7.`1985-86` AS degrees), STRUCT('1990-91' AS years, t7.`1990-91` AS degrees), STRUCT('1995-96' AS years, t7.`1995-96` AS degrees), STRUCT('2000-01' AS years, t7.`2000-01` AS degrees), STRUCT('2005-06' AS years, t7.`2005-06` AS degrees), STRUCT('2010-11' AS years, t7.`2010-11` AS degrees), STRUCT('2011-12' AS years, t7.`2011-12` AS degrees), STRUCT('2012-13' AS years, t7.`2012-13` AS degrees), STRUCT('2013-14' AS years, t7.`2013-14` AS degrees), STRUCT('2014-15' AS years, t7.`2014-15` AS degrees), STRUCT('2015-16' AS years, t7.`2015-16` AS degrees), STRUCT('2016-17' AS years, t7.`2016-17` AS degrees), STRUCT('2017-18' AS years, t7.`2017-18` AS degrees), STRUCT('2018-19' AS years, t7.`2018-19` AS degrees), STRUCT('2019-20' AS years, t7.`2019-20` AS degrees)]) AS `__pivoted__` WITH OFFSET AS pos_2 - WHERE - pos = pos_2 - OR ( - pos > ( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t7.`1970-71` AS degrees), STRUCT('1975-76' AS years, t7.`1975-76` AS degrees), STRUCT('1980-81' AS years, t7.`1980-81` AS degrees), STRUCT('1985-86' AS years, t7.`1985-86` AS degrees), STRUCT('1990-91' AS years, t7.`1990-91` AS degrees), STRUCT('1995-96' AS years, t7.`1995-96` AS degrees), STRUCT('2000-01' AS years, t7.`2000-01` AS degrees), STRUCT('2005-06' AS years, t7.`2005-06` AS degrees), STRUCT('2010-11' AS years, t7.`2010-11` AS degrees), STRUCT('2011-12' AS years, t7.`2011-12` AS degrees), STRUCT('2012-13' AS years, t7.`2012-13` AS degrees), STRUCT('2013-14' AS years, t7.`2013-14` AS degrees), STRUCT('2014-15' AS years, t7.`2014-15` AS degrees), STRUCT('2015-16' AS years, t7.`2015-16` AS degrees), STRUCT('2016-17' AS years, t7.`2016-17` AS degrees), STRUCT('2017-18' AS years, t7.`2017-18` AS degrees), STRUCT('2018-19' AS years, t7.`2018-19` AS degrees), STRUCT('2019-20' AS years, t7.`2019-20` AS degrees)] - ) - 1 - ) - AND pos_2 = ( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t7.`1970-71` AS degrees), STRUCT('1975-76' AS years, t7.`1975-76` AS degrees), STRUCT('1980-81' AS years, t7.`1980-81` AS degrees), STRUCT('1985-86' AS years, t7.`1985-86` AS degrees), STRUCT('1990-91' AS years, t7.`1990-91` AS degrees), STRUCT('1995-96' AS years, t7.`1995-96` AS degrees), STRUCT('2000-01' AS years, t7.`2000-01` AS degrees), STRUCT('2005-06' AS years, t7.`2005-06` AS degrees), STRUCT('2010-11' AS years, t7.`2010-11` AS degrees), STRUCT('2011-12' AS years, t7.`2011-12` AS degrees), STRUCT('2012-13' AS years, t7.`2012-13` AS degrees), STRUCT('2013-14' AS years, t7.`2013-14` AS degrees), STRUCT('2014-15' AS years, t7.`2014-15` AS degrees), STRUCT('2015-16' AS years, t7.`2015-16` AS degrees), STRUCT('2016-17' AS years, t7.`2016-17` AS degrees), STRUCT('2017-18' AS years, t7.`2017-18` AS degrees), STRUCT('2018-19' AS years, t7.`2018-19` AS degrees), STRUCT('2019-20' AS years, t7.`2019-20` AS degrees)] - ) - 1 - ) - ) - ), t1 AS ( - SELECT - t0.`field_of_study`, - t0.`__pivoted__`.`years` AS `years`, - t0.`__pivoted__`.`degrees` AS `degrees` - FROM t0 - ), t2 AS ( - SELECT - t1.*, - first_value(t1.`degrees`) OVER (PARTITION BY t1.`field_of_study` ORDER BY t1.`years` ASC) AS `earliest_degrees`, - last_value(t1.`degrees`) OVER (PARTITION BY t1.`field_of_study` ORDER BY t1.`years` ASC) AS `latest_degrees` - FROM t1 - ), t3 AS ( - SELECT - t2.*, - t2.`latest_degrees` - t2.`earliest_degrees` AS `diff` - FROM t2 - ), t4 AS ( + SELECT + t5.field_of_study, + t5.diff + FROM ( SELECT - t3.`field_of_study`, - ANY_VALUE(t3.`diff`) AS `diff` - FROM t3 + t4.field_of_study, + ANY_VALUE(t4.diff) AS diff + FROM ( + SELECT + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff + FROM ( + SELECT + t2.field_of_study, + t2.years, + t2.degrees, + first_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + last_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + FROM ( + SELECT + t1.field_of_study, + t1.__pivoted__.years AS years, + t1.__pivoted__.degrees AS degrees + FROM ( + SELECT + t0.field_of_study, + IF(pos = pos_2, __pivoted__, NULL) AS __pivoted__ + FROM humanities AS t0 + CROSS JOIN UNNEST(GENERATE_ARRAY( + 0, + GREATEST( + ARRAY_LENGTH( + [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + ) + ) - 1 + )) AS pos + CROSS JOIN UNNEST([STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)]) AS __pivoted__ WITH OFFSET AS pos_2 + WHERE + pos = pos_2 + OR ( + pos > ( + ARRAY_LENGTH( + [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + ) - 1 + ) + AND pos_2 = ( + ARRAY_LENGTH( + [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + ) - 1 + ) + ) + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 GROUP BY 1 - ), t5 AS ( - SELECT - t4.* - FROM t4 - WHERE - t4.`diff` < 0 - ), t7 AS ( - SELECT - t5.* - FROM t5 - ORDER BY - t5.`diff` ASC - ), t8 AS ( - SELECT - t4.* - FROM t4 - ORDER BY - t4.`diff` DESC - ), t9 AS ( - SELECT - t5.* - FROM t5 - ORDER BY - t5.`diff` ASC - LIMIT 10 - ), t10 AS ( - SELECT - t4.* - FROM t4 - ORDER BY - t4.`diff` DESC - LIMIT 10 - ) - SELECT - * - FROM t10 + ) AS t5 + ORDER BY + t5.diff DESC + LIMIT 10 UNION ALL SELECT - * - FROM t9 -) AS t6 \ No newline at end of file + t5.field_of_study, + t5.diff + FROM ( + SELECT + t4.field_of_study, + ANY_VALUE(t4.diff) AS diff + FROM ( + SELECT + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff + FROM ( + SELECT + t2.field_of_study, + t2.years, + t2.degrees, + first_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + last_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + FROM ( + SELECT + t1.field_of_study, + t1.__pivoted__.years AS years, + t1.__pivoted__.degrees AS degrees + FROM ( + SELECT + t0.field_of_study, + IF(pos = pos_2, __pivoted__, NULL) AS __pivoted__ + FROM humanities AS t0 + CROSS JOIN UNNEST(GENERATE_ARRAY( + 0, + GREATEST( + ARRAY_LENGTH( + [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + ) + ) - 1 + )) AS pos + CROSS JOIN UNNEST([STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)]) AS __pivoted__ WITH OFFSET AS pos_2 + WHERE + pos = pos_2 + OR ( + pos > ( + ARRAY_LENGTH( + [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + ) - 1 + ) + AND pos_2 = ( + ARRAY_LENGTH( + [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + ) - 1 + ) + ) + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + 1 + ) AS t5 + WHERE + t5.diff < 0 + ORDER BY + t5.diff ASC NULLS LAST + LIMIT 10 +) AS t10 \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index ddbf55d77bbf..52649199d79b 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1374,7 +1374,9 @@ def test_group_concat( .reset_index() ) - backend.assert_frame_equal(result.fillna(pd.NA), expected.fillna(pd.NA)) + backend.assert_frame_equal( + result.replace(np.nan, None), expected.replace(np.nan, None) + ) @pytest.mark.broken( diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index a0a67a36c687..e7563587f7f6 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -90,6 +90,7 @@ def time_keyed_right(time_keyed_df2): "pyspark", "druid", "impala", + "bigquery", ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): @@ -125,6 +126,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op "pyspark", "druid", "impala", + "bigquery", ] ) def test_keyed_asof_join_with_tolerance( diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 617abc539b64..c92f65090918 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1347,7 +1347,6 @@ def hash_256(col): [ "pandas", "dask", - "bigquery", "mssql", "oracle", "risingwave", @@ -1369,6 +1368,7 @@ def hash_256(col): 1672531200, marks=[ pytest.mark.notyet(["duckdb", "impala"], reason="casts to NULL"), + pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken( ["druid"], reason="casts to 1672531200000 (millisecond)" @@ -1393,7 +1393,6 @@ def test_try_cast(con, from_val, to_type, expected): @pytest.mark.notimpl( [ - "bigquery", "dask", "datafusion", "druid", @@ -1419,6 +1418,7 @@ def test_try_cast(con, from_val, to_type, expected): pytest.mark.never( ["clickhouse", "pyspark"], reason="casts to 1672531200" ), + pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), ], @@ -1434,7 +1434,6 @@ def test_try_cast_null(con, from_val, to_type): [ "pandas", "dask", - "bigquery", "datafusion", "druid", "mssql", @@ -1464,7 +1463,6 @@ def test_try_cast_table(backend, con): [ "pandas", "dask", - "bigquery", "datafusion", "mssql", "mysql", @@ -1490,6 +1488,7 @@ def test_try_cast_table(backend, con): ["clickhouse", "polars", "flink", "pyspark"], reason="casts this to to a number", ), + pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet(["trino"], raises=TrinoUserError), ], id="datetime-to-float", @@ -1797,7 +1796,6 @@ def test_dynamic_table_slice_with_computed_offset(backend): @pytest.mark.notimpl( [ - "bigquery", "druid", "flink", "polars", @@ -1827,7 +1825,6 @@ def test_sample(backend): @pytest.mark.notimpl( [ - "bigquery", "druid", "flink", "polars", diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 19bf6f15f35e..cc4b54067519 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -395,7 +395,6 @@ def test_numeric_literal(con, backend, expr, expected_types): ibis.literal(decimal.Decimal("Infinity"), type=dt.decimal), # TODO(krzysztof-kwitt): Should we unify it? { - "bigquery": float("inf"), "sqlite": float("inf"), "risingwave": float("nan"), "postgres": decimal.Decimal("Infinity"), @@ -406,7 +405,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "duckdb": float("inf"), }, { - "bigquery": "FLOAT64", "sqlite": "real", "postgres": "numeric", "risingwave": "numeric", @@ -465,6 +463,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "infinity is not allowed as a decimal value", raises=SnowflakeProgrammingError, ), + pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), ], id="decimal-infinity+", ), @@ -472,7 +471,6 @@ def test_numeric_literal(con, backend, expr, expected_types): ibis.literal(decimal.Decimal("-Infinity"), type=dt.decimal), # TODO(krzysztof-kwitt): Should we unify it? { - "bigquery": float("-inf"), "sqlite": float("-inf"), "risingwave": float("nan"), "postgres": decimal.Decimal("-Infinity"), @@ -483,7 +481,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "duckdb": float("-inf"), }, { - "bigquery": "FLOAT64", "sqlite": "real", "postgres": "numeric", "risingwave": "numeric", @@ -542,6 +539,7 @@ def test_numeric_literal(con, backend, expr, expected_types): raises=TrinoUserError, reason="can't cast infinity to decimal", ), + pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), ], id="decimal-infinity-", ), @@ -629,6 +627,7 @@ def test_numeric_literal(con, backend, expr, expected_types): raises=TrinoUserError, reason="can't cast nan to decimal", ), + pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), ], id="decimal-NaN", ), diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 01dad3aa36de..ed8bb91d466b 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -12,7 +12,7 @@ import ibis import ibis.expr.datatypes as dt from ibis import _ -from ibis.backends.tests.errors import GoogleBadRequest, Py4JJavaError +from ibis.backends.tests.errors import Py4JJavaError @pytest.mark.parametrize( @@ -144,42 +144,21 @@ def test_scalar_param_map(con): "timestamp", "timestamp_col", id="string_timestamp", - marks=[ - pytest.mark.notimpl(["druid"]), - pytest.mark.broken( - ["bigquery"], - raises=GoogleBadRequest, - reason="No matching for operator = for argument types: DATETIME, TIMESTAMP", - ), - ], + marks=[pytest.mark.notimpl(["druid"])], ), param( datetime.date(2009, 1, 20), "timestamp", "timestamp_col", id="date_timestamp", - marks=[ - pytest.mark.notimpl(["druid"]), - pytest.mark.broken( - ["bigquery"], - raises=GoogleBadRequest, - reason="No matching for operator = for argument types: DATETIME, TIMESTAMP", - ), - ], + marks=[pytest.mark.notimpl(["druid"])], ), param( datetime.datetime(2009, 1, 20, 1, 2, 3), "timestamp", "timestamp_col", id="datetime_timestamp", - marks=[ - pytest.mark.notimpl(["druid"]), - pytest.mark.broken( - ["bigquery"], - raises=GoogleBadRequest, - reason="No matching for operator = for argument types: DATETIME, TIMESTAMP", - ), - ], + marks=[pytest.mark.notimpl(["druid"])], ), ], ) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index b3033c312359..74db927d928d 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -61,7 +61,7 @@ def test_literal(backend, expr): assert ibis.to_sql(expr, dialect=backend.name()) -@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") +@pytest.mark.never(["pandas", "dask", "polars"], reason="not SQL") @pytest.mark.xfail_version( mssql=["sqlalchemy>=2"], reason="sqlalchemy 2 prefixes literals with `N`" ) @@ -103,7 +103,7 @@ def test_cte_refs_in_topo_order(backend, snapshot): snapshot.assert_match(sql, "out.sql") -@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") +@pytest.mark.never(["pandas", "dask", "polars"], reason="not SQL") def test_isin_bug(con, snapshot): t = ibis.table(dict(x="int"), name="t") good = t[t.x > 2].x diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 410f7f30045b..0ba91bb67050 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -999,7 +999,6 @@ def test_multiple_subs(con): @pytest.mark.notimpl( [ - "bigquery", "clickhouse", "dask", "datafusion", diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 9ed2b1e83855..ab71cefc15e6 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -606,11 +606,6 @@ def test_date_truncate(backend, alltypes, df, unit): pd.offsets.DateOffset, # TODO - DateOffset - #2553 marks=[ - pytest.mark.notimpl( - ["bigquery"], - raises=com.UnsupportedOperationError, - reason="BigQuery does not allow binary operation TIMESTAMP_ADD with INTERVAL offset D", - ), pytest.mark.notimpl( ["polars"], raises=TypeError, @@ -634,11 +629,6 @@ def test_date_truncate(backend, alltypes, df, unit): pd.offsets.DateOffset, # TODO - DateOffset - #2553 marks=[ - pytest.mark.notimpl( - ["bigquery"], - raises=com.UnsupportedOperationError, - reason="BigQuery does not allow binary operation TIMESTAMP_ADD with INTERVAL offset M", - ), pytest.mark.notimpl( ["dask"], raises=ValueError, @@ -661,11 +651,6 @@ def test_date_truncate(backend, alltypes, df, unit): pd.offsets.DateOffset, # TODO - DateOffset - #2553 marks=[ - pytest.mark.notimpl( - ["bigquery"], - raises=com.UnsupportedOperationError, - reason="BigQuery does not allow extracting date part `IntervalUnit.WEEK` from intervals", - ), pytest.mark.notimpl( ["dask"], raises=ValueError, diff --git a/ibis/backends/tests/test_udf.py b/ibis/backends/tests/test_udf.py index 15be61cf1723..2a55a30355b2 100644 --- a/ibis/backends/tests/test_udf.py +++ b/ibis/backends/tests/test_udf.py @@ -37,7 +37,7 @@ def num_vowels(s: str, include_y: bool = False) -> int: batting = batting.limit(100) nvowels = num_vowels(batting.playerID) assert nvowels.op().__module__ == __name__ - assert type(nvowels.op()).__qualname__ == "num_vowels" + assert type(nvowels.op()).__qualname__.startswith("num_vowels") expr = batting.group_by(id_len=nvowels).agg(n=_.count()) result = expr.execute() diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index ff4baf80211d..4378d1d4dd84 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -718,7 +718,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): reason="Window operations are unsupported in the dask backend", ), pytest.mark.broken( - ["bigquery", "flink", "impala"], + ["flink", "impala"], reason="default window semantics are different", raises=AssertionError, ), @@ -1305,17 +1305,12 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): ) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notyet(["flink"], raises=com.UnsupportedOperationError) -@pytest.mark.broken( - ["pandas"], - raises=TypeError, - reason="pandas rank impl cannot handle compound sort keys with null", -) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ) -def test_ordering_order(con): +def test_windowed_order_by_sequence_is_preserved(con): table = ibis.memtable({"bool_col": [True, False, False, None, True]}) window = ibis.window( order_by=[ diff --git a/ibis/expr/operations/udf.py b/ibis/expr/operations/udf.py index 945f70f429d7..4fc50d2466f6 100644 --- a/ibis/expr/operations/udf.py +++ b/ibis/expr/operations/udf.py @@ -1,9 +1,11 @@ from __future__ import annotations import abc +import collections import enum import functools import inspect +import itertools import typing from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, overload @@ -19,12 +21,24 @@ from ibis.common.deferred import deferrable if TYPE_CHECKING: + from collections.abc import Iterable, MutableMapping + import ibis.expr.types as ir EMPTY = inspect.Parameter.empty +_udf_name_cache: MutableMapping[ + type[ops.Node], Iterable[int] +] = collections.defaultdict(itertools.count) + + +def _make_udf_name(name: str) -> str: + definition = next(_udf_name_cache[name]) + return f"{name}_{definition:d}" + + @enum.unique class InputType(enum.Enum): BUILTIN = enum.auto() @@ -78,6 +92,7 @@ def _make_node( input_type: InputType, name: str | None = None, schema: str | None = None, + database: str | None = None, signature: tuple[tuple, Any] | None = None, **kwargs, ) -> type[S]: @@ -113,13 +128,13 @@ def _make_node( # method "__func__": property(fget=lambda _, fn=fn: fn), "__config__": FrozenDict(kwargs), - "__udf_namespace__": schema, + "__udf_namespace__": ops.Namespace(schema=schema, database=database), "__module__": fn.__module__, "__func_name__": func_name, } ) - return type(fn.__name__, (cls._base,), fields) + return type(_make_udf_name(fn.__name__), (cls._base,), fields) @classmethod def _make_wrapper( @@ -157,6 +172,7 @@ def builtin( *, name: str | None = None, schema: str | None = None, + database: str | None = None, signature: tuple[tuple[Any, ...], Any] | None = None, **kwargs: Any, ) -> Callable[[Callable], Callable[..., ir.Value]]: @@ -164,7 +180,9 @@ def builtin( @util.experimental @classmethod - def builtin(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): + def builtin( + cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs + ): """Construct a scalar user-defined function that is built-in to the backend. Parameters @@ -175,6 +193,8 @@ def builtin(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): The name of the UDF in the backend if different from the function name. schema The schema in which the builtin function resides. + database + The database in which the builtin function resides. signature An optional signature to use for the UDF. If present, should be a tuple containing a tuple of argument types and a return type. For @@ -201,6 +221,7 @@ def builtin(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): fn, name=name, schema=schema, + database=database, signature=signature, **kwargs, ) @@ -217,6 +238,7 @@ def python( *, name: str | None = None, schema: str | None = None, + database: str | None = None, signature: tuple[tuple[Any, ...], Any] | None = None, **kwargs: Any, ) -> Callable[[Callable], Callable[..., ir.Value]]: @@ -224,7 +246,9 @@ def python( @util.experimental @classmethod - def python(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): + def python( + cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs + ): """Construct a **non-vectorized** scalar user-defined function that accepts Python scalar values as inputs. ::: {.callout-warning collapse="true"} @@ -248,6 +272,8 @@ def python(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): The name of the UDF in the backend if different from the function name. schema The schema in which to create the UDF. + database + The database in which to create the UDF. signature An optional signature to use for the UDF. If present, should be a tuple containing a tuple of argument types and a return type. For @@ -279,6 +305,7 @@ def python(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): fn, name=name, schema=schema, + database=database, signature=signature, **kwargs, ) @@ -295,6 +322,7 @@ def pandas( *, name: str | None = None, schema: str | None = None, + database: str | None = None, signature: tuple[tuple[Any, ...], Any] | None = None, **kwargs: Any, ) -> Callable[[Callable], Callable[..., ir.Value]]: @@ -302,7 +330,9 @@ def pandas( @util.experimental @classmethod - def pandas(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): + def pandas( + cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs + ): """Construct a **vectorized** scalar user-defined function that accepts pandas Series' as inputs. Parameters @@ -313,6 +343,8 @@ def pandas(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): The name of the UDF in the backend if different from the function name. schema The schema in which to create the UDF. + database + The database in which to create the UDF. signature An optional signature to use for the UDF. If present, should be a tuple containing a tuple of argument types and a return type. For @@ -346,6 +378,7 @@ def pandas(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): fn, name=name, schema=schema, + database=database, signature=signature, **kwargs, ) @@ -362,6 +395,7 @@ def pyarrow( *, name: str | None = None, schema: str | None = None, + database: str | None = None, signature: tuple[tuple[Any, ...], Any] | None = None, **kwargs: Any, ) -> Callable[[Callable], Callable[..., ir.Value]]: @@ -369,7 +403,9 @@ def pyarrow( @util.experimental @classmethod - def pyarrow(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): + def pyarrow( + cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs + ): """Construct a **vectorized** scalar user-defined function that accepts PyArrow Arrays as input. Parameters @@ -380,6 +416,8 @@ def pyarrow(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): The name of the UDF in the backend if different from the function name. schema The schema in which to create the UDF. + database + The database in which to create the UDF. signature An optional signature to use for the UDF. If present, should be a tuple containing a tuple of argument types and a return type. For @@ -412,6 +450,7 @@ def pyarrow(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): fn, name=name, schema=schema, + database=database, signature=signature, **kwargs, ) @@ -435,6 +474,7 @@ def builtin( *, name: str | None = None, schema: str | None = None, + database: str | None = None, signature: tuple[tuple[Any, ...], Any] | None = None, **kwargs: Any, ) -> Callable[[Callable], Callable[..., ir.Value]]: @@ -442,7 +482,9 @@ def builtin( @util.experimental @classmethod - def builtin(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): + def builtin( + cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs + ): """Construct an aggregate user-defined function that is built-in to the backend. Parameters @@ -453,6 +495,8 @@ def builtin(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): The name of the UDF in the backend if different from the function name. schema The schema in which the builtin function resides. + database + The database in which the builtin function resides. signature An optional signature to use for the UDF. If present, should be a tuple containing a tuple of argument types and a return type. For @@ -480,6 +524,7 @@ def builtin(cls, fn=None, *, name=None, schema=None, signature=None, **kwargs): fn, name=name, schema=schema, + database=database, signature=signature, **kwargs, ) diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index e62a339d5d78..f0352fefa8bd 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -182,7 +182,7 @@ def window_merge_frames(_, frame): group_by = tuple(toolz.unique(_.frame.group_by + frame.group_by)) order_by = {} - for sort_key in _.frame.order_by + frame.order_by: + for sort_key in frame.order_by + _.frame.order_by: order_by[sort_key.expr] = sort_key.ascending order_by = tuple(ops.SortKey(k, v) for k, v in order_by.items()) From ac5b75b0c8051ccbad87e4553f289afde103d6ab Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 18 Jan 2024 05:14:22 -0500 Subject: [PATCH 082/161] chore: add docstring for null ordering transform --- ibis/backends/bigquery/__init__.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index 5e0cddf43f9a..ab2f32eab055 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -94,9 +94,21 @@ def _qualify_memtable( return node -def _remove_nulls_first_from_invalid_window_orderings( +def _remove_null_ordering_from_unsupported_window( node: sge.Expression, ) -> sge.Expression: + """Remove null ordering in window frame clauses not supported by BigQuery. + + BigQuery has only partial support for NULL FIRST/LAST in RANGE windows so + we remove it from any window frame clause that doesn't support it. + + Here's the support matrix: + + ✅ sum(x) over (order by y desc nulls last) + 🚫 sum(x) over (order by y asc nulls last) + ✅ sum(x) over (order by y asc nulls first) + 🚫 sum(x) over (order by y desc nulls first) + """ if isinstance(node, sge.Window): order = node.args.get("order") if order is not None: @@ -108,7 +120,6 @@ def _remove_nulls_first_from_invalid_window_orderings( "nulls_first", True ): kargs["nulls_first"] = True - return node @@ -627,7 +638,7 @@ def _to_sqlglot( _qualify_memtable, dataset=getattr(self._session_dataset, "dataset_id", None), project=getattr(self._session_dataset, "project", None), - ).transform(_remove_nulls_first_from_invalid_window_orderings) + ).transform(_remove_null_ordering_from_unsupported_window) def raw_sql(self, query: str, params=None): query_parameters = [ From 6ea9c6a7daac56d19eefa77955caa866a0011e4d Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 18 Jan 2024 05:23:12 -0500 Subject: [PATCH 083/161] chore(bigquery-datatypes): fix type annotations and raise uniform error types for datatype conversion --- ibis/backends/base/sqlglot/datatypes.py | 39 ++++++++++++------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index e09e582f9671..4928c4d9523c 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -1,6 +1,7 @@ from __future__ import annotations from functools import partial +from typing import NoReturn import sqlglot as sg import sqlglot.expressions as sge @@ -641,15 +642,15 @@ def _from_sqlglot_BIGNUMERIC(cls) -> dt.Decimal: return dt.Decimal(76, 38, nullable=cls.default_nullable) @classmethod - def _from_sqlglot_DATETIME(cls) -> dt.Decimal: + def _from_sqlglot_DATETIME(cls) -> dt.Timestamp: return dt.Timestamp(timezone=None, nullable=cls.default_nullable) @classmethod - def _from_sqlglot_TIMESTAMP(cls) -> dt.Decimal: + def _from_sqlglot_TIMESTAMP(cls) -> dt.Timestamp: return dt.Timestamp(timezone="UTC", nullable=cls.default_nullable) @classmethod - def _from_sqlglot_GEOGRAPHY(cls) -> dt.Decimal: + def _from_sqlglot_GEOGRAPHY(cls) -> dt.GeoSpatial: return dt.GeoSpatial( geotype="geography", srid=4326, nullable=cls.default_nullable ) @@ -665,24 +666,22 @@ def _from_sqlglot_TINYINT(cls) -> dt.Int64: ) = _from_sqlglot_INT = _from_sqlglot_SMALLINT = _from_sqlglot_TINYINT @classmethod - def _from_sqlglot_UBIGINT(cls) -> dt.Int64: - raise TypeError("Unsigned BIGINT isn't representable in BigQuery INT64") + def _from_sqlglot_UBIGINT(cls) -> NoReturn: + raise com.UnsupportedBackendType( + "Unsigned BIGINT isn't representable in BigQuery INT64" + ) @classmethod - def _from_sqlglot_FLOAT(cls) -> dt.Double: + def _from_sqlglot_FLOAT(cls) -> dt.Float64: return dt.Float64(nullable=cls.default_nullable) @classmethod - def _from_sqlglot_MAP(cls) -> dt.Map: - raise NotImplementedError( - "Cannot convert sqlglot Map type to ibis type: maps are not supported in BigQuery" - ) + def _from_sqlglot_MAP(cls) -> NoReturn: + raise com.UnsupportedBackendType("Maps are not supported in BigQuery") @classmethod - def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType: - raise NotImplementedError( - "Cannot convert Ibis Map type to BigQuery type: maps are not supported in BigQuery" - ) + def _from_ibis_Map(cls, dtype: dt.Map) -> NoReturn: + raise com.UnsupportedBackendType("Maps are not supported in BigQuery") @classmethod def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType: @@ -691,7 +690,7 @@ def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType: elif dtype.timezone == "UTC": return sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ) else: - raise TypeError( + raise com.UnsupportedBackendType( "BigQuery does not support timestamps with timezones other than 'UTC'" ) @@ -704,15 +703,15 @@ def _from_ibis_Decimal(cls, dtype: dt.Decimal) -> sge.DataType: elif (precision, scale) in ((38, 9), (None, None)): return sge.DataType(this=sge.DataType.Type.DECIMAL) else: - raise TypeError( + raise com.UnsupportedBackendType( "BigQuery only supports decimal types with precision of 38 and " f"scale of 9 (NUMERIC) or precision of 76 and scale of 38 (BIGNUMERIC). " f"Current precision: {dtype.precision}. Current scale: {dtype.scale}" ) @classmethod - def _from_ibis_UInt64(cls, dtype: dt.UInt64) -> sge.DataType: - raise TypeError( + def _from_ibis_UInt64(cls, dtype: dt.UInt64) -> NoReturn: + raise com.UnsupportedBackendType( f"Conversion from {dtype} to BigQuery integer type (Int64) is lossy" ) @@ -727,7 +726,7 @@ def _from_ibis_GeoSpatial(cls, dtype: dt.GeoSpatial) -> sge.DataType: if (dtype.geotype, dtype.srid) == ("geography", 4326): return sge.DataType(this=sge.DataType.Type.GEOGRAPHY) else: - raise TypeError( + raise com.UnsupportedBackendType( "BigQuery geography uses points on WGS84 reference ellipsoid." f"Current geotype: {dtype.geotype}, Current srid: {dtype.srid}" ) @@ -735,7 +734,7 @@ def _from_ibis_GeoSpatial(cls, dtype: dt.GeoSpatial) -> sge.DataType: class BigQueryUDFType(BigQueryType): @classmethod - def _from_ibis_Int64(cls, dtype: dt.Int64) -> sge.DataType: + def _from_ibis_Int64(cls, dtype: dt.Int64) -> NoReturn: raise com.UnsupportedBackendType( "int64 is not a supported input or output type in BigQuery UDFs; use float64 instead" ) From 090a7237ecd49fd37a47e18fdac99a42fe013942 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 19 Jan 2024 04:43:02 -0500 Subject: [PATCH 084/161] ci(bigquery): install geospatial extra --- .github/workflows/ibis-backends-cloud.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ibis-backends-cloud.yml b/.github/workflows/ibis-backends-cloud.yml index 62801325cf29..f47da3170186 100644 --- a/.github/workflows/ibis-backends-cloud.yml +++ b/.github/workflows/ibis-backends-cloud.yml @@ -43,10 +43,24 @@ jobs: - "3.9" - "3.11" backend: - - name: bigquery - title: BigQuery - name: snowflake title: Snowflake + extras: + - snowflake + include: + - python-version: "3.9" + backend: + name: bigquery + title: BigQuery + extras: + - bigquery + - python-version: "3.11" + backend: + name: bigquery + title: BigQuery + extras: + - bigquery + - geospatial steps: - name: checkout uses: actions/checkout@v4 @@ -62,7 +76,7 @@ jobs: cache: poetry - name: install ibis - run: poetry install --without dev --without docs --extras ${{ matrix.backend.name }} + run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" - uses: extractions/setup-just@v1 env: From 45098390df01802544d9ed7f45943b240016339d Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 19 Jan 2024 04:45:37 -0500 Subject: [PATCH 085/161] test: remove unused `spread_type` function --- ibis/backends/bigquery/datatypes.py | 18 ----------- .../bigquery/tests/unit/test_datatypes.py | 30 +------------------ 2 files changed, 1 insertion(+), 47 deletions(-) diff --git a/ibis/backends/bigquery/datatypes.py b/ibis/backends/bigquery/datatypes.py index 10aef9e1c943..5ddb21c46e39 100644 --- a/ibis/backends/bigquery/datatypes.py +++ b/ibis/backends/bigquery/datatypes.py @@ -65,21 +65,3 @@ def _dtype_from_bigquery_field(cls, field: bq.SchemaField) -> dt.DataType: @classmethod def to_ibis(cls, fields: list[bq.SchemaField]) -> sch.Schema: return sch.Schema({f.name: cls._dtype_from_bigquery_field(f) for f in fields}) - - -# TODO(kszucs): we can eliminate this function by making dt.DataType traversible -# using ibis.common.graph.Node, similarly to how we traverse ops.Node instances: -# node.find(types) -def spread_type(dt: dt.DataType): - """Returns a generator that contains all the types in the given type. - - For complex types like set and array, it returns the types of the elements. - """ - if dt.is_array(): - yield from spread_type(dt.value_type) - elif dt.is_struct(): - for type_ in dt.types: - yield from spread_type(type_) - elif dt.is_map(): - raise NotImplementedError("Maps are not supported in BigQuery") - yield dt diff --git a/ibis/backends/bigquery/tests/unit/test_datatypes.py b/ibis/backends/bigquery/tests/unit/test_datatypes.py index e0a035cd69a7..b4148d123e08 100644 --- a/ibis/backends/bigquery/tests/unit/test_datatypes.py +++ b/ibis/backends/bigquery/tests/unit/test_datatypes.py @@ -5,10 +5,7 @@ from pytest import param import ibis.expr.datatypes as dt -from ibis.backends.bigquery.datatypes import ( - BigQueryType, - spread_type, -) +from ibis.backends.bigquery.datatypes import BigQueryType @pytest.mark.parametrize( @@ -79,31 +76,6 @@ def test_simple_failure_mode(datatype): BigQueryType.to_string(datatype) -@pytest.mark.parametrize( - ("type_", "expected"), - [ - param( - dt.int64, - [dt.int64], - ), - param( - dt.Array(dt.int64), - [dt.int64, dt.Array(value_type=dt.int64)], - ), - param( - dt.Struct.from_tuples([("a", dt.Array(dt.int64))]), - [ - dt.int64, - dt.Array(value_type=dt.int64), - dt.Struct.from_tuples([("a", dt.Array(value_type=dt.int64))]), - ], - ), - ], -) -def test_spread_type(type_, expected): - assert list(spread_type(type_)) == expected - - def test_struct_type(): dtype = dt.Array(dt.int64) parsed_type = sg.parse_one("BIGINT[]", into=sg.exp.DataType, read="duckdb") From e370b26969e73a9ec77340f22aaf235fd06475e1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 19 Jan 2024 04:46:00 -0500 Subject: [PATCH 086/161] test: account for new error type --- ibis/backends/bigquery/tests/unit/test_datatypes.py | 9 +++++---- ibis/backends/tests/test_temporal.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ibis/backends/bigquery/tests/unit/test_datatypes.py b/ibis/backends/bigquery/tests/unit/test_datatypes.py index b4148d123e08..5d55cee02610 100644 --- a/ibis/backends/bigquery/tests/unit/test_datatypes.py +++ b/ibis/backends/bigquery/tests/unit/test_datatypes.py @@ -4,6 +4,7 @@ import sqlglot as sg from pytest import param +import ibis.common.exceptions as com import ibis.expr.datatypes as dt from ibis.backends.bigquery.datatypes import BigQueryType @@ -41,7 +42,7 @@ dt.Timestamp(timezone="US/Eastern"), "TIMESTAMP", marks=pytest.mark.xfail( - raises=TypeError, reason="Not supported in BigQuery" + raises=com.UnsupportedBackendType, reason="Not supported in BigQuery" ), id="timestamp_with_other_tz", ), @@ -59,10 +60,10 @@ dt.GeoSpatial(geotype="geography"), "GEOGRAPHY", marks=pytest.mark.xfail( - raises=TypeError, + raises=com.UnsupportedBackendType, reason="Should use the WGS84 reference ellipsoid.", ), - id="geography", + id="geography-no-srid", ), ], ) @@ -72,7 +73,7 @@ def test_simple(datatype, expected): @pytest.mark.parametrize("datatype", [dt.uint64, dt.Decimal(8, 3)]) def test_simple_failure_mode(datatype): - with pytest.raises(TypeError): + with pytest.raises(com.UnsupportedBackendType): BigQueryType.to_string(datatype) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index ab71cefc15e6..005bea8ad105 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1940,7 +1940,7 @@ def test_timestamp_literal(con, backend): @pytest.mark.notimpl( ["bigquery"], "BigQuery does not support timestamps with timezones other than 'UTC'", - raises=TypeError, + raises=com.UnsupportedBackendType, ) @pytest.mark.notimpl( ["druid"], From 43eea9d4f2afce2c27847b4ffaf5cdd278d7eebb Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 19 Jan 2024 04:46:13 -0500 Subject: [PATCH 087/161] test(bigquery): skip geospatial execution test when geopandas not installed --- ibis/backends/bigquery/tests/system/test_client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ibis/backends/bigquery/tests/system/test_client.py b/ibis/backends/bigquery/tests/system/test_client.py index cbb4a3c3c0aa..7ed2c24a09e1 100644 --- a/ibis/backends/bigquery/tests/system/test_client.py +++ b/ibis/backends/bigquery/tests/system/test_client.py @@ -328,6 +328,8 @@ def test_create_table_bignumeric(con, temp_table): def test_geography_table(con, temp_table): + pytest.importorskip("geopandas") + schema = ibis.schema({"col1": dt.GeoSpatial(geotype="geography", srid=4326)}) temporary_table = con.create_table(temp_table, schema=schema) con.raw_sql( From 1f649f68360c03eec8520e16f9e71ebe847c7004 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 19 Jan 2024 04:50:27 -0500 Subject: [PATCH 088/161] fix(snowflake): handle udf function naming --- ibis/backends/snowflake/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 6096e2f75313..6edf9aa75d8d 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -305,6 +305,7 @@ def _get_udf_source(self, udf_node: ops.ScalarUDF): return dict( source=source, name=name, + func_name=udf_node.__func_name__, preamble="\n".join(preamble_lines).format( name=name, signature=signature, @@ -351,7 +352,7 @@ def _compile_pyarrow_udf(self, udf_node: ops.ScalarUDF) -> None: def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> str: return """\ {preamble} -HANDLER = '{name}' +HANDLER = '{func_name}' AS $$ from __future__ import annotations @@ -376,7 +377,7 @@ def _compile_pandas_udf(self, udf_node: ops.ScalarUDF) -> str: @_snowflake.vectorized(input=pd.DataFrame) def wrapper(df): - return {name}(*(col for _, col in df.items())) + return {func_name}(*(col for _, col in df.items())) $$""" return template.format(**self._get_udf_source(udf_node)) From 336092f0f64478dd8118a0296041afbe41a7c3a3 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:09:09 -0500 Subject: [PATCH 089/161] refactor(exasol): port to sqlglot (#8032) This PR ports exasol to sqlglot instead of sqlalchemy. --- .github/workflows/ibis-backends.yml | 72 +-- ci/schema/exasol.sql | 40 +- ibis/backends/base/sqlglot/datatypes.py | 76 +++ ibis/backends/conftest.py | 1 - ibis/backends/exasol/__init__.py | 446 ++++++++++++------ ibis/backends/exasol/compiler.py | 231 ++++++++- ibis/backends/exasol/converter.py | 38 ++ ibis/backends/exasol/datatypes.py | 26 - ibis/backends/exasol/registry.py | 46 -- ibis/backends/exasol/tests/conftest.py | 27 +- .../test_default_limit/exasol/out.sql | 5 + .../test_disable_query_limit/exasol/out.sql | 5 + .../exasol/out.sql | 3 + .../test_respect_set_limit/exasol/out.sql | 10 + .../test_group_by_has_index/exasol/out.sql | 22 + .../test_sql/test_isin_bug/exasol/out.sql | 9 + ibis/backends/tests/test_aggregation.py | 15 +- ibis/backends/tests/test_asof_join.py | 2 + ibis/backends/tests/test_binary.py | 3 +- ibis/backends/tests/test_dot_sql.py | 20 +- ibis/backends/tests/test_export.py | 22 +- ibis/backends/tests/test_generic.py | 125 +++-- ibis/backends/tests/test_join.py | 28 +- ibis/backends/tests/test_numeric.py | 99 +--- ibis/backends/tests/test_sql.py | 12 +- ibis/backends/tests/test_string.py | 7 +- ibis/backends/tests/test_temporal.py | 160 +++---- ibis/backends/tests/test_window.py | 38 +- poetry.lock | 41 +- pyproject.toml | 6 +- requirements-dev.txt | 6 +- 31 files changed, 986 insertions(+), 655 deletions(-) create mode 100644 ibis/backends/exasol/converter.py delete mode 100644 ibis/backends/exasol/datatypes.py delete mode 100644 ibis/backends/exasol/registry.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_isin_bug/exasol/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index a78d1ae83a68..8bdc3c74db5a 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -162,13 +162,13 @@ jobs: # - oracle # services: # - oracle - # - name: exasol - # title: Exasol - # serial: true - # extras: - # - exasol - # services: - # - exasol + - name: exasol + title: Exasol + serial: true + extras: + - exasol + services: + - exasol # - name: flink # title: Flink # serial: true @@ -299,21 +299,21 @@ jobs: # - flink # - os: windows-latest # backend: - # name: exasol - # title: Exasol - # serial: true - # extras: - # - exasol - # services: - # - exasol - # - os: windows-latest - # backend: # name: risingwave # title: Risingwave # services: # - risingwave # extras: # - risingwave + - os: windows-latest + backend: + name: exasol + title: Exasol + serial: true + extras: + - exasol + services: + - exasol steps: - name: update and install system dependencies if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null @@ -612,46 +612,6 @@ jobs: with: flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - # gen_lockfile_sqlalchemy2: - # name: Generate Poetry Lockfile for SQLAlchemy 2 - # runs-on: ubuntu-latest - # steps: - # - name: checkout - # uses: actions/checkout@v4 - # - # - name: install python - # uses: actions/setup-python@v5 - # with: - # python-version: "3.11" - # - # - run: python -m pip install --upgrade pip 'poetry==1.7.1' - # - # - name: remove deps that are not compatible with sqlalchemy 2 - # run: poetry remove sqlalchemy-exasol - # - # - name: add sqlalchemy 2 - # run: poetry add --lock --optional 'sqlalchemy>=2,<3' - # - # - name: checkout the lock file - # run: git checkout poetry.lock - # - # - name: lock with no updates - # # poetry add is aggressive and will update other dependencies like - # # numpy and pandas so we keep the pyproject.toml edits and then relock - # # without updating anything except the requested versions - # run: poetry lock --no-update - # - # - name: check the sqlalchemy version - # run: poetry show sqlalchemy --no-ansi | grep version | cut -d ':' -f2- | sed 's/ //g' | grep -P '^2\.' - # - # - name: upload deps file - # uses: actions/upload-artifact@v3 - # with: - # name: deps - # path: | - # pyproject.toml - # poetry.lock - # test_backends_sqlalchemy2: # name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} # runs-on: ${{ matrix.os }} diff --git a/ci/schema/exasol.sql b/ci/schema/exasol.sql index 856b059e7407..7d40fd94c798 100644 --- a/ci/schema/exasol.sql +++ b/ci/schema/exasol.sql @@ -1,7 +1,7 @@ DROP SCHEMA IF EXISTS EXASOL CASCADE; CREATE SCHEMA EXASOL; -CREATE OR REPLACE TABLE EXASOL.diamonds +CREATE OR REPLACE TABLE EXASOL."diamonds" ( "carat" DOUBLE, "cut" VARCHAR(256), @@ -15,13 +15,13 @@ CREATE OR REPLACE TABLE EXASOL.diamonds "z" DOUBLE ); -CREATE OR REPLACE TABLE EXASOL.batting +CREATE OR REPLACE TABLE EXASOL."batting" ( "playerID" VARCHAR(256), "yearID" BIGINT, "stint" BIGINT, "teamID" VARCHAR(256), - "logID" VARCHAR(256), + "lgID" VARCHAR(256), "G" BIGINT, "AB" BIGINT, "R" BIGINT, @@ -41,22 +41,22 @@ CREATE OR REPLACE TABLE EXASOL.batting "GIDP" BIGINT ); -CREATE OR REPLACE TABLE EXASOL.awards_players +CREATE OR REPLACE TABLE EXASOL."awards_players" ( - "playerId" VARCHAR(256), + "playerID" VARCHAR(256), "awardID" VARCHAR(256), - "yearID" VARCHAR(256), - "logID" VARCHAR(256), + "yearID" BIGINT, + "lgID" VARCHAR(256), "tie" VARCHAR(256), "notest" VARCHAR(256) ); -CREATE OR REPLACE TABLE EXASOL.functional_alltypes +CREATE OR REPLACE TABLE EXASOL."functional_alltypes" ( "id" INTEGER, "bool_col" BOOLEAN, "tinyint_col" SHORTINT, - "small_int" SMALLINT, + "smallint_col" SMALLINT, "int_col" INTEGER, "bigint_col" BIGINT, "float_col" FLOAT, @@ -69,7 +69,21 @@ CREATE OR REPLACE TABLE EXASOL.functional_alltypes ); -IMPORT INTO EXASOL.diamonds FROM LOCAL CSV FILE '/data/diamonds.csv' COLUMN SEPARATOR = ',' SKIP = 1; -IMPORT INTO EXASOL.batting FROM LOCAL CSV FILE '/data/batting.csv' COLUMN SEPARATOR = ',' SKIP = 1; -IMPORT INTO EXASOL.awards_players FROM LOCAL CSV FILE '/data/awards_players.csv' COLUMN SEPARATOR = ',' SKIP = 1; -IMPORT INTO EXASOL.functional_alltypes FROM LOCAL CSV FILE '/data/functional_alltypes.csv' COLUMN SEPARATOR = ',' SKIP = 1; +IMPORT INTO EXASOL."diamonds" FROM LOCAL CSV FILE '/data/diamonds.csv' COLUMN SEPARATOR = ',' SKIP = 1; +IMPORT INTO EXASOL."batting" FROM LOCAL CSV FILE '/data/batting.csv' COLUMN SEPARATOR = ',' SKIP = 1; +IMPORT INTO EXASOL."awards_players" FROM LOCAL CSV FILE '/data/awards_players.csv' COLUMN SEPARATOR = ',' SKIP = 1; +IMPORT INTO EXASOL."functional_alltypes" FROM LOCAL CSV FILE '/data/functional_alltypes.csv' COLUMN SEPARATOR = ',' SKIP = 1; + +CREATE OR REPLACE TABLE EXASOL."win" +( + "g" VARCHAR(1), + "x" BIGINT, + "y" BIGINT +); + +INSERT INTO "win" VALUES + ('a', 0, 3), + ('a', 1, 2), + ('a', 2, 0), + ('a', 3, 1), + ('a', 4, 1); diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 4928c4d9523c..273295f83b2f 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -738,3 +738,79 @@ def _from_ibis_Int64(cls, dtype: dt.Int64) -> NoReturn: raise com.UnsupportedBackendType( "int64 is not a supported input or output type in BigQuery UDFs; use float64 instead" ) + + +class ExasolType(SqlglotType): + dialect = "exasol" + + default_temporal_scale = 3 + + default_decimal_precision = 18 + default_decimal_scale = 0 + + @classmethod + def _from_ibis_String(cls, dtype: dt.String) -> sge.DataType: + return sge.DataType( + this=sge.DataType.Type.VARCHAR, + expressions=[sge.DataTypeParam(this=sge.convert(2_000_000))], + ) + + @classmethod + def _from_sqlglot_DECIMAL( + cls, + precision: sge.DataTypeParam | None = None, + scale: sge.DataTypeParam | None = None, + ) -> dt.Decimal: + if precision is None: + precision = cls.default_decimal_precision + else: + precision = int(precision.this.this) + + if scale is None: + scale = cls.default_decimal_scale + else: + scale = int(scale.this.this) + + if not scale: + if 0 < precision <= 3: + return dt.Int8(nullable=cls.default_nullable) + elif 3 < precision <= 9: + return dt.Int16(nullable=cls.default_nullable) + elif 9 < precision <= 18: + return dt.Int32(nullable=cls.default_nullable) + elif 18 < precision <= 36: + return dt.Int64(nullable=cls.default_nullable) + else: + raise com.UnsupportedBackendType( + "Decimal precision is too large; Exasol supports precision up to 36." + ) + return dt.Decimal(precision, scale, nullable=cls.default_nullable) + + @classmethod + def _from_ibis_Array(cls, dtype: dt.Array) -> NoReturn: + raise com.UnsupportedBackendType("Arrays not supported in Exasol") + + @classmethod + def _from_ibis_Map(cls, dtype: dt.Map) -> NoReturn: + raise com.UnsupportedBackendType("Maps not supported in Exasol") + + @classmethod + def _from_ibis_Struct(cls, dtype: dt.Struct) -> NoReturn: + raise com.UnsupportedBackendType("Structs not supported in Exasol") + + @classmethod + def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType: + code = typecode.TIMESTAMP if dtype.timezone is None else typecode.TIMESTAMPTZ + return sge.DataType(this=code) + + @classmethod + def _from_sqlglot_ARRAY(cls, value_type: sge.DataType) -> NoReturn: + raise com.UnsupportedBackendType("Arrays not supported in Exasol") + + @classmethod + def _from_sqlglot_MAP(cls, key: sge.DataType, value: sge.DataType) -> NoReturn: + raise com.UnsupportedBackendType("Maps not supported in Exasol") + + @classmethod + def _from_sqlglot_STRUCT(cls, *cols: sge.ColumnDef) -> NoReturn: + raise com.UnsupportedBackendType("Structs not supported in Exasol") diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index ad4b8712de8a..b10f902818e0 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -548,7 +548,6 @@ def ddl_con(ddl_backend): @pytest.fixture( params=_get_backends_to_test( keep=( - "exasol", "mssql", "oracle", "risingwave", diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index d00f9b7f9c96..715fa3d6b7e4 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -1,44 +1,65 @@ from __future__ import annotations +import atexit +import contextlib import re -import warnings -from collections import ChainMap -from contextlib import contextmanager from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse -import sqlalchemy as sa +import pyexasol import sqlglot as sg +import sqlglot.expressions as sge +import ibis +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +import ibis.expr.schema as sch +import ibis.expr.types as ir from ibis import util -from ibis.backends.base.sql.alchemy import AlchemyCanCreateSchema, BaseAlchemyBackend -from ibis.backends.base.sqlglot.datatypes import PostgresType +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import STAR, C from ibis.backends.exasol.compiler import ExasolCompiler if TYPE_CHECKING: - from collections.abc import Iterable, MutableMapping + from collections.abc import Iterable, Mapping + + import pandas as pd + import pyarrow as pa from ibis.backends.base import BaseBackend - from ibis.expr import datatypes as dt + +# strip trailing encodings e.g., UTF8 +_VARCHAR_REGEX = re.compile(r"^(VARCHAR(?:\(\d+\)))?(?:\s+.+)?$") -class Backend(BaseAlchemyBackend, AlchemyCanCreateSchema): +class Backend(SQLGlotBackend): name = "exasol" - compiler = ExasolCompiler + compiler = ExasolCompiler() supports_temporary_tables = False supports_create_or_replace = False supports_in_memory_tables = False supports_python_udfs = False + @property + def version(self) -> str: + # https://stackoverflow.com/a/67500385 + query = ( + sg.select("param_value") + .from_(sg.table("EXA_METADATA", catalog="SYS")) + .where(C.param_name.eq("databaseProductVersion")) + ) + with self._safe_raw_sql(query) as result: + [(version,)] = result.fetchall() + return version + def do_connect( self, user: str, password: str, host: str = "localhost", port: int = 8563, - schema: str | None = None, - encryption: bool = True, - certificate_validation: bool = True, - encoding: str = "en_US.UTF-8", + **kwargs: Any, ) -> None: """Create an Ibis client connected to an Exasol database. @@ -52,130 +73,286 @@ def do_connect( Hostname to connect to (default: "localhost"). port Port number to connect to (default: 8563) - schema - Database schema to open, if `None`, no schema will be opened. - encryption - Enables/disables transport layer encryption (default: True). - certificate_validation - Enables/disables certificate validation (default: True). - encoding - The encoding format (default: "en_US.UTF-8"). + kwargs + Additional keyword arguments passed to `pyexasol.connect`. """ - options = [ - "SSLCertificate=SSL_VERIFY_NONE" if not certificate_validation else "", - f"ENCRYPTION={'yes' if encryption else 'no'}", - f"CONNECTIONCALL={encoding}", - ] - url_template = ( - "exa+websocket://{user}:{password}@{host}:{port}/{schema}?{options}" - ) - url = sa.engine.url.make_url( - url_template.format( - user=user, - password=password, - host=host, - port=port, - schema=schema, - options="&".join(options), + if kwargs.pop("quote_ident", None) is not None: + raise com.UnsupportedArgumentError( + "Setting `quote_ident` to anything other than `True` is not supported. " + "Ibis requires all identifiers to be quoted to work correctly." ) - ) - engine = sa.create_engine(url, poolclass=sa.pool.StaticPool) - super().do_connect(engine) - - def _convert_kwargs(self, kwargs: MutableMapping) -> None: - def convert_sqla_to_ibis(keyword_arguments): - sqla_to_ibis = {"tls": "encryption", "username": "user"} - for sqla_kwarg, ibis_kwarg in sqla_to_ibis.items(): - if sqla_kwarg in keyword_arguments: - keyword_arguments[ibis_kwarg] = keyword_arguments.pop(sqla_kwarg) - - def filter_kwargs(keyword_arguments): - allowed_parameters = [ - "user", - "password", - "host", - "port", - "schema", - "encryption", - "certificate", - "encoding", - ] - to_be_removed = [ - key for key in keyword_arguments if key not in allowed_parameters - ] - for parameter_name in to_be_removed: - del keyword_arguments[parameter_name] - convert_sqla_to_ibis(kwargs) - filter_kwargs(kwargs) + self.con = pyexasol.connect( + dsn=f"{host}:{port}", + user=user, + password=password, + quote_ident=True, + **kwargs, + ) + self._temp_views = set() def _from_url(self, url: str, **kwargs) -> BaseBackend: """Construct an ibis backend from a SQLAlchemy-conforming URL.""" - kwargs = ChainMap(kwargs) - _, new_kwargs = self.inspector.dialect.create_connect_args(url) - kwargs = kwargs.new_child(new_kwargs) - kwargs = dict(kwargs) + url = urlparse(url) + query_params = parse_qs(url.query) + kwargs = { + "user": url.username, + "password": url.password, + "schema": url.path[1:] or None, + "host": url.hostname, + "port": url.port, + } | kwargs + + for name, value in query_params.items(): + if len(value) > 1: + kwargs[name] = value + elif len(value) == 1: + kwargs[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + self._convert_kwargs(kwargs) return self.connect(**kwargs) - @property - def inspector(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=sa.exc.RemovedIn20Warning) - return super().inspector - - @contextmanager + @contextlib.contextmanager def begin(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=sa.exc.RemovedIn20Warning) - with super().begin() as con: - yield con + # pyexasol doesn't have a cursor method + con = self.con + try: + yield con + except Exception: + con.rollback() + raise + else: + con.commit() + + @contextlib.contextmanager + def _safe_raw_sql(self, query: str, *args, **kwargs): + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.compiler.dialect) + + with self.begin() as cur: + yield cur.execute(query, *args, **kwargs) def list_tables(self, like=None, database=None): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=sa.exc.RemovedIn20Warning) - return super().list_tables(like=like, database=database) + tables = sg.select("table_name").from_( + sg.table("EXA_ALL_TABLES", catalog="SYS") + ) + views = sg.select(sg.column("view_name").as_("table_name")).from_( + sg.table("EXA_ALL_VIEWS", catalog="SYS") + ) - def _get_sqla_table( - self, - name: str, - autoload: bool = True, - **kwargs: Any, - ) -> sa.Table: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=sa.exc.RemovedIn20Warning) - return super()._get_sqla_table(name=name, autoload=autoload, **kwargs) + if database is not None: + tables = tables.where(sg.column("table_schema").eq(sge.convert(database))) + views = views.where(sg.column("view_schema").eq(sge.convert(database))) + + query = sg.union(tables, views) + + with self._safe_raw_sql(query) as con: + tables = con.fetchall() + + return self._filter_with_like([table for (table,) in tables], like=like) + + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + name_type_pairs = self._metadata( + sg.select(STAR) + .from_( + sg.table( + table_name, db=schema, catalog=database, quoted=self.compiler.quoted + ) + ) + .sql(self.compiler.dialect) + ) + return sch.Schema.from_tuples(name_type_pairs) + + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + import pandas as pd + + from ibis.backends.exasol.converter import ExasolPandasData + + df = pd.DataFrame.from_records(cursor, columns=schema.names, coerce_float=True) + df = ExasolPandasData.convert_table(df, schema) + return df def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: - table = sg.table(util.gen_name("exasol_metadata")) + table = sg.table(util.gen_name("exasol_metadata"), quoted=self.compiler.quoted) + dialect = self.compiler.dialect create_view = sg.exp.Create( - kind="VIEW", this=table, expression=sg.parse_one(query, dialect="postgres") + kind="VIEW", + this=table, + expression=sg.parse_one(query, dialect=dialect), ) drop_view = sg.exp.Drop(kind="VIEW", this=table) - describe = sg.exp.Describe(this=table).sql(dialect="postgres") - # strip trailing encodings e.g., UTF8 - varchar_regex = re.compile(r"^(VARCHAR(?:\(\d+\)))?(?:\s+.+)?$") - with self.begin() as con: - con.exec_driver_sql(create_view.sql(dialect="postgres")) + describe = sg.exp.Describe(this=table) + with self._safe_raw_sql(create_view): try: yield from ( ( name, - PostgresType.from_string(varchar_regex.sub(r"\1", typ)), + self.compiler.type_mapper.from_string( + _VARCHAR_REGEX.sub(r"\1", typ) + ), ) - for name, typ, *_ in con.exec_driver_sql(describe) + for name, typ, *_ in self.con.execute( + describe.sql(dialect=dialect) + ).fetchall() ) finally: - con.exec_driver_sql(drop_view.sql(dialect="postgres")) + self.con.execute(drop_view.sql(dialect=dialect)) - @property - def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.text("CURRENT_SCHEMA"))) + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise com.IbisTypeError( + "Exasol cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" + ) + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [ + sg.exp.ColumnConstraint( + kind=sg.exp.NotNullColumnConstraint() + ) + ] + ), + ) + for colname, typ in schema.items() + ] + + ident = sg.to_identifier(name, quoted=quoted) + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema(this=ident, expressions=column_defs), + ) + create_stmt_sql = create_stmt.sql(self.name) + + df = op.data.to_frame() + with self._safe_raw_sql(create_stmt_sql): + self.con.import_from_pandas(df, name) + + atexit.register(self._clean_up_tmp_table, ident) + + def _clean_up_tmp_table(self, ident: sge.Identifier) -> None: + with self._safe_raw_sql( + sge.Drop(kind="TABLE", this=ident, force=True, cascade=True) + ): + pass + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: sch.Schema | None = None, + database: str | None = None, + overwrite: bool = False, + ) -> ir.Table: + """Create a table in Snowflake. + + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The database in which to create the table; optional + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists + """ + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + if database is not None and database != self.current_database: + raise com.UnsupportedOperationError( + "Creating tables in other databases is not supported by Postgres" + ) + else: + database = None + + quoted = self.compiler.quoted + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self._to_sqlglot(table) + else: + query = None + + type_mapper = self.compiler.type_mapper + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name(f"{self.name}_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=quoted) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create(kind="TABLE", this=target) + + this = sg.table(name, catalog=database, quoted=quoted) + with self._safe_raw_sql(create_stmt): + if query is not None: + self.con.execute( + sge.Insert(this=table, expression=query).sql(self.name) + ) + + if overwrite: + self.con.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + ) + self.con.execute( + f"RENAME TABLE {table.sql(self.name)} TO {this.sql(self.name)}" + ) + + if schema is None: + return self.table(name, database=database) + + # preserve the input schema if it was provided + return ops.DatabaseTable( + name, schema=schema, source=self, namespace=ops.Namespace(database=database) + ).to_expr() @property - def current_database(self) -> str: - return None + def current_schema(self) -> str: + with self._safe_raw_sql("SELECT CURRENT_SCHEMA") as cur: + [(schema,)] = cur.fetchall() + return schema def drop_schema( self, name: str, database: str | None = None, force: bool = False @@ -184,11 +361,9 @@ def drop_schema( raise NotImplementedError( "`database` argument is not supported for the Exasol backend" ) - drop_schema = sg.exp.Drop( - kind="SCHEMA", this=sg.to_identifier(name), exists=force - ) + drop_schema = sg.exp.Drop(kind="SCHEMA", this=name, exists=force) with self.begin() as con: - con.exec_driver_sql(drop_schema.sql(dialect="postgres")) + con.execute(drop_schema.sql(dialect=self.compiler.dialect)) def create_schema( self, name: str, database: str | None = None, force: bool = False @@ -197,20 +372,15 @@ def create_schema( raise NotImplementedError( "`database` argument is not supported for the Exasol backend" ) - create_schema = sg.exp.Create( - kind="SCHEMA", this=sg.to_identifier(name), exists=force - ) + create_schema = sg.exp.Create(kind="SCHEMA", this=name, exists=force) + open_schema = self.current_schema with self.begin() as con: - open_schema = self.current_schema - con.exec_driver_sql(create_schema.sql(dialect="postgres")) + con.execute(create_schema.sql(dialect=self.compiler.dialect)) # Exasol implicitly opens the created schema, therefore we need to restore # the previous context. - action = ( - sa.text(f"OPEN SCHEMA {open_schema}") - if open_schema - else sa.text(f"CLOSE SCHEMA {name}") + con.execute( + f"OPEN SCHEMA {open_schema}" if open_schema else f"CLOSE SCHEMA {name}" ) - con.exec_driver_sql(action) def list_schemas( self, like: str | None = None, database: str | None = None @@ -220,15 +390,25 @@ def list_schemas( "`database` argument is not supported for the Exasol backend" ) - schema, table = "SYS", "EXA_SCHEMAS" - sch = sa.table( - table, - sa.column("schema_name", sa.TEXT()), - schema=schema, - ) + query = sg.select("schema_name").from_(sg.table("EXA_SCHEMAS", catalog="SYS")) - query = sa.select(sch.c.schema_name) + with self._safe_raw_sql(query) as con: + schemas = con.fetchall() + return self._filter_with_like([schema for (schema,) in schemas], like=like) - with self.begin() as con: - schemas = list(con.execute(query).scalars()) - return self._filter_with_like(schemas, like=like) + def _cursor_batches( + self, + expr: ir.Expr, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1 << 20, + ) -> Iterable[list]: + self._run_pre_execute_hooks(expr) + + dtypes = expr.as_table().schema().values() + + with self._safe_raw_sql( + self.compile(expr, limit=limit, params=params) + ) as cursor: + while batch := cursor.fetchmany(chunk_size): + yield (tuple(map(dt.normalize, dtypes, row)) for row in batch) diff --git a/ibis/backends/exasol/compiler.py b/ibis/backends/exasol/compiler.py index d4e5fcc6d114..efbde277b971 100644 --- a/ibis/backends/exasol/compiler.py +++ b/ibis/backends/exasol/compiler.py @@ -1,24 +1,225 @@ from __future__ import annotations -import sqlalchemy as sa +import contextlib +from functools import singledispatchmethod -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.exasol import registry -from ibis.backends.exasol.datatypes import ExasolSQLType +import sqlglot.expressions as sge +from sqlglot.dialects import Postgres +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.compiler import NULL, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import ExasolType +from ibis.backends.base.sqlglot.rewrites import ( + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_row_number, + rewrite_empty_order_by_window, +) +from ibis.common.patterns import replace +from ibis.expr.rewrites import p, rewrite_sample, y -class ExasolExprTranslator(AlchemyExprTranslator): - _registry = registry.create() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _integer_to_timestamp = sa.func.from_unixtime - _dialect_name = "exa.websocket" - native_json_type = False - type_mapper = ExasolSQLType +def _interval(self, e): + """Work around Exasol's inability to handle string literals in INTERVAL syntax.""" + arg = e.args["this"].this + with contextlib.suppress(AttributeError): + arg = arg.sql(self.dialect) + res = f"INTERVAL '{arg}' {e.args['unit']}" + return res -rewrites = ExasolExprTranslator.rewrites +# Is postgres the best dialect to inherit from? +class Exasol(Postgres): + """The exasol dialect.""" -class ExasolCompiler(AlchemyCompiler): - translator_class = ExasolExprTranslator - support_values_syntax_in_select = False + class Generator(Postgres.Generator): + TRANSFORMS = Postgres.Generator.TRANSFORMS.copy() | { + sge.Interval: _interval, + } + + TYPE_MAPPING = Postgres.Generator.TYPE_MAPPING.copy() | { + sge.DataType.Type.TIMESTAMPTZ: "TIMESTAMP WITH LOCAL TIME ZONE", + } + + +@replace(p.WindowFunction(p.MinRank | p.DenseRank, y @ p.WindowFrame(start=None))) +def exclude_unsupported_window_frame_from_rank(_, y): + return ops.Subtract( + _.copy(frame=y.copy(start=None, end=0, order_by=y.order_by or (ops.NULL,))), 1 + ) + + +class ExasolCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "exasol" + type_mapper = ExasolType + quoted = True + rewrites = ( + rewrite_sample, + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_rank, + exclude_unsupported_window_frame_from_row_number, + rewrite_empty_order_by_window, + *SQLGlotCompiler.rewrites, + ) + + @staticmethod + def _minimize_spec(start, end, spec): + if ( + start is None + and isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec + + def _aggregate(self, funcname: str, *args, where): + func = self.f[funcname] + if where is not None: + args = tuple(self.if_(where, arg, NULL) for arg in args) + return func(*args) + + @staticmethod + def _gen_valid_name(name: str) -> str: + """Exasol does not allow dots in quoted column names.""" + return name.replace(".", "_") + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_date(): + return self.cast(value.isoformat(), dtype) + elif dtype.is_timestamp(): + val = value.replace(tzinfo=None).isoformat(sep=" ", timespec="milliseconds") + return self.cast(val, dtype) + elif dtype.is_array() or dtype.is_struct() or dtype.is_map(): + raise com.UnsupportedBackendType( + f"{type(dtype).__name__}s are not supported in Exasol" + ) + elif dtype.is_uuid(): + return sge.convert(str(value)) + return super().visit_NonNullLiteral(op, value=value, dtype=dtype) + + @visit_node.register(ops.Date) + def visit_Date(self, op, *, arg): + return self.cast(arg, dt.date) + + @visit_node.register(ops.StartsWith) + def visit_StartsWith(self, op, *, arg, start): + return self.f.left(arg, self.f.length(start)).eq(start) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return self.f.right(arg, self.f.length(end)).eq(end) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + return self.f.locate(substr, arg, (start if start is not None else 0) + 1) + + @visit_node.register(ops.StringSQLILike) + def visit_StringSQLILike(self, op, *, arg, pattern, escape): + return self.f.upper(arg).like(self.f.upper(pattern)) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.locate(needle, haystack) > 0 + + @visit_node.register(ops.ExtractSecond) + def visit_ExtractSecond(self, op, *, arg): + return self.f.floor(self.cast(self.f.extract(self.v.second, arg), op.dtype)) + + @visit_node.register(ops.AnalyticVectorizedUDF) + @visit_node.register(ops.ApproxMedian) + @visit_node.register(ops.Arbitrary) + @visit_node.register(ops.ArgMax) + @visit_node.register(ops.ArgMin) + @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.ArrayDistinct) + @visit_node.register(ops.ArrayFilter) + @visit_node.register(ops.ArrayFlatten) + @visit_node.register(ops.ArrayIntersect) + @visit_node.register(ops.ArrayMap) + @visit_node.register(ops.ArraySort) + @visit_node.register(ops.ArrayStringJoin) + @visit_node.register(ops.ArrayUnion) + @visit_node.register(ops.ArrayZip) + @visit_node.register(ops.BitwiseNot) + @visit_node.register(ops.Covariance) + @visit_node.register(ops.CumeDist) + @visit_node.register(ops.DateAdd) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.DateSub) + @visit_node.register(ops.DateFromYMD) + @visit_node.register(ops.DayOfWeekIndex) + @visit_node.register(ops.DayOfWeekName) + @visit_node.register(ops.ElementWiseVectorizedUDF) + @visit_node.register(ops.ExtractDayOfYear) + @visit_node.register(ops.ExtractEpochSeconds) + @visit_node.register(ops.ExtractQuarter) + @visit_node.register(ops.ExtractWeekOfYear) + @visit_node.register(ops.First) + @visit_node.register(ops.IntervalFromInteger) + @visit_node.register(ops.IsInf) + @visit_node.register(ops.IsNan) + @visit_node.register(ops.Last) + @visit_node.register(ops.Levenshtein) + @visit_node.register(ops.Median) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.ReductionVectorizedUDF) + @visit_node.register(ops.RegexExtract) + @visit_node.register(ops.RegexReplace) + @visit_node.register(ops.RegexSearch) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.RowID) + @visit_node.register(ops.StandardDev) + @visit_node.register(ops.Strftime) + @visit_node.register(ops.StringJoin) + @visit_node.register(ops.StringSplit) + @visit_node.register(ops.StringToTimestamp) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.TimestampAdd) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimestampDelta) + @visit_node.register(ops.TimestampDiff) + @visit_node.register(ops.TimestampNow) + @visit_node.register(ops.TimestampSub) + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.TypeOf) + @visit_node.register(ops.Unnest) + @visit_node.register(ops.Variance) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + @visit_node.register(ops.CountDistinctStar) + def visit_Unsupported(self, op, **_): + raise com.UnsupportedOperationError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.Log10: "log10", + ops.Modulus: "mod", + ops.All: "min", + ops.Any: "max", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @ExasolCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @ExasolCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(ExasolCompiler, f"visit_{_op.__name__}", _fmt) diff --git a/ibis/backends/exasol/converter.py b/ibis/backends/exasol/converter.py new file mode 100644 index 000000000000..fb7e83dc712e --- /dev/null +++ b/ibis/backends/exasol/converter.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import datetime + +from ibis.formats.pandas import PandasData + + +class ExasolPandasData(PandasData): + @classmethod + def convert_String(cls, s, dtype, pandas_type): + if s.dtype != "object": + return s.map(str) + else: + return s + + @classmethod + def convert_Interval(cls, s, dtype, pandas_dtype): + def parse_timedelta(value): + # format is '(+|-)days hour:minute:second.millisecond' + days, rest = value.split(" ", 1) + hms, millis = rest.split(".", 1) + hours, minutes, seconds = hms.split(":") + return datetime.timedelta( + days=int(days), + hours=int(hours), + minutes=int(minutes), + seconds=int(seconds), + milliseconds=int(millis), + ) + + if s.dtype == "int64": + # exasol can return intervals as the number of integer days (e.g., + # from subtraction of two dates) + # + # TODO: investigate whether days are the only interval ever + # returned as integers + return s.map(lambda days: datetime.timedelta(days=days)) + return s.map(parse_timedelta, na_action="ignore") diff --git a/ibis/backends/exasol/datatypes.py b/ibis/backends/exasol/datatypes.py deleted file mode 100644 index afc13c9d7896..000000000000 --- a/ibis/backends/exasol/datatypes.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -import sqlalchemy.types as sa_types - -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType - -if TYPE_CHECKING: - import ibis.expr.datatypes as dt - - -class ExasolSQLType(AlchemyType): - dialect = "exa.websocket" - - @classmethod - def from_ibis(cls, dtype: dt.DataType) -> sa_types.TypeEngine: - if dtype.is_string(): - # see also: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm - MAX_VARCHAR_SIZE = 2_000_000 - return sa_types.VARCHAR(MAX_VARCHAR_SIZE) - return super().from_ibis(dtype) - - @classmethod - def to_ibis(cls, typ: sa_types.TypeEngine, nullable: bool = True) -> dt.DataType: - return super().to_ibis(typ, nullable=nullable) diff --git a/ibis/backends/exasol/registry.py b/ibis/backends/exasol/registry.py deleted file mode 100644 index 5c23f3996662..000000000000 --- a/ibis/backends/exasol/registry.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -import sqlalchemy as sa - -import ibis.expr.operations as ops - -# used for literal translate -from ibis.backends.base.sql.alchemy import ( - fixed_arity, - sqlalchemy_operation_registry, -) - - -class _String: - @staticmethod - def find(t, op): - args = [t.translate(op.substr), t.translate(op.arg)] - if (start := op.start) is not None: - args.append(t.translate(start) + 1) - return sa.func.locate(*args) - 1 - - @staticmethod - def translate(t, op): - func = fixed_arity(sa.func.translate, 3) - return func(t, op) - - -class _Registry: - _unsupported = {ops.StringJoin} - - _supported = { - ops.Translate: _String.translate, - ops.StringFind: _String.find, - } - - @classmethod - def create(cls): - registry = sqlalchemy_operation_registry.copy() - registry = {k: v for k, v in registry.items() if k not in cls._unsupported} - registry.update(cls._supported) - return registry - - -def create(): - """Create an operation registry for an Exasol backend.""" - return _Registry.create() diff --git a/ibis/backends/exasol/tests/conftest.py b/ibis/backends/exasol/tests/conftest.py index 35d3c6b04c7f..f6389b0336d7 100644 --- a/ibis/backends/exasol/tests/conftest.py +++ b/ibis/backends/exasol/tests/conftest.py @@ -4,6 +4,8 @@ import subprocess from typing import TYPE_CHECKING +import sqlglot as sg + import ibis from ibis.backends.tests.base import ( ServiceBackendTest, @@ -40,19 +42,24 @@ class TestConf(ServiceBackendTest): service_name = "exasol" supports_tpch = False force_sort = True - deps = "sqlalchemy", "sqlalchemy_exasol", "pyexasol" + deps = ("pyexasol",) @staticmethod def connect(*, tmpdir, worker_id, **kw: Any): - kwargs = { - "user": EXASOL_USER, - "password": EXASOL_PASS, - "host": EXASOL_HOST, - "port": EXASOL_PORT, - "schema": IBIS_TEST_EXASOL_DB, - "certificate_validation": False, - } - return ibis.exasol.connect(**kwargs) + return ibis.exasol.connect( + user=EXASOL_USER, + password=EXASOL_PASS, + host=EXASOL_HOST, + port=EXASOL_PORT, + **kw, + ) + + def postload(self, **kw: Any): + self.connection = self.connect(schema=IBIS_TEST_EXASOL_DB, **kw) + + @staticmethod + def format_table(name: str) -> str: + return sg.to_identifier(name, quoted=True).sql("exasol") @property def test_files(self) -> Iterable[Path]: diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/exasol/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/exasol/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/exasol/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/exasol/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/exasol/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/exasol/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/exasol/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/exasol/out.sql new file mode 100644 index 000000000000..6bd0ba8c995d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/exasol/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/exasol/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/exasol/out.sql new file mode 100644 index 000000000000..97338646649f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/exasol/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/exasol/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/exasol/out.sql new file mode 100644 index 000000000000..d3969647c9ea --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/exasol/out.sql @@ -0,0 +1,22 @@ +SELECT + CASE "t0"."continent" + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/exasol/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/exasol/out.sql new file mode 100644 index 000000000000..c1611d8cecc3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/exasol/out.sql @@ -0,0 +1,9 @@ +SELECT + "t0"."x" IN ( + SELECT + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 52649199d79b..0d0586a30f9b 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -769,7 +769,7 @@ def mean_and_std(v): lambda _: slice(None), marks=pytest.mark.notimpl( ["exasol"], - raises=(com.OperationNotDefinedError, ExaQueryError, sa.exc.DBAPIError), + raises=(com.OperationNotDefinedError, ExaQueryError), strict=False, ), id="no_cond", @@ -849,9 +849,7 @@ def test_reduction_ops( raises=com.OperationNotDefinedError, reason="no one has attempted implementation yet", ) -@pytest.mark.notimpl( - ["exasol"], raises=(sa.exc.DBAPIError, com.UnsupportedOperationError) -) +@pytest.mark.notimpl(["exasol"], raises=com.UnsupportedOperationError) def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): table = alltypes[["int_col", "double_col", "string_col"]] expr = table.nunique(where=ibis_cond(table)) @@ -920,12 +918,11 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): "sqlite", "druid", "oracle", - "exasol", ], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( - ["mysql", "impala"], raises=com.UnsupportedBackendType + ["mysql", "impala", "exasol"], raises=com.UnsupportedBackendType ), pytest.mark.notyet( ["snowflake"], @@ -1153,8 +1150,7 @@ def test_quantile( ), ], ) -@pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) +@pytest.mark.notimpl(["mssql", "exasol"], raises=com.OperationNotDefinedError) def test_corr_cov( con, batting, @@ -1597,8 +1593,9 @@ def test_grouped_case(backend, con): @pytest.mark.notimpl( - ["datafusion", "mssql", "polars", "exasol"], raises=com.OperationNotDefinedError + ["datafusion", "mssql", "polars"], raises=com.OperationNotDefinedError ) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.broken( ["dask"], reason="Dask does not windowize this operation correctly", diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index e7563587f7f6..3b71ebe88346 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -91,6 +91,7 @@ def time_keyed_right(time_keyed_df2): "druid", "impala", "bigquery", + "exasol", ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): @@ -127,6 +128,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op "druid", "impala", "bigquery", + "exasol", ] ) def test_keyed_asof_join_with_tolerance( diff --git a/ibis/backends/tests/test_binary.py b/ibis/backends/tests/test_binary.py index c3dfe9965424..3559741a493d 100644 --- a/ibis/backends/tests/test_binary.py +++ b/ibis/backends/tests/test_binary.py @@ -3,7 +3,6 @@ import contextlib import pytest -import sqlalchemy.exc import ibis import ibis.common.exceptions as com @@ -29,7 +28,7 @@ @pytest.mark.notimpl( ["exasol"], "Exasol does not have native support for a binary data type.", - raises=sqlalchemy.exc.StatementError, + raises=NotImplementedError, ) def test_binary_literal(con, backend): expr = ibis.literal(b"A") diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 90c14a598da0..6d7fa5d4c08c 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -24,6 +24,7 @@ _NAMES = { "bigquery": "ibis_gbq_testing.functional_alltypes", + "exasol": '"functional_alltypes"', } @@ -38,17 +39,20 @@ ], ) def test_con_dot_sql(backend, con, schema): - alltypes = con.table("functional_alltypes") + alltypes = backend.functional_alltypes # pull out the quoted name - name = _NAMES.get(con.name, alltypes.op().name) + name = _NAMES.get(con.name, "functional_alltypes") + quoted = getattr(getattr(con, "compiler", None), "quoted", True) + dialect = _IBIS_TO_SQLGLOT_DIALECT.get(con.name, con.name) + cols = [ + sg.column("string_col", quoted=quoted).as_("s", quoted=quoted).sql(dialect), + (sg.column("double_col", quoted=quoted) + 1.0) + .as_("new_col", quoted=quoted) + .sql(dialect), + ] t = ( con.sql( - f""" - SELECT - string_col as s, - double_col + 1.0 AS new_col - FROM {name} - """, + f"SELECT {', '.join(cols)} FROM {name}", schema=schema, ) .group_by("s") # group by a column from SQL diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index b1ec4c4bff28..1760cf8de461 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -13,6 +13,7 @@ from ibis.backends.tests.errors import ( DuckDBNotImplementedException, DuckDBParserException, + ExaQueryError, MySQLOperationalError, PyDeltaTableError, PyDruidProgrammingError, @@ -97,7 +98,6 @@ def test_empty_column_to_pyarrow(limit, awards_players): @pytest.mark.parametrize("limit", no_limit) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_empty_scalar_to_pyarrow(limit, awards_players): expr = awards_players.filter(awards_players.awardID == "DEADBEEF").yearID.sum() array = expr.to_pyarrow(limit=limit) @@ -105,7 +105,6 @@ def test_empty_scalar_to_pyarrow(limit, awards_players): @pytest.mark.parametrize("limit", no_limit) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_scalar_to_pyarrow_scalar(limit, awards_players): scalar = awards_players.yearID.sum().to_pyarrow(limit=limit) assert isinstance(scalar, pa.Scalar) @@ -209,7 +208,9 @@ def test_table_to_parquet(tmp_path, backend, awards_players): df = pd.read_parquet(outparquet) - backend.assert_frame_equal(awards_players.to_pandas(), df) + backend.assert_frame_equal( + awards_players.to_pandas().fillna(pd.NA), df.fillna(pd.NA) + ) @pytest.mark.notimpl( @@ -224,7 +225,9 @@ def test_table_to_parquet_writer_kwargs(version, tmp_path, backend, awards_playe df = pd.read_parquet(outparquet) - backend.assert_frame_equal(awards_players.to_pandas(), df) + backend.assert_frame_equal( + awards_players.to_pandas().fillna(pd.NA), df.fillna(pd.NA) + ) md = pa.parquet.read_metadata(outparquet) @@ -297,7 +300,7 @@ def test_memtable_to_file(tmp_path, con, ftype, monkeypatch): assert outfile.is_file() -@pytest.mark.notimpl(["exasol"]) +@pytest.mark.notimpl(["flink"]) def test_table_to_csv(tmp_path, backend, awards_players): outcsv = tmp_path / "out.csv" @@ -311,7 +314,7 @@ def test_table_to_csv(tmp_path, backend, awards_players): backend.assert_frame_equal(awards_players.to_pandas(), df) -@pytest.mark.notimpl(["exasol"]) +@pytest.mark.notimpl(["flink"]) @pytest.mark.notimpl( ["duckdb"], reason="cannot inline WriteOptions objects", @@ -337,12 +340,12 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): id="decimal128", marks=[ pytest.mark.notyet(["flink"], raises=NotImplementedError), - pytest.mark.notyet(["exasol"], raises=sa.exc.DBAPIError), pytest.mark.notyet( ["risingwave"], raises=sa.exc.DBAPIError, reason="Feature is not yet implemented: unsupported data type: NUMERIC(38,9)", ), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], ), param( @@ -362,12 +365,13 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): raises=(PySparkParseException, PySparkArithmeticException), reason="precision is out of range", ), - pytest.mark.notyet(["exasol"], raises=sa.exc.DBAPIError), pytest.mark.notyet( ["risingwave"], raises=sa.exc.DBAPIError, reason="Feature is not yet implemented: unsupported data type: NUMERIC(76,38)", ), + pytest.mark.notyet(["flink"], raises=NotImplementedError), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], ), ], @@ -495,7 +499,6 @@ def test_to_pandas_batches_empty_table(backend, con): param( None, marks=[ - pytest.mark.notimpl(["exasol"], raises=sa.exc.CompileError), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -520,7 +523,6 @@ def test_to_pandas_batches_nonempty_table(backend, con, n): param( None, marks=[ - pytest.mark.notimpl(["exasol"], raises=sa.exc.CompileError), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index c92f65090918..5f4e353d1641 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -72,7 +72,6 @@ def test_null_literal(con, backend): } -@pytest.mark.notimpl(["exasol"]) def test_boolean_literal(con, backend): expr = ibis.literal(False, type=dt.boolean) result = con.execute(expr) @@ -106,32 +105,34 @@ def test_scalar_fillna_nullif(con, expr, expected): @pytest.mark.parametrize( - ("col", "filt"), + ("col", "value", "filt"), [ param( "nan_col", - _.nan_col.isnan(), - marks=pytest.mark.notimpl(["mysql", "sqlite"]), + ibis.literal(np.nan), + methodcaller("isnan"), + marks=[ + pytest.mark.notimpl(["mysql", "sqlite", "druid"]), + pytest.mark.notyet( + ["exasol"], + raises=ExaQueryError, + reason="no way to test for nan-ness", + ), + ], id="nan_col", ), param( - "none_col", - _.none_col.isnull(), - marks=[pytest.mark.notimpl(["mysql"])], - id="none_col", + "none_col", ibis.NA.cast("float64"), methodcaller("isnull"), id="none_col" ), ], ) -@pytest.mark.notimpl(["mssql", "druid", "oracle"]) +@pytest.mark.notimpl(["mssql", "oracle"]) @pytest.mark.notyet(["flink"], "NaN is not supported in Flink SQL", raises=ValueError) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError, strict=False) -def test_isna(backend, alltypes, col, filt): - table = alltypes.select( - nan_col=ibis.literal(np.nan), none_col=ibis.NA.cast("float64") - ) +def test_isna(backend, alltypes, col, value, filt): + table = alltypes.select(**{col: value}) df = table.execute() - result = table[filt].execute().reset_index(drop=True) + result = table[filt(table[col])].execute().reset_index(drop=True) expected = df[df[col].isna()].reset_index(drop=True) backend.assert_frame_equal(result, expected) @@ -569,10 +570,6 @@ def test_order_by_random(alltypes): raises=PyDruidProgrammingError, reason="Druid only supports trivial unions", ) -@pytest.mark.notyet( - ["exasol"], - raises=AssertionError, -) def test_table_info(alltypes): expr = alltypes.info() df = expr.execute() @@ -592,18 +589,8 @@ def test_table_info(alltypes): @pytest.mark.parametrize( ("ibis_op", "pandas_op"), [ - param( - _.string_col.isin([]), - lambda df: df.string_col.isin([]), - marks=pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - id="isin", - ), - param( - _.string_col.notin([]), - lambda df: ~df.string_col.isin([]), - marks=pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - id="notin", - ), + param(_.string_col.isin([]), lambda df: df.string_col.isin([]), id="isin"), + param(_.string_col.notin([]), lambda df: ~df.string_col.isin([]), id="notin"), param( (_.string_col.length() * 1).isin([1]), lambda df: (df.string_col.str.len() * 1).isin([1]), @@ -674,7 +661,6 @@ def test_isin_notin_column_expr(backend, alltypes, df, ibis_op, pandas_op): param(False, True, neg, id="false_negate"), ], ) -@pytest.mark.notimpl(["exasol"]) def test_logical_negation_literal(con, expr, expected, op): assert con.execute(op(ibis.literal(expr)).name("tmp")) == expected @@ -827,7 +813,7 @@ def test_int_scalar(alltypes): assert result.dtype == np.int32 -@pytest.mark.notimpl(["dask", "datafusion", "pandas", "polars", "druid", "exasol"]) +@pytest.mark.notimpl(["dask", "datafusion", "pandas", "polars", "druid"]) @pytest.mark.notyet( ["clickhouse"], reason="https://github.com/ClickHouse/ClickHouse/issues/6697" ) @@ -871,12 +857,12 @@ def test_typeof(con): @pytest.mark.notimpl(["datafusion", "druid"]) @pytest.mark.notimpl(["pyspark"], condition=is_older_than("pyspark", "3.5.0")) @pytest.mark.notyet(["dask", "mssql"], reason="not supported by the backend") -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @pytest.mark.broken( ["risingwave"], raises=sa.exc.InternalError, reason="https://github.com/risingwavelabs/risingwave/issues/1343", ) +@pytest.mark.notyet(["exasol"], raises=ExaQueryError, reason="not supported by exasol") def test_isin_uncorrelated( backend, batting, awards_players, batting_df, awards_players_df ): @@ -896,7 +882,7 @@ def test_isin_uncorrelated( @pytest.mark.broken(["polars"], reason="incorrect answer") -@pytest.mark.notimpl(["druid", "exasol"]) +@pytest.mark.notimpl(["druid"]) @pytest.mark.notyet(["dask"], reason="not supported by the backend") def test_isin_uncorrelated_filter( backend, batting, awards_players, batting_df, awards_players_df @@ -921,7 +907,14 @@ def test_isin_uncorrelated_filter( "dtype", [ "bool", - "bytes", + param( + "bytes", + marks=[ + pytest.mark.notyet( + ["exasol"], raises=ExaQueryError, reason="no binary type" + ) + ], + ), "str", "int", "float", @@ -933,7 +926,14 @@ def test_isin_uncorrelated_filter( "float64", "timestamp", "date", - "time", + param( + "time", + marks=[ + pytest.mark.notyet( + ["exasol"], raises=ExaQueryError, reason="no time type" + ) + ], + ), ], ) def test_literal_na(con, dtype): @@ -942,8 +942,7 @@ def test_literal_na(con, dtype): assert pd.isna(result) -@pytest.mark.notimpl(["exasol"]) -def test_memtable_bool_column(backend, con): +def test_memtable_bool_column(con): data = [True, False, True] t = ibis.memtable({"a": data}) assert Counter(con.execute(t.a)) == Counter(data) @@ -1352,7 +1351,6 @@ def hash_256(col): "risingwave", "snowflake", "sqlite", - "exasol", ] ) @pytest.mark.parametrize( @@ -1370,6 +1368,7 @@ def hash_256(col): pytest.mark.notyet(["duckdb", "impala"], reason="casts to NULL"), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet(["trino"], raises=TrinoUserError), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), pytest.mark.broken( ["druid"], reason="casts to 1672531200000 (millisecond)" ), @@ -1512,10 +1511,6 @@ def test_try_cast_func(con, from_val, to_type, func): slice(None, None), lambda t: t.count().to_pandas(), marks=[ - pytest.mark.notyet( - ["exasol"], - raises=sa.exc.CompileError, - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1529,10 +1524,6 @@ def test_try_cast_func(con, from_val, to_type, func): slice(0, None), lambda t: t.count().to_pandas(), marks=[ - pytest.mark.notyet( - ["exasol"], - raises=sa.exc.CompileError, - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1563,8 +1554,13 @@ def test_try_cast_func(con, from_val, to_type, func): pytest.mark.never( ["impala"], raises=ImpalaHiveServer2Error, - reason="impala doesn't support OFFSET without ORDER BY", - ) + reason="doesn't support OFFSET without ORDER BY", + ), + pytest.mark.notyet( + ["exasol"], + raises=ExaQueryError, + reason="doesn't support OFFSET without ORDER BY", + ), ], ), param( @@ -1582,10 +1578,7 @@ def test_try_cast_func(con, from_val, to_type, func): raises=sa.exc.CompileError, reason="mssql doesn't support OFFSET without LIMIT", ), - pytest.mark.notyet( - ["exasol"], - raises=sa.exc.CompileError, - ), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), pytest.mark.never( ["impala"], raises=ImpalaHiveServer2Error, @@ -1607,8 +1600,13 @@ def test_try_cast_func(con, from_val, to_type, func): pytest.mark.never( ["impala"], raises=ImpalaHiveServer2Error, - reason="impala doesn't support OFFSET without ORDER BY", - ) + reason="doesn't support OFFSET without ORDER BY", + ), + pytest.mark.notyet( + ["exasol"], + raises=ExaQueryError, + reason="doesn't support OFFSET without ORDER BY", + ), ], ), param( @@ -1621,10 +1619,7 @@ def test_try_cast_func(con, from_val, to_type, func): raises=sa.exc.CompileError, reason="mssql doesn't support OFFSET without LIMIT", ), - pytest.mark.notyet( - ["exasol"], - raises=sa.exc.DBAPIError, - ), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), pytest.mark.notyet( ["impala"], raises=ImpalaHiveServer2Error, @@ -1693,10 +1688,7 @@ def test_static_table_slice(backend, slc, expected_count_fn): raises=sa.exc.InternalError, reason="risingwave doesn't support limit/offset", ) -@pytest.mark.notimpl( - ["exasol"], - raises=sa.exc.CompileError, -) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, @@ -1746,7 +1738,7 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): raises=TrinoUserError, reason="backend doesn't support dynamic limit/offset", ) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.CompileError) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, @@ -1800,7 +1792,6 @@ def test_dynamic_table_slice_with_computed_offset(backend): "flink", "polars", "snowflake", - "exasol", ] ) @pytest.mark.notimpl( @@ -1829,7 +1820,6 @@ def test_sample(backend): "flink", "polars", "snowflake", - "exasol", ] ) @pytest.mark.notimpl( @@ -1893,7 +1883,6 @@ def test_substitute(backend): ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" ) @pytest.mark.notimpl(["flink"], reason="no sqlglot dialect", raises=ValueError) -@pytest.mark.notimpl(["exasol"], raises=ValueError, reason="unknown dialect") @pytest.mark.notimpl( ["risingwave"], raises=ValueError, diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 6eed7380d2f5..2104893321ef 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from packaging.version import parse as vparse from pytest import param @@ -43,7 +42,14 @@ def check_eq(left, right, how, **kwargs): [ "inner", "left", - "right", + param( + "right", + marks=[ + pytest.mark.broken( + ["exasol"], raises=AssertionError, reasons="results don't match" + ) + ], + ), param( "outer", # TODO: mysql will likely never support full outer join @@ -55,12 +61,14 @@ def check_eq(left, right, how, **kwargs): + ["sqlite"] * (vparse(sqlite3.sqlite_version) < vparse("3.39")) ), pytest.mark.xfail_version(datafusion=["datafusion<31"]), + pytest.mark.broken( + ["exasol"], raises=AssertionError, reasons="results don't match" + ), ], ), ], ) @pytest.mark.notimpl(["druid"]) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_mutating_join(backend, batting, awards_players, how): left = batting[batting.yearID == 2015] right = awards_players[awards_players.lgID == "NL"].drop("yearID", "lgID") @@ -109,7 +117,7 @@ def test_mutating_join(backend, batting, awards_players, how): @pytest.mark.parametrize("how", ["semi", "anti"]) -@pytest.mark.notimpl(["dask", "druid", "exasol"]) +@pytest.mark.notimpl(["dask", "druid"]) @pytest.mark.notyet(["flink"], reason="Flink doesn't support semi joins or anti joins") def test_filtering_join(backend, batting, awards_players, how): left = batting[batting.yearID == 2015] @@ -139,7 +147,6 @@ def test_filtering_join(backend, batting, awards_players, how): backend.assert_frame_equal(result, expected, check_like=True) -@pytest.mark.notimpl(["exasol"], raises=com.IbisTypeError) def test_join_then_filter_no_column_overlap(awards_players, batting): left = batting[batting.yearID == 2015] year = left.yearID.name("year") @@ -152,7 +159,6 @@ def test_join_then_filter_no_column_overlap(awards_players, batting): assert not q.execute().empty -@pytest.mark.notimpl(["exasol"], raises=com.IbisTypeError) def test_mutate_then_join_no_column_overlap(batting, awards_players): left = batting.mutate(year=batting.yearID).filter(lambda t: t.year == 2015) left = left["year", "RBI"] @@ -175,7 +181,6 @@ def test_mutate_then_join_no_column_overlap(batting, awards_players): param(lambda left, right: left.join(right, "year", how="semi"), id="how_semi"), ], ) -@pytest.mark.notimpl(["exasol"], raises=com.IbisTypeError) def test_semi_join_topk(batting, awards_players, func): batting = batting.mutate(year=batting.yearID) left = func(batting, batting.year.topk(5)).select("year", "RBI") @@ -198,7 +203,7 @@ def test_join_with_pandas(batting, awards_players): assert df.yearID.nunique() == 7 -@pytest.mark.notimpl(["dask", "exasol"]) +@pytest.mark.notimpl(["dask"]) def test_join_with_pandas_non_null_typed_columns(batting, awards_players): batting_filt = batting[lambda t: t.yearID < 1900][["yearID"]] awards_players_filt = awards_players[lambda t: t.yearID < 1900][ @@ -271,10 +276,6 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): raises=TypeError, reason="dask doesn't support join predicates", ) -@pytest.mark.notimpl( - ["exasol"], - raises=com.IbisTypeError, -) def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_value): n = 5 @@ -299,9 +300,6 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu ) -@pytest.mark.notimpl( - ["exasol"], raises=sa.exc.NoSuchTableError, reason="`win` table isn't loaded" -) @pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notimpl(["flink"], reason="`win` table isn't loaded") @pytest.mark.parametrize( diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index cc4b54067519..01b41d73e4f5 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -191,10 +191,6 @@ "Expected np.float16 instance", raises=ArrowNotImplementedError, ), - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), ], id="float16", ), @@ -212,12 +208,6 @@ "risingwave": "numeric", "flink": "FLOAT NOT NULL", }, - marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), - ], id="float32", ), param( @@ -234,12 +224,6 @@ "risingwave": "numeric", "flink": "DOUBLE NOT NULL", }, - marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), - ], id="float64", ), ], @@ -265,6 +249,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "sqlite": 1.1, "trino": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), + "exasol": decimal.Decimal("1"), "duckdb": decimal.Decimal("1.1"), "risingwave": 1.1, "impala": decimal.Decimal("1"), @@ -281,6 +266,7 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "NUMERIC", "snowflake": "DECIMAL", + "exasol": "DECIMAL(18,0)", "sqlite": "real", "impala": "DECIMAL(9,0)", "trino": "decimal(18,3)", @@ -290,10 +276,9 @@ def test_numeric_literal(con, backend, expr, expected_types): "flink": "DECIMAL(38, 18) NOT NULL", }, marks=[ - pytest.mark.notimpl(["exasol"], raises=ExaQueryError), pytest.mark.notimpl( ["clickhouse"], - "Unsupported precision. Supported values: [1 : 76]. Current value: None", + reason="precision must be specified; clickhouse doesn't have a default", raises=NotImplementedError, ), ], @@ -464,6 +449,7 @@ def test_numeric_literal(con, backend, expr, expected_types): raises=SnowflakeProgrammingError, ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], id="decimal-infinity+", ), @@ -540,6 +526,7 @@ def test_numeric_literal(con, backend, expr, expected_types): reason="can't cast infinity to decimal", ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], id="decimal-infinity-", ), @@ -628,6 +615,7 @@ def test_numeric_literal(con, backend, expr, expected_types): reason="can't cast nan to decimal", ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], id="decimal-NaN", ), @@ -767,33 +755,13 @@ def test_isnan_isinf( ["datafusion"], raises=com.OperationNotDefinedError ), ), - param( - L(5.5).round(), - 6.0, - id="round", - ), - param( - L(5.556).round(2), - 5.56, - id="round-digits", - ), + param(L(5.5).round(), 6.0, id="round"), + param(L(5.556).round(2), 5.56, id="round-digits"), param(L(5.556).ceil(), 6.0, id="ceil"), param(L(5.556).floor(), 5.0, id="floor"), - param( - L(5.556).exp(), - math.exp(5.556), - id="exp", - ), - param( - L(5.556).sign(), - 1, - id="sign-pos", - ), - param( - L(-5.556).sign(), - -1, - id="sign-neg", - ), + param(L(5.556).exp(), math.exp(5.556), id="exp"), + param(L(5.556).sign(), 1, id="sign-pos"), + param(L(-5.556).sign(), -1, id="sign-neg"), param( L(0).sign(), 0, @@ -810,10 +778,6 @@ def test_isnan_isinf( math.log(5.556, 2), id="log-base", marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -827,15 +791,12 @@ def test_isnan_isinf( math.log(5.556), id="ln", ), + param(L(5.556).ln(), math.log(5.556), id="ln"), param( L(5.556).log2(), math.log(5.556, 2), id="log2", marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -866,6 +827,10 @@ def test_isnan_isinf( marks=pytest.mark.notimpl(["exasol"], raises=ExaQueryError), id="mod", ), + param(L(5.556).log10(), math.log10(5.556), id="log10"), + param(L(5.556).radians(), math.radians(5.556), id="radians"), + param(L(5.556).degrees(), math.degrees(5.556), id="degrees"), + param(L(11) % 3, 11 % 3, id="mod"), ], ) def test_math_functions_literals(con, expr, expected): @@ -998,7 +963,6 @@ def test_simple_math_functions_columns( lambda t: t.double_col.add(1).log(2), lambda t: np.log2(t.double_col + 1), marks=[ - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1016,7 +980,6 @@ def test_simple_math_functions_columns( param( lambda t: t.double_col.add(1).log10(), lambda t: np.log10(t.double_col + 1), - marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), id="log10", ), param( @@ -1031,7 +994,6 @@ def test_simple_math_functions_columns( ), id="log_base_bigint", marks=[ - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), pytest.mark.notimpl( ["datafusion"], raises=com.OperationNotDefinedError ), @@ -1133,11 +1095,12 @@ def test_backend_specific_numerics(backend, con, df, alltypes, expr_fn, expected operator.mul, operator.truediv, operator.floordiv, - operator.pow, + param( + operator.pow, marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)] + ), ], ids=lambda op: op.__name__, ) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_binary_arithmetic_operations(backend, alltypes, df, op): smallint_col = alltypes.smallint_col + 1 # make it nonzero smallint_series = df.smallint_col + 1 @@ -1155,7 +1118,6 @@ def test_binary_arithmetic_operations(backend, alltypes, df, op): backend.assert_series_equal(result, expected, check_exact=False) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_mod(backend, alltypes, df): expr = operator.mod(alltypes.smallint_col, alltypes.smallint_col + 1).name("tmp") @@ -1182,7 +1144,6 @@ def test_mod(backend, alltypes, df): "Cannot apply '%' to arguments of type ' % '. Supported form(s): ' % ", raises=Py4JError, ) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_floating_mod(backend, alltypes, df): expr = operator.mod(alltypes.double_col, alltypes.smallint_col + 1).name("tmp") @@ -1339,7 +1300,7 @@ def test_floating_mod(backend, alltypes, df): @pytest.mark.notyet(["mssql"], raises=(sa.exc.OperationalError, sa.exc.DataError)) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notyet(["postgres"], raises=PsycoPg2DivisionByZero) -@pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, com.IbisTypeError)) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_divide_by_zero(backend, alltypes, df, column, denominator): expr = alltypes[column] / denominator result = expr.name("tmp").execute() @@ -1455,13 +1416,7 @@ def test_random(con): [ param(lambda x: x.clip(lower=0), lambda x: x.clip(lower=0), id="lower-int"), param( - lambda x: x.clip(lower=0.0), - lambda x: x.clip(lower=0.0), - marks=pytest.mark.notimpl( - "exasol", - raises=ExaQueryError, - ), - id="lower-float", + lambda x: x.clip(lower=0.0), lambda x: x.clip(lower=0.0), id="lower-float" ), param(lambda x: x.clip(upper=0), lambda x: x.clip(upper=0), id="upper-int"), param( @@ -1482,10 +1437,6 @@ def test_random(con): param( lambda x: x.clip(lower=0, upper=1.0), lambda x: x.clip(lower=0, upper=1.0), - marks=pytest.mark.notimpl( - "exasol", - raises=ExaQueryError, - ), id="lower-upper-float", ), param( @@ -1509,7 +1460,7 @@ def test_clip(backend, alltypes, df, ibis_func, pandas_func): backend.assert_series_equal(result, expected, check_names=False) -@pytest.mark.notimpl(["polars", "exasol"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=PyDruidProgrammingError, @@ -1623,9 +1574,8 @@ def test_bitwise_scalars(con, op, left, right): assert result == expected -@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["datafusion", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @flink_no_bitwise def test_bitwise_not_scalar(con): expr = ~L(2) @@ -1634,9 +1584,8 @@ def test_bitwise_not_scalar(con): assert result == expected -@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["datafusion", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @flink_no_bitwise def test_bitwise_not_col(backend, alltypes, df): expr = (~alltypes.int_col).name("tmp") diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 74db927d928d..8b17e69a6930 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -11,22 +11,22 @@ sa = pytest.importorskip("sqlalchemy") sg = pytest.importorskip("sqlglot") -pytestmark = pytest.mark.notimpl(["flink", "exasol", "risingwave"]) +pytestmark = pytest.mark.notimpl(["flink", "risingwave"]) simple_literal = param(ibis.literal(1), id="simple_literal") array_literal = param( ibis.array([1]), marks=[ pytest.mark.never( - ["mysql", "mssql", "oracle", "impala", "sqlite"], - raises=exc.OperationNotDefinedError, + ["mysql", "mssql", "oracle", "impala", "sqlite", "exasol"], + raises=(exc.OperationNotDefinedError, exc.UnsupportedBackendType), reason="arrays not supported in the backend", ), ], id="array_literal", ) no_structs = pytest.mark.never( - ["impala", "mysql", "sqlite", "mssql"], + ["impala", "mysql", "sqlite", "mssql", "exasol"], raises=(NotImplementedError, sa.exc.CompileError, exc.UnsupportedBackendType), reason="structs not supported in the backend", ) @@ -117,7 +117,9 @@ def test_isin_bug(con, snapshot): raises=NotImplementedError, ) @pytest.mark.notyet( - ["datafusion"], reason="no unnest support", raises=exc.OperationNotDefinedError + ["datafusion", "exasol"], + reason="no unnest support", + raises=exc.OperationNotDefinedError, ) @pytest.mark.notyet( ["sqlite", "mysql", "druid", "impala", "mssql"], reason="no unnest support upstream" diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 0ba91bb67050..55559a2efb1d 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -961,13 +961,14 @@ def test_capitalize(con): ["dask", "pandas", "polars", "oracle", "flink"], raises=com.OperationNotDefinedError ) @pytest.mark.notyet( - ["mssql", "sqlite", "exasol"], - reason="no arrays", - raises=com.OperationNotDefinedError, + ["mssql", "sqlite"], reason="no arrays", raises=com.OperationNotDefinedError ) @pytest.mark.never( ["mysql"], raises=com.OperationNotDefinedError, reason="no array support" ) +@pytest.mark.never( + ["exasol"], raises=com.UnsupportedBackendType, reason="no array support" +) @pytest.mark.notimpl( ["impala"], raises=com.UnsupportedBackendType, reason="no array support" ) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 005bea8ad105..5643369051f7 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -47,7 +47,6 @@ raises=AttributeError, reason="Can only use .dt accessor with datetimelike values", ) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) def test_date_extract(backend, alltypes, df, attr, expr_fn): expr = getattr(expr_fn(alltypes.timestamp_col), attr)() expected = getattr(df.timestamp_col.dt, attr).astype("int32") @@ -60,13 +59,9 @@ def test_date_extract(backend, alltypes, df, attr, expr_fn): @pytest.mark.parametrize( "attr", [ - param( - "year", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] - ), - param( - "month", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] - ), - param("day", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)]), + "year", + "month", + "day", param( "day_of_year", marks=[ @@ -80,24 +75,26 @@ def test_date_extract(backend, alltypes, df, attr, expr_fn): "quarter", marks=[ pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), - pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), + "hour", + "minute", param( - "hour", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] - ), - param( - "minute", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] - ), - param( - "second", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] + "second", + marks=[ + pytest.mark.broken( + ["exasol"], + raises=AssertionError, + reason="seems like exasol might be rounding", + ) + ], ), ], ) -@pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], - raises=AttributeError, + raises=(AttributeError, com.OperationNotDefinedError), reason="AttributeError: 'StringColumn' object has no attribute 'X'", ) def test_timestamp_extract(backend, alltypes, df, attr): @@ -113,42 +110,12 @@ def test_timestamp_extract(backend, alltypes, df, attr): @pytest.mark.parametrize( ("func", "expected"), [ - param( - methodcaller("year"), - 2015, - id="year", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("month"), - 9, - id="month", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("day"), - 1, - id="day", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("hour"), - 14, - id="hour", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("minute"), - 48, - id="minute", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("second"), - 5, - id="second", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), + param(methodcaller("year"), 2015, id="year"), + param(methodcaller("month"), 9, id="month"), + param(methodcaller("day"), 1, id="day"), + param(methodcaller("hour"), 14, id="hour"), + param(methodcaller("minute"), 48, id="minute"), + param(methodcaller("second"), 5, id="second"), param( methodcaller("millisecond"), 359, @@ -262,13 +229,12 @@ def test_timestamp_extract_epoch_seconds(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=AttributeError, reason="'StringColumn' object has no attribute 'week_of_year'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_extract_week_of_year(backend, alltypes, df): expr = alltypes.timestamp_col.week_of_year().name("tmp") result = expr.execute() @@ -344,7 +310,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): param( "W", marks=[ - pytest.mark.broken(["sqlite"], raises=AssertionError), + pytest.mark.broken(["sqlite", "exasol"], raises=AssertionError), pytest.mark.notimpl(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["polars"], @@ -480,7 +446,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): reason="attempt to calculate the remainder with a divisor of zero", ), pytest.mark.notimpl( - ["flink"], + ["flink", "exasol"], raises=com.UnsupportedOperationError, reason=" unit is not supported in timestamp truncate", ), @@ -488,13 +454,12 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, reason="AttributeError: 'StringColumn' object has no attribute 'truncate'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_truncate(backend, alltypes, df, unit): expr = alltypes.timestamp_col.truncate(unit).name("tmp") @@ -565,6 +530,11 @@ def test_timestamp_truncate(backend, alltypes, df, unit): "Timestamp truncation is not supported in Flink" ), ), + pytest.mark.broken( + ["exasol"], + raises=AssertionError, + reason="behavior is different than expected", + ), ], ), ], @@ -581,7 +551,6 @@ def test_timestamp_truncate(backend, alltypes, df, unit): raises=AttributeError, reason="AttributeError: 'StringColumn' object has no attribute 'date'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_date_truncate(backend, alltypes, df, unit): expr = alltypes.timestamp_col.date().truncate(unit).name("tmp") @@ -848,8 +817,7 @@ def convert_to_offset(x): id="timestamp-add-interval", marks=[ pytest.mark.notimpl( - ["sqlite"], - raises=com.OperationNotDefinedError, + ["sqlite", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["druid"], @@ -872,6 +840,7 @@ def convert_to_offset(x): "snowflake", "sqlite", "bigquery", + "exasol" ], raises=com.OperationNotDefinedError, ), @@ -898,6 +867,7 @@ def convert_to_offset(x): "polars", "snowflake", "bigquery", + "exasol" ], raises=com.OperationNotDefinedError, ), @@ -921,8 +891,7 @@ def convert_to_offset(x): reason="unsupported operand type(s) for -: 'StringColumn' and 'IntervalScalar'", ), pytest.mark.notimpl( - ["sqlite"], - raises=com.OperationNotDefinedError, + ["sqlite", "exasol"], raises=com.OperationNotDefinedError ), ], ), @@ -941,6 +910,7 @@ def convert_to_offset(x): raises=AttributeError, reason="'StringColumn' object has no attribute 'date'", ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -958,6 +928,7 @@ def convert_to_offset(x): raises=AttributeError, reason="'StringColumn' object has no attribute 'date'", ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -998,6 +969,7 @@ def convert_to_offset(x): raises=Exception, reason="pyarrow.lib.ArrowInvalid: Casting from duration[us] to duration[s] would lose data", ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -1040,7 +1012,6 @@ def convert_to_offset(x): ], ) @pytest.mark.notimpl(["mssql", "oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): expr = expr_fn(alltypes, backend).name("tmp") expected = expected_fn(df, backend) @@ -1284,7 +1255,6 @@ def test_temporal_binop_pandas_timedelta( raises=AttributeError, reason="Can only use .dt accessor with datetimelike values", ) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name): ts = pd.Timestamp("20100302", tz="UTC").to_pydatetime() @@ -1842,14 +1812,13 @@ def test_now_from_projection(alltypes): } -@pytest.mark.notimpl(["pandas", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["pandas", "dask", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=PyDruidProgrammingError, reason="SQL parse failed" ) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936 missing expression" ) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1880,13 +1849,13 @@ def test_date_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "dask", "pyspark", "mysql"], raises=com.OperationNotDefinedError + ["pandas", "dask", "pyspark", "mysql", "exasol"], + raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904: MAKE TIMESTAMP invalid" ) @pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1905,7 +1874,8 @@ def test_timestamp_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "mysql", "dask", "pyspark"], raises=com.OperationNotDefinedError + ["pandas", "mysql", "dask", "pyspark", "exasol"], + raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( ["sqlite"], @@ -1950,7 +1920,6 @@ def test_timestamp_literal(con, backend): ", , , )" ), ) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1980,10 +1949,11 @@ def test_timestamp_with_timezone_literal(con, timezone, expected): ["pandas", "datafusion", "dask", "pyspark", "polars", "mysql"], raises=com.OperationNotDefinedError, ) -@pytest.mark.notyet(["clickhouse", "impala"], raises=com.OperationNotDefinedError) +@pytest.mark.notyet( + ["clickhouse", "impala", "exasol"], raises=com.OperationNotDefinedError +) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -2124,7 +2094,7 @@ def test_interval_literal(con, backend): assert con.execute(expr.typeof()) == INTERVAL_BACKEND_TYPES[backend_name] -@pytest.mark.notimpl(["pandas", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["pandas", "dask", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -2133,7 +2103,6 @@ def test_interval_literal(con, backend): @pytest.mark.broken( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936: missing expression" ) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -2150,7 +2119,8 @@ def test_date_column_from_ymd(backend, con, alltypes, df): @pytest.mark.notimpl( - ["pandas", "dask", "pyspark", "mysql"], raises=com.OperationNotDefinedError + ["pandas", "dask", "pyspark", "mysql", "exasol"], + raises=com.OperationNotDefinedError, ) @pytest.mark.broken( ["druid"], @@ -2161,7 +2131,6 @@ def test_date_column_from_ymd(backend, con, alltypes, df): ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904 make timestamp invalid" ) @pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -2224,16 +2193,10 @@ def test_timestamp_extract_milliseconds_with_big_value(con): @pytest.mark.notimpl( ["datafusion"], raises=Exception, - reason=( - "This feature is not implemented: Unsupported CAST from Int32 to Timestamp(Nanosecond, None)" - ), + reason="Unsupported CAST from Int32 to Timestamp(Nanosecond, None)", ) -@pytest.mark.notimpl( - ["oracle"], - raises=sa.exc.DatabaseError, - reason="ORA-00932", -) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) +@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00932") +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_integer_cast_to_timestamp_column(backend, alltypes, df): expr = alltypes.int_col.cast("timestamp") expected = pd.to_datetime(df.int_col, unit="s").rename(expr.get_name()) @@ -2242,7 +2205,7 @@ def test_integer_cast_to_timestamp_column(backend, alltypes, df): @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_integer_cast_to_timestamp_scalar(alltypes, df): expr = alltypes.int_col.min().cast("timestamp") result = expr.execute() @@ -2344,7 +2307,6 @@ def test_timestamp_date_comparison(backend, alltypes, df, left_fn, right_fn): reason="Casting from timestamp[s] to timestamp[ns] would result in out of bounds timestamp: 81953424000", raises=ArrowInvalid, ) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_large_timestamp(con): huge_timestamp = datetime.datetime(year=4567, month=1, day=1) expr = ibis.timestamp("4567-01-01 00:00:00") @@ -2377,6 +2339,7 @@ def test_large_timestamp(con): reason="time_parse truncates to milliseconds", raises=AssertionError, ), + pytest.mark.notimpl(["exasol"], raises=AssertionError), ], ), param( @@ -2428,6 +2391,7 @@ def test_large_timestamp(con): raises=sa.exc.InternalError, reason="Parse error: timestamp without time zone Can't cast string to timestamp (expected format is YYYY-MM-DD HH:MM:SS[.D+{up to 6 digits}] or YYYY-MM-DD HH:MM or YYYY-MM-DD or ISO 8601 format)", ), + pytest.mark.notimpl(["exasol"], raises=AssertionError), ], ), ], @@ -2437,7 +2401,6 @@ def test_large_timestamp(con): raises=sa.exc.DatabaseError, reason="ORA-01843: invalid month was specified", ) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_timestamp_precision_output(con, ts, scale, unit): dtype = dt.Timestamp(scale=scale) expr = ibis.literal(ts).cast(dtype) @@ -2576,10 +2539,7 @@ def test_delta(con, start, end, unit, expected): {"seconds": 2}, "2s", marks=[ - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), + pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) ], id="seconds", ), @@ -2587,10 +2547,7 @@ def test_delta(con, start, end, unit, expected): {"minutes": 5}, "300s", marks=[ - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), + pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) ], id="minutes", ), @@ -2598,10 +2555,7 @@ def test_delta(con, start, end, unit, expected): {"hours": 2}, "2h", marks=[ - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), + pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) ], id="hours", ), diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 4378d1d4dd84..ccccc9571ad6 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -14,6 +14,7 @@ import ibis.expr.datatypes as dt from ibis.backends.tests.errors import ( ClickHouseDatabaseError, + ExaQueryError, GoogleBadRequest, ImpalaHiveServer2Error, MySQLOperationalError, @@ -24,17 +25,9 @@ from ibis.legacy.udf.vectorized import analytic, reduction pytestmark = [ - pytest.mark.notimpl( - ["exasol"], - raises=( - sa.exc.ProgrammingError, - sa.exc.NoSuchTableError, - com.OperationNotDefinedError, - ), - ), pytest.mark.notimpl( ["druid"], raises=(com.OperationNotDefinedError, PyDruidProgrammingError) - ), + ) ] @@ -163,7 +156,9 @@ def calc_zscore(s): lambda t: t.id.rank(method="min") / t.id.transform(len), id="cume_dist", marks=[ - pytest.mark.notyet(["clickhouse"], raises=com.OperationNotDefinedError), + pytest.mark.notyet( + ["clickhouse", "exasol"], raises=com.OperationNotDefinedError + ), pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( @@ -208,13 +203,19 @@ def calc_zscore(s): lambda t, win: t.float_col.first().over(win), lambda t: t.float_col.transform("first"), id="first", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), + marks=[ + pytest.mark.notimpl(["dask"], raises=NotImplementedError), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + ], ), param( lambda t, win: t.float_col.last().over(win), lambda t: t.float_col.transform("last"), id="last", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), + marks=[ + pytest.mark.notimpl(["dask"], raises=NotImplementedError), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + ], ), param( lambda t, win: t.double_col.nth(3).over(win), @@ -430,6 +431,7 @@ def test_grouped_bounded_expanding_window( "snowflake", "datafusion", "trino", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -589,6 +591,7 @@ def test_grouped_bounded_preceding_window(backend, alltypes, df, window_fn): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -785,6 +788,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -816,6 +820,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -941,6 +946,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -973,6 +979,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -1164,7 +1171,7 @@ def test_mutate_window_filter(backend, alltypes): backend.assert_frame_equal(res, sol, check_dtype=False) -@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["flink"], raises=Exception, @@ -1226,6 +1233,11 @@ def test_first_last(backend): raises=sa.exc.InternalError, reason="sql parser error: Expected literal int, found: INTERVAL at line:1, column:99", ) +@pytest.mark.broken( + ["exasol"], + raises=ExaQueryError, + reason="database can't handle UTC timestamps in DataFrames", +) def test_range_expression_bounds(backend): t = ibis.memtable( { diff --git a/poetry.lock b/poetry.lock index 2bc9f08730ab..199a9f108391 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4691,6 +4691,7 @@ files = [ [package.dependencies] packaging = "*" +pandas = {version = "*", optional = true, markers = "extra == \"pandas\""} pyopenssl = "*" rsa = "*" websocket-client = ">=1.0.1" @@ -6352,40 +6353,6 @@ postgresql-psycopg2cffi = ["psycopg2cffi"] pymysql = ["pymysql", "pymysql (<1)"] sqlcipher = ["sqlcipher3_binary"] -[[package]] -name = "sqlalchemy-exasol" -version = "4.6.3" -description = "EXASOL dialect for SQLAlchemy" -optional = true -python-versions = ">=3.8,<4.0" -files = [ - {file = "sqlalchemy_exasol-4.6.3-py3-none-any.whl", hash = "sha256:d524d14bd84935087fb4e9fed273c1b5f6d23f0008ef3460a0278aa332e646ea"}, - {file = "sqlalchemy_exasol-4.6.3.tar.gz", hash = "sha256:03a424886cc90480a2127ca0531779e8b0a415d4b113d85dd23025d6c0b52cd3"}, -] - -[package.dependencies] -packaging = ">=21.3" -pyexasol = ">=0.25.1,<0.26.0" -pyodbc = ">=4.0.34,<6" -sqlalchemy = ">=1.4,<2" - -[package.extras] -turbodbc = ["turbodbc (==4.5.4)"] - -[[package]] -name = "sqlalchemy-risingwave" -version = "1.0.0" -description = "RisingWave dialect for SQLAlchemy" -optional = true -python-versions = "*" -files = [ - {file = "sqlalchemy-risingwave-1.0.0.tar.gz", hash = "sha256:856a3c44b98cba34d399c3cc9785a74896caca152b3685d87553e4210e3e07a4"}, - {file = "sqlalchemy_risingwave-1.0.0-py3-none-any.whl", hash = "sha256:c733365abc38e88f4d23d83713cfc3f21c0b0d3c81210cbc2f569b49a912ba08"}, -] - -[package.dependencies] -SQLAlchemy = ">=1.4,<2" - [[package]] name = "sqlalchemy-views" version = "0.3.2" @@ -7332,7 +7299,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views", "trino"] +all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pyexasol", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "sqlalchemy-views", "trino"] bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "pydata-google-auth"] clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] @@ -7342,7 +7309,7 @@ deltalake = ["deltalake"] druid = ["pydruid"] duckdb = ["duckdb"] examples = ["pins"] -exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] +exasol = ["pyexasol"] flink = [] geospatial = ["geopandas", "shapely"] impala = ["impyla"] @@ -7362,4 +7329,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "1939863bb76e53c0c8a1575ffe8fd2e035e6768ac21682fe12a9e640ffe3ade1" +content-hash = "3fcc813731a54acc626f4e5d124030eeeff9ce304dd2851b16dfdf89ab529d01" diff --git a/pyproject.toml b/pyproject.toml index 53d58ee328f6..69b029a66af0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,7 @@ polars = { version = ">=0.19.3,<1", optional = true } psycopg2 = { version = ">=2.8.4,<3", optional = true } pydata-google-auth = { version = ">=1.4.0,<2", optional = true } pydruid = { version = ">=0.6.5,<1", optional = true } +pyexasol = { version = ">=0.25.2,<1", optional = true, extras = ["pandas"] } pymysql = { version = ">=1,<2", optional = true } pyodbc = { version = ">=4.0.39,<6", optional = true } pyspark = { version = ">=3,<4", optional = true } @@ -87,7 +88,6 @@ shapely = { version = ">=2,<3", optional = true } # issues with versions <3.0.2 snowflake-connector-python = { version = ">=3.0.2,<4,!=3.3.0b1", optional = true } sqlalchemy = { version = ">=1.4,<3", optional = true } -sqlalchemy-exasol = { version = ">=4.6.0", optional = true } sqlalchemy-views = { version = ">=0.3.1,<1", optional = true } sqlalchemy-risingwave = { version = ">=1.0.0,<2", optional = true } trino = { version = ">=0.321,<1", optional = true } @@ -161,6 +161,7 @@ all = [ "psycopg2", "pydata-google-auth", "pydruid", + "pyexasol", "pymysql", "pyodbc", "pyspark", @@ -168,7 +169,6 @@ all = [ "shapely", "snowflake-connector-python", "sqlalchemy", - "sqlalchemy-exasol", "sqlalchemy-views", "sqlalchemy-risingwave", "trino", @@ -184,7 +184,7 @@ dask = ["dask", "regex"] datafusion = ["datafusion"] druid = ["pydruid"] duckdb = ["duckdb"] -exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] +exasol = ["pyexasol"] flink = [] geospatial = ["geopandas", "shapely"] impala = ["impyla"] diff --git a/requirements-dev.txt b/requirements-dev.txt index b4ad86ee9fb2..53a3afa07041 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -170,14 +170,14 @@ py4j==0.10.9.7 ; python_version >= "3.9" and python_version < "4.0" pyarrow-hotfix==0.6 ; python_version >= "3.9" and python_version < "4.0" pyarrow==15.0.0 ; python_version >= "3.9" and python_version < "4.0" pyasn1-modules==0.3.0 ; python_version >= "3.9" and python_version < "4.0" -pyasn1==0.5.1 ; python_version >= "3.9" and python_version < "4.0" +pyasn1==0.5.1 ; python_version >= "3.9" and python_version < "4" pycparser==2.21 ; python_version >= "3.9" and python_version < "4.0" pydantic-core==2.16.1 ; python_version >= "3.10" and python_version < "3.13" pydantic==2.6.0 ; python_version >= "3.10" and python_version < "3.13" pydata-google-auth==1.8.2 ; python_version >= "3.9" and python_version < "4.0" pydeps==1.12.17 ; python_version >= "3.9" and python_version < "4.0" pydruid==0.6.6 ; python_version >= "3.9" and python_version < "4.0" -pyexasol==0.25.2 ; python_version >= "3.9" and python_version < "4.0" +pyexasol[pandas]==0.25.2 ; python_version >= "3.9" and python_version < "4.0" pygments==2.17.2 ; python_version >= "3.9" and python_version < "4.0" pyinstrument==4.6.2 ; python_version >= "3.9" and python_version < "4.0" pyjwt==2.8.0 ; python_version >= "3.9" and python_version < "4.0" @@ -234,8 +234,6 @@ snowflake-connector-python==3.6.0 ; python_version >= "3.9" and python_version < sortedcontainers==2.4.0 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.5 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1 ; python_version >= "3.10" and python_version < "3.13" -sqlalchemy-exasol==4.6.3 ; python_version >= "3.9" and python_version < "4.0" -sqlalchemy-risingwave==1.0.0 ; python_version >= "3.9" and python_version < "4.0" sqlalchemy-views==0.3.2 ; python_version >= "3.9" and python_version < "4.0" sqlalchemy==1.4.51 ; python_version >= "3.9" and python_version < "4.0" sqlglot==20.11.0 ; python_version >= "3.9" and python_version < "4.0" From 8ca3e00f610e360dcdc1eb08db071c6efa6c6f21 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 19 Jan 2024 15:24:20 -0500 Subject: [PATCH 090/161] ci(exasol): run ci serially (#8042) Fixes failing snowflake and exasol tests --- .github/workflows/ibis-backends.yml | 130 ++-------------------------- ibis/backends/tests/test_dot_sql.py | 1 - 2 files changed, 6 insertions(+), 125 deletions(-) diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 8bdc3c74db5a..984fb1d43835 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -366,7 +366,7 @@ jobs: run: poetry run pip list - name: "run parallel tests: ${{ matrix.backend.name }}" - if: true # ${{ !matrix.backend.serial }} + if: ${{ !matrix.backend.serial }} run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup env: IBIS_TEST_IMPALA_HOST: localhost @@ -387,11 +387,11 @@ jobs: # FLINK_REMOTE_CLUSTER_ADDR: localhost # FLINK_REMOTE_CLUSTER_PORT: "8081" # - # - name: "run serial tests: ${{ matrix.backend.name }}" - # if: matrix.backend.serial && matrix.backend.name != 'flink' - # run: just ci-check -m ${{ matrix.backend.name }} - # env: - # IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} + - name: "run serial tests: ${{ matrix.backend.name }}" + if: matrix.backend.serial # && matrix.backend.name != 'flink' + run: just ci-check -m ${{ matrix.backend.name }} + env: + IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} - name: check that no untracked files were produced shell: bash @@ -612,130 +612,12 @@ jobs: with: flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - # test_backends_sqlalchemy2: - # name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} - # runs-on: ${{ matrix.os }} - # needs: - # - gen_lockfile_sqlalchemy2 - # env: - # ODBCSYSINI: "${{ github.workspace }}/.odbc" - # strategy: - # fail-fast: false - # matrix: - # os: - # - ubuntu-latest - # python-version: - # - "3.11" - # backend: - # - name: mssql - # title: MS SQL Server - # services: - # - mssql - # extras: - # - mssql - # sys-deps: - # - freetds-dev - # - unixodbc-dev - # - tdsodbc - # - name: sqlite - # title: SQLite - # extras: - # - sqlite - # - name: oracle - # title: Oracle - # serial: true - # extras: - # - oracle - # services: - # - oracle - # steps: - # - name: checkout - # uses: actions/checkout@v4 - # - # - name: update and install system dependencies - # if: matrix.backend.sys-deps != null - # run: | - # set -euo pipefail - # - # sudo apt-get update -qq -y - # sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} - # - # - name: setup odbc for mssql - # if: ${{ matrix.backend.name == 'mssql' }} - # run: | - # mkdir -p "$ODBCSYSINI" - # - # { - # echo '[FreeTDS]' - # echo "Driver = libtdsodbc.so" - # } > "$ODBCSYSINI/odbcinst.ini" - # - # - uses: extractions/setup-just@v1 - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # - # - name: download backend data - # run: just download-data - # - # - name: start services - # if: matrix.backend.services != null - # run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} - # - # - name: install python - # uses: actions/setup-python@v5 - # id: install_python - # with: - # python-version: ${{ matrix.python-version }} - # - # - name: download poetry lockfile - # uses: actions/download-artifact@v3 - # with: - # name: deps - # path: deps - # - # - name: pull out lockfile - # run: | - # set -euo pipefail - # - # mv -f deps/* . - # rm -r deps - # - # - uses: syphar/restore-virtualenv@v1 - # with: - # requirement_files: poetry.lock - # custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }} - # - # - uses: syphar/restore-pip-download-cache@v1 - # with: - # requirement_files: poetry.lock - # custom_cache_key_element: ${{ steps.install_python.outputs.python-version }} - # - # - name: install poetry - # run: python -m pip install --upgrade pip 'poetry==1.7.1' - # - # - name: install ibis - # run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" - # - # - name: run tests - # run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup - # - # - name: check that no untracked files were produced - # shell: bash - # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - # - # - name: upload code coverage - # if: success() - # uses: codecov/codecov-action@v4 - # with: - # flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - backends: # this job exists so that we can use a single job from this workflow to gate merging runs-on: ubuntu-latest needs: # - test_backends_min_version - test_backends - # - test_backends_sqlalchemy2 - test_pyspark steps: - run: exit 0 diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 6d7fa5d4c08c..38ed9708d54d 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -29,7 +29,6 @@ @pytest.mark.notimpl(["flink"]) -@dot_sql_notyet @dot_sql_never @pytest.mark.parametrize( "schema", From e31428f772bdc2e42b897c9dd189e3a64b5f1732 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 19 Jan 2024 20:25:00 +0100 Subject: [PATCH 091/161] feat(sql): extract common table expressions --- ibis/backends/base/sqlglot/compiler.py | 61 ++++++++++++++++---------- ibis/backends/base/sqlglot/rewrites.py | 37 +++++++++++++++- 2 files changed, 74 insertions(+), 24 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index f8e14222c3fe..12432647752f 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -19,7 +19,7 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sqlglot.rewrites import Select, Window, sqlize +from ibis.backends.base.sqlglot.rewrites import CTE, Select, Window, sqlize from ibis.expr.operations.udf import InputType from ibis.expr.rewrites import ( add_one_to_nth_value_input, @@ -243,25 +243,6 @@ def translate(self, op, *, params: Mapping[ir.Value, Any]) -> sge.Expression: sqlglot.expressions.Expression A sqlglot expression """ - - gen_alias_index = itertools.count() - quoted = self.quoted - - def fn(node, _, **kwargs): - result = self.visit_node(node, **kwargs) - - # don't alias root nodes or value ops - if node is op or isinstance(node, ops.Value): - return result - - alias_index = next(gen_alias_index) - alias = sg.to_identifier(f"t{alias_index:d}", quoted=quoted) - - try: - return result.subquery(alias) - except AttributeError: - return result.as_(alias, quoted=quoted) - # substitute parameters immediately to avoid having to define a # ScalarParameter translation rule # @@ -277,11 +258,41 @@ def fn(node, _, **kwargs): op = op.replace( replace_scalar_parameter(params) | reduce(operator.or_, self.rewrites) ) - op = sqlize(op) + op, ctes = sqlize(op) + + aliases = {} + alias_counter = itertools.count() + + def fn(node, _, **kwargs): + result = self.visit_node(node, **kwargs) + + if node is op: + return result + elif isinstance(node, ops.JoinLink): + # TODO(kszucs): this is a hack to preserve the generated table + # aliases, going to remove in a follow-up PR + next(alias_counter) + return result + elif isinstance(node, ops.Relation): + aliases[node] = alias = f"t{next(alias_counter)}" + alias = sg.to_identifier(alias, quoted=self.quoted) + try: + return result.subquery(alias) + except AttributeError: + return result.as_(alias, quoted=self.quoted) + else: + return result + # apply translate rules in topological order results = op.map(fn) - node = results[op] - return node.this if isinstance(node, sge.Subquery) else node + out = results[op] + out = out.this if isinstance(out, sge.Subquery) else out + + for cte in ctes: + alias = sg.to_identifier(aliases[cte], quoted=self.quoted) + out = out.with_(alias, as_=results[cte].this, dialect=self.dialect) + + return out @singledispatchmethod def visit_node(self, op: ops.Node, **_): @@ -1223,6 +1234,10 @@ def visit_View(self, op, *, child, name: str): backend._create_temp_view(table_name=name, source=sg.select(STAR).from_(child)) return sg.table(name, quoted=self.quoted) + @visit_node.register(CTE) + def visit_CTE(self, op, *, parent): + return sg.table(parent.alias_or_name, quoted=self.quoted) + @visit_node.register(ops.SQLStringView) def visit_SQLStringView(self, op, *, query: str, name: str, child): table = sg.table(name, quoted=self.quoted) diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index d3084e8f1543..9bd2d4a8b1ab 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -15,6 +15,7 @@ from ibis.common.annotations import attribute from ibis.common.collections import FrozenDict # noqa: TCH001 from ibis.common.deferred import var +from ibis.common.graph import Graph from ibis.common.patterns import Object, replace from ibis.common.typing import VarTuple # noqa: TCH001 from ibis.expr.rewrites import p @@ -24,6 +25,21 @@ y = var("y") +@public +class CTE(ops.Relation): + """Common table expression.""" + + parent: ops.Relation + + @attribute + def schema(self): + return self.parent.schema + + @attribute + def values(self): + return self.parent.values + + @public class Select(ops.Relation): """Relation modelled after SQL's SELECT statement.""" @@ -135,8 +151,22 @@ def merge_select_select(_): ) +def extract_ctes(node): + result = [] + cte_types = (Select, ops.Aggregate, ops.JoinChain, ops.Set, ops.Limit, ops.Sample) + + g = Graph.from_bfs(node, filter=(ops.Relation, ops.Subquery, ops.JoinLink)) + for node, dependents in g.invert().items(): + if len(dependents) > 1 and isinstance(node, cte_types): + result.append(node) + + return result + + def sqlize(node): """Lower the ibis expression graph to a SQL-like relational algebra.""" + assert isinstance(node, ops.Relation) + step1 = node.replace( window_function_to_window | project_to_select @@ -144,7 +174,12 @@ def sqlize(node): | sort_to_select ) step2 = step1.replace(merge_select_select) - return step2 + + ctes = extract_ctes(step2) + subs = {cte: CTE(cte) for cte in ctes} + step3 = step2.replace(subs) + + return step3, ctes @replace(p.WindowFunction(p.First(x, y))) From a76686e38e319ad098f7bb44c1547140671748d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 19 Jan 2024 22:16:19 +0100 Subject: [PATCH 092/161] chore(sql): regenerate snapshots for clickhouse, duckdb and postgres --- .../test_union_cte/False/out.sql | 2 +- .../test_union_cte/True/out.sql | 2 +- .../test_union_aliasing/clickhouse/out.sql | 124 +++++++----------- .../test_union_aliasing/duckdb/out.sql | 124 +++++++----------- .../test_union_aliasing/postgres/out.sql | 124 +++++++----------- .../test_compiler/test_union_order_by/out.sql | 22 ++-- .../result.sql | 58 ++++---- .../test_limit_cte_extract/out.sql | 40 +++--- .../test_subquery_in_union/out.sql | 52 ++++---- .../test_subquery_used_for_self_join/out.sql | 62 ++++----- .../test_topk_analysis_bug/out.sql | 55 ++++---- .../test_tpch_self_join_failure/out.sql | 41 ++---- 12 files changed, 271 insertions(+), 435 deletions(-) diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/False/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/False/out.sql index 5b727f7ca817..f0366d83444d 100644 --- a/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/False/out.sql +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/False/out.sql @@ -1 +1 @@ -SELECT "t6"."string_col", "t6"."metric" FROM ( SELECT "t4"."string_col", "t4"."metric" FROM ( SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 UNION ALL SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) AS "t4" UNION ALL SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) AS "t6" \ No newline at end of file +WITH "t1" AS ( SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) SELECT "t7"."string_col", "t7"."metric" FROM ( SELECT "t5"."string_col", "t5"."metric" FROM ( SELECT * FROM "t1" AS "t2" UNION ALL SELECT * FROM "t1" AS "t4" ) AS "t5" UNION ALL SELECT * FROM "t1" AS "t3" ) AS "t7" \ No newline at end of file diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/True/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/True/out.sql index 1388747c56f0..5a873785e92b 100644 --- a/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/True/out.sql +++ b/ibis/backends/postgres/tests/snapshots/test_functions/test_union_cte/True/out.sql @@ -1 +1 @@ -SELECT "t6"."string_col", "t6"."metric" FROM ( SELECT "t4"."string_col", "t4"."metric" FROM ( SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 UNION SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) AS "t4" UNION SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) AS "t6" \ No newline at end of file +WITH "t1" AS ( SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) SELECT "t7"."string_col", "t7"."metric" FROM ( SELECT "t5"."string_col", "t5"."metric" FROM ( SELECT * FROM "t1" AS "t2" UNION SELECT * FROM "t1" AS "t4" ) AS "t5" UNION SELECT * FROM "t1" AS "t3" ) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql index 8962d00fdabe..55466dbdea2c 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql @@ -1,96 +1,60 @@ -SELECT - t10.field_of_study, - t10.diff -FROM ( +WITH t5 AS ( SELECT - t5.field_of_study, - t5.diff + t4.field_of_study, + any(t4.diff) AS diff FROM ( SELECT - t4.field_of_study, - any(t4.diff) AS diff + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff + t2.field_of_study, + t2.years, + t2.degrees, + any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t2.field_of_study, - t2.years, - t2.degrees, - any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + t1.field_of_study, + CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, + CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees FROM ( SELECT - t1.field_of_study, - CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, - CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees - FROM ( - SELECT - t0.field_of_study, - arrayJoin( - [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - t4.field_of_study - ) AS t5 + t0.field_of_study, + arrayJoin( + [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + t4.field_of_study +) +SELECT + t11.field_of_study, + t11.diff +FROM ( + SELECT + t6.field_of_study, + t6.diff + FROM t5 AS t6 ORDER BY - t5.diff DESC + t6.diff DESC LIMIT 10 UNION ALL SELECT - t5.field_of_study, - t5.diff - FROM ( - SELECT - t4.field_of_study, - any(t4.diff) AS diff - FROM ( - SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff - FROM ( - SELECT - t2.field_of_study, - t2.years, - t2.degrees, - any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM ( - SELECT - t1.field_of_study, - CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, - CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees - FROM ( - SELECT - t0.field_of_study, - arrayJoin( - [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - t4.field_of_study - ) AS t5 + t6.field_of_study, + t6.diff + FROM t5 AS t6 WHERE - t5.diff < 0 + t6.diff < 0 ORDER BY - t5.diff ASC + t6.diff ASC LIMIT 10 -) AS t10 \ No newline at end of file +) AS t11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql index ffa8c03c59cf..52d685293884 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql @@ -1,96 +1,60 @@ -SELECT - t10.field_of_study, - t10.diff -FROM ( +WITH t5 AS ( SELECT - t5.field_of_study, - t5.diff + t4.field_of_study, + FIRST(t4.diff) AS diff FROM ( SELECT - t4.field_of_study, - FIRST(t4.diff) AS diff + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff + t2.field_of_study, + t2.years, + t2.degrees, + FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t2.field_of_study, - t2.years, - t2.degrees, - FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + t1.field_of_study, + t1.__pivoted__.years AS years, + t1.__pivoted__.degrees AS degrees FROM ( SELECT - t1.field_of_study, - t1.__pivoted__.years AS years, - t1.__pivoted__.degrees AS degrees - FROM ( - SELECT - t0.field_of_study, - UNNEST( - [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - 1 - ) AS t5 + t0.field_of_study, + UNNEST( + [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + 1 +) +SELECT + t11.field_of_study, + t11.diff +FROM ( + SELECT + t6.field_of_study, + t6.diff + FROM t5 AS t6 ORDER BY - t5.diff DESC + t6.diff DESC LIMIT 10 UNION ALL SELECT - t5.field_of_study, - t5.diff - FROM ( - SELECT - t4.field_of_study, - FIRST(t4.diff) AS diff - FROM ( - SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff - FROM ( - SELECT - t2.field_of_study, - t2.years, - t2.degrees, - FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM ( - SELECT - t1.field_of_study, - t1.__pivoted__.years AS years, - t1.__pivoted__.degrees AS degrees - FROM ( - SELECT - t0.field_of_study, - UNNEST( - [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - 1 - ) AS t5 + t6.field_of_study, + t6.diff + FROM t5 AS t6 WHERE - t5.diff < CAST(0 AS TINYINT) + t6.diff < CAST(0 AS TINYINT) ORDER BY - t5.diff ASC + t6.diff ASC LIMIT 10 -) AS t10 \ No newline at end of file +) AS t11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql index c674f81521f2..770da15fd9ec 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql @@ -1,96 +1,60 @@ -SELECT - "t10"."field_of_study", - "t10"."diff" -FROM ( +WITH "t5" AS ( SELECT - "t5"."field_of_study", - "t5"."diff" + "t4"."field_of_study", + FIRST("t4"."diff") AS "diff" FROM ( SELECT - "t4"."field_of_study", - FIRST("t4"."diff") AS "diff" + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" FROM ( SELECT - "t3"."field_of_study", - "t3"."years", - "t3"."degrees", - "t3"."earliest_degrees", - "t3"."latest_degrees", - "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" FROM ( SELECT - "t2"."field_of_study", - "t2"."years", - "t2"."degrees", - FIRST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", - LAST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + "t1"."field_of_study", + CAST(TO_JSONB("t1"."__pivoted__") -> 'f1' AS VARCHAR) AS "years", + CAST(TO_JSONB("t1"."__pivoted__") -> 'f2' AS BIGINT) AS "degrees" FROM ( SELECT - "t1"."field_of_study", - CAST(TO_JSONB("t1"."__pivoted__") -> 'f1' AS VARCHAR) AS "years", - CAST(TO_JSONB("t1"."__pivoted__") -> 'f2' AS BIGINT) AS "degrees" - FROM ( - SELECT - "t0"."field_of_study", - UNNEST( - ARRAY[ROW(CAST('1970-71' AS VARCHAR), CAST("t0"."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS VARCHAR), CAST("t0"."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS VARCHAR), CAST("t0"."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS VARCHAR), CAST("t0"."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS VARCHAR), CAST("t0"."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS VARCHAR), CAST("t0"."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS VARCHAR), CAST("t0"."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS VARCHAR), CAST("t0"."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS VARCHAR), CAST("t0"."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS VARCHAR), CAST("t0"."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS VARCHAR), CAST("t0"."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS VARCHAR), CAST("t0"."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS VARCHAR), CAST("t0"."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS VARCHAR), CAST("t0"."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS VARCHAR), CAST("t0"."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS VARCHAR), CAST("t0"."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS VARCHAR), CAST("t0"."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS VARCHAR), CAST("t0"."2019-20" AS BIGINT))] - ) AS "__pivoted__" - FROM "humanities" AS "t0" - ) AS "t1" - ) AS "t2" - ) AS "t3" - ) AS "t4" - GROUP BY - 1 - ) AS "t5" + "t0"."field_of_study", + UNNEST( + ARRAY[ROW(CAST('1970-71' AS VARCHAR), CAST("t0"."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS VARCHAR), CAST("t0"."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS VARCHAR), CAST("t0"."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS VARCHAR), CAST("t0"."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS VARCHAR), CAST("t0"."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS VARCHAR), CAST("t0"."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS VARCHAR), CAST("t0"."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS VARCHAR), CAST("t0"."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS VARCHAR), CAST("t0"."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS VARCHAR), CAST("t0"."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS VARCHAR), CAST("t0"."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS VARCHAR), CAST("t0"."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS VARCHAR), CAST("t0"."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS VARCHAR), CAST("t0"."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS VARCHAR), CAST("t0"."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS VARCHAR), CAST("t0"."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS VARCHAR), CAST("t0"."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS VARCHAR), CAST("t0"."2019-20" AS BIGINT))] + ) AS "__pivoted__" + FROM "humanities" AS "t0" + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 +) +SELECT + "t11"."field_of_study", + "t11"."diff" +FROM ( + SELECT + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" ORDER BY - "t5"."diff" DESC NULLS LAST + "t6"."diff" DESC NULLS LAST LIMIT 10 UNION ALL SELECT - "t5"."field_of_study", - "t5"."diff" - FROM ( - SELECT - "t4"."field_of_study", - FIRST("t4"."diff") AS "diff" - FROM ( - SELECT - "t3"."field_of_study", - "t3"."years", - "t3"."degrees", - "t3"."earliest_degrees", - "t3"."latest_degrees", - "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" - FROM ( - SELECT - "t2"."field_of_study", - "t2"."years", - "t2"."degrees", - FIRST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", - LAST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" - FROM ( - SELECT - "t1"."field_of_study", - CAST(TO_JSONB("t1"."__pivoted__") -> 'f1' AS VARCHAR) AS "years", - CAST(TO_JSONB("t1"."__pivoted__") -> 'f2' AS BIGINT) AS "degrees" - FROM ( - SELECT - "t0"."field_of_study", - UNNEST( - ARRAY[ROW(CAST('1970-71' AS VARCHAR), CAST("t0"."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS VARCHAR), CAST("t0"."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS VARCHAR), CAST("t0"."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS VARCHAR), CAST("t0"."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS VARCHAR), CAST("t0"."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS VARCHAR), CAST("t0"."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS VARCHAR), CAST("t0"."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS VARCHAR), CAST("t0"."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS VARCHAR), CAST("t0"."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS VARCHAR), CAST("t0"."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS VARCHAR), CAST("t0"."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS VARCHAR), CAST("t0"."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS VARCHAR), CAST("t0"."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS VARCHAR), CAST("t0"."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS VARCHAR), CAST("t0"."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS VARCHAR), CAST("t0"."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS VARCHAR), CAST("t0"."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS VARCHAR), CAST("t0"."2019-20" AS BIGINT))] - ) AS "__pivoted__" - FROM "humanities" AS "t0" - ) AS "t1" - ) AS "t2" - ) AS "t3" - ) AS "t4" - GROUP BY - 1 - ) AS "t5" + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" WHERE - "t5"."diff" < 0 + "t6"."diff" < 0 ORDER BY - "t5"."diff" ASC + "t6"."diff" ASC LIMIT 10 -) AS "t10" \ No newline at end of file +) AS "t11" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql index dda59184ba53..2afa8074bb0c 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql @@ -1,18 +1,20 @@ -SELECT - t2.a, - t2.b -FROM ( +WITH t1 AS ( SELECT t0.a, t0.b FROM t AS t0 ORDER BY t0.b ASC +) +SELECT + t3.a, + t3.b +FROM ( + SELECT + * + FROM t1 AS t2 UNION ALL SELECT - t0.a, - t0.b - FROM t AS t0 - ORDER BY - t0.b ASC -) AS t2 \ No newline at end of file + * + FROM t1 AS t2 +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql index 2bdce97b5fa2..68396dd92fca 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql @@ -1,40 +1,32 @@ +WITH t1 AS ( + SELECT + t0.region, + t0.kind, + SUM(t0.amount) AS total + FROM purchases AS t0 + GROUP BY + 1, + 2 +) SELECT - t4.region, - t4.total - t5.total AS diff + t5.region, + t5.total - t6.total AS diff FROM ( SELECT - t1.region, - t1.kind, - t1.total - FROM ( - SELECT - t0.region, - t0.kind, - SUM(t0.amount) AS total - FROM purchases AS t0 - GROUP BY - 1, - 2 - ) AS t1 + t2.region, + t2.kind, + t2.total + FROM t1 AS t2 WHERE - t1.kind = 'foo' -) AS t4 + t2.kind = 'foo' +) AS t5 INNER JOIN ( SELECT - t1.region, - t1.kind, - t1.total - FROM ( - SELECT - t0.region, - t0.kind, - SUM(t0.amount) AS total - FROM purchases AS t0 - GROUP BY - 1, - 2 - ) AS t1 + t2.region, + t2.kind, + t2.total + FROM t1 AS t2 WHERE - t1.kind = 'bar' -) AS t5 - ON t4.region = t5.region \ No newline at end of file + t2.kind = 'bar' +) AS t6 + ON t5.region = t6.region \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql index e32be6672764..12799aaebd5a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql @@ -1,27 +1,23 @@ -SELECT - t2.id, - t2.bool_col, - t2.tinyint_col, - t2.smallint_col, - t2.int_col, - t2.bigint_col, - t2.float_col, - t2.double_col, - t2.date_string_col, - t2.string_col, - t2.timestamp_col, - t2.year, - t2.month -FROM ( - SELECT - * - FROM functional_alltypes AS t0 - LIMIT 100 -) AS t2 -INNER JOIN ( +WITH t1 AS ( SELECT * FROM functional_alltypes AS t0 LIMIT 100 -) AS t4 +) +SELECT + t3.id, + t3.bool_col, + t3.tinyint_col, + t3.smallint_col, + t3.int_col, + t3.bigint_col, + t3.float_col, + t3.double_col, + t3.date_string_col, + t3.string_col, + t3.timestamp_col, + t3.year, + t3.month +FROM t1 AS t3 +INNER JOIN t1 AS t5 ON TRUE \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql index 57e0912af4a6..fbaeee461612 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql @@ -1,8 +1,4 @@ -SELECT - t8.a, - t8.g, - t8.metric -FROM ( +WITH t6 AS ( SELECT t2.a, t2.g, @@ -28,30 +24,26 @@ FROM ( 2 ) AS t4 ON t2.g = t4.g +), t1 AS ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 +) +SELECT + t9.a, + t9.g, + t9.metric +FROM ( + SELECT + * + FROM t6 AS t7 UNION ALL SELECT - t2.a, - t2.g, - t2.metric - FROM ( - SELECT - t0.a, - t0.g, - SUM(t0.f) AS metric - FROM alltypes AS t0 - GROUP BY - 1, - 2 - ) AS t2 - INNER JOIN ( - SELECT - t0.a, - t0.g, - SUM(t0.f) AS metric - FROM alltypes AS t0 - GROUP BY - 1, - 2 - ) AS t4 - ON t2.g = t4.g -) AS t8 \ No newline at end of file + * + FROM t6 AS t8 +) AS t9 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql index 57edded67515..76fc7a3e43e4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql @@ -1,41 +1,31 @@ +WITH t1 AS ( + SELECT + t0.g, + t0.a, + t0.b, + SUM(t0.f) AS total + FROM alltypes AS t0 + GROUP BY + 1, + 2, + 3 +) SELECT - t6.g, - MAX(t6.total - t6.total_right) AS metric + t7.g, + MAX(t7.total - t7.total_right) AS metric FROM ( SELECT - t2.g, - t2.a, - t2.b, - t2.total, - t4.g AS g_right, - t4.a AS a_right, - t4.b AS b_right, - t4.total AS total_right - FROM ( - SELECT - t0.g, - t0.a, - t0.b, - SUM(t0.f) AS total - FROM alltypes AS t0 - GROUP BY - 1, - 2, - 3 - ) AS t2 - INNER JOIN ( - SELECT - t0.g, - t0.a, - t0.b, - SUM(t0.f) AS total - FROM alltypes AS t0 - GROUP BY - 1, - 2, - 3 - ) AS t4 - ON t2.a = t4.b -) AS t6 + t3.g, + t3.a, + t3.b, + t3.total, + t5.g AS g_right, + t5.a AS a_right, + t5.b AS b_right, + t5.total AS total_right + FROM t1 AS t3 + INNER JOIN t1 AS t5 + ON t3.a = t5.b +) AS t7 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql index 5145b5c7361f..5c61bb338e11 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql @@ -1,45 +1,38 @@ +WITH t1 AS ( + SELECT + t0.dest, + t0.origin, + t0.arrdelay + FROM airlines AS t0 + WHERE + t0.dest IN ('ORD', 'JFK', 'SFO') +) SELECT - t8.origin, + t9.origin, COUNT(*) AS "CountStar()" FROM ( SELECT - t2.dest, - t2.origin, - t2.arrdelay - FROM ( - SELECT - t0.dest, - t0.origin, - t0.arrdelay - FROM airlines AS t0 - WHERE - t0.dest IN ('ORD', 'JFK', 'SFO') - ) AS t2 + t3.dest, + t3.origin, + t3.arrdelay + FROM t1 AS t3 SEMI JOIN ( SELECT - t3.dest, - t3."Mean(arrdelay)" + t4.dest, + t4."Mean(arrdelay)" FROM ( SELECT - t1.dest, - AVG(t1.arrdelay) AS "Mean(arrdelay)" - FROM ( - SELECT - t0.dest, - t0.origin, - t0.arrdelay - FROM airlines AS t0 - WHERE - t0.dest IN ('ORD', 'JFK', 'SFO') - ) AS t1 + t2.dest, + AVG(t2.arrdelay) AS "Mean(arrdelay)" + FROM t1 AS t2 GROUP BY 1 - ) AS t3 + ) AS t4 ORDER BY - t3."Mean(arrdelay)" DESC + t4."Mean(arrdelay)" DESC LIMIT 10 - ) AS t6 - ON t2.dest = t6.dest -) AS t8 + ) AS t7 + ON t3.dest = t7.dest +) AS t9 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql index feacfd23da7e..2da6a3ed3dca 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql @@ -1,8 +1,4 @@ -SELECT - t13.region, - t13.year, - t13.total - t15.total AS yoy_change -FROM ( +WITH t12 AS ( SELECT t11.region, EXTRACT(year FROM t11.odate) AS year, @@ -24,30 +20,13 @@ FROM ( GROUP BY 1, 2 -) AS t13 -INNER JOIN ( - SELECT - t11.region, - EXTRACT(year FROM t11.odate) AS year, - CAST(SUM(t11.amount) AS DOUBLE) AS total - FROM ( - SELECT - t4.r_name AS region, - t5.n_name AS nation, - t7.o_totalprice AS amount, - CAST(t7.o_orderdate AS TIMESTAMP) AS odate - FROM tpch_region AS t4 - INNER JOIN tpch_nation AS t5 - ON t4.r_regionkey = t5.n_regionkey - INNER JOIN tpch_customer AS t6 - ON t6.c_nationkey = t5.n_nationkey - INNER JOIN tpch_orders AS t7 - ON t7.o_custkey = t6.c_custkey - ) AS t11 - GROUP BY - 1, - 2 -) AS t15 - ON t13.year = ( - t15.year - CAST(1 AS TINYINT) +) +SELECT + t14.region, + t14.year, + t14.total - t16.total AS yoy_change +FROM t12 AS t14 +INNER JOIN t12 AS t16 + ON t14.year = ( + t16.year - CAST(1 AS TINYINT) ) \ No newline at end of file From 96002f446cacb382690b7ace9226322d51b8734d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 19 Jan 2024 22:45:35 +0100 Subject: [PATCH 093/161] chore(sql): regenerate snapshots for snowflake --- .../test_h07/test_tpc_h07/snowflake/h07.sql | 86 ++++---- .../test_h08/test_tpc_h08/snowflake/h08.sql | 80 ++++--- .../test_h18/test_tpc_h18/snowflake/h18.sql | 208 ++++++++---------- .../test_h21/test_tpc_h21/snowflake/h21.sql | 179 ++++++--------- 4 files changed, 243 insertions(+), 310 deletions(-) diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql index c3b31dcec500..703b030ad4bc 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql @@ -1,27 +1,35 @@ +WITH "t9" AS ( + SELECT + "t4"."N_NATIONKEY" AS "n_nationkey", + "t4"."N_NAME" AS "n_name", + "t4"."N_REGIONKEY" AS "n_regionkey", + "t4"."N_COMMENT" AS "n_comment" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t4" +) SELECT - "t24"."supp_nation", - "t24"."cust_nation", - "t24"."l_year", - "t24"."revenue" + "t25"."supp_nation", + "t25"."cust_nation", + "t25"."l_year", + "t25"."revenue" FROM ( SELECT - "t23"."supp_nation", - "t23"."cust_nation", - "t23"."l_year", - SUM("t23"."volume") AS "revenue" + "t24"."supp_nation", + "t24"."cust_nation", + "t24"."l_year", + SUM("t24"."volume") AS "revenue" FROM ( SELECT - "t22"."supp_nation", - "t22"."cust_nation", - "t22"."l_shipdate", - "t22"."l_extendedprice", - "t22"."l_discount", - "t22"."l_year", - "t22"."volume" + "t23"."supp_nation", + "t23"."cust_nation", + "t23"."l_shipdate", + "t23"."l_extendedprice", + "t23"."l_discount", + "t23"."l_year", + "t23"."volume" FROM ( SELECT - "t14"."n_name" AS "supp_nation", - "t16"."n_name" AS "cust_nation", + "t15"."n_name" AS "supp_nation", + "t17"."n_name" AS "cust_nation", "t11"."l_shipdate", "t11"."l_extendedprice", "t11"."l_discount", @@ -88,50 +96,36 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t3" ) AS "t13" ON "t13"."c_custkey" = "t12"."o_custkey" - INNER JOIN ( - SELECT - "t4"."N_NATIONKEY" AS "n_nationkey", - "t4"."N_NAME" AS "n_name", - "t4"."N_REGIONKEY" AS "n_regionkey", - "t4"."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t4" - ) AS "t14" - ON "t10"."s_nationkey" = "t14"."n_nationkey" - INNER JOIN ( - SELECT - "t4"."N_NATIONKEY" AS "n_nationkey", - "t4"."N_NAME" AS "n_name", - "t4"."N_REGIONKEY" AS "n_regionkey", - "t4"."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t4" - ) AS "t16" - ON "t13"."c_nationkey" = "t16"."n_nationkey" - ) AS "t22" + INNER JOIN "t9" AS "t15" + ON "t10"."s_nationkey" = "t15"."n_nationkey" + INNER JOIN "t9" AS "t17" + ON "t13"."c_nationkey" = "t17"."n_nationkey" + ) AS "t23" WHERE ( ( ( - "t22"."cust_nation" = 'FRANCE' + "t23"."cust_nation" = 'FRANCE' ) AND ( - "t22"."supp_nation" = 'GERMANY' + "t23"."supp_nation" = 'GERMANY' ) ) OR ( ( - "t22"."cust_nation" = 'GERMANY' + "t23"."cust_nation" = 'GERMANY' ) AND ( - "t22"."supp_nation" = 'FRANCE' + "t23"."supp_nation" = 'FRANCE' ) ) ) - AND "t22"."l_shipdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) - ) AS "t23" + AND "t23"."l_shipdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) + ) AS "t24" GROUP BY 1, 2, 3 -) AS "t24" +) AS "t25" ORDER BY - "t24"."supp_nation" ASC, - "t24"."cust_nation" ASC, - "t24"."l_year" ASC \ No newline at end of file + "t25"."supp_nation" ASC, + "t25"."cust_nation" ASC, + "t25"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql index 9dac02ababb7..e92bababc234 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql @@ -1,27 +1,35 @@ +WITH "t13" AS ( + SELECT + "t6"."N_NATIONKEY" AS "n_nationkey", + "t6"."N_NAME" AS "n_name", + "t6"."N_REGIONKEY" AS "n_regionkey", + "t6"."N_COMMENT" AS "n_comment" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t6" +) SELECT - "t32"."o_year", - "t32"."mkt_share" + "t33"."o_year", + "t33"."mkt_share" FROM ( SELECT - "t31"."o_year", - SUM("t31"."nation_volume") / SUM("t31"."volume") AS "mkt_share" + "t32"."o_year", + SUM("t32"."nation_volume") / SUM("t32"."volume") AS "mkt_share" FROM ( SELECT - "t30"."o_year", - "t30"."volume", - "t30"."nation", - "t30"."r_name", - "t30"."o_orderdate", - "t30"."p_type", - CASE WHEN "t30"."nation" = 'BRAZIL' THEN "t30"."volume" ELSE 0 END AS "nation_volume" + "t31"."o_year", + "t31"."volume", + "t31"."nation", + "t31"."r_name", + "t31"."o_orderdate", + "t31"."p_type", + CASE WHEN "t31"."nation" = 'BRAZIL' THEN "t31"."volume" ELSE 0 END AS "nation_volume" FROM ( SELECT DATE_PART(year, "t17"."o_orderdate") AS "o_year", "t15"."l_extendedprice" * ( 1 - "t15"."l_discount" ) AS "volume", - "t22"."n_name" AS "nation", - "t21"."r_name", + "t23"."n_name" AS "nation", + "t19"."r_name", "t17"."o_orderdate", "t14"."p_type" FROM ( @@ -97,40 +105,26 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS "t4" ) AS "t18" ON "t17"."o_custkey" = "t18"."c_custkey" + INNER JOIN "t13" AS "t21" + ON "t18"."c_nationkey" = "t21"."n_nationkey" INNER JOIN ( SELECT - "t5"."N_NATIONKEY" AS "n_nationkey", - "t5"."N_NAME" AS "n_name", - "t5"."N_REGIONKEY" AS "n_regionkey", - "t5"."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t5" + "t5"."R_REGIONKEY" AS "r_regionkey", + "t5"."R_NAME" AS "r_name", + "t5"."R_COMMENT" AS "r_comment" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t5" ) AS "t19" - ON "t18"."c_nationkey" = "t19"."n_nationkey" - INNER JOIN ( - SELECT - "t6"."R_REGIONKEY" AS "r_regionkey", - "t6"."R_NAME" AS "r_name", - "t6"."R_COMMENT" AS "r_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t6" - ) AS "t21" - ON "t19"."n_regionkey" = "t21"."r_regionkey" - INNER JOIN ( - SELECT - "t5"."N_NATIONKEY" AS "n_nationkey", - "t5"."N_NAME" AS "n_name", - "t5"."N_REGIONKEY" AS "n_regionkey", - "t5"."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t5" - ) AS "t22" - ON "t16"."s_nationkey" = "t22"."n_nationkey" - ) AS "t30" + ON "t21"."n_regionkey" = "t19"."r_regionkey" + INNER JOIN "t13" AS "t23" + ON "t16"."s_nationkey" = "t23"."n_nationkey" + ) AS "t31" WHERE - "t30"."r_name" = 'AMERICA' - AND "t30"."o_orderdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) - AND "t30"."p_type" = 'ECONOMY ANODIZED STEEL' - ) AS "t31" + "t31"."r_name" = 'AMERICA' + AND "t31"."o_orderdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) + AND "t31"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t32" GROUP BY 1 -) AS "t32" +) AS "t33" ORDER BY - "t32"."o_year" ASC \ No newline at end of file + "t33"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql index 84bae2c4079c..cbeae5fe06f4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql @@ -1,53 +1,73 @@ +WITH "t5" AS ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" +) SELECT - "t15"."c_name", - "t15"."c_custkey", - "t15"."o_orderkey", - "t15"."o_orderdate", - "t15"."o_totalprice", - "t15"."sum_qty" + "t16"."c_name", + "t16"."c_custkey", + "t16"."o_orderkey", + "t16"."o_orderdate", + "t16"."o_totalprice", + "t16"."sum_qty" FROM ( SELECT - "t14"."c_name", - "t14"."c_custkey", - "t14"."o_orderkey", - "t14"."o_orderdate", - "t14"."o_totalprice", - SUM("t14"."l_quantity") AS "sum_qty" + "t15"."c_name", + "t15"."c_custkey", + "t15"."o_orderkey", + "t15"."o_orderdate", + "t15"."o_totalprice", + SUM("t15"."l_quantity") AS "sum_qty" FROM ( SELECT - "t12"."c_custkey", - "t12"."c_name", - "t12"."c_address", - "t12"."c_nationkey", - "t12"."c_phone", - "t12"."c_acctbal", - "t12"."c_mktsegment", - "t12"."c_comment", - "t12"."o_orderkey", - "t12"."o_custkey", - "t12"."o_orderstatus", - "t12"."o_totalprice", - "t12"."o_orderdate", - "t12"."o_orderpriority", - "t12"."o_clerk", - "t12"."o_shippriority", - "t12"."o_comment", - "t12"."l_orderkey", - "t12"."l_partkey", - "t12"."l_suppkey", - "t12"."l_linenumber", - "t12"."l_quantity", - "t12"."l_extendedprice", - "t12"."l_discount", - "t12"."l_tax", - "t12"."l_returnflag", - "t12"."l_linestatus", - "t12"."l_shipdate", - "t12"."l_commitdate", - "t12"."l_receiptdate", - "t12"."l_shipinstruct", - "t12"."l_shipmode", - "t12"."l_comment" + "t13"."c_custkey", + "t13"."c_name", + "t13"."c_address", + "t13"."c_nationkey", + "t13"."c_phone", + "t13"."c_acctbal", + "t13"."c_mktsegment", + "t13"."c_comment", + "t13"."o_orderkey", + "t13"."o_custkey", + "t13"."o_orderstatus", + "t13"."o_totalprice", + "t13"."o_orderdate", + "t13"."o_orderpriority", + "t13"."o_clerk", + "t13"."o_shippriority", + "t13"."o_comment", + "t13"."l_orderkey", + "t13"."l_partkey", + "t13"."l_suppkey", + "t13"."l_linenumber", + "t13"."l_quantity", + "t13"."l_extendedprice", + "t13"."l_discount", + "t13"."l_tax", + "t13"."l_returnflag", + "t13"."l_linestatus", + "t13"."l_shipdate", + "t13"."l_commitdate", + "t13"."l_receiptdate", + "t13"."l_shipinstruct", + "t13"."l_shipmode", + "t13"."l_comment" FROM ( SELECT "t6"."c_custkey", @@ -67,22 +87,22 @@ FROM ( "t7"."o_clerk", "t7"."o_shippriority", "t7"."o_comment", - "t8"."l_orderkey", - "t8"."l_partkey", - "t8"."l_suppkey", - "t8"."l_linenumber", - "t8"."l_quantity", - "t8"."l_extendedprice", - "t8"."l_discount", - "t8"."l_tax", - "t8"."l_returnflag", - "t8"."l_linestatus", - "t8"."l_shipdate", - "t8"."l_commitdate", - "t8"."l_receiptdate", - "t8"."l_shipinstruct", - "t8"."l_shipmode", - "t8"."l_comment" + "t9"."l_orderkey", + "t9"."l_partkey", + "t9"."l_suppkey", + "t9"."l_linenumber", + "t9"."l_quantity", + "t9"."l_extendedprice", + "t9"."l_discount", + "t9"."l_tax", + "t9"."l_returnflag", + "t9"."l_linestatus", + "t9"."l_shipdate", + "t9"."l_commitdate", + "t9"."l_receiptdate", + "t9"."l_shipinstruct", + "t9"."l_shipmode", + "t9"."l_comment" FROM ( SELECT "t0"."C_CUSTKEY" AS "c_custkey", @@ -109,71 +129,33 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t1" ) AS "t7" ON "t6"."c_custkey" = "t7"."o_custkey" - INNER JOIN ( - SELECT - "t2"."L_ORDERKEY" AS "l_orderkey", - "t2"."L_PARTKEY" AS "l_partkey", - "t2"."L_SUPPKEY" AS "l_suppkey", - "t2"."L_LINENUMBER" AS "l_linenumber", - "t2"."L_QUANTITY" AS "l_quantity", - "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", - "t2"."L_DISCOUNT" AS "l_discount", - "t2"."L_TAX" AS "l_tax", - "t2"."L_RETURNFLAG" AS "l_returnflag", - "t2"."L_LINESTATUS" AS "l_linestatus", - "t2"."L_SHIPDATE" AS "l_shipdate", - "t2"."L_COMMITDATE" AS "l_commitdate", - "t2"."L_RECEIPTDATE" AS "l_receiptdate", - "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", - "t2"."L_SHIPMODE" AS "l_shipmode", - "t2"."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" - ) AS "t8" - ON "t7"."o_orderkey" = "t8"."l_orderkey" - ) AS "t12" + INNER JOIN "t5" AS "t9" + ON "t7"."o_orderkey" = "t9"."l_orderkey" + ) AS "t13" WHERE - "t12"."o_orderkey" IN ( + "t13"."o_orderkey" IN ( SELECT - "t9"."l_orderkey" + "t11"."l_orderkey" FROM ( SELECT - "t5"."l_orderkey", - SUM("t5"."l_quantity") AS "qty_sum" - FROM ( - SELECT - "t2"."L_ORDERKEY" AS "l_orderkey", - "t2"."L_PARTKEY" AS "l_partkey", - "t2"."L_SUPPKEY" AS "l_suppkey", - "t2"."L_LINENUMBER" AS "l_linenumber", - "t2"."L_QUANTITY" AS "l_quantity", - "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", - "t2"."L_DISCOUNT" AS "l_discount", - "t2"."L_TAX" AS "l_tax", - "t2"."L_RETURNFLAG" AS "l_returnflag", - "t2"."L_LINESTATUS" AS "l_linestatus", - "t2"."L_SHIPDATE" AS "l_shipdate", - "t2"."L_COMMITDATE" AS "l_commitdate", - "t2"."L_RECEIPTDATE" AS "l_receiptdate", - "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", - "t2"."L_SHIPMODE" AS "l_shipmode", - "t2"."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" - ) AS "t5" + "t8"."l_orderkey", + SUM("t8"."l_quantity") AS "qty_sum" + FROM "t5" AS "t8" GROUP BY 1 - ) AS "t9" + ) AS "t11" WHERE - "t9"."qty_sum" > 300 + "t11"."qty_sum" > 300 ) - ) AS "t14" + ) AS "t15" GROUP BY 1, 2, 3, 4, 5 -) AS "t15" +) AS "t16" ORDER BY - "t15"."o_totalprice" DESC NULLS LAST, - "t15"."o_orderdate" ASC + "t16"."o_totalprice" DESC NULLS LAST, + "t16"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql index 6ca53704e385..6fbec4262788 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql @@ -1,28 +1,48 @@ +WITH "t7" AS ( + SELECT + "t3"."L_ORDERKEY" AS "l_orderkey", + "t3"."L_PARTKEY" AS "l_partkey", + "t3"."L_SUPPKEY" AS "l_suppkey", + "t3"."L_LINENUMBER" AS "l_linenumber", + "t3"."L_QUANTITY" AS "l_quantity", + "t3"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t3"."L_DISCOUNT" AS "l_discount", + "t3"."L_TAX" AS "l_tax", + "t3"."L_RETURNFLAG" AS "l_returnflag", + "t3"."L_LINESTATUS" AS "l_linestatus", + "t3"."L_SHIPDATE" AS "l_shipdate", + "t3"."L_COMMITDATE" AS "l_commitdate", + "t3"."L_RECEIPTDATE" AS "l_receiptdate", + "t3"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t3"."L_SHIPMODE" AS "l_shipmode", + "t3"."L_COMMENT" AS "l_comment" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t3" +) SELECT - "t21"."s_name", - "t21"."numwait" + "t22"."s_name", + "t22"."numwait" FROM ( SELECT - "t20"."s_name", + "t21"."s_name", COUNT(*) AS "numwait" FROM ( SELECT - "t17"."l1_orderkey", - "t17"."o_orderstatus", - "t17"."l_receiptdate", - "t17"."l_commitdate", - "t17"."l1_suppkey", - "t17"."s_name", - "t17"."n_name" + "t18"."l1_orderkey", + "t18"."o_orderstatus", + "t18"."l_receiptdate", + "t18"."l_commitdate", + "t18"."l1_suppkey", + "t18"."s_name", + "t18"."n_name" FROM ( SELECT - "t9"."l_orderkey" AS "l1_orderkey", - "t12"."o_orderstatus", - "t9"."l_receiptdate", - "t9"."l_commitdate", - "t9"."l_suppkey" AS "l1_suppkey", + "t12"."l_orderkey" AS "l1_orderkey", + "t9"."o_orderstatus", + "t12"."l_receiptdate", + "t12"."l_commitdate", + "t12"."l_suppkey" AS "l1_suppkey", "t8"."s_name", - "t13"."n_name" + "t10"."n_name" FROM ( SELECT "t0"."S_SUPPKEY" AS "s_suppkey", @@ -34,129 +54,72 @@ FROM ( "t0"."S_COMMENT" AS "s_comment" FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t0" ) AS "t8" + INNER JOIN "t7" AS "t12" + ON "t8"."s_suppkey" = "t12"."l_suppkey" INNER JOIN ( SELECT - "t1"."L_ORDERKEY" AS "l_orderkey", - "t1"."L_PARTKEY" AS "l_partkey", - "t1"."L_SUPPKEY" AS "l_suppkey", - "t1"."L_LINENUMBER" AS "l_linenumber", - "t1"."L_QUANTITY" AS "l_quantity", - "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", - "t1"."L_DISCOUNT" AS "l_discount", - "t1"."L_TAX" AS "l_tax", - "t1"."L_RETURNFLAG" AS "l_returnflag", - "t1"."L_LINESTATUS" AS "l_linestatus", - "t1"."L_SHIPDATE" AS "l_shipdate", - "t1"."L_COMMITDATE" AS "l_commitdate", - "t1"."L_RECEIPTDATE" AS "l_receiptdate", - "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", - "t1"."L_SHIPMODE" AS "l_shipmode", - "t1"."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t1" ) AS "t9" - ON "t8"."s_suppkey" = "t9"."l_suppkey" - INNER JOIN ( - SELECT - "t2"."O_ORDERKEY" AS "o_orderkey", - "t2"."O_CUSTKEY" AS "o_custkey", - "t2"."O_ORDERSTATUS" AS "o_orderstatus", - "t2"."O_TOTALPRICE" AS "o_totalprice", - "t2"."O_ORDERDATE" AS "o_orderdate", - "t2"."O_ORDERPRIORITY" AS "o_orderpriority", - "t2"."O_CLERK" AS "o_clerk", - "t2"."O_SHIPPRIORITY" AS "o_shippriority", - "t2"."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS "t2" - ) AS "t12" - ON "t12"."o_orderkey" = "t9"."l_orderkey" + ON "t9"."o_orderkey" = "t12"."l_orderkey" INNER JOIN ( SELECT - "t3"."N_NATIONKEY" AS "n_nationkey", - "t3"."N_NAME" AS "n_name", - "t3"."N_REGIONKEY" AS "n_regionkey", - "t3"."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t3" - ) AS "t13" - ON "t8"."s_nationkey" = "t13"."n_nationkey" - ) AS "t17" + "t2"."N_NATIONKEY" AS "n_nationkey", + "t2"."N_NAME" AS "n_name", + "t2"."N_REGIONKEY" AS "n_regionkey", + "t2"."N_COMMENT" AS "n_comment" + FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t2" + ) AS "t10" + ON "t8"."s_nationkey" = "t10"."n_nationkey" + ) AS "t18" WHERE - "t17"."o_orderstatus" = 'F' - AND "t17"."l_receiptdate" > "t17"."l_commitdate" - AND "t17"."n_name" = 'SAUDI ARABIA' + "t18"."o_orderstatus" = 'F' + AND "t18"."l_receiptdate" > "t18"."l_commitdate" + AND "t18"."n_name" = 'SAUDI ARABIA' AND EXISTS( SELECT 1 AS "1" - FROM ( - SELECT - "t1"."L_ORDERKEY" AS "l_orderkey", - "t1"."L_PARTKEY" AS "l_partkey", - "t1"."L_SUPPKEY" AS "l_suppkey", - "t1"."L_LINENUMBER" AS "l_linenumber", - "t1"."L_QUANTITY" AS "l_quantity", - "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", - "t1"."L_DISCOUNT" AS "l_discount", - "t1"."L_TAX" AS "l_tax", - "t1"."L_RETURNFLAG" AS "l_returnflag", - "t1"."L_LINESTATUS" AS "l_linestatus", - "t1"."L_SHIPDATE" AS "l_shipdate", - "t1"."L_COMMITDATE" AS "l_commitdate", - "t1"."L_RECEIPTDATE" AS "l_receiptdate", - "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", - "t1"."L_SHIPMODE" AS "l_shipmode", - "t1"."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" - ) AS "t10" + FROM "t7" AS "t13" WHERE ( - "t10"."l_orderkey" = "t17"."l1_orderkey" + "t13"."l_orderkey" = "t18"."l1_orderkey" ) AND ( - "t10"."l_suppkey" <> "t17"."l1_suppkey" + "t13"."l_suppkey" <> "t18"."l1_suppkey" ) ) AND NOT ( EXISTS( SELECT 1 AS "1" - FROM ( - SELECT - "t1"."L_ORDERKEY" AS "l_orderkey", - "t1"."L_PARTKEY" AS "l_partkey", - "t1"."L_SUPPKEY" AS "l_suppkey", - "t1"."L_LINENUMBER" AS "l_linenumber", - "t1"."L_QUANTITY" AS "l_quantity", - "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", - "t1"."L_DISCOUNT" AS "l_discount", - "t1"."L_TAX" AS "l_tax", - "t1"."L_RETURNFLAG" AS "l_returnflag", - "t1"."L_LINESTATUS" AS "l_linestatus", - "t1"."L_SHIPDATE" AS "l_shipdate", - "t1"."L_COMMITDATE" AS "l_commitdate", - "t1"."L_RECEIPTDATE" AS "l_receiptdate", - "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", - "t1"."L_SHIPMODE" AS "l_shipmode", - "t1"."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" - ) AS "t11" + FROM "t7" AS "t14" WHERE ( ( - "t11"."l_orderkey" = "t17"."l1_orderkey" + "t14"."l_orderkey" = "t18"."l1_orderkey" ) AND ( - "t11"."l_suppkey" <> "t17"."l1_suppkey" + "t14"."l_suppkey" <> "t18"."l1_suppkey" ) ) AND ( - "t11"."l_receiptdate" > "t11"."l_commitdate" + "t14"."l_receiptdate" > "t14"."l_commitdate" ) ) ) - ) AS "t20" + ) AS "t21" GROUP BY 1 -) AS "t21" +) AS "t22" ORDER BY - "t21"."numwait" DESC NULLS LAST, - "t21"."s_name" ASC + "t22"."numwait" DESC NULLS LAST, + "t22"."s_name" ASC LIMIT 100 \ No newline at end of file From 5179b81f93aa59f337211d8c22f7e3c8239317ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 19 Jan 2024 22:54:22 +0100 Subject: [PATCH 094/161] chore(sql): regenerate snapshots for bigquery --- .../out.sql | 26 +-- .../test_union_cte/False-False/out.sql | 41 ++-- .../test_union_cte/False-True/out.sql | 41 ++-- .../test_union_cte/True-False/out.sql | 41 ++-- .../test_union_cte/True-True/out.sql | 41 ++-- .../test_union_aliasing/bigquery/out.sql | 181 ++++++------------ 6 files changed, 151 insertions(+), 220 deletions(-) diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql index c64aff514d1f..4e9dfeb746c9 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql @@ -1,19 +1,4 @@ -SELECT - t2.file_date, - t2.PARTITIONTIME, - t2.val, - t2.XYZ -FROM ( - SELECT - CAST(t0.file_date AS DATE) AS file_date, - t0.PARTITIONTIME, - t0.val, - t0.val * 2 AS XYZ - FROM unbound_table AS t0 - WHERE - t0.PARTITIONTIME < DATE(2017, 1, 1) AND CAST(t0.file_date AS DATE) < DATE(2017, 1, 1) -) AS t2 -INNER JOIN ( +WITH t1 AS ( SELECT CAST(t0.file_date AS DATE) AS file_date, t0.PARTITIONTIME, @@ -22,5 +7,12 @@ INNER JOIN ( FROM unbound_table AS t0 WHERE t0.PARTITIONTIME < DATE(2017, 1, 1) AND CAST(t0.file_date AS DATE) < DATE(2017, 1, 1) -) AS t4 +) +SELECT + t3.file_date, + t3.PARTITIONTIME, + t3.val, + t3.XYZ +FROM t1 AS t3 +INNER JOIN t1 AS t5 ON TRUE \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql index 061bd3df1a64..3a6924cb5b2e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql @@ -1,30 +1,29 @@ +WITH t1 AS ( + SELECT + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 + GROUP BY + 1 +) SELECT - t6.string_col, - t6.metric + t7.string_col, + t7.metric FROM ( SELECT - t4.string_col, - t4.metric + t5.string_col, + t5.metric FROM ( SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 + * + FROM t1 AS t2 UNION ALL SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 - ) AS t4 + * + FROM t1 AS t4 + ) AS t5 UNION ALL SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 -) AS t6 \ No newline at end of file + * + FROM t1 AS t3 +) AS t7 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql index ef59312a1de9..cc408f613945 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql @@ -1,30 +1,29 @@ +WITH t1 AS ( + SELECT + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 + GROUP BY + 1 +) SELECT - t6.string_col, - t6.metric + t7.string_col, + t7.metric FROM ( SELECT - t4.string_col, - t4.metric + t5.string_col, + t5.metric FROM ( SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 + * + FROM t1 AS t2 UNION DISTINCT SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 - ) AS t4 + * + FROM t1 AS t4 + ) AS t5 UNION ALL SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 -) AS t6 \ No newline at end of file + * + FROM t1 AS t3 +) AS t7 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql index 36dde7359805..81d85c90cbb2 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql @@ -1,30 +1,29 @@ +WITH t1 AS ( + SELECT + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 + GROUP BY + 1 +) SELECT - t6.string_col, - t6.metric + t7.string_col, + t7.metric FROM ( SELECT - t4.string_col, - t4.metric + t5.string_col, + t5.metric FROM ( SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 + * + FROM t1 AS t2 UNION ALL SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 - ) AS t4 + * + FROM t1 AS t4 + ) AS t5 UNION DISTINCT SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 -) AS t6 \ No newline at end of file + * + FROM t1 AS t3 +) AS t7 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql index 1ba202a0f834..77a087cb3362 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql @@ -1,30 +1,29 @@ +WITH t1 AS ( + SELECT + t0.string_col, + SUM(t0.double_col) AS metric + FROM functional_alltypes AS t0 + GROUP BY + 1 +) SELECT - t6.string_col, - t6.metric + t7.string_col, + t7.metric FROM ( SELECT - t4.string_col, - t4.metric + t5.string_col, + t5.metric FROM ( SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 + * + FROM t1 AS t2 UNION DISTINCT SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 - ) AS t4 + * + FROM t1 AS t4 + ) AS t5 UNION DISTINCT SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 - GROUP BY - 1 -) AS t6 \ No newline at end of file + * + FROM t1 AS t3 +) AS t7 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql index a53e4550c0dd..a5ee8fb42f14 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql @@ -1,138 +1,81 @@ -SELECT - t10.field_of_study, - t10.diff -FROM ( +WITH t5 AS ( SELECT - t5.field_of_study, - t5.diff + t4.field_of_study, + ANY_VALUE(t4.diff) AS diff FROM ( SELECT - t4.field_of_study, - ANY_VALUE(t4.diff) AS diff + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff + t2.field_of_study, + t2.years, + t2.degrees, + first_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + last_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t2.field_of_study, - t2.years, - t2.degrees, - first_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - last_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + t1.field_of_study, + t1.__pivoted__.years AS years, + t1.__pivoted__.degrees AS degrees FROM ( SELECT - t1.field_of_study, - t1.__pivoted__.years AS years, - t1.__pivoted__.degrees AS degrees - FROM ( - SELECT - t0.field_of_study, - IF(pos = pos_2, __pivoted__, NULL) AS __pivoted__ - FROM humanities AS t0 - CROSS JOIN UNNEST(GENERATE_ARRAY( - 0, - GREATEST( + t0.field_of_study, + IF(pos = pos_2, __pivoted__, NULL) AS __pivoted__ + FROM humanities AS t0 + CROSS JOIN UNNEST(GENERATE_ARRAY( + 0, + GREATEST( + ARRAY_LENGTH( + [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + ) + ) - 1 + )) AS pos + CROSS JOIN UNNEST([STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)]) AS __pivoted__ WITH OFFSET AS pos_2 + WHERE + pos = pos_2 + OR ( + pos > ( + ARRAY_LENGTH( + [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + ) - 1 + ) + AND pos_2 = ( ARRAY_LENGTH( [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] - ) - ) - 1 - )) AS pos - CROSS JOIN UNNEST([STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)]) AS __pivoted__ WITH OFFSET AS pos_2 - WHERE - pos = pos_2 - OR ( - pos > ( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] - ) - 1 - ) - AND pos_2 = ( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] - ) - 1 - ) + ) - 1 ) - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - 1 - ) AS t5 + ) + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + 1 +) +SELECT + t11.field_of_study, + t11.diff +FROM ( + SELECT + t6.field_of_study, + t6.diff + FROM t5 AS t6 ORDER BY - t5.diff DESC + t6.diff DESC LIMIT 10 UNION ALL SELECT - t5.field_of_study, - t5.diff - FROM ( - SELECT - t4.field_of_study, - ANY_VALUE(t4.diff) AS diff - FROM ( - SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff - FROM ( - SELECT - t2.field_of_study, - t2.years, - t2.degrees, - first_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - last_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM ( - SELECT - t1.field_of_study, - t1.__pivoted__.years AS years, - t1.__pivoted__.degrees AS degrees - FROM ( - SELECT - t0.field_of_study, - IF(pos = pos_2, __pivoted__, NULL) AS __pivoted__ - FROM humanities AS t0 - CROSS JOIN UNNEST(GENERATE_ARRAY( - 0, - GREATEST( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] - ) - ) - 1 - )) AS pos - CROSS JOIN UNNEST([STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)]) AS __pivoted__ WITH OFFSET AS pos_2 - WHERE - pos = pos_2 - OR ( - pos > ( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] - ) - 1 - ) - AND pos_2 = ( - ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] - ) - 1 - ) - ) - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - 1 - ) AS t5 + t6.field_of_study, + t6.diff + FROM t5 AS t6 WHERE - t5.diff < 0 + t6.diff < 0 ORDER BY - t5.diff ASC NULLS LAST + t6.diff ASC NULLS LAST LIMIT 10 -) AS t10 \ No newline at end of file +) AS t11 \ No newline at end of file From b8d06ced704e8bcb9d151d3a9ca2953d2368aa3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 19 Jan 2024 23:04:44 +0100 Subject: [PATCH 095/161] chore(sql): regenerate snapshots for pyspark --- .../test_union_aliasing/pyspark/out.sql | 181 ++++++------------ 1 file changed, 63 insertions(+), 118 deletions(-) diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/pyspark/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/pyspark/out.sql index 583ea9c73238..42c725d21361 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/pyspark/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/pyspark/out.sql @@ -1,134 +1,79 @@ -SELECT - `t10`.`field_of_study`, - `t10`.`diff` -FROM ( +WITH `t5` AS ( SELECT - `t5`.`field_of_study`, - `t5`.`diff` + `t4`.`field_of_study`, + FIRST(`t4`.`diff`, TRUE) AS `diff` FROM ( SELECT - `t4`.`field_of_study`, - FIRST(`t4`.`diff`, TRUE) AS `diff` + `t3`.`field_of_study`, + `t3`.`years`, + `t3`.`degrees`, + `t3`.`earliest_degrees`, + `t3`.`latest_degrees`, + `t3`.`latest_degrees` - `t3`.`earliest_degrees` AS `diff` FROM ( SELECT - `t3`.`field_of_study`, - `t3`.`years`, - `t3`.`degrees`, - `t3`.`earliest_degrees`, - `t3`.`latest_degrees`, - `t3`.`latest_degrees` - `t3`.`earliest_degrees` AS `diff` + `t2`.`field_of_study`, + `t2`.`years`, + `t2`.`degrees`, + FIRST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `earliest_degrees`, + LAST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `latest_degrees` FROM ( SELECT - `t2`.`field_of_study`, - `t2`.`years`, - `t2`.`degrees`, - FIRST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `earliest_degrees`, - LAST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `latest_degrees` + `t1`.`field_of_study`, + `t1`.`__pivoted__`.`years` AS `years`, + `t1`.`__pivoted__`.`degrees` AS `degrees` FROM ( SELECT - `t1`.`field_of_study`, - `t1`.`__pivoted__`.`years` AS `years`, - `t1`.`__pivoted__`.`degrees` AS `degrees` - FROM ( - SELECT - `t0`.`field_of_study`, - EXPLODE( - ARRAY( - STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), - STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), - STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), - STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), - STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), - STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), - STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), - STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), - STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), - STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), - STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), - STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), - STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), - STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), - STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), - STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), - STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), - STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`) - ) - ) AS `__pivoted__` - FROM `humanities` AS `t0` - ) AS `t1` - ) AS `t2` - ) AS `t3` - ) AS `t4` - GROUP BY - 1 - ) AS `t5` + `t0`.`field_of_study`, + EXPLODE( + ARRAY( + STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), + STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), + STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), + STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), + STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), + STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), + STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), + STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), + STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), + STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), + STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), + STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), + STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), + STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), + STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), + STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), + STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), + STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`) + ) + ) AS `__pivoted__` + FROM `humanities` AS `t0` + ) AS `t1` + ) AS `t2` + ) AS `t3` + ) AS `t4` + GROUP BY + 1 +) +SELECT + `t11`.`field_of_study`, + `t11`.`diff` +FROM ( + SELECT + `t6`.`field_of_study`, + `t6`.`diff` + FROM `t5` AS `t6` ORDER BY - `t5`.`diff` DESC + `t6`.`diff` DESC LIMIT 10 UNION ALL SELECT - `t5`.`field_of_study`, - `t5`.`diff` - FROM ( - SELECT - `t4`.`field_of_study`, - FIRST(`t4`.`diff`, TRUE) AS `diff` - FROM ( - SELECT - `t3`.`field_of_study`, - `t3`.`years`, - `t3`.`degrees`, - `t3`.`earliest_degrees`, - `t3`.`latest_degrees`, - `t3`.`latest_degrees` - `t3`.`earliest_degrees` AS `diff` - FROM ( - SELECT - `t2`.`field_of_study`, - `t2`.`years`, - `t2`.`degrees`, - FIRST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `earliest_degrees`, - LAST(`t2`.`degrees`, TRUE) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `latest_degrees` - FROM ( - SELECT - `t1`.`field_of_study`, - `t1`.`__pivoted__`.`years` AS `years`, - `t1`.`__pivoted__`.`degrees` AS `degrees` - FROM ( - SELECT - `t0`.`field_of_study`, - EXPLODE( - ARRAY( - STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), - STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), - STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), - STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), - STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), - STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), - STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), - STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), - STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), - STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), - STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), - STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), - STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), - STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), - STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), - STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), - STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), - STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`) - ) - ) AS `__pivoted__` - FROM `humanities` AS `t0` - ) AS `t1` - ) AS `t2` - ) AS `t3` - ) AS `t4` - GROUP BY - 1 - ) AS `t5` + `t6`.`field_of_study`, + `t6`.`diff` + FROM `t5` AS `t6` WHERE - `t5`.`diff` < 0 + `t6`.`diff` < 0 ORDER BY - `t5`.`diff` ASC NULLS LAST + `t6`.`diff` ASC NULLS LAST LIMIT 10 -) AS `t10` \ No newline at end of file +) AS `t11` \ No newline at end of file From 6827be0b2295f7058c936903894f5956d108e8e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 19 Jan 2024 23:21:38 +0100 Subject: [PATCH 096/161] chore(sql): regenerate snapshots for trino --- .../test_union_aliasing/trino/out.sql | 171 +++++--------- .../test_h07/test_tpc_h07/trino/h07.sql | 112 +++++----- .../test_h08/test_tpc_h08/trino/h08.sql | 108 +++++---- .../test_h18/test_tpc_h18/trino/h18.sql | 208 ++++++++---------- .../test_h21/test_tpc_h21/trino/h21.sql | 179 ++++++--------- 5 files changed, 329 insertions(+), 449 deletions(-) diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql index f20ebad9894f..ad38eea501b9 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql @@ -1,130 +1,77 @@ -SELECT - "t10"."field_of_study", - "t10"."diff" -FROM ( +WITH "t5" AS ( SELECT - "t5"."field_of_study", - "t5"."diff" + "t4"."field_of_study", + ARBITRARY("t4"."diff") AS "diff" FROM ( SELECT - "t4"."field_of_study", - ARBITRARY("t4"."diff") AS "diff" + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" FROM ( SELECT - "t3"."field_of_study", - "t3"."years", - "t3"."degrees", - "t3"."earliest_degrees", - "t3"."latest_degrees", - "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" FROM ( SELECT - "t2"."field_of_study", - "t2"."years", - "t2"."degrees", - FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", - LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + "t1"."field_of_study", + "t1"."__pivoted__"."years" AS "years", + "t1"."__pivoted__"."degrees" AS "degrees" FROM ( SELECT - "t1"."field_of_study", - "t1"."__pivoted__"."years" AS "years", - "t1"."__pivoted__"."degrees" AS "degrees" - FROM ( - SELECT - "t0"."field_of_study", - IF(_u.pos = _u_2.pos_2, _u_2."__pivoted__") AS "__pivoted__" - FROM "humanities" AS "t0" - CROSS JOIN UNNEST(SEQUENCE( - 1, - GREATEST( - CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] - ) + "t0"."field_of_study", + IF(_u.pos = _u_2.pos_2, _u_2."__pivoted__") AS "__pivoted__" + FROM "humanities" AS "t0" + CROSS JOIN UNNEST(SEQUENCE( + 1, + GREATEST( + CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] ) - )) AS _u(pos) - CROSS JOIN UNNEST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) - WHERE - _u.pos = _u_2.pos_2 - OR ( - _u.pos > CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] - ) - AND _u_2.pos_2 = CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] - ) + ) + )) AS _u(pos) + CROSS JOIN UNNEST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) + WHERE + _u.pos = _u_2.pos_2 + OR ( + _u.pos > CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] ) - ) AS "t1" - ) AS "t2" - ) AS "t3" - ) AS "t4" - GROUP BY - 1 - ) AS "t5" + AND _u_2.pos_2 = CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 +) +SELECT + "t11"."field_of_study", + "t11"."diff" +FROM ( + SELECT + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" ORDER BY - "t5"."diff" DESC + "t6"."diff" DESC LIMIT 10 UNION ALL SELECT - "t5"."field_of_study", - "t5"."diff" - FROM ( - SELECT - "t4"."field_of_study", - ARBITRARY("t4"."diff") AS "diff" - FROM ( - SELECT - "t3"."field_of_study", - "t3"."years", - "t3"."degrees", - "t3"."earliest_degrees", - "t3"."latest_degrees", - "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" - FROM ( - SELECT - "t2"."field_of_study", - "t2"."years", - "t2"."degrees", - FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", - LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" - FROM ( - SELECT - "t1"."field_of_study", - "t1"."__pivoted__"."years" AS "years", - "t1"."__pivoted__"."degrees" AS "degrees" - FROM ( - SELECT - "t0"."field_of_study", - IF(_u.pos = _u_2.pos_2, _u_2."__pivoted__") AS "__pivoted__" - FROM "humanities" AS "t0" - CROSS JOIN UNNEST(SEQUENCE( - 1, - GREATEST( - CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] - ) - ) - )) AS _u(pos) - CROSS JOIN UNNEST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) - WHERE - _u.pos = _u_2.pos_2 - OR ( - _u.pos > CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] - ) - AND _u_2.pos_2 = CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] - ) - ) - ) AS "t1" - ) AS "t2" - ) AS "t3" - ) AS "t4" - GROUP BY - 1 - ) AS "t5" + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" WHERE - "t5"."diff" < 0 + "t6"."diff" < 0 ORDER BY - "t5"."diff" ASC + "t6"."diff" ASC LIMIT 10 -) AS "t10" \ No newline at end of file +) AS "t11" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql index b484929c34d4..c891e8aa758f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql @@ -1,33 +1,41 @@ +WITH "t5" AS ( + SELECT + "t4"."n_nationkey", + "t4"."n_name", + "t4"."n_regionkey", + "t4"."n_comment" + FROM "hive"."ibis_sf1"."nation" AS "t4" +) SELECT - "t24"."supp_nation", - "t24"."cust_nation", - "t24"."l_year", - "t24"."revenue" + "t25"."supp_nation", + "t25"."cust_nation", + "t25"."l_year", + "t25"."revenue" FROM ( SELECT - "t23"."supp_nation", - "t23"."cust_nation", - "t23"."l_year", - SUM("t23"."volume") AS "revenue" + "t24"."supp_nation", + "t24"."cust_nation", + "t24"."l_year", + SUM("t24"."volume") AS "revenue" FROM ( SELECT - "t22"."supp_nation", - "t22"."cust_nation", - "t22"."l_shipdate", - "t22"."l_extendedprice", - "t22"."l_discount", - "t22"."l_year", - "t22"."volume" + "t23"."supp_nation", + "t23"."cust_nation", + "t23"."l_shipdate", + "t23"."l_extendedprice", + "t23"."l_discount", + "t23"."l_year", + "t23"."volume" FROM ( SELECT - "t10"."n_name" AS "supp_nation", - "t16"."n_name" AS "cust_nation", - "t13"."l_shipdate", - "t13"."l_extendedprice", - "t13"."l_discount", - EXTRACT(year FROM "t13"."l_shipdate") AS "l_year", - "t13"."l_extendedprice" * ( - 1 - "t13"."l_discount" + "t15"."n_name" AS "supp_nation", + "t17"."n_name" AS "cust_nation", + "t12"."l_shipdate", + "t12"."l_extendedprice", + "t12"."l_discount", + EXTRACT(year FROM "t12"."l_shipdate") AS "l_year", + "t12"."l_extendedprice" * ( + 1 - "t12"."l_discount" ) AS "volume" FROM ( SELECT @@ -39,7 +47,7 @@ FROM ( CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t0"."s_comment" FROM "hive"."ibis_sf1"."supplier" AS "t0" - ) AS "t12" + ) AS "t11" INNER JOIN ( SELECT "t1"."l_orderkey", @@ -59,8 +67,8 @@ FROM ( "t1"."l_shipmode", "t1"."l_comment" FROM "hive"."ibis_sf1"."lineitem" AS "t1" - ) AS "t13" - ON "t12"."s_suppkey" = "t13"."l_suppkey" + ) AS "t12" + ON "t11"."s_suppkey" = "t12"."l_suppkey" INNER JOIN ( SELECT "t2"."o_orderkey", @@ -73,8 +81,8 @@ FROM ( "t2"."o_shippriority", "t2"."o_comment" FROM "hive"."ibis_sf1"."orders" AS "t2" - ) AS "t14" - ON "t14"."o_orderkey" = "t13"."l_orderkey" + ) AS "t13" + ON "t13"."o_orderkey" = "t12"."l_orderkey" INNER JOIN ( SELECT "t3"."c_custkey", @@ -86,52 +94,38 @@ FROM ( "t3"."c_mktsegment", "t3"."c_comment" FROM "hive"."ibis_sf1"."customer" AS "t3" - ) AS "t15" - ON "t15"."c_custkey" = "t14"."o_custkey" - INNER JOIN ( - SELECT - "t4"."n_nationkey", - "t4"."n_name", - "t4"."n_regionkey", - "t4"."n_comment" - FROM "hive"."ibis_sf1"."nation" AS "t4" - ) AS "t10" - ON "t12"."s_nationkey" = "t10"."n_nationkey" - INNER JOIN ( - SELECT - "t4"."n_nationkey", - "t4"."n_name", - "t4"."n_regionkey", - "t4"."n_comment" - FROM "hive"."ibis_sf1"."nation" AS "t4" - ) AS "t16" - ON "t15"."c_nationkey" = "t16"."n_nationkey" - ) AS "t22" + ) AS "t14" + ON "t14"."c_custkey" = "t13"."o_custkey" + INNER JOIN "t5" AS "t15" + ON "t11"."s_nationkey" = "t15"."n_nationkey" + INNER JOIN "t5" AS "t17" + ON "t14"."c_nationkey" = "t17"."n_nationkey" + ) AS "t23" WHERE ( ( ( - "t22"."cust_nation" = 'FRANCE' + "t23"."cust_nation" = 'FRANCE' ) AND ( - "t22"."supp_nation" = 'GERMANY' + "t23"."supp_nation" = 'GERMANY' ) ) OR ( ( - "t22"."cust_nation" = 'GERMANY' + "t23"."cust_nation" = 'GERMANY' ) AND ( - "t22"."supp_nation" = 'FRANCE' + "t23"."supp_nation" = 'FRANCE' ) ) ) - AND "t22"."l_shipdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') - ) AS "t23" + AND "t23"."l_shipdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + ) AS "t24" GROUP BY 1, 2, 3 -) AS "t24" +) AS "t25" ORDER BY - "t24"."supp_nation" ASC, - "t24"."cust_nation" ASC, - "t24"."l_year" ASC \ No newline at end of file + "t25"."supp_nation" ASC, + "t25"."cust_nation" ASC, + "t25"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql index d54d0e87e9ae..b78f38c6519a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql @@ -1,29 +1,37 @@ +WITH "t8" AS ( + SELECT + "t6"."n_nationkey", + "t6"."n_name", + "t6"."n_regionkey", + "t6"."n_comment" + FROM "hive"."ibis_sf1"."nation" AS "t6" +) SELECT - "t32"."o_year", - "t32"."mkt_share" + "t33"."o_year", + "t33"."mkt_share" FROM ( SELECT - "t31"."o_year", - CAST(SUM("t31"."nation_volume") AS DOUBLE) / SUM("t31"."volume") AS "mkt_share" + "t32"."o_year", + CAST(SUM("t32"."nation_volume") AS DOUBLE) / SUM("t32"."volume") AS "mkt_share" FROM ( SELECT - "t30"."o_year", - "t30"."volume", - "t30"."nation", - "t30"."r_name", - "t30"."o_orderdate", - "t30"."p_type", - CASE WHEN "t30"."nation" = 'BRAZIL' THEN "t30"."volume" ELSE 0 END AS "nation_volume" + "t31"."o_year", + "t31"."volume", + "t31"."nation", + "t31"."r_name", + "t31"."o_orderdate", + "t31"."p_type", + CASE WHEN "t31"."nation" = 'BRAZIL' THEN "t31"."volume" ELSE 0 END AS "nation_volume" FROM ( SELECT - EXTRACT(year FROM "t20"."o_orderdate") AS "o_year", - "t18"."l_extendedprice" * ( - 1 - "t18"."l_discount" + EXTRACT(year FROM "t19"."o_orderdate") AS "o_year", + "t17"."l_extendedprice" * ( + 1 - "t17"."l_discount" ) AS "volume", - "t22"."n_name" AS "nation", - "t16"."r_name", - "t20"."o_orderdate", - "t17"."p_type" + "t23"."n_name" AS "nation", + "t14"."r_name", + "t19"."o_orderdate", + "t16"."p_type" FROM ( SELECT "t0"."p_partkey", @@ -36,7 +44,7 @@ FROM ( CAST("t0"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", "t0"."p_comment" FROM "hive"."ibis_sf1"."part" AS "t0" - ) AS "t17" + ) AS "t16" INNER JOIN ( SELECT "t1"."l_orderkey", @@ -56,8 +64,8 @@ FROM ( "t1"."l_shipmode", "t1"."l_comment" FROM "hive"."ibis_sf1"."lineitem" AS "t1" - ) AS "t18" - ON "t17"."p_partkey" = "t18"."l_partkey" + ) AS "t17" + ON "t16"."p_partkey" = "t17"."l_partkey" INNER JOIN ( SELECT "t2"."s_suppkey", @@ -68,8 +76,8 @@ FROM ( CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", "t2"."s_comment" FROM "hive"."ibis_sf1"."supplier" AS "t2" - ) AS "t19" - ON "t19"."s_suppkey" = "t18"."l_suppkey" + ) AS "t18" + ON "t18"."s_suppkey" = "t17"."l_suppkey" INNER JOIN ( SELECT "t3"."o_orderkey", @@ -82,8 +90,8 @@ FROM ( "t3"."o_shippriority", "t3"."o_comment" FROM "hive"."ibis_sf1"."orders" AS "t3" - ) AS "t20" - ON "t18"."l_orderkey" = "t20"."o_orderkey" + ) AS "t19" + ON "t17"."l_orderkey" = "t19"."o_orderkey" INNER JOIN ( SELECT "t4"."c_custkey", @@ -95,42 +103,28 @@ FROM ( "t4"."c_mktsegment", "t4"."c_comment" FROM "hive"."ibis_sf1"."customer" AS "t4" - ) AS "t21" - ON "t20"."o_custkey" = "t21"."c_custkey" + ) AS "t20" + ON "t19"."o_custkey" = "t20"."c_custkey" + INNER JOIN "t8" AS "t21" + ON "t20"."c_nationkey" = "t21"."n_nationkey" INNER JOIN ( SELECT - "t5"."n_nationkey", - "t5"."n_name", - "t5"."n_regionkey", - "t5"."n_comment" - FROM "hive"."ibis_sf1"."nation" AS "t5" + "t5"."r_regionkey", + "t5"."r_name", + "t5"."r_comment" + FROM "hive"."ibis_sf1"."region" AS "t5" ) AS "t14" - ON "t21"."c_nationkey" = "t14"."n_nationkey" - INNER JOIN ( - SELECT - "t6"."r_regionkey", - "t6"."r_name", - "t6"."r_comment" - FROM "hive"."ibis_sf1"."region" AS "t6" - ) AS "t16" - ON "t14"."n_regionkey" = "t16"."r_regionkey" - INNER JOIN ( - SELECT - "t5"."n_nationkey", - "t5"."n_name", - "t5"."n_regionkey", - "t5"."n_comment" - FROM "hive"."ibis_sf1"."nation" AS "t5" - ) AS "t22" - ON "t19"."s_nationkey" = "t22"."n_nationkey" - ) AS "t30" + ON "t21"."n_regionkey" = "t14"."r_regionkey" + INNER JOIN "t8" AS "t23" + ON "t18"."s_nationkey" = "t23"."n_nationkey" + ) AS "t31" WHERE - "t30"."r_name" = 'AMERICA' - AND "t30"."o_orderdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') - AND "t30"."p_type" = 'ECONOMY ANODIZED STEEL' - ) AS "t31" + "t31"."r_name" = 'AMERICA' + AND "t31"."o_orderdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + AND "t31"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t32" GROUP BY 1 -) AS "t32" +) AS "t33" ORDER BY - "t32"."o_year" ASC \ No newline at end of file + "t33"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql index 87e8f68633c3..73600edb4467 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql @@ -1,53 +1,73 @@ +WITH "t5" AS ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "hive"."ibis_sf1"."lineitem" AS "t2" +) SELECT - "t15"."c_name", - "t15"."c_custkey", - "t15"."o_orderkey", - "t15"."o_orderdate", - "t15"."o_totalprice", - "t15"."sum_qty" + "t16"."c_name", + "t16"."c_custkey", + "t16"."o_orderkey", + "t16"."o_orderdate", + "t16"."o_totalprice", + "t16"."sum_qty" FROM ( SELECT - "t14"."c_name", - "t14"."c_custkey", - "t14"."o_orderkey", - "t14"."o_orderdate", - "t14"."o_totalprice", - SUM("t14"."l_quantity") AS "sum_qty" + "t15"."c_name", + "t15"."c_custkey", + "t15"."o_orderkey", + "t15"."o_orderdate", + "t15"."o_totalprice", + SUM("t15"."l_quantity") AS "sum_qty" FROM ( SELECT - "t12"."c_custkey", - "t12"."c_name", - "t12"."c_address", - "t12"."c_nationkey", - "t12"."c_phone", - "t12"."c_acctbal", - "t12"."c_mktsegment", - "t12"."c_comment", - "t12"."o_orderkey", - "t12"."o_custkey", - "t12"."o_orderstatus", - "t12"."o_totalprice", - "t12"."o_orderdate", - "t12"."o_orderpriority", - "t12"."o_clerk", - "t12"."o_shippriority", - "t12"."o_comment", - "t12"."l_orderkey", - "t12"."l_partkey", - "t12"."l_suppkey", - "t12"."l_linenumber", - "t12"."l_quantity", - "t12"."l_extendedprice", - "t12"."l_discount", - "t12"."l_tax", - "t12"."l_returnflag", - "t12"."l_linestatus", - "t12"."l_shipdate", - "t12"."l_commitdate", - "t12"."l_receiptdate", - "t12"."l_shipinstruct", - "t12"."l_shipmode", - "t12"."l_comment" + "t13"."c_custkey", + "t13"."c_name", + "t13"."c_address", + "t13"."c_nationkey", + "t13"."c_phone", + "t13"."c_acctbal", + "t13"."c_mktsegment", + "t13"."c_comment", + "t13"."o_orderkey", + "t13"."o_custkey", + "t13"."o_orderstatus", + "t13"."o_totalprice", + "t13"."o_orderdate", + "t13"."o_orderpriority", + "t13"."o_clerk", + "t13"."o_shippriority", + "t13"."o_comment", + "t13"."l_orderkey", + "t13"."l_partkey", + "t13"."l_suppkey", + "t13"."l_linenumber", + "t13"."l_quantity", + "t13"."l_extendedprice", + "t13"."l_discount", + "t13"."l_tax", + "t13"."l_returnflag", + "t13"."l_linestatus", + "t13"."l_shipdate", + "t13"."l_commitdate", + "t13"."l_receiptdate", + "t13"."l_shipinstruct", + "t13"."l_shipmode", + "t13"."l_comment" FROM ( SELECT "t6"."c_custkey", @@ -67,22 +87,22 @@ FROM ( "t7"."o_clerk", "t7"."o_shippriority", "t7"."o_comment", - "t8"."l_orderkey", - "t8"."l_partkey", - "t8"."l_suppkey", - "t8"."l_linenumber", - "t8"."l_quantity", - "t8"."l_extendedprice", - "t8"."l_discount", - "t8"."l_tax", - "t8"."l_returnflag", - "t8"."l_linestatus", - "t8"."l_shipdate", - "t8"."l_commitdate", - "t8"."l_receiptdate", - "t8"."l_shipinstruct", - "t8"."l_shipmode", - "t8"."l_comment" + "t9"."l_orderkey", + "t9"."l_partkey", + "t9"."l_suppkey", + "t9"."l_linenumber", + "t9"."l_quantity", + "t9"."l_extendedprice", + "t9"."l_discount", + "t9"."l_tax", + "t9"."l_returnflag", + "t9"."l_linestatus", + "t9"."l_shipdate", + "t9"."l_commitdate", + "t9"."l_receiptdate", + "t9"."l_shipinstruct", + "t9"."l_shipmode", + "t9"."l_comment" FROM ( SELECT "t0"."c_custkey", @@ -109,71 +129,33 @@ FROM ( FROM "hive"."ibis_sf1"."orders" AS "t1" ) AS "t7" ON "t6"."c_custkey" = "t7"."o_custkey" - INNER JOIN ( - SELECT - "t2"."l_orderkey", - "t2"."l_partkey", - "t2"."l_suppkey", - "t2"."l_linenumber", - CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", - CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", - CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", - CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", - "t2"."l_returnflag", - "t2"."l_linestatus", - "t2"."l_shipdate", - "t2"."l_commitdate", - "t2"."l_receiptdate", - "t2"."l_shipinstruct", - "t2"."l_shipmode", - "t2"."l_comment" - FROM "hive"."ibis_sf1"."lineitem" AS "t2" - ) AS "t8" - ON "t7"."o_orderkey" = "t8"."l_orderkey" - ) AS "t12" + INNER JOIN "t5" AS "t9" + ON "t7"."o_orderkey" = "t9"."l_orderkey" + ) AS "t13" WHERE - "t12"."o_orderkey" IN ( + "t13"."o_orderkey" IN ( SELECT - "t9"."l_orderkey" + "t11"."l_orderkey" FROM ( SELECT - "t5"."l_orderkey", - SUM("t5"."l_quantity") AS "qty_sum" - FROM ( - SELECT - "t2"."l_orderkey", - "t2"."l_partkey", - "t2"."l_suppkey", - "t2"."l_linenumber", - CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", - CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", - CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", - CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", - "t2"."l_returnflag", - "t2"."l_linestatus", - "t2"."l_shipdate", - "t2"."l_commitdate", - "t2"."l_receiptdate", - "t2"."l_shipinstruct", - "t2"."l_shipmode", - "t2"."l_comment" - FROM "hive"."ibis_sf1"."lineitem" AS "t2" - ) AS "t5" + "t8"."l_orderkey", + SUM("t8"."l_quantity") AS "qty_sum" + FROM "t5" AS "t8" GROUP BY 1 - ) AS "t9" + ) AS "t11" WHERE - "t9"."qty_sum" > 300 + "t11"."qty_sum" > 300 ) - ) AS "t14" + ) AS "t15" GROUP BY 1, 2, 3, 4, 5 -) AS "t15" +) AS "t16" ORDER BY - "t15"."o_totalprice" DESC, - "t15"."o_orderdate" ASC + "t16"."o_totalprice" DESC, + "t16"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql index 46bc135ee050..774b842175e1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql @@ -1,28 +1,48 @@ +WITH "t8" AS ( + SELECT + "t3"."l_orderkey", + "t3"."l_partkey", + "t3"."l_suppkey", + "t3"."l_linenumber", + CAST("t3"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t3"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t3"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t3"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t3"."l_returnflag", + "t3"."l_linestatus", + "t3"."l_shipdate", + "t3"."l_commitdate", + "t3"."l_receiptdate", + "t3"."l_shipinstruct", + "t3"."l_shipmode", + "t3"."l_comment" + FROM "hive"."ibis_sf1"."lineitem" AS "t3" +) SELECT - "t21"."s_name", - "t21"."numwait" + "t22"."s_name", + "t22"."numwait" FROM ( SELECT - "t20"."s_name", + "t21"."s_name", COUNT(*) AS "numwait" FROM ( SELECT - "t17"."l1_orderkey", - "t17"."o_orderstatus", - "t17"."l_receiptdate", - "t17"."l_commitdate", - "t17"."l1_suppkey", - "t17"."s_name", - "t17"."n_name" + "t18"."l1_orderkey", + "t18"."o_orderstatus", + "t18"."l_receiptdate", + "t18"."l_commitdate", + "t18"."l1_suppkey", + "t18"."s_name", + "t18"."n_name" FROM ( SELECT - "t10"."l_orderkey" AS "l1_orderkey", - "t13"."o_orderstatus", - "t10"."l_receiptdate", - "t10"."l_commitdate", - "t10"."l_suppkey" AS "l1_suppkey", + "t12"."l_orderkey" AS "l1_orderkey", + "t10"."o_orderstatus", + "t12"."l_receiptdate", + "t12"."l_commitdate", + "t12"."l_suppkey" AS "l1_suppkey", "t9"."s_name", - "t8"."n_name" + "t7"."n_name" FROM ( SELECT "t0"."s_suppkey", @@ -34,129 +54,72 @@ FROM ( "t0"."s_comment" FROM "hive"."ibis_sf1"."supplier" AS "t0" ) AS "t9" + INNER JOIN "t8" AS "t12" + ON "t9"."s_suppkey" = "t12"."l_suppkey" INNER JOIN ( SELECT - "t1"."l_orderkey", - "t1"."l_partkey", - "t1"."l_suppkey", - "t1"."l_linenumber", - CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", - CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", - CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", - CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", - "t1"."l_returnflag", - "t1"."l_linestatus", - "t1"."l_shipdate", - "t1"."l_commitdate", - "t1"."l_receiptdate", - "t1"."l_shipinstruct", - "t1"."l_shipmode", - "t1"."l_comment" - FROM "hive"."ibis_sf1"."lineitem" AS "t1" + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "hive"."ibis_sf1"."orders" AS "t1" ) AS "t10" - ON "t9"."s_suppkey" = "t10"."l_suppkey" - INNER JOIN ( - SELECT - "t2"."o_orderkey", - "t2"."o_custkey", - "t2"."o_orderstatus", - CAST("t2"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", - "t2"."o_orderdate", - "t2"."o_orderpriority", - "t2"."o_clerk", - "t2"."o_shippriority", - "t2"."o_comment" - FROM "hive"."ibis_sf1"."orders" AS "t2" - ) AS "t13" - ON "t13"."o_orderkey" = "t10"."l_orderkey" + ON "t10"."o_orderkey" = "t12"."l_orderkey" INNER JOIN ( SELECT - "t3"."n_nationkey", - "t3"."n_name", - "t3"."n_regionkey", - "t3"."n_comment" - FROM "hive"."ibis_sf1"."nation" AS "t3" - ) AS "t8" - ON "t9"."s_nationkey" = "t8"."n_nationkey" - ) AS "t17" + "t2"."n_nationkey", + "t2"."n_name", + "t2"."n_regionkey", + "t2"."n_comment" + FROM "hive"."ibis_sf1"."nation" AS "t2" + ) AS "t7" + ON "t9"."s_nationkey" = "t7"."n_nationkey" + ) AS "t18" WHERE - "t17"."o_orderstatus" = 'F' - AND "t17"."l_receiptdate" > "t17"."l_commitdate" - AND "t17"."n_name" = 'SAUDI ARABIA' + "t18"."o_orderstatus" = 'F' + AND "t18"."l_receiptdate" > "t18"."l_commitdate" + AND "t18"."n_name" = 'SAUDI ARABIA' AND EXISTS( SELECT 1 AS "1" - FROM ( - SELECT - "t1"."l_orderkey", - "t1"."l_partkey", - "t1"."l_suppkey", - "t1"."l_linenumber", - CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", - CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", - CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", - CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", - "t1"."l_returnflag", - "t1"."l_linestatus", - "t1"."l_shipdate", - "t1"."l_commitdate", - "t1"."l_receiptdate", - "t1"."l_shipinstruct", - "t1"."l_shipmode", - "t1"."l_comment" - FROM "hive"."ibis_sf1"."lineitem" AS "t1" - ) AS "t11" + FROM "t8" AS "t13" WHERE ( - "t11"."l_orderkey" = "t17"."l1_orderkey" + "t13"."l_orderkey" = "t18"."l1_orderkey" ) AND ( - "t11"."l_suppkey" <> "t17"."l1_suppkey" + "t13"."l_suppkey" <> "t18"."l1_suppkey" ) ) AND NOT ( EXISTS( SELECT 1 AS "1" - FROM ( - SELECT - "t1"."l_orderkey", - "t1"."l_partkey", - "t1"."l_suppkey", - "t1"."l_linenumber", - CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", - CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", - CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", - CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", - "t1"."l_returnflag", - "t1"."l_linestatus", - "t1"."l_shipdate", - "t1"."l_commitdate", - "t1"."l_receiptdate", - "t1"."l_shipinstruct", - "t1"."l_shipmode", - "t1"."l_comment" - FROM "hive"."ibis_sf1"."lineitem" AS "t1" - ) AS "t12" + FROM "t8" AS "t14" WHERE ( ( - "t12"."l_orderkey" = "t17"."l1_orderkey" + "t14"."l_orderkey" = "t18"."l1_orderkey" ) AND ( - "t12"."l_suppkey" <> "t17"."l1_suppkey" + "t14"."l_suppkey" <> "t18"."l1_suppkey" ) ) AND ( - "t12"."l_receiptdate" > "t12"."l_commitdate" + "t14"."l_receiptdate" > "t14"."l_commitdate" ) ) ) - ) AS "t20" + ) AS "t21" GROUP BY 1 -) AS "t21" +) AS "t22" ORDER BY - "t21"."numwait" DESC, - "t21"."s_name" ASC + "t22"."numwait" DESC, + "t22"."s_name" ASC LIMIT 100 \ No newline at end of file From 35c9390dec297a00654252ece41b2325c27c1f16 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 20 Jan 2024 05:31:57 -0500 Subject: [PATCH 097/161] chore(impala): regen snapshots --- .../test_sql/test_join_aliasing/out.sql | 93 +++++----- .../test_sql/test_join_key_name/out.sql | 159 +++++++----------- .../test_sql/test_join_key_name2/out.sql | 44 +---- .../test_sql/test_limit_cte_extract/out.sql | 40 ++--- .../test_sql/test_nested_join_base/out.sql | 36 ++-- .../test_nested_join_multiple_ctes/out.sql | 90 ++++------ .../test_nested_joins_single_cte/out.sql | 42 ++--- 7 files changed, 196 insertions(+), 308 deletions(-) diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql index e74367136685..12d1e4ecb77f 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql @@ -1,9 +1,26 @@ +WITH `t5` AS ( + SELECT + `t2`.`d`, + CAST(`t2`.`d` / 15 AS BIGINT) AS `idx`, + `t2`.`c`, + COUNT(*) AS `row_count` + FROM ( + SELECT + `t0`.`a` + 20 AS `d`, + `t0`.`c` + FROM `test_table` AS `t0` + ) AS `t2` + GROUP BY + 1, + 2, + 3 +) SELECT `t6`.`d`, `t6`.`b`, `t6`.`count`, `t6`.`unique`, - `t13`.`total` + `t14`.`total` FROM ( SELECT `t1`.`d`, @@ -24,64 +41,32 @@ FROM ( ) AS `t6` INNER JOIN ( SELECT - `t11`.`d`, - `t11`.`idx`, - `t11`.`c`, - `t11`.`row_count`, - `t11`.`total` + `t12`.`d`, + `t12`.`idx`, + `t12`.`c`, + `t12`.`row_count`, + `t12`.`total` FROM ( SELECT - `t7`.`d`, - `t7`.`idx`, - `t7`.`c`, - `t7`.`row_count`, - `t9`.`total` - FROM ( - SELECT - `t2`.`d`, - CAST(`t2`.`d` / 15 AS BIGINT) AS `idx`, - `t2`.`c`, - COUNT(*) AS `row_count` - FROM ( - SELECT - `t0`.`a` + 20 AS `d`, - `t0`.`c` - FROM `test_table` AS `t0` - ) AS `t2` - GROUP BY - 1, - 2, - 3 - ) AS `t7` + `t8`.`d`, + `t8`.`idx`, + `t8`.`c`, + `t8`.`row_count`, + `t10`.`total` + FROM `t5` AS `t8` INNER JOIN ( SELECT - `t5`.`d`, - SUM(`t5`.`row_count`) AS `total` - FROM ( - SELECT - `t2`.`d`, - CAST(`t2`.`d` / 15 AS BIGINT) AS `idx`, - `t2`.`c`, - COUNT(*) AS `row_count` - FROM ( - SELECT - `t0`.`a` + 20 AS `d`, - `t0`.`c` - FROM `test_table` AS `t0` - ) AS `t2` - GROUP BY - 1, - 2, - 3 - ) AS `t5` + `t7`.`d`, + SUM(`t7`.`row_count`) AS `total` + FROM `t5` AS `t7` GROUP BY 1 - ) AS `t9` - ON `t7`.`d` = `t9`.`d` - ) AS `t11` + ) AS `t10` + ON `t8`.`d` = `t10`.`d` + ) AS `t12` WHERE - `t11`.`row_count` < ( - `t11`.`total` / 2 + `t12`.`row_count` < ( + `t12`.`total` / 2 ) -) AS `t13` - ON `t6`.`d` = `t13`.`d` \ No newline at end of file +) AS `t14` + ON `t6`.`d` = `t14`.`d` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql index 865f638ff542..c7e04edfeb7a 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql @@ -1,118 +1,79 @@ +WITH `t11` AS ( + SELECT + `t6`.`c_custkey`, + `t6`.`c_name`, + `t6`.`c_address`, + `t6`.`c_nationkey`, + `t6`.`c_phone`, + `t6`.`c_acctbal`, + `t6`.`c_mktsegment`, + `t6`.`c_comment`, + `t4`.`r_name` AS `region`, + `t7`.`o_totalprice`, + CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` + FROM `tpch_region` AS `t4` + INNER JOIN `tpch_nation` AS `t5` + ON `t4`.`r_regionkey` = `t5`.`n_regionkey` + INNER JOIN `tpch_customer` AS `t6` + ON `t6`.`c_nationkey` = `t5`.`n_nationkey` + INNER JOIN `tpch_orders` AS `t7` + ON `t7`.`o_custkey` = `t6`.`c_custkey` +) SELECT - `t14`.`year`, - `t14`.`CountStar()` AS `pre_count`, - `t19`.`CountStar()` AS `post_count`, - `t19`.`CountStar()` / CAST(`t14`.`CountStar()` AS DOUBLE) AS `fraction` + `t15`.`year`, + `t15`.`CountStar()` AS `pre_count`, + `t20`.`CountStar()` AS `post_count`, + `t20`.`CountStar()` / CAST(`t15`.`CountStar()` AS DOUBLE) AS `fraction` FROM ( SELECT - EXTRACT(year FROM `t11`.`odate`) AS `year`, + EXTRACT(year FROM `t12`.`odate`) AS `year`, COUNT(*) AS `CountStar()` - FROM ( - SELECT - `t6`.`c_custkey`, - `t6`.`c_name`, - `t6`.`c_address`, - `t6`.`c_nationkey`, - `t6`.`c_phone`, - `t6`.`c_acctbal`, - `t6`.`c_mktsegment`, - `t6`.`c_comment`, - `t4`.`r_name` AS `region`, - `t7`.`o_totalprice`, - CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` - FROM `tpch_region` AS `t4` - INNER JOIN `tpch_nation` AS `t5` - ON `t4`.`r_regionkey` = `t5`.`n_regionkey` - INNER JOIN `tpch_customer` AS `t6` - ON `t6`.`c_nationkey` = `t5`.`n_nationkey` - INNER JOIN `tpch_orders` AS `t7` - ON `t7`.`o_custkey` = `t6`.`c_custkey` - ) AS `t11` + FROM `t11` AS `t12` GROUP BY 1 -) AS `t14` +) AS `t15` INNER JOIN ( SELECT - EXTRACT(year FROM `t17`.`odate`) AS `year`, + EXTRACT(year FROM `t18`.`odate`) AS `year`, COUNT(*) AS `CountStar()` FROM ( SELECT - `t11`.`c_custkey`, - `t11`.`c_name`, - `t11`.`c_address`, - `t11`.`c_nationkey`, - `t11`.`c_phone`, - `t11`.`c_acctbal`, - `t11`.`c_mktsegment`, - `t11`.`c_comment`, - `t11`.`region`, - `t11`.`o_totalprice`, - `t11`.`odate` - FROM ( - SELECT - `t6`.`c_custkey`, - `t6`.`c_name`, - `t6`.`c_address`, - `t6`.`c_nationkey`, - `t6`.`c_phone`, - `t6`.`c_acctbal`, - `t6`.`c_mktsegment`, - `t6`.`c_comment`, - `t4`.`r_name` AS `region`, - `t7`.`o_totalprice`, - CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` - FROM `tpch_region` AS `t4` - INNER JOIN `tpch_nation` AS `t5` - ON `t4`.`r_regionkey` = `t5`.`n_regionkey` - INNER JOIN `tpch_customer` AS `t6` - ON `t6`.`c_nationkey` = `t5`.`n_nationkey` - INNER JOIN `tpch_orders` AS `t7` - ON `t7`.`o_custkey` = `t6`.`c_custkey` - ) AS `t11` + `t12`.`c_custkey`, + `t12`.`c_name`, + `t12`.`c_address`, + `t12`.`c_nationkey`, + `t12`.`c_phone`, + `t12`.`c_acctbal`, + `t12`.`c_mktsegment`, + `t12`.`c_comment`, + `t12`.`region`, + `t12`.`o_totalprice`, + `t12`.`odate` + FROM `t11` AS `t12` WHERE - `t11`.`o_totalprice` > ( + `t12`.`o_totalprice` > ( SELECT - AVG(`t15`.`o_totalprice`) AS `Mean(o_totalprice)` + AVG(`t16`.`o_totalprice`) AS `Mean(o_totalprice)` FROM ( SELECT - `t12`.`c_custkey`, - `t12`.`c_name`, - `t12`.`c_address`, - `t12`.`c_nationkey`, - `t12`.`c_phone`, - `t12`.`c_acctbal`, - `t12`.`c_mktsegment`, - `t12`.`c_comment`, - `t12`.`region`, - `t12`.`o_totalprice`, - `t12`.`odate` - FROM ( - SELECT - `t6`.`c_custkey`, - `t6`.`c_name`, - `t6`.`c_address`, - `t6`.`c_nationkey`, - `t6`.`c_phone`, - `t6`.`c_acctbal`, - `t6`.`c_mktsegment`, - `t6`.`c_comment`, - `t4`.`r_name` AS `region`, - `t7`.`o_totalprice`, - CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` - FROM `tpch_region` AS `t4` - INNER JOIN `tpch_nation` AS `t5` - ON `t4`.`r_regionkey` = `t5`.`n_regionkey` - INNER JOIN `tpch_customer` AS `t6` - ON `t6`.`c_nationkey` = `t5`.`n_nationkey` - INNER JOIN `tpch_orders` AS `t7` - ON `t7`.`o_custkey` = `t6`.`c_custkey` - ) AS `t12` + `t13`.`c_custkey`, + `t13`.`c_name`, + `t13`.`c_address`, + `t13`.`c_nationkey`, + `t13`.`c_phone`, + `t13`.`c_acctbal`, + `t13`.`c_mktsegment`, + `t13`.`c_comment`, + `t13`.`region`, + `t13`.`o_totalprice`, + `t13`.`odate` + FROM `t11` AS `t13` WHERE - `t12`.`region` = `t11`.`region` - ) AS `t15` + `t13`.`region` = `t12`.`region` + ) AS `t16` ) - ) AS `t17` + ) AS `t18` GROUP BY 1 -) AS `t19` - ON `t14`.`year` = `t19`.`year` \ No newline at end of file +) AS `t20` + ON `t15`.`year` = `t20`.`year` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql index 5d6f63cb5b69..39c29d332eba 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql @@ -1,36 +1,4 @@ -SELECT - `t13`.`year`, - `t13`.`CountStar()` AS `pre_count`, - `t15`.`CountStar()` AS `post_count` -FROM ( - SELECT - EXTRACT(year FROM `t11`.`odate`) AS `year`, - COUNT(*) AS `CountStar()` - FROM ( - SELECT - `t6`.`c_custkey`, - `t6`.`c_name`, - `t6`.`c_address`, - `t6`.`c_nationkey`, - `t6`.`c_phone`, - `t6`.`c_acctbal`, - `t6`.`c_mktsegment`, - `t6`.`c_comment`, - `t4`.`r_name` AS `region`, - `t7`.`o_totalprice`, - CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` - FROM `tpch_region` AS `t4` - INNER JOIN `tpch_nation` AS `t5` - ON `t4`.`r_regionkey` = `t5`.`n_regionkey` - INNER JOIN `tpch_customer` AS `t6` - ON `t6`.`c_nationkey` = `t5`.`n_nationkey` - INNER JOIN `tpch_orders` AS `t7` - ON `t7`.`o_custkey` = `t6`.`c_custkey` - ) AS `t11` - GROUP BY - 1 -) AS `t13` -INNER JOIN ( +WITH `t12` AS ( SELECT EXTRACT(year FROM `t11`.`odate`) AS `year`, COUNT(*) AS `CountStar()` @@ -57,5 +25,11 @@ INNER JOIN ( ) AS `t11` GROUP BY 1 -) AS `t15` - ON `t13`.`year` = `t15`.`year` \ No newline at end of file +) +SELECT + `t14`.`year`, + `t14`.`CountStar()` AS `pre_count`, + `t16`.`CountStar()` AS `post_count` +FROM `t12` AS `t14` +INNER JOIN `t12` AS `t16` + ON `t14`.`year` = `t16`.`year` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql index b408d5a00767..cfb42ddfab2c 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql @@ -1,27 +1,23 @@ -SELECT - `t2`.`id`, - `t2`.`bool_col`, - `t2`.`tinyint_col`, - `t2`.`smallint_col`, - `t2`.`int_col`, - `t2`.`bigint_col`, - `t2`.`float_col`, - `t2`.`double_col`, - `t2`.`date_string_col`, - `t2`.`string_col`, - `t2`.`timestamp_col`, - `t2`.`year`, - `t2`.`month` -FROM ( - SELECT - * - FROM `functional_alltypes` AS `t0` - LIMIT 100 -) AS `t2` -INNER JOIN ( +WITH `t1` AS ( SELECT * FROM `functional_alltypes` AS `t0` LIMIT 100 -) AS `t4` +) +SELECT + `t3`.`id`, + `t3`.`bool_col`, + `t3`.`tinyint_col`, + `t3`.`smallint_col`, + `t3`.`int_col`, + `t3`.`bigint_col`, + `t3`.`float_col`, + `t3`.`double_col`, + `t3`.`date_string_col`, + `t3`.`string_col`, + `t3`.`timestamp_col`, + `t3`.`year`, + `t3`.`month` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t5` ON TRUE \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql index 0afadafb7be9..f1b7a93c51ef 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql @@ -1,27 +1,21 @@ -SELECT - `t4`.`uuid`, - `t2`.`CountStar()` -FROM ( - SELECT - `t1`.`uuid`, - MAX(`t1`.`CountStar()`) AS `max_count` - FROM ( - SELECT - `t0`.`uuid`, - COUNT(*) AS `CountStar()` - FROM `t` AS `t0` - GROUP BY - 1 - ) AS `t1` - GROUP BY - 1 -) AS `t4` -LEFT OUTER JOIN ( +WITH `t1` AS ( SELECT `t0`.`uuid`, COUNT(*) AS `CountStar()` FROM `t` AS `t0` GROUP BY 1 -) AS `t2` - ON `t4`.`uuid` = `t2`.`uuid` \ No newline at end of file +) +SELECT + `t5`.`uuid`, + `t3`.`CountStar()` +FROM ( + SELECT + `t2`.`uuid`, + MAX(`t2`.`CountStar()`) AS `max_count` + FROM `t1` AS `t2` + GROUP BY + 1 +) AS `t5` +LEFT OUTER JOIN `t1` AS `t3` + ON `t5`.`uuid` = `t3`.`uuid` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql index 5a4bca2f68ab..2ff8a2515300 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql @@ -1,61 +1,45 @@ -SELECT - `t7`.`userid`, - `t7`.`movieid`, - `t7`.`rating`, - `t7`.`datetime`, - `t7`.`title` -FROM ( +WITH `t6` AS ( SELECT - `t6`.`userid`, - `t6`.`movieid`, - `t6`.`rating`, - `t6`.`datetime`, - `t6`.`title` + `t4`.`userid`, + `t4`.`movieid`, + `t4`.`rating`, + `t4`.`datetime`, + `t2`.`title` FROM ( SELECT - `t4`.`userid`, - `t4`.`movieid`, - `t4`.`rating`, - `t4`.`datetime`, - `t2`.`title` - FROM ( - SELECT - `t0`.`userid`, - `t0`.`movieid`, - `t0`.`rating`, - CAST(`t0`.`timestamp` AS TIMESTAMP) AS `datetime` - FROM `ratings` AS `t0` - ) AS `t4` - INNER JOIN `movies` AS `t2` - ON `t4`.`movieid` = `t2`.`movieid` - ) AS `t6` + `t0`.`userid`, + `t0`.`movieid`, + `t0`.`rating`, + CAST(`t0`.`timestamp` AS TIMESTAMP) AS `datetime` + FROM `ratings` AS `t0` + ) AS `t4` + INNER JOIN `movies` AS `t2` + ON `t4`.`movieid` = `t2`.`movieid` +) +SELECT + `t8`.`userid`, + `t8`.`movieid`, + `t8`.`rating`, + `t8`.`datetime`, + `t8`.`title` +FROM ( + SELECT + `t7`.`userid`, + `t7`.`movieid`, + `t7`.`rating`, + `t7`.`datetime`, + `t7`.`title` + FROM `t6` AS `t7` WHERE - `t6`.`userid` = 118205 AND EXTRACT(year FROM `t6`.`datetime`) > 2001 -) AS `t7` + `t7`.`userid` = 118205 AND EXTRACT(year FROM `t7`.`datetime`) > 2001 +) AS `t8` WHERE - `t7`.`movieid` IN ( + `t8`.`movieid` IN ( SELECT - `t6`.`movieid` - FROM ( - SELECT - `t4`.`userid`, - `t4`.`movieid`, - `t4`.`rating`, - `t4`.`datetime`, - `t2`.`title` - FROM ( - SELECT - `t0`.`userid`, - `t0`.`movieid`, - `t0`.`rating`, - CAST(`t0`.`timestamp` AS TIMESTAMP) AS `datetime` - FROM `ratings` AS `t0` - ) AS `t4` - INNER JOIN `movies` AS `t2` - ON `t4`.`movieid` = `t2`.`movieid` - ) AS `t6` + `t7`.`movieid` + FROM `t6` AS `t7` WHERE - `t6`.`userid` = 118205 - AND EXTRACT(year FROM `t6`.`datetime`) > 2001 - AND EXTRACT(year FROM `t6`.`datetime`) < 2009 + `t7`.`userid` = 118205 + AND EXTRACT(year FROM `t7`.`datetime`) > 2001 + AND EXTRACT(year FROM `t7`.`datetime`) < 2009 ) \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_joins_single_cte/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_joins_single_cte/out.sql index 95e2f8ad4d5b..89c637993a4c 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_joins_single_cte/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_joins_single_cte/out.sql @@ -1,31 +1,25 @@ -SELECT - `t6`.`uuid`, - `t2`.`CountStar()`, - `t4`.`last_visit` -FROM ( - SELECT - `t1`.`uuid`, - MAX(`t1`.`CountStar()`) AS `max_count` - FROM ( - SELECT - `t0`.`uuid`, - COUNT(*) AS `CountStar()` - FROM `t` AS `t0` - GROUP BY - 1 - ) AS `t1` - GROUP BY - 1 -) AS `t6` -LEFT OUTER JOIN ( +WITH `t1` AS ( SELECT `t0`.`uuid`, COUNT(*) AS `CountStar()` FROM `t` AS `t0` GROUP BY 1 -) AS `t2` - ON `t6`.`uuid` = `t2`.`uuid` AND `t6`.`max_count` = `t2`.`CountStar()` +) +SELECT + `t7`.`uuid`, + `t4`.`CountStar()`, + `t5`.`last_visit` +FROM ( + SELECT + `t2`.`uuid`, + MAX(`t2`.`CountStar()`) AS `max_count` + FROM `t1` AS `t2` + GROUP BY + 1 +) AS `t7` +LEFT OUTER JOIN `t1` AS `t4` + ON `t7`.`uuid` = `t4`.`uuid` AND `t7`.`max_count` = `t4`.`CountStar()` LEFT OUTER JOIN ( SELECT `t0`.`uuid`, @@ -33,5 +27,5 @@ LEFT OUTER JOIN ( FROM `t` AS `t0` GROUP BY 1 -) AS `t4` - ON `t6`.`uuid` = `t4`.`uuid` \ No newline at end of file +) AS `t5` + ON `t7`.`uuid` = `t5`.`uuid` \ No newline at end of file From 127d51c6a86b7fee555ecb83d4644b9448cfde86 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 20 Jan 2024 05:38:14 -0500 Subject: [PATCH 098/161] fix(trino): compile property literal values directly instead of going throughh the pipeline --- ibis/backends/trino/__init__.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index 7122241c56c1..72ee1b48edc0 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -431,13 +431,13 @@ def create_table( else: target = table_ref - property_list = [ - sge.Property( - this=sg.to_identifier(k), - value=self.compiler.translate(ibis.literal(v).op(), params={}), - ) - for k, v in (properties or {}).items() - ] + property_list = [] + + for k, v in (properties or {}).items(): + name = sg.to_identifier(k) + expr = ibis.literal(v) + value = self.compiler.visit_Literal(expr.op(), value=v, dtype=expr.type()) + property_list.append(sge.Property(this=name, value=value)) if comment: property_list.append(sge.SchemaCommentProperty(this=sge.convert(comment))) From 44b7da291f5c9f00e1128249361be4ad89395880 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 20 Jan 2024 06:03:40 -0500 Subject: [PATCH 099/161] ci(impala): run tests in series --- .github/workflows/ibis-backends.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 984fb1d43835..c955edc237bd 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -125,6 +125,7 @@ jobs: - libgeos-dev - name: impala title: Impala + serial: true extras: - impala services: @@ -234,6 +235,7 @@ jobs: backend: name: impala title: Impala + serial: true extras: - impala services: From 5731e2a0a024c177bede77da589d94cbd5a6768f Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 20 Jan 2024 06:29:48 -0500 Subject: [PATCH 100/161] chore(exasol): avoid complex websocket callback for inserting memtables --- ibis/backends/exasol/__init__.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index 715fa3d6b7e4..4878d51ef018 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -2,6 +2,7 @@ import atexit import contextlib +import datetime import re from typing import TYPE_CHECKING, Any from urllib.parse import parse_qs, urlparse @@ -239,8 +240,20 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: create_stmt_sql = create_stmt.sql(self.name) df = op.data.to_frame() + data = df.itertuples(index=False, name=None) + + def process_item(item: Any): + """Handle inserting timestamps with timezones.""" + if isinstance(item, datetime.datetime): + if item.tzinfo is not None: + item = item.tz_convert("UTC").tz_localize(None) + return item.isoformat(sep=" ", timespec="milliseconds") + return item + + rows = (tuple(map(process_item, row)) for row in data) with self._safe_raw_sql(create_stmt_sql): - self.con.import_from_pandas(df, name) + if not df.empty: + self.con.ext.insert_multi(name, rows) atexit.register(self._clean_up_tmp_table, ident) From 20222014ce4ea6868b2516a816c7b0db826ef0d2 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 20 Jan 2024 06:30:03 -0500 Subject: [PATCH 101/161] test(exasol): account for unordered results in window function tests --- ibis/backends/tests/test_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index ccccc9571ad6..997b733717f0 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -852,7 +852,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): id="unordered-lag", marks=[ pytest.mark.broken( - ["trino"], + ["trino", "exasol"], reason="this isn't actually broken: the backend result is equal up to ordering", raises=AssertionError, strict=False, # sometimes it passes From af54a184c1843932d7a8fc6e0f0258a524d5a871 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 20 Jan 2024 06:42:50 -0500 Subject: [PATCH 102/161] test(duckdb): test that column name case is preserved when inserting --- ibis/backends/duckdb/tests/test_client.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index e01467aa5f65..a09b559c5f98 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -1,6 +1,7 @@ from __future__ import annotations import duckdb +import pandas as pd import pyarrow as pa import pytest from pytest import param @@ -162,11 +163,10 @@ def test_config_options_bad_option(con): def test_insert(con): - import pandas as pd - name = ibis.util.guid() - t = con.create_table(name, schema=ibis.schema({"a": "int64"})) + t = con.create_table(name, schema=ibis.schema({"a": "int64"}), temp=True) + con.insert(name, obj=pd.DataFrame({"a": [1, 2]})) assert t.count().execute() == 2 @@ -196,3 +196,18 @@ def test_to_other_sql(con, snapshot): sql = ibis.to_sql(t, dialect="snowflake") snapshot.assert_match(sql, "out.sql") + + +def test_insert_preserves_column_case(con): + name1 = ibis.util.guid() + name2 = ibis.util.guid() + + df1 = pd.DataFrame([[1], [2], [3], [4]], columns=["FTHG"]) + df2 = pd.DataFrame([[5], [6], [7], [8]], columns=["FTHG"]) + + t1 = con.create_table(name1, df1, temp=True) + assert t1.count().execute() == 4 + + t2 = con.create_table(name2, df2, temp=True) + con.insert(name1, t2) + assert t1.count().execute() == 8 From b2ec73337c7f728c3f83712e56b5b5b97d364082 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Mon, 22 Jan 2024 15:39:09 -0500 Subject: [PATCH 103/161] refactor(oracle): port to sqlglot (#8020) --- .github/workflows/ibis-backends.yml | 32 +- ibis/backends/base/sqlglot/datatypes.py | 23 + ibis/backends/conftest.py | 17 +- ibis/backends/oracle/__init__.py | 447 +++++++++---- ibis/backends/oracle/compiler.py | 590 ++++++++++++++++++ ibis/backends/oracle/converter.py | 19 + ibis/backends/oracle/datatypes.py | 46 -- ibis/backends/oracle/registry.py | 130 ---- ibis/backends/oracle/tests/conftest.py | 41 +- ibis/backends/oracle/tests/test_client.py | 4 +- ibis/backends/oracle/tests/test_datatypes.py | 2 +- ibis/backends/tests/errors.py | 5 + .../test_default_limit/oracle/out.sql | 5 + .../test_disable_query_limit/oracle/out.sql | 5 + .../oracle/out.sql | 19 + .../test_respect_set_limit/oracle/out.sql | 10 + .../test_group_by_has_index/oracle/out.sql | 8 +- .../test_sql/test_isin_bug/oracle/out.sql | 18 +- ibis/backends/tests/test_aggregation.py | 52 +- ibis/backends/tests/test_asof_join.py | 2 + ibis/backends/tests/test_binary.py | 2 +- ibis/backends/tests/test_client.py | 4 + ibis/backends/tests/test_dot_sql.py | 1 + ibis/backends/tests/test_export.py | 3 +- ibis/backends/tests/test_generic.py | 34 +- ibis/backends/tests/test_join.py | 2 +- ibis/backends/tests/test_numeric.py | 68 +- ibis/backends/tests/test_param.py | 6 +- ibis/backends/tests/test_sql.py | 2 +- ibis/backends/tests/test_string.py | 66 +- ibis/backends/tests/test_temporal.py | 108 ++-- ibis/backends/tests/test_window.py | 21 +- 32 files changed, 1261 insertions(+), 531 deletions(-) create mode 100644 ibis/backends/oracle/compiler.py create mode 100644 ibis/backends/oracle/converter.py delete mode 100644 ibis/backends/oracle/datatypes.py delete mode 100644 ibis/backends/oracle/registry.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/oracle/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/oracle/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/oracle/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/oracle/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index c955edc237bd..2a9d77145dde 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -156,13 +156,6 @@ jobs: - druid services: - druid - # - name: oracle - # title: Oracle - # serial: true - # extras: - # - oracle - # services: - # - oracle - name: exasol title: Exasol serial: true @@ -170,6 +163,13 @@ jobs: - exasol services: - exasol + - name: oracle + title: Oracle + serial: true + extras: + - oracle + services: + - oracle # - name: flink # title: Flink # serial: true @@ -272,15 +272,15 @@ jobs: - druid services: - druid - # - os: windows-latest - # backend: - # name: oracle - # title: Oracle - # serial: true - # extras: - # - oracle - # services: - # - oracle + - os: windows-latest + backend: + name: oracle + title: Oracle + serial: true + extras: + - oracle + services: + - oracle # - os: windows-latest # backend: # name: flink diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 273295f83b2f..a54959092e4e 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -563,6 +563,29 @@ class DruidType(SqlglotType): class OracleType(SqlglotType): dialect = "oracle" + default_decimal_precision = 38 + default_decimal_scale = 9 + + default_temporal_scale = 9 + + unknown_type_strings = FrozenDict({"raw": dt.binary}) + + @classmethod + def _from_sqlglot_FLOAT(cls) -> dt.Float64: + return dt.Float64(nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_DECIMAL(cls, precision=None, scale=None) -> dt.Decimal: + if scale is None or int(scale.this.this) == 0: + return dt.Int64(nullable=cls.default_nullable) + else: + return super()._from_sqlglot_DECIMAL(precision, scale) + + @classmethod + def _from_ibis_String(cls, dtype: dt.String) -> sge.DataType: + nullable = " NOT NULL" if not dtype.nullable else "" + return "VARCHAR2(4000)" + nullable + class SnowflakeType(SqlglotType): dialect = "snowflake" diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index b10f902818e0..5cac11957a5a 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -22,6 +22,7 @@ from ibis import util from ibis.backends.base import CanCreateDatabase, CanCreateSchema, _get_backend_names from ibis.conftest import WINDOWS +from ibis.util import promote_tuple if TYPE_CHECKING: from collections.abc import Iterable @@ -414,6 +415,13 @@ def pytest_runtest_call(item): backend = next(iter(backend)) + def _filter_none_from_raises(kwargs): + # Filter out any None values from kwargs['raises'] + # to cover any missing backend error types as defined in ibis/backends/tests/errors.py + if (raises := kwargs.get("raises")) is not None: + kwargs["raises"] = tuple(filter(None, promote_tuple(raises))) + return kwargs + # Ibis hasn't exposed existing functionality # This xfails so that you know when it starts to pass for marker in item.iter_markers(name="notimpl"): @@ -425,6 +433,7 @@ def pytest_runtest_call(item): raise ValueError("notimpl requires a raises") kwargs = marker.kwargs.copy() kwargs.setdefault("reason", f"Feature not yet exposed in {backend}") + kwargs = _filter_none_from_raises(kwargs) item.add_marker(pytest.mark.xfail(**kwargs)) # Functionality is unavailable upstream (but could be) @@ -439,13 +448,16 @@ def pytest_runtest_call(item): kwargs = marker.kwargs.copy() kwargs.setdefault("reason", f"Feature not available upstream for {backend}") + kwargs = _filter_none_from_raises(kwargs) item.add_marker(pytest.mark.xfail(**kwargs)) for marker in item.iter_markers(name="never"): if backend in marker.args[0]: if "reason" not in marker.kwargs.keys(): raise ValueError("never requires a reason") - item.add_marker(pytest.mark.xfail(**marker.kwargs)) + kwargs = marker.kwargs.copy() + kwargs = _filter_none_from_raises(kwargs) + item.add_marker(pytest.mark.xfail(**kwargs)) # Something has been exposed as broken by a new test and it shouldn't be # imperative for a contributor to fix it just because they happened to @@ -460,10 +472,12 @@ def pytest_runtest_call(item): kwargs = marker.kwargs.copy() kwargs.setdefault("reason", f"Feature is failing on {backend}") + kwargs = _filter_none_from_raises(kwargs) item.add_marker(pytest.mark.xfail(**kwargs)) for marker in item.iter_markers(name="xfail_version"): kwargs = marker.kwargs.copy() + kwargs = _filter_none_from_raises(kwargs) if backend not in kwargs: continue @@ -549,7 +563,6 @@ def ddl_con(ddl_backend): params=_get_backends_to_test( keep=( "mssql", - "oracle", "risingwave", "sqlite", ) diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index cd5ca715f28e..1f8c430c96cd 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -2,87 +2,43 @@ from __future__ import annotations -import atexit import contextlib -import sys +import re import warnings +from functools import cached_property +from operator import itemgetter from typing import TYPE_CHECKING, Any import oracledb import sqlglot as sg - +import sqlglot.expressions as sge + +import ibis +import ibis.common.exceptions as exc +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +import ibis.expr.schema as sch +import ibis.expr.types as ir from ibis import util - -# Wow, this is truly horrible -# Get out your clippers, it's time to shave a yak. -# -# 1. oracledb is only supported in sqlalchemy 2.0 -# 2. Ergo, module hacking is required to avoid doing a silly amount of work -# to create multiple lockfiles or port snowflake away from sqlalchemy -# 3. Also the version needs to be spoofed to be >= 7 or else the cx_Oracle -# dialect barfs -oracledb.__version__ = oracledb.version = "7" - -sys.modules["cx_Oracle"] = oracledb - -import sqlalchemy as sa # noqa: E402 - -import ibis.common.exceptions as exc # noqa: E402 -import ibis.expr.datatypes as dt # noqa: E402 -import ibis.expr.operations as ops # noqa: E402 -import ibis.expr.schema as sch # noqa: E402 -from ibis.backends.base.sql.alchemy import ( # noqa: E402 - AlchemyCompiler, - AlchemyExprTranslator, - BaseAlchemyBackend, -) -from ibis.backends.base.sqlglot import STAR, C # noqa: E402 -from ibis.backends.oracle.datatypes import OracleType # noqa: E402 -from ibis.backends.oracle.registry import operation_registry # noqa: E402 -from ibis.expr.rewrites import rewrite_sample # noqa: E402 +from ibis.backends.base.sqlglot import STAR, SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import TRUE, C +from ibis.backends.oracle.compiler import OracleCompiler if TYPE_CHECKING: from collections.abc import Iterable + import pandas as pd + import pyrrow as pa -class OracleExprTranslator(AlchemyExprTranslator): - _registry = operation_registry.copy() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _dialect_name = "oracle" - _has_reduction_filter_syntax = False - _require_order_by = ( - *AlchemyExprTranslator._require_order_by, - ops.Reduction, - ops.Lag, - ops.Lead, - ) - - _forbids_frame_clause = ( - *AlchemyExprTranslator._forbids_frame_clause, - ops.Lag, - ops.Lead, - ) - - _quote_column_names = True - _quote_table_names = True - - type_mapper = OracleType - -class OracleCompiler(AlchemyCompiler): - translator_class = OracleExprTranslator - support_values_syntax_in_select = False - supports_indexed_grouping_keys = False - null_limit = None - rewrites = AlchemyCompiler.rewrites | rewrite_sample - - -class Backend(BaseAlchemyBackend): +class Backend(SQLGlotBackend): name = "oracle" - compiler = OracleCompiler - supports_create_or_replace = False - supports_temporary_tables = True - _temporary_prefix = "GLOBAL TEMPORARY" + compiler = OracleCompiler() + + @cached_property + def version(self): + matched = re.search(r"(\d+)\.(\d+)\.(\d+)", self.con.version) + return ".".join(matched.groups()) def do_connect( self, @@ -143,41 +99,70 @@ def do_connect( if dsn is None: dsn = oracledb.makedsn(host, port, service_name=service_name, sid=sid) - url = sa.engine.url.make_url(f"oracle://{user}:{password}@{dsn}") - - engine = sa.create_engine( - url, - poolclass=sa.pool.StaticPool, - # We set the statement cache size to 0 because Oracle will otherwise - # attempt to reuse prepared statements even if the type of the bound variable - # has changed. - # This is apparently accepted behavior. - # https://python-oracledb.readthedocs.io/en/latest/user_guide/appendix_b.html#statement-caching-in-thin-and-thick-modes - connect_args={"stmtcachesize": 0}, - ) - super().do_connect(engine) + # We set the statement cache size to 0 because Oracle will otherwise + # attempt to reuse prepared statements even if the type of the bound variable + # has changed. + # This is apparently accepted behavior. + # https://python-oracledb.readthedocs.io/en/latest/user_guide/appendix_b.html#statement-caching-in-thin-and-thick-modes + self.con = oracledb.connect(dsn, user=user, password=password, stmtcachesize=0) - def normalize_name(name): - if name is None: - return None - elif not name: - return "" - elif name.lower() == name: - return sa.sql.quoted_name(name, quote=True) - else: - return name + # turn on autocommit + # TODO: it would be great if this worked but it doesn't seem to do the trick + # I had to hack in the commit lines to the compiler + # self.con.autocommit = True - self.con.dialect.normalize_name = normalize_name + # Set to ensure decimals come back as decimals + oracledb.defaults.fetch_decimals = True def _from_url(self, url: str, **kwargs): return self.do_connect(user=url.username, password=url.password, dsn=url.host) @property def current_database(self) -> str: - return self._scalar_query("SELECT * FROM global_name") + with self._safe_raw_sql(sg.select(STAR).from_("global_name")) as cur: + [(database,)] = cur.fetchall() + return database + + @contextlib.contextmanager + def begin(self): + con = self.con + cur = con.cursor() + try: + yield cur + except Exception: + con.rollback() + raise + else: + con.commit() + finally: + cur.close() + + @contextlib.contextmanager + def _safe_raw_sql(self, *args, **kwargs): + with contextlib.closing(self.raw_sql(*args, **kwargs)) as result: + yield result + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name) + + con = self.con + cursor = con.cursor() - def list_tables(self, like=None, schema=None): + try: + cursor.execute(query, **kwargs) + except Exception: + con.rollback() + cursor.close() + raise + else: + con.commit() + return cursor + + def list_tables( + self, like: str | None = None, schema: str | None = None + ) -> list[str]: """List the tables in the database. Parameters @@ -186,17 +171,214 @@ def list_tables(self, like=None, schema=None): A pattern to use for listing tables. schema The schema to perform the list against. + """ + conditions = [TRUE] + + if schema is None: + schema = self.con.username.upper() + conditions = C.owner.eq(sge.convert(schema.upper())) - ::: {.callout-warning} - ## `schema` refers to database hierarchy + tables = ( + sg.select("table_name", "owner") + .from_(sg.table("all_tables")) + .distinct() + .where(conditions) + ) + views = ( + sg.select("view_name", "owner") + .from_(sg.table("all_views")) + .distinct() + .where(conditions) + ) + sql = tables.union(views).sql(self.name) - The `schema` parameter does **not** refer to the column names and - types of `table`. - ::: + with self._safe_raw_sql(sql) as cur: + out = cur.fetchall() + + return self._filter_with_like(map(itemgetter(0), out), like) + + def list_schemas( + self, like: str | None = None, database: str | None = None + ) -> list[str]: + if database is not None: + raise exc.UnsupportedArgumentError( + "No cross-database schema access in Oracle" + ) + + query = sg.select("username").from_("all_users").order_by("username") + + with self._safe_raw_sql(query) as con: + schemata = list(map(itemgetter(0), con)) + + return self._filter_with_like(schemata, like) + + def get_schema( + self, name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + if schema is None: + schema = self.con.username.upper() + stmt = ( + sg.select( + "column_name", + "data_type", + sg.column("nullable").eq(sge.convert("Y")).as_("nullable"), + ) + .from_(sg.table("all_tab_columns")) + .where(sg.column("table_name").eq(sge.convert(name))) + .where(sg.column("owner").eq(sge.convert(schema))) + ) + with self._safe_raw_sql(stmt) as cur: + result = cur.fetchall() + + if not result: + raise exc.IbisError(f"Table not found: {name!r}") + + type_mapper = self.compiler.type_mapper + fields = { + name: type_mapper.from_string(type_string, nullable=nullable) + for name, type_string, nullable in result + } + + return sch.Schema(fields) + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: ibis.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ): + """Create a table in Oracle. + + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The name of the database in which to create the table; if not + passed, the current database is used. + temp + Create a temporary table + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists """ - tables = self.inspector.get_table_names(schema=schema) - views = self.inspector.get_view_names(schema=schema) - return self._filter_with_like(tables + views, like) + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + properties = [] + + if temp: + properties.append(sge.TemporaryProperty()) + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self._to_sqlglot(table) + else: + query = None + + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name(f"{self.name}_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create( + kind="TABLE", + this=target, + properties=sge.Properties(expressions=properties), + ) + + this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + with self._safe_raw_sql(create_stmt) as cur: + if query is not None: + insert_stmt = sge.Insert(this=table, expression=query).sql(self.name) + cur.execute(insert_stmt) + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + ) + cur.execute( + f"ALTER TABLE IF EXISTS {table.sql(self.name)} RENAME TO {this.sql(self.name)}" + ) + + if schema is None: + return self.table(name, schema=database) + + # preserve the input schema if it was provided + return ops.DatabaseTable( + name, schema=schema, source=self, namespace=ops.Namespace(database=database) + ).to_expr() + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [ + sg.exp.ColumnConstraint( + kind=sg.exp.NotNullColumnConstraint() + ) + ] + ), + ) + for colname, typ in schema.items() + ] + + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema( + this=sg.to_identifier(name, quoted=quoted), expressions=column_defs + ), + ).sql(self.name, pretty=True) + + data = op.data.to_frame().itertuples(index=False) + specs = ", ".join(f":{i}" for i, _ in enumerate(schema)) + table = sg.table(name, quoted=quoted).sql(self.name) + insert_stmt = f"INSERT INTO {table} VALUES ({specs})" + with self.begin() as cur: + cur.execute(create_stmt) + for row in data: + cur.execute(insert_stmt, row) def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: name = util.gen_name("oracle_metadata") @@ -211,6 +393,17 @@ def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: if isinstance(sg_expr, sg.exp.Table): sg_expr = sg.select(STAR).from_(sg_expr) + # TODO(gforsyth): followup -- this should probably be made a default + # transform for quoting backends + def transformer(node): + if isinstance(node, sg.exp.Table): + return sg.table(node.name, quoted=True) + elif isinstance(node, sg.exp.Column): + return sg.column(col=node.name, quoted=True) + return node + + sg_expr = sg_expr.transform(transformer) + this = sg.table(name, quoted=True) create_view = sg.exp.Create(kind="VIEW", this=this, expression=sg_expr).sql( dialect @@ -232,13 +425,14 @@ def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: ) with self.begin() as con: - con.exec_driver_sql(create_view) + con.execute(create_view) try: - results = con.exec_driver_sql(metadata_query).fetchall() + results = con.execute(metadata_query).fetchall() finally: # drop the view no matter what - con.exec_driver_sql(drop_view) + con.execute(drop_view) + # TODO: hand all this off to the type mapper for name, type_string, precision, scale, nullable in results: # NUMBER(null, null) --> FLOAT # (null, null) --> from_string() @@ -269,26 +463,33 @@ def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: typ = dt.Decimal(precision=precision, scale=scale, nullable=nullable) else: - typ = OracleType.from_string(type_string, nullable=nullable) + typ = self.compiler.type_mapper.from_string( + type_string, nullable=nullable + ) yield name, typ - def _table_from_schema( - self, - name: str, - schema: sch.Schema, - temp: bool = False, - database: str | None = None, - **kwargs: Any, - ) -> sa.Table: - if temp: - kwargs["oracle_on_commit"] = "PRESERVE ROWS" - t = super()._table_from_schema(name, schema, temp, database, **kwargs) - if temp: - atexit.register(self._clean_up_tmp_table, t) - return t + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + # TODO(gforsyth): this can probably be generalized a bit and put into + # the base backend (or a mixin) + import pandas as pd + + from ibis.backends.oracle.converter import OraclePandasData + + try: + df = pd.DataFrame.from_records( + cursor, columns=schema.names, coerce_float=True + ) + except Exception: + # clean up the cursor if we fail to create the DataFrame + # + # in the sqlite case failing to close the cursor results in + # artificially locked tables + cursor.close() + raise + df = OraclePandasData.convert_table(df, schema) + return df def _clean_up_tmp_table(self, name: str) -> None: - tmptable = self._get_sqla_table(name, autoload=False) with self.begin() as bind: # global temporary tables cannot be dropped without first truncating them # @@ -296,10 +497,10 @@ def _clean_up_tmp_table(self, name: str) -> None: # # ignore DatabaseError exceptions because the table may not exist # because it's already been deleted - with contextlib.suppress(sa.exc.DatabaseError): - bind.exec_driver_sql(f'TRUNCATE TABLE "{tmptable.name}"') - with contextlib.suppress(sa.exc.DatabaseError): - tmptable.drop(bind=bind) + with contextlib.suppress(oracledb.DatabaseError): + bind.execute(f'TRUNCATE TABLE "{name}"') + with contextlib.suppress(oracledb.DatabaseError): + bind.execute(f'DROP TABLE "{name}"') def _clean_up_cached_table(self, op): self._clean_up_tmp_table(op.name) diff --git a/ibis/backends/oracle/compiler.py b/ibis/backends/oracle/compiler.py new file mode 100644 index 000000000000..63b171458195 --- /dev/null +++ b/ibis/backends/oracle/compiler.py @@ -0,0 +1,590 @@ +from __future__ import annotations + +from functools import singledispatchmethod + +import sqlglot as sg +import sqlglot.expressions as sge +import toolz +from public import public +from sqlglot.dialects import Oracle +from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func + +import ibis +import ibis.common.exceptions as com +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import OracleType +from ibis.backends.base.sqlglot.rewrites import Window, replace_log2, replace_log10 +from ibis.common.patterns import replace +from ibis.expr.analysis import p, x, y +from ibis.expr.rewrites import rewrite_sample + + +def _create_sql(self, expression: sge.Create) -> str: + # TODO: should we use CREATE PRIVATE instead? That will set an implicit + # lower bound of Oracle 18c + properties = expression.args.get("properties") + temporary = any( + isinstance(prop, sge.TemporaryProperty) + for prop in (properties.expressions if properties else []) + ) + + kind = expression.args["kind"] + if (obj := kind.upper()) in ("TABLE", "VIEW") and temporary: + if expression.expression: + return f"CREATE GLOBAL TEMPORARY {obj} {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" + else: + # TODO: why does autocommit not work here? need to specify the ON COMMIT part... + return f"CREATE GLOBAL TEMPORARY {obj} {self.sql(expression, 'this')} ON COMMIT PRESERVE ROWS" + + return create_with_partitions_sql(self, expression) + + +def _datatype_sql(self: Oracle.Generator, expression: sge.DataType) -> str: + # Use this to handle correctly formatting timestamp precision + # e.g. TIMESTAMP (scale) WITH TIME ZONE vs. TIMESTAMP WITH TIME ZONE(scale) + if expression.is_type("timestamptz"): + for exp in expression.expressions: + if isinstance(exp, sge.DataTypeParam): + return f"TIMESTAMP ({self.sql(exp, 'this')}) WITH TIME ZONE" + return "TIMESTAMP WITH TIME ZONE" + return self.datatype_sql(expression) + + +Oracle.Generator.TRANSFORMS |= { + sge.LogicalOr: rename_func("max"), + sge.LogicalAnd: rename_func("min"), + sge.VariancePop: rename_func("var_pop"), + sge.Variance: rename_func("var_samp"), + sge.Stddev: rename_func("stddev_pop"), + sge.StddevPop: rename_func("stddev_pop"), + sge.StddevSamp: rename_func("stddev_samp"), + sge.ApproxDistinct: rename_func("approx_count_distinct"), + sge.Create: _create_sql, + sge.Select: sg.transforms.preprocess([sg.transforms.eliminate_semi_and_anti_joins]), + sge.DataType: _datatype_sql, +} + + +@replace(p.WindowFunction(p.First(x, y))) +def rewrite_first(_, x, y): + if y is not None: + raise com.UnsupportedOperationError( + "`first` aggregate over window does not support `where`" + ) + return _.copy(func=ops.FirstValue(x)) + + +@replace(p.WindowFunction(p.Last(x, y))) +def rewrite_last(_, x, y): + if y is not None: + raise com.UnsupportedOperationError( + "`last` aggregate over window does not support `where`" + ) + return _.copy(func=ops.LastValue(x)) + + +@replace(p.WindowFunction(frame=x @ p.WindowFrame(order_by=()))) +def rewrite_empty_order_by_window(_, x): + return _.copy(frame=x.copy(order_by=(ibis.NA,))) + + +@replace(p.WindowFunction(p.RowNumber | p.NTile, x)) +def exclude_unsupported_window_frame_from_row_number(_, x): + return ops.Subtract(_.copy(frame=x.copy(start=None, end=None)), 1) + + +@replace( + p.WindowFunction( + p.Lag | p.Lead | p.PercentRank | p.CumeDist | p.Any | p.All, + x @ p.WindowFrame(start=None), + ) +) +def exclude_unsupported_window_frame_from_ops(_, x): + return _.copy(frame=x.copy(start=None, end=None)) + + +@public +class OracleCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "oracle" + quoted = True + type_mapper = OracleType + rewrites = ( + exclude_unsupported_window_frame_from_row_number, + exclude_unsupported_window_frame_from_ops, + rewrite_first, + rewrite_last, + rewrite_empty_order_by_window, + rewrite_sample, + replace_log2, + replace_log10, + *SQLGlotCompiler.rewrites, + ) + + NAN = sge.Literal.number("binary_double_nan") + """Backend's NaN literal.""" + + POS_INF = sge.Literal.number("binary_double_infinity") + """Backend's positive infinity literal.""" + + NEG_INF = sge.Literal.number("-binary_double_infinity") + """Backend's negative infinity literal.""" + + def _aggregate(self, funcname: str, *args, where): + func = self.f[funcname] + if where is not None: + args = tuple(self.if_(where, arg) for arg in args) + return func(*args) + + @staticmethod + def _generate_groups(groups): + return groups + + @singledispatchmethod + def visit_node(self, op, **kwargs): + return super().visit_node(op, **kwargs) + + @visit_node.register(ops.Equals) + def visit_Equals(self, op, *, left, right): + # Oracle didn't have proper boolean types until recently and we handle them + # as integers so we end up with things like "t0"."bool_col" = 1 (for True) + # but then if we are testing that a boolean column IS True, it gets rendered as + # "t0"."bool_col" = 1 = 1 + # so intercept that and change it to WHERE (bool_col = 1) + # TODO(gil): there must be a better way to do this + if op.dtype.is_boolean() and isinstance(right, sge.Boolean): + if right.this: + return left + else: + return sg.not_(left) + return super().visit_Equals(op, left=left, right=right) + + @visit_node.register(ops.IsNull) + def visit_IsNull(self, op, *, arg): + # TODO(gil): find a better way to handle this + # but CASE WHEN (bool_col = 1) IS NULL isn't valid and we can simply check if + # bool_col is null + if isinstance(arg, sge.EQ): + return arg.this.is_(NULL) + return arg.is_(NULL) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype): + # avoid casting NULL -- oracle handling for these casts is... complicated + if value is None: + return NULL + elif dtype.is_timestamp() or dtype.is_time(): + if getattr(dtype, "timezone", None) is not None: + return self.f.to_timestamp_tz( + value.isoformat(), 'YYYY-MM-DD"T"HH24:MI:SS.FF6TZH:TZM' + ) + else: + return self.f.to_timestamp( + value.isoformat(), 'YYYY-MM-DD"T"HH24:MI:SS.FF6' + ) + elif dtype.is_date(): + return self.f.to_date( + f"{value.year:04d}-{value.month:02d}-{value.day:02d}", "FXYYYY-MM-DD" + ) + elif dtype.is_uuid(): + return sge.convert(str(value)) + elif dtype.is_interval(): + if dtype.unit.short in ("Y", "M"): + return self.f.numtoyminterval(value, dtype.unit.name) + elif dtype.unit.short in ("D", "h", "m", "s"): + return self.f.numtodsinterval(value, dtype.unit.name) + else: + raise com.UnsupportedOperationError( + f"Intervals with precision {dtype.unit.name} not supported in Oracle." + ) + + return super().visit_Literal(op, value=value, dtype=dtype) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + if to.is_interval(): + # CASTing to an INTERVAL in Oracle requires specifying digits of + # precision that are a pain. There are two helper functions that + # should be used instead. + if to.unit.short in ("D", "h", "m", "s"): + return self.f.numtodsinterval(arg, to.unit.name) + elif to.unit.short in ("Y", "M"): + return self.f.numtoyminterval(arg, to.unit.name) + else: + raise com.UnsupportedArgumentError( + f"Interval {to.unit.name} not supported by Oracle" + ) + return self.cast(arg, to) + + @visit_node.register(ops.Limit) + def visit_Limit(self, op, *, parent, n, offset): + # push limit/offset into subqueries + if isinstance(parent, sge.Subquery) and parent.this.args.get("limit") is None: + result = parent.this + alias = parent.alias + else: + result = sg.select(STAR).from_(parent) + alias = None + + if isinstance(n, int): + result = result.limit(n) + elif n is not None: + raise com.UnsupportedArgumentError( + "No support for dynamic limit in the Oracle backend." + ) + # TODO: re-enable this for dynamic limits + # but it should be paired with offsets working + # result = result.where(C.ROWNUM <= sg.select(n).from_(parent).subquery()) + else: + assert n is None, n + if self.no_limit_value is not None: + result = result.limit(self.no_limit_value) + + assert offset is not None, "offset is None" + + if offset > 0: + raise com.UnsupportedArgumentError( + "No support for limit offsets in the Oracle backend." + ) + + if alias is not None: + return result.subquery(alias) + return result + + @visit_node.register(ops.Date) + def visit_Date(self, op, *, arg): + return sg.cast(arg, to="date") + + @visit_node.register(ops.IsNan) + def visit_IsNan(self, op, *, arg): + return arg.eq(self.NAN) + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + return self.f.log(base, arg, dialect=self.dialect) + + @visit_node.register(ops.IsInf) + def visit_IsInf(self, op, *, arg): + return arg.isin(self.POS_INF, self.NEG_INF) + + @visit_node.register(ops.RandomScalar) + def visit_RandomScalar(self, op): + # Not using FuncGen here because of dotted function call + return sg.func("dbms_random.value") + + @visit_node.register(ops.Pi) + def visit_Pi(self, op): + return self.f.acos(-1) + + @visit_node.register(ops.Cot) + def visit_Cot(self, op, *, arg): + return 1 / self.f.tan(arg) + + @visit_node.register(ops.Degrees) + def visit_Degrees(self, op, *, arg): + return 180 * arg / self.visit_node(ops.Pi()) + + @visit_node.register(ops.Radians) + def visit_Radians(self, op, *, arg): + return self.visit_node(ops.Pi()) * arg / 180 + + @visit_node.register(ops.Modulus) + def visit_Modulus(self, op, *, left, right): + return self.f.mod(left, right) + + @visit_node.register(ops.Levenshtein) + def visit_Levenshtein(self, op, *, left, right): + # Not using FuncGen here because of dotted function call + return sg.func("utl_match.edit_distance", left, right) + + @visit_node.register(ops.StartsWith) + def visit_StartsWith(self, op, *, arg, start): + return self.f.substr(arg, 0, self.f.length(start)).eq(start) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return self.f.substr(arg, -1 * self.f.length(end), self.f.length(end)).eq(end) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if end is not None: + raise NotImplementedError("`end` is not implemented") + + sub_string = substr + + if start is not None: + arg = self.f.substr(arg, start + 1) + pos = self.f.instr(arg, sub_string) + # TODO(gil): why, oh why, does this need an extra +1 on the end? + return sg.case().when(pos > 0, pos - 1 + start).else_(-1) + 1 + + return self.f.instr(arg, sub_string) + + @visit_node.register(ops.StrRight) + def visit_StrRight(self, op, *, arg, nchars): + return self.f.substr(arg, -nchars) + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + return self.if_( + index.eq(0), + self.f.regexp_substr(arg, pattern), + self.f.regexp_substr(arg, pattern, 1, 1, "cn", index), + ) + + @visit_node.register(ops.RegexReplace) + def visit_RegexReplace(self, op, *, arg, pattern, replacement): + return sge.RegexpReplace(this=arg, expression=pattern, replacement=replacement) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.instr(haystack, needle) > 0 + + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, arg, sep): + return self.f.concat(*toolz.interpose(sep, arg)) + + ## Aggregate stuff + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, where, how): + if how == "sample": + raise ValueError( + "Oracle only implements population correlation coefficient" + ) + return self.agg.corr(left, right, where=where) + + @visit_node.register(ops.Covariance) + def visit_Covariance(self, op, *, left, right, where, how): + if how == "sample": + return self.agg.covar_samp(left, right, where=where) + return self.agg.covar_pop(left, right, where=where) + + @visit_node.register(ops.ApproxMedian) + def visit_ApproxMedian(self, op, *, arg, where): + return self.visit_Quantile(op, arg=arg, quantile=0.5, where=where) + + @visit_node.register(ops.Quantile) + def visit_Quantile(self, op, *, arg, quantile, where): + suffix = "cont" if op.arg.dtype.is_numeric() else "disc" + funcname = f"percentile_{suffix}" + + if where is not None: + arg = self.if_(where, arg) + + expr = sge.WithinGroup( + this=self.f[funcname](quantile), + expression=sge.Order(expressions=[sge.Ordered(this=arg)]), + ) + return expr + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg) + + return sge.Count(this=sge.Distinct(expressions=[arg])) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + if where is not None: + return self.f.count(self.if_(where, 1, NULL)) + return self.f.count(STAR) + + @visit_node.register(ops.IdenticalTo) + def visit_IdenticalTo(self, op, *, left, right): + # sqlglot NullSafeEQ uses "is not distinct from" which isn't supported in oracle + return ( + sg.case() + .when(left.eq(right).or_(left.is_(NULL).and_(right.is_(NULL))), 0) + .else_(1) + .eq(0) + ) + + @visit_node.register(ops.Xor) + def visit_Xor(self, op, *, left, right): + return (left.or_(right)).and_(sg.not_(left.and_(right))) + + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.DateTruncate) + def visit_DateTruncate(self, op, *, arg, unit): + trunc_unit_mapping = { + "Y": "year", + "M": "MONTH", + "W": "IW", + "D": "DDD", + "h": "HH", + "m": "MI", + } + + timestamp_unit_mapping = { + "s": "SS", + "ms": "SS.FF3", + "us": "SS.FF6", + "ns": "SS.FF9", + } + + if (unyt := timestamp_unit_mapping.get(unit.short)) is not None: + # Oracle only has trunc(DATE) and that can't do sub-minute precision, but we can + # handle those separately. + return self.f.to_timestamp( + self.f.to_char(arg, f"YYYY-MM-DD HH24:MI:{unyt}"), + f"YYYY-MM-DD HH24:MI:{unyt}", + ) + + if (unyt := trunc_unit_mapping.get(unit.short)) is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") + + return self.f.trunc(arg, unyt) + + @visit_node.register(Window) + def visit_Window(self, op, *, how, func, start, end, group_by, order_by): + # Oracle has two (more?) types of analytic functions you can use inside OVER. + # + # The first group accepts an "analytic clause" which is decomposed into the + # PARTITION BY, ORDER BY and the windowing clause (e.g. ROWS BETWEEN + # UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING). These are the "full" window functions. + # + # The second group accepts an _optional_ PARTITION BY clause and a _required_ ORDER BY clause. + # If you try to pass, for instance, LEAD(col, 1) OVER() AS "val", this will error. + # + # The list of functions which accept the full analytic clause (and so + # accept a windowing clause) are those functions which are marked with + # an asterisk at the bottom of this page (yes, Oracle thinks this is + # a reasonable way to demarcate them): + # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Analytic-Functions.html + # + # (Side note: these unordered window function queries were not erroring + # in the SQLAlchemy Oracle backend but they were raising AssertionErrors. + # This is because the SQLAlchemy Oracle dialect automatically inserts an + # ORDER BY whether you ask it to or not.) + # + # If the windowing clause is omitted, the default is + # RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + # + # I (@gforsyth) believe that this is the windowing range applied to the + # analytic functions (like LEAD, LAG, CUME_DIST) which don't allow + # specifying a windowing clause. + # + # This allowance for specifying a windowing clause is handled below by + # explicitly listing the ops which correspond to the analytic functions + # that accept it. + + if type(op.func) in ( + # TODO: figure out REGR_* functions and also manage this list better + # Allowed windowing clause functions + ops.Mean, # "avg", + ops.Correlation, # "corr", + ops.Count, # "count", + ops.Covariance, # "covar_pop", "covar_samp", + ops.FirstValue, # "first_value", + ops.LastValue, # "last_value", + ops.Max, # "max", + ops.Min, # "min", + ops.NthValue, # "nth_value", + ops.StandardDev, # "stddev","stddev_pop","stddev_samp", + ops.Sum, # "sum", + ops.Variance, # "var_pop","var_samp","variance", + ): + if start is None: + start = {} + if end is None: + end = {} + + start_value = start.get("value", "UNBOUNDED") + start_side = start.get("side", "PRECEDING") + end_value = end.get("value", "UNBOUNDED") + end_side = end.get("side", "FOLLOWING") + + spec = sge.WindowSpec( + kind=how.upper(), + start=start_value, + start_side=start_side, + end=end_value, + end_side=end_side, + over="OVER", + ) + elif not order_by: + # For other analytic functions, ORDER BY is required + raise com.UnsupportedOperationError( + f"Function {op.func.name} cannot be used in Oracle without an order_by." + ) + else: + # and no windowing clause is supported, so set the spec to None. + spec = None + + order = sge.Order(expressions=order_by) if order_by else None + + spec = self._minimize_spec(op.start, op.end, spec) + + return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) + + @visit_node.register(ops.Arbitrary) + @visit_node.register(ops.ArgMax) + @visit_node.register(ops.ArgMin) + @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.ArrayColumn) + @visit_node.register(ops.ArrayFlatten) + @visit_node.register(ops.ArrayMap) + @visit_node.register(ops.ArrayStringJoin) + @visit_node.register(ops.First) + @visit_node.register(ops.Last) + @visit_node.register(ops.Mode) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.StringSplit) + @visit_node.register(ops.TimeTruncate) + @visit_node.register(ops.Bucket) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.TimestampDelta) + @visit_node.register(ops.TimestampNow) + @visit_node.register(ops.TimestampFromYMDHMS) + @visit_node.register(ops.TimeFromHMS) + @visit_node.register(ops.IntervalFromInteger) + @visit_node.register(ops.DayOfWeekIndex) + @visit_node.register(ops.DayOfWeekName) + @visit_node.register(ops.DateDiff) + @visit_node.register(ops.ExtractEpochSeconds) + @visit_node.register(ops.ExtractWeekOfYear) + @visit_node.register(ops.ExtractDayOfYear) + @visit_node.register(ops.RowID) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.ApproxCountDistinct: "approx_count_distinct", + ops.BitAnd: "bit_and_agg", + ops.BitOr: "bit_or_agg", + ops.BitXor: "bit_xor_agg", + ops.BitwiseAnd: "bitand", + ops.Hash: "hash", + ops.LPad: "lpad", + ops.RPad: "rpad", + ops.StringAscii: "ascii", + ops.Strip: "trim", + ops.Hash: "ora_hash", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @OracleCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @OracleCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(OracleCompiler, f"visit_{_op.__name__}", _fmt) + + +del _op, _name, _fmt diff --git a/ibis/backends/oracle/converter.py b/ibis/backends/oracle/converter.py new file mode 100644 index 000000000000..7755cb595340 --- /dev/null +++ b/ibis/backends/oracle/converter.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import datetime + +from ibis.formats.pandas import PandasData + + +class OraclePandasData(PandasData): + @classmethod + def convert_Timestamp_element(cls, dtype): + return datetime.datetime.fromisoformat + + @classmethod + def convert_Date_element(cls, dtype): + return datetime.date.fromisoformat + + @classmethod + def convert_Time_element(cls, dtype): + return datetime.time.fromisoformat diff --git a/ibis/backends/oracle/datatypes.py b/ibis/backends/oracle/datatypes.py deleted file mode 100644 index 08cdc3be2e4f..000000000000 --- a/ibis/backends/oracle/datatypes.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -import sqlalchemy.types as sat -from sqlalchemy.dialects import oracle - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import OracleType as SqlglotOracleType - - -class OracleType(AlchemyType): - dialect = "oracle" - - @classmethod - def to_ibis(cls, typ, nullable=True): - if isinstance(typ, oracle.ROWID): - return dt.String(nullable=nullable) - elif isinstance(typ, (oracle.RAW, sat.BLOB)): - return dt.Binary(nullable=nullable) - elif isinstance(typ, sat.Float): - return dt.Float64(nullable=nullable) - elif isinstance(typ, sat.Numeric): - if typ.scale == 0: - # kind of a lie, should be int128 because 38 digits - return dt.Int64(nullable=nullable) - else: - return dt.Decimal( - precision=typ.precision or 38, - scale=typ.scale or 0, - nullable=nullable, - ) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_ibis(cls, dtype): - if isinstance(dtype, dt.Float64): - return sat.Float(precision=53).with_variant(oracle.FLOAT(14), "oracle") - elif isinstance(dtype, dt.Float32): - return sat.Float(precision=23).with_variant(oracle.FLOAT(7), "oracle") - else: - return super().from_ibis(dtype) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotOracleType.from_string(type_string, nullable=nullable) diff --git a/ibis/backends/oracle/registry.py b/ibis/backends/oracle/registry.py deleted file mode 100644 index 8c6b074bd21d..000000000000 --- a/ibis/backends/oracle/registry.py +++ /dev/null @@ -1,130 +0,0 @@ -from __future__ import annotations - -import sqlalchemy as sa -import toolz -from packaging.version import parse as vparse - -import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import ( - fixed_arity, - reduction, - sqlalchemy_operation_registry, - sqlalchemy_window_functions_registry, - unary, -) -from ibis.backends.base.sql.alchemy.registry import ( - _gen_string_find, -) -from ibis.backends.base.sql.alchemy.registry import ( - _literal as _alchemy_literal, -) - -operation_registry = sqlalchemy_operation_registry.copy() - -operation_registry.update(sqlalchemy_window_functions_registry) - - -def _cot(t, op): - arg = t.translate(op.arg) - return 1.0 / sa.func.tan(arg, type_=t.get_sqla_type(op.arg.dtype)) - - -def _cov(t, op): - return t._reduction(getattr(sa.func, f"covar_{op.how[:4]}"), op) - - -def _corr(t, op): - if op.how == "sample": - raise ValueError( - f"{t.__class__.__name__} only implements population correlation " - "coefficient" - ) - return t._reduction(sa.func.corr, op) - - -def _literal(t, op): - dtype = op.dtype - value = op.value - - if value is None: - return sa.null() - elif ( - # handle UUIDs in sqlalchemy < 2 - vparse(sa.__version__) < vparse("2") and dtype.is_uuid() - ): - return sa.literal(str(value), type_=t.get_sqla_type(dtype)) - elif dtype.is_timestamp(): - if dtype.timezone is not None: - return sa.func.to_utc_timestamp_tz(value.isoformat(timespec="microseconds")) - return sa.func.to_timestamp( - # comma for sep here because T is a special character in Oracle - # the FX prefix means "requires an exact match" - value.isoformat(sep=",", timespec="microseconds"), - "FXYYYY-MM-DD,HH24:MI:SS.FF6", - ) - elif dtype.is_date(): - return sa.func.to_date(value.isoformat(), "FXYYYY-MM-DD") - elif dtype.is_time(): - raise NotImplementedError("Time values are not supported in Oracle") - return _alchemy_literal(t, op) - - -def _second(t, op): - # Oracle returns fractional seconds, so `floor` the result to match - # the behavior of other backends - return sa.func.floor(sa.extract("SECOND", t.translate(op.arg))) - - -def _string_join(t, op): - sep = t.translate(op.sep) - values = list(map(t.translate, op.arg)) - return sa.func.concat(*toolz.interpose(sep, values)) - - -def _median(t, op): - arg = op.arg - if (where := op.where) is not None: - arg = ops.IfElse(where, arg, None) - - if arg.dtype.is_numeric(): - return sa.func.median(t.translate(arg)) - return sa.cast( - sa.func.percentile_disc(0.5).within_group(t.translate(arg)), - t.get_sqla_type(op.dtype), - ) - - -operation_registry.update( - { - ops.Log2: unary(lambda arg: sa.func.log(2, arg)), - ops.Log10: unary(lambda arg: sa.func.log(10, arg)), - ops.Log: fixed_arity(lambda arg, base: sa.func.log(base, arg), 2), - ops.Power: fixed_arity(sa.func.power, 2), - ops.Cot: _cot, - ops.Pi: lambda *_: sa.func.ACOS(-1), - ops.RandomScalar: fixed_arity(sa.func.dbms_random.value, 0), - ops.Degrees: lambda t, op: 180 * t.translate(op.arg) / t.translate(ops.Pi()), - ops.Radians: lambda t, op: t.translate(ops.Pi()) * t.translate(op.arg) / 180, - # Aggregate Functions - ops.Covariance: _cov, - ops.Correlation: _corr, - ops.ApproxMedian: reduction(sa.func.approx_median), - ops.Median: _median, - # Temporal - ops.ExtractSecond: _second, - # String - ops.StrRight: fixed_arity(lambda arg, nchars: sa.func.substr(arg, -nchars), 2), - ops.StringJoin: _string_join, - ops.StringFind: _gen_string_find(sa.func.instr), - # Generic - ops.Hash: unary(sa.func.ora_hash), - ops.Literal: _literal, - ops.Levenshtein: fixed_arity(sa.func.utl_match.edit_distance, 2), - } -) - -_invalid_operations = set() - -operation_registry = { - k: v for k, v in operation_registry.items() if k not in _invalid_operations -} diff --git a/ibis/backends/oracle/tests/conftest.py b/ibis/backends/oracle/tests/conftest.py index addf6a6d3924..ee27b4cc0f1f 100644 --- a/ibis/backends/oracle/tests/conftest.py +++ b/ibis/backends/oracle/tests/conftest.py @@ -7,8 +7,8 @@ import subprocess from typing import TYPE_CHECKING, Any +import oracledb import pytest -import sqlalchemy as sa import ibis from ibis.backends.tests.base import ServiceBackendTest @@ -28,6 +28,9 @@ # ./createAppUser user pass ORACLE_DB # where ORACLE_DB is the same name you used in the Compose file. +# Set to ensure decimals come back as decimals +oracledb.defaults.fetch_decimals = True + class TestConf(ServiceBackendTest): check_dtype = False @@ -43,7 +46,7 @@ class TestConf(ServiceBackendTest): rounding_method = "half_to_even" data_volume = "/opt/oracle/data" service_name = "oracle" - deps = "oracledb", "sqlalchemy" + deps = ("oracledb",) @property def test_files(self) -> Iterable[Path]: @@ -88,12 +91,11 @@ def _load_data( ) init_oracle_database( - url=sa.engine.make_url( - f"oracle://{user}:{password}@{host}:{port:d}/{database}", - ), + dsn=oracledb.makedsn(host, port, service_name=database), + user=user, + password=password, database=database, schema=self.ddl_script, - connect_args=dict(service_name=database), ) # then call sqlldr to ingest @@ -138,42 +140,29 @@ def con(data_dir, tmp_path_factory, worker_id): def init_oracle_database( - url: sa.engine.url.URL, + user: str, + password: str, + dsn: str, database: str, schema: str | None = None, **kwargs: Any, -) -> sa.engine.Engine: +) -> None: """Initialise `database` at `url` with `schema`. - If `recreate`, drop the `database` at `url`, if it exists. - Parameters ---------- - url : url.sa.engine.url.URL - Connection url to the database database : str Name of the database to be dropped schema : TextIO File object containing schema to use - - Returns - ------- - sa.engine.Engine - SQLAlchemy engine object """ - try: - url.database = database - except AttributeError: - url = url.set(database=database) - engine = sa.create_engine(url, **kwargs) + con = oracledb.connect(dsn, user=user, password=password, stmtcachesize=0) if schema: - with engine.begin() as conn: + with con.cursor() as cursor: for stmt in schema: # XXX: maybe should just remove the comments in the sql file # so we don't end up writing an entire parser here. if not stmt.startswith("--"): - conn.exec_driver_sql(stmt) - - return engine + cursor.execute(stmt) diff --git a/ibis/backends/oracle/tests/test_client.py b/ibis/backends/oracle/tests/test_client.py index 4f7290aa9157..d1dd5e6ad5fd 100644 --- a/ibis/backends/oracle/tests/test_client.py +++ b/ibis/backends/oracle/tests/test_client.py @@ -54,6 +54,6 @@ def stats_one_way_anova(x, y, value: str) -> int: """ with con.begin() as c: expected = pd.DataFrame( - c.exec_driver_sql(sql).fetchall(), columns=["string_col", "df_w"] + c.execute(sql).fetchall(), columns=["string_col", "df_w"] ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_dtype=False) diff --git a/ibis/backends/oracle/tests/test_datatypes.py b/ibis/backends/oracle/tests/test_datatypes.py index 848bfd905e43..33efc4bdcb41 100644 --- a/ibis/backends/oracle/tests/test_datatypes.py +++ b/ibis/backends/oracle/tests/test_datatypes.py @@ -14,7 +14,7 @@ def test_blob_raw(con): con.drop_table("blob_raw_blobs_blob_raw", force=True) with con.begin() as bind: - bind.exec_driver_sql( + bind.execute( """CREATE TABLE "blob_raw_blobs_blob_raw" ("blob" BLOB, "raw" RAW(255))""" ) diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index a21b80556acb..188c7ee9ccaa 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -112,3 +112,8 @@ from pydruid.db.exceptions import ProgrammingError as PyDruidProgrammingError except ImportError: PyDruidProgrammingError = None + +try: + from oracledb.exceptions import DatabaseError as OracleDatabaseError +except ImportError: + OracleDatabaseError = None diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/oracle/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/oracle/out.sql new file mode 100644 index 000000000000..2124da09f645 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/oracle/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" = 1 AS "bool_col" +FROM "functional_alltypes" "t0" +FETCH FIRST 11 ROWS ONLY \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/oracle/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/oracle/out.sql new file mode 100644 index 000000000000..2124da09f645 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/oracle/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" = 1 AS "bool_col" +FROM "functional_alltypes" "t0" +FETCH FIRST 11 ROWS ONLY \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/oracle/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/oracle/out.sql new file mode 100644 index 000000000000..7b50874f2771 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/oracle/out.sql @@ -0,0 +1,19 @@ +SELECT + SUM("t1"."bigint_col") AS "Sum(bigint_col)" +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" = 1 AS "bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" + FROM "functional_alltypes" "t0" +) "t1" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/oracle/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/oracle/out.sql new file mode 100644 index 000000000000..96217eecd9a1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/oracle/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" = 1 AS "bool_col" + FROM "functional_alltypes" "t0" + FETCH FIRST 10 ROWS ONLY +) "t2" +FETCH FIRST 11 ROWS ONLY \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/oracle/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/oracle/out.sql index 69fb369f7226..036e3567f920 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/oracle/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/oracle/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0."continent" + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -16,10 +16,10 @@ SELECT THEN 'Antarctica' ELSE 'Unknown continent' END AS "cont", - SUM(t0."population") AS "total_pop" -FROM "countries" t0 + SUM("t0"."population") AS "total_pop" +FROM "countries" "t0" GROUP BY - CASE t0."continent" + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/oracle/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/oracle/out.sql index 13480df0fe70..e63d49015d77 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/oracle/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/oracle/out.sql @@ -1,13 +1,9 @@ SELECT - t0."x" IN ( + "t0"."x" IN ( SELECT - t1."x" - FROM ( - SELECT - t0."x" AS "x" - FROM "t" t0 - WHERE - t0."x" > 2 - ) t1 - ) AS "InColumn(x, x)" -FROM "t" t0 \ No newline at end of file + "t0"."x" + FROM "t" "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" "t0" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 0d0586a30f9b..a64179a8f9f0 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -20,6 +20,7 @@ GoogleBadRequest, ImpalaHiveServer2Error, MySQLNotSupportedError, + OracleDatabaseError, PolarsInvalidOperationError, Py4JError, PyDruidProgrammingError, @@ -299,7 +300,7 @@ def mean_and_std(v): ), pytest.mark.broken( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-02000: missing AS keyword", ), pytest.mark.notimpl( @@ -319,7 +320,7 @@ def mean_and_std(v): ), pytest.mark.broken( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-02000: missing AS keyword", ), pytest.mark.notimpl( @@ -351,7 +352,7 @@ def mean_and_std(v): ), pytest.mark.broken( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-02000: missing AS keyword", ), pytest.mark.notimpl( @@ -371,7 +372,7 @@ def mean_and_std(v): ), pytest.mark.broken( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-02000: missing AS keyword", ), pytest.mark.notimpl( @@ -391,7 +392,7 @@ def mean_and_std(v): marks=[ pytest.mark.broken( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-02000: missing AS keyword", ), ], @@ -599,10 +600,10 @@ def mean_and_std(v): "dask", "datafusion", "druid", - "oracle", "impala", "mssql", "mysql", + "oracle", "pandas", "polars", "sqlite", @@ -678,10 +679,6 @@ def mean_and_std(v): raises=AttributeError, reason="'Series' object has no attribute 'bitand'", ), - pytest.mark.notimpl( - ["oracle"], - raises=sa.exc.DatabaseError, - ), ], ), param( @@ -701,11 +698,6 @@ def mean_and_std(v): raises=AttributeError, reason="'Series' object has no attribute 'bitor'", ), - pytest.mark.notyet( - ["oracle"], - raises=sa.exc.DatabaseError, - reason="ORA-00904: 'BIT_OR': invalid identifier", - ), ], ), param( @@ -725,11 +717,6 @@ def mean_and_std(v): raises=AttributeError, reason="'Series' object has no attribute 'bitxor'", ), - pytest.mark.notyet( - ["oracle"], - raises=sa.exc.DatabaseError, - reason="ORA-00904: 'BIT_XOR': invalid identifier", - ), ], ), param( @@ -838,7 +825,7 @@ def test_reduction_ops( ["bigquery", "druid", "mssql", "oracle", "sqlite", "flink"], raises=( sa.exc.OperationalError, - sa.exc.DatabaseError, + OracleDatabaseError, com.UnsupportedOperationError, com.OperationNotDefinedError, ), @@ -876,7 +863,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): "mysql", "sqlite", "druid", - "oracle", "exasol", ], raises=com.OperationNotDefinedError, @@ -1225,9 +1211,6 @@ def test_median(alltypes, df): raises=ClickHouseDatabaseError, reason="doesn't support median of strings", ) -@pytest.mark.notyet( - ["oracle"], raises=sa.exc.DatabaseError, reason="doesn't support median of strings" -) @pytest.mark.broken( ["pyspark"], raises=AssertionError, reason="pyspark returns null for string median" ) @@ -1246,7 +1229,17 @@ def test_median(alltypes, df): "func", [ param(methodcaller("quantile", 0.5), id="quantile"), - param(methodcaller("median"), id="median"), + param( + methodcaller("median"), + id="median", + marks=[ + pytest.mark.notyet( + ["oracle"], + raises=OracleDatabaseError, + reason="doesn't support median of strings", + ) + ], + ), ], ) def test_string_quantile(alltypes, func): @@ -1275,9 +1268,6 @@ def test_string_quantile(alltypes, func): param( methodcaller("quantile", 0.5), id="quantile", - marks=[ - pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) - ], ), ], ) @@ -1335,7 +1325,7 @@ def test_date_quantile(alltypes, func): ) @pytest.mark.notyet( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-00904: 'GROUP_CONCAT': invalid identifier", ) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @@ -1608,7 +1598,7 @@ def test_grouped_case(backend, con): @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notyet(["trino"], raises=TrinoUserError) @pytest.mark.notyet(["mysql"], raises=MySQLNotSupportedError) -@pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) +@pytest.mark.notyet(["oracle"], raises=OracleDatabaseError) @pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) def test_group_concat_over_window(backend, con): input_df = pd.DataFrame( diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 3b71ebe88346..c10fe79dbff6 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -92,6 +92,7 @@ def time_keyed_right(time_keyed_df2): "impala", "bigquery", "exasol", + "oracle", ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): @@ -129,6 +130,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op "impala", "bigquery", "exasol", + "oracle", ] ) def test_keyed_asof_join_with_tolerance( diff --git a/ibis/backends/tests/test_binary.py b/ibis/backends/tests/test_binary.py index 3559741a493d..1d9f7cfa0516 100644 --- a/ibis/backends/tests/test_binary.py +++ b/ibis/backends/tests/test_binary.py @@ -21,7 +21,7 @@ @pytest.mark.notimpl( - ["clickhouse", "impala", "druid"], + ["clickhouse", "impala", "druid", "oracle"], "Unsupported type: Binary(nullable=True)", raises=NotImplementedError, ) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index e4016c762910..7a5a3d6a3869 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -1417,6 +1417,10 @@ def test_persist_expression_repeated_cache(alltypes): raises=sa.exc.InternalError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) +@mark.notimpl( + ["oracle"], + reason="Oracle error message for a missing table/view doesn't include the name of the table", +) def test_persist_expression_release(con, alltypes): non_cached_table = alltypes.mutate( test_column="calculation", other_column="big calc 3" diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 38ed9708d54d..4f6fe5799272 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -29,6 +29,7 @@ @pytest.mark.notimpl(["flink"]) +@pytest.mark.notyet(["oracle"], reason="table quoting behavior") @dot_sql_never @pytest.mark.parametrize( "schema", diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 1760cf8de461..5707f8ce3ec6 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -15,6 +15,7 @@ DuckDBParserException, ExaQueryError, MySQLOperationalError, + OracleDatabaseError, PyDeltaTableError, PyDruidProgrammingError, PySparkArithmeticException, @@ -358,7 +359,7 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): pytest.mark.notyet(["mssql"], raises=sa.exc.ProgrammingError), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), pytest.mark.notyet(["trino"], raises=TrinoUserError), - pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), + pytest.mark.notyet(["oracle"], raises=OracleDatabaseError), pytest.mark.notyet(["mysql"], raises=MySQLOperationalError), pytest.mark.notyet( ["pyspark"], diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 5f4e353d1641..1c829b50c0d3 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -26,6 +26,7 @@ ImpalaHiveServer2Error, Py4JJavaError, MySQLProgrammingError, + OracleDatabaseError, PyDruidProgrammingError, SnowflakeProgrammingError, TrinoUserError, @@ -126,7 +127,7 @@ def test_scalar_fillna_nullif(con, expr, expected): ), ], ) -@pytest.mark.notimpl(["mssql", "oracle"]) +@pytest.mark.notimpl(["mssql"]) @pytest.mark.notyet(["flink"], "NaN is not supported in Flink SQL", raises=ValueError) def test_isna(backend, alltypes, col, value, filt): table = alltypes.select(**{col: value}) @@ -370,7 +371,7 @@ def test_case_where(backend, alltypes, df): # TODO: some of these are notimpl (datafusion) others are probably never -@pytest.mark.notimpl(["mysql", "sqlite", "mssql", "druid", "oracle", "exasol"]) +@pytest.mark.notimpl(["mysql", "sqlite", "mssql", "druid", "exasol"]) @pytest.mark.notyet(["flink"], "NaN is not supported in Flink SQL", raises=ValueError) def test_select_filter_mutate(backend, alltypes, df): """Test that select, filter and mutate are executed in right order. @@ -422,11 +423,7 @@ def test_table_fillna_invalid(alltypes): "replacements", [ param({"int_col": 20}, id="int"), - param( - {"double_col": -1, "string_col": "missing"}, - id="double-int-str", - marks=[pytest.mark.notimpl(["oracle"])], - ), + param({"double_col": -1, "string_col": "missing"}, id="double-int-str"), param({"double_col": -1.5, "string_col": "missing"}, id="double-str"), ], ) @@ -444,7 +441,6 @@ def test_table_fillna_mapping(backend, alltypes, replacements): backend.assert_frame_equal(result, expected, check_dtype=False) -@pytest.mark.notimpl(["oracle"]) def test_table_fillna_scalar(backend, alltypes): table = alltypes.mutate( int_col=alltypes.int_col.nullif(1), @@ -1137,7 +1133,11 @@ def test_pivot_wider(backend): ) @pytest.mark.notimpl( ["druid", "impala", "oracle"], - raises=(NotImplementedError, sa.exc.ProgrammingError, com.OperationNotDefinedError), + raises=( + NotImplementedError, + OracleDatabaseError, + com.OperationNotDefinedError, + ), reason="arbitrary not implemented in the backend", ) @pytest.mark.notimpl( @@ -1207,7 +1207,7 @@ def test_distinct_on_keep(backend, on, keep): ) @pytest.mark.notimpl( ["druid", "impala", "oracle"], - raises=(NotImplementedError, sa.exc.ProgrammingError, com.OperationNotDefinedError), + raises=(NotImplementedError, OracleDatabaseError, com.OperationNotDefinedError), reason="arbitrary not implemented in the backend", ) @pytest.mark.notimpl( @@ -1561,6 +1561,7 @@ def test_try_cast_func(con, from_val, to_type, func): raises=ExaQueryError, reason="doesn't support OFFSET without ORDER BY", ), + pytest.mark.notyet(["oracle"], raises=com.UnsupportedArgumentError), ], ), param( @@ -1589,6 +1590,7 @@ def test_try_cast_func(con, from_val, to_type, func): raises=sa.exc.InternalError, reason="risingwave doesn't support limit/offset", ), + pytest.mark.notyet(["oracle"], raises=com.UnsupportedArgumentError), ], ), # positive stop @@ -1607,6 +1609,7 @@ def test_try_cast_func(con, from_val, to_type, func): raises=ExaQueryError, reason="doesn't support OFFSET without ORDER BY", ), + pytest.mark.notyet(["oracle"], raises=com.UnsupportedArgumentError), ], ), param( @@ -1620,6 +1623,7 @@ def test_try_cast_func(con, from_val, to_type, func): reason="mssql doesn't support OFFSET without LIMIT", ), pytest.mark.notyet(["exasol"], raises=ExaQueryError), + pytest.mark.notyet(["oracle"], raises=com.UnsupportedArgumentError), pytest.mark.notyet( ["impala"], raises=ImpalaHiveServer2Error, @@ -1673,6 +1677,11 @@ def test_static_table_slice(backend, slc, expected_count_fn): raises=SnowflakeProgrammingError, reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notyet( + ["oracle"], + raises=com.UnsupportedArgumentError, + reason="Removed half-baked dynamic offset functionality for now", +) @pytest.mark.notyet( ["trino"], raises=TrinoUserError, @@ -1733,6 +1742,11 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): raises=SnowflakeProgrammingError, reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notyet( + ["oracle"], + raises=com.UnsupportedArgumentError, + reason="Removed half-baked dynamic offset functionality for now", +) @pytest.mark.notimpl( ["trino"], raises=TrinoUserError, diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 2104893321ef..f1b79bdc2ef2 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -188,7 +188,7 @@ def test_semi_join_topk(batting, awards_players, func): assert not expr.limit(5).execute().empty -@pytest.mark.notimpl(["dask", "druid", "exasol"]) +@pytest.mark.notimpl(["dask", "druid", "exasol", "oracle"]) @pytest.mark.notimpl( ["postgres"], raises=com.IbisTypeError, diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 01b41d73e4f5..f91cdffd89fb 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -23,6 +23,7 @@ GoogleBadRequest, ImpalaHiveServer2Error, MySQLOperationalError, + OracleDatabaseError, PsycoPg2DivisionByZero, Py4JError, PyDruidProgrammingError, @@ -260,7 +261,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "mssql": 1.1, "druid": decimal.Decimal("1.1"), "datafusion": decimal.Decimal("1.1"), - "oracle": 1.1, + "oracle": decimal.Decimal("1.1"), "flink": decimal.Decimal("1.1"), }, { @@ -304,7 +305,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "mssql": 1.1, "druid": decimal.Decimal("1.1"), "datafusion": decimal.Decimal("1.1"), - "oracle": 1.1, + "oracle": decimal.Decimal("1.1"), "flink": decimal.Decimal("1.1"), }, { @@ -353,6 +354,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.notimpl(["exasol"], raises=ExaQueryError), pytest.mark.notimpl(["mysql"], raises=MySQLOperationalError), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), + pytest.mark.notyet(["oracle"], raises=OracleDatabaseError), pytest.mark.notyet(["impala"], raises=ImpalaHiveServer2Error), pytest.mark.broken( ["duckdb"], @@ -431,7 +433,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.broken( ["oracle"], "(oracledb.exceptions.DatabaseError) DPY-4004: invalid number", - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, ), pytest.mark.notyet( ["trino"], @@ -508,7 +510,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.broken( ["oracle"], "(oracledb.exceptions.DatabaseError) DPY-4004: invalid number", - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, ), pytest.mark.notyet( ["flink"], @@ -597,7 +599,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.broken( ["oracle"], "(oracledb.exceptions.DatabaseError) DPY-4004: invalid number", - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, ), pytest.mark.notyet( ["flink"], @@ -706,7 +708,7 @@ def test_decimal_literal(con, backend, expr, expected_types, expected_result): ], ) @pytest.mark.notimpl( - ["sqlite", "mssql", "oracle", "flink", "druid"], raises=com.OperationNotDefinedError + ["sqlite", "mssql", "flink", "druid"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl(["mysql"], raises=(MySQLOperationalError, NotImplementedError)) def test_isnan_isinf( @@ -1163,7 +1165,7 @@ def test_floating_mod(backend, alltypes, df): marks=[ pytest.mark.notyet( "oracle", - raises=(sa.exc.DatabaseError, sa.exc.ArgumentError), + raises=OracleDatabaseError, reason="Oracle doesn't do integer division by zero", ), pytest.mark.notyet( @@ -1179,7 +1181,7 @@ def test_floating_mod(backend, alltypes, df): marks=[ pytest.mark.notyet( "oracle", - raises=(sa.exc.DatabaseError, sa.exc.ArgumentError), + raises=OracleDatabaseError, reason="Oracle doesn't do integer division by zero", ), pytest.mark.notyet( @@ -1195,7 +1197,7 @@ def test_floating_mod(backend, alltypes, df): marks=[ pytest.mark.notyet( "oracle", - raises=(sa.exc.DatabaseError, sa.exc.ArgumentError), + raises=OracleDatabaseError, reason="Oracle doesn't do integer division by zero", ), pytest.mark.notyet( @@ -1211,7 +1213,7 @@ def test_floating_mod(backend, alltypes, df): marks=[ pytest.mark.notyet( "oracle", - raises=(sa.exc.DatabaseError, sa.exc.ArgumentError), + raises=OracleDatabaseError, reason="Oracle doesn't do integer division by zero", ), pytest.mark.notyet( @@ -1229,7 +1231,7 @@ def test_floating_mod(backend, alltypes, df): marks=[ pytest.mark.notyet( "oracle", - raises=(sa.exc.DatabaseError, sa.exc.ArgumentError), + raises=OracleDatabaseError, reason="Oracle doesn't do integer division by zero", ), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), @@ -1242,7 +1244,7 @@ def test_floating_mod(backend, alltypes, df): marks=[ pytest.mark.notyet( "oracle", - raises=(sa.exc.DatabaseError, sa.exc.ArgumentError), + raises=OracleDatabaseError, reason="Oracle doesn't do integer division by zero", ), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), @@ -1255,7 +1257,7 @@ def test_floating_mod(backend, alltypes, df): marks=[ pytest.mark.notyet( "oracle", - raises=(sa.exc.DatabaseError, sa.exc.ArgumentError), + raises=OracleDatabaseError, reason="Oracle doesn't do integer division by zero", ), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), @@ -1268,7 +1270,7 @@ def test_floating_mod(backend, alltypes, df): marks=[ pytest.mark.notyet( "oracle", - raises=(sa.exc.DatabaseError, sa.exc.ArgumentError), + raises=OracleDatabaseError, reason="Oracle doesn't do integer division by zero", ), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), @@ -1346,6 +1348,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "datafusion", "duckdb", "impala", + "oracle", "pandas", "pyspark", "polars", @@ -1493,7 +1496,14 @@ def test_constants(con, const): ) -@pytest.mark.parametrize("op", [and_, or_, xor]) +@pytest.mark.parametrize( + "op", + [ + and_, + param(or_, marks=[pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError)]), + param(xor, marks=[pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError)]), + ], +) @pytest.mark.parametrize( ("left_fn", "right_fn"), [ @@ -1502,7 +1512,6 @@ def test_constants(con, const): param(lambda t: t.int_col, lambda _: 3, id="col_scalar"), ], ) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError)) @flink_no_bitwise def test_bitwise_columns(backend, con, alltypes, df, op, left_fn, right_fn): @@ -1539,7 +1548,7 @@ def test_bitwise_columns(backend, con, alltypes, df, op, left_fn, right_fn): param(rshift, lambda t: t.int_col, lambda _: 3, id="rshift_col_scalar"), ], ) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) +@pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError) @pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError)) @flink_no_bitwise def test_bitwise_shift(backend, alltypes, df, op, left_fn, right_fn): @@ -1558,13 +1567,30 @@ def test_bitwise_shift(backend, alltypes, df, op, left_fn, right_fn): @pytest.mark.parametrize( "op", - [and_, or_, xor, lshift, rshift], + [ + and_, + param( + or_, + marks=pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError), + ), + param( + xor, + marks=pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError), + ), + param( + lshift, + marks=[pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError)], + ), + param( + rshift, + marks=[pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError)], + ), + ], ) @pytest.mark.parametrize( ("left", "right"), [param(4, L(2), id="int_col"), param(L(4), 2, id="col_int")], ) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @flink_no_bitwise def test_bitwise_scalars(con, op, left, right): @@ -1575,7 +1601,7 @@ def test_bitwise_scalars(con, op, left, right): @pytest.mark.notimpl(["datafusion", "exasol"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) +@pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError) @flink_no_bitwise def test_bitwise_not_scalar(con): expr = ~L(2) @@ -1585,7 +1611,7 @@ def test_bitwise_not_scalar(con): @pytest.mark.notimpl(["datafusion", "exasol"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) +@pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError) @flink_no_bitwise def test_bitwise_not_col(backend, alltypes, df): expr = (~alltypes.int_col).name("tmp") diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index ed8bb91d466b..7d8ead956c71 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -6,13 +6,13 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from pytest import param +import sqlalchemy as sa import ibis import ibis.expr.datatypes as dt from ibis import _ -from ibis.backends.tests.errors import Py4JJavaError +from ibis.backends.tests.errors import OracleDatabaseError, Py4JJavaError @pytest.mark.parametrize( @@ -38,12 +38,12 @@ def test_floating_scalar_parameter(backend, alltypes, df, column, raw_value): [("2009-03-01", "2010-07-03"), ("2014-12-01", "2017-01-05")], ) @pytest.mark.notimpl(["mssql", "trino", "druid"]) -@pytest.mark.broken(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, reason="function make_date(integer, integer, integer) does not exist", ) +@pytest.mark.broken(["oracle"], raises=OracleDatabaseError) def test_date_scalar_parameter(backend, alltypes, start_string, end_string): start, end = ibis.param(dt.date), ibis.param(dt.date) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 8b17e69a6930..fe4cce718e18 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -31,7 +31,7 @@ reason="structs not supported in the backend", ) no_struct_literals = pytest.mark.notimpl( - ["mssql", "oracle"], reason="struct literals are not yet implemented" + ["mssql"], reason="struct literals are not yet implemented" ) not_sql = pytest.mark.never( ["pandas", "dask"], diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 55559a2efb1d..37703174fb22 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -11,7 +11,11 @@ import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt -from ibis.backends.tests.errors import ClickHouseDatabaseError, PyDruidProgrammingError +from ibis.backends.tests.errors import ( + ClickHouseDatabaseError, + OracleDatabaseError, + PyDruidProgrammingError, +) from ibis.common.annotations import ValidationError @@ -52,7 +56,7 @@ marks=[ pytest.mark.broken( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-01741: illegal zero length identifier", ), pytest.mark.broken( @@ -80,7 +84,7 @@ marks=[ pytest.mark.broken( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-25716", ), pytest.mark.broken( @@ -202,7 +206,7 @@ def uses_java_re(t): id="rlike", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), ], ), @@ -212,7 +216,7 @@ def uses_java_re(t): id="re_search_substring", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), ], ), @@ -222,7 +226,7 @@ def uses_java_re(t): id="re_search", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), ], ), @@ -234,7 +238,7 @@ def uses_java_re(t): id="re_search_posix", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], + ["mssql", "exasol"], raises=com.OperationNotDefinedError, ), pytest.mark.never( @@ -250,7 +254,7 @@ def uses_java_re(t): id="re_extract", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -265,7 +269,7 @@ def uses_java_re(t): id="re_extract_group", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -282,7 +286,7 @@ def uses_java_re(t): id="re_extract_posix", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["druid"], reason="No posix support", raises=AssertionError @@ -300,7 +304,7 @@ def uses_java_re(t): id="re_extract_whole_group", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -317,7 +321,7 @@ def uses_java_re(t): id="re_extract_group_1", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -334,7 +338,7 @@ def uses_java_re(t): id="re_extract_group_2", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -351,7 +355,7 @@ def uses_java_re(t): id="re_extract_group_3", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -366,7 +370,7 @@ def uses_java_re(t): id="re_extract_group_at_beginning", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -381,7 +385,7 @@ def uses_java_re(t): id="re_extract_group_at_end", marks=[ pytest.mark.notimpl( - ["mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["risingwave"], @@ -398,7 +402,7 @@ def uses_java_re(t): id="re_replace_posix", marks=[ pytest.mark.notimpl( - ["mysql", "mssql", "druid", "oracle", "exasol"], + ["mysql", "mssql", "druid", "exasol"], raises=com.OperationNotDefinedError, ), ], @@ -409,7 +413,7 @@ def uses_java_re(t): id="re_replace", marks=[ pytest.mark.notimpl( - ["mysql", "mssql", "druid", "oracle", "exasol"], + ["mysql", "mssql", "druid", "exasol"], raises=com.OperationNotDefinedError, ), ], @@ -420,7 +424,7 @@ def uses_java_re(t): id="repeat_method", marks=pytest.mark.notimpl( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-00904: REPEAT invalid identifier", ), ), @@ -430,7 +434,7 @@ def uses_java_re(t): id="repeat_left", marks=pytest.mark.notimpl( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-00904: REPEAT invalid identifier", ), ), @@ -440,7 +444,7 @@ def uses_java_re(t): id="repeat_right", marks=pytest.mark.notimpl( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-00904: REPEAT invalid identifier", ), ), @@ -450,7 +454,7 @@ def uses_java_re(t): id="translate", marks=[ pytest.mark.notimpl( - ["mssql", "mysql", "polars", "druid", "oracle"], + ["mssql", "mysql", "polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -825,7 +829,7 @@ def test_string(backend, alltypes, df, result_func, expected_func): @pytest.mark.notimpl( - ["mysql", "mssql", "druid", "oracle", "exasol"], + ["mysql", "mssql", "druid", "exasol"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -841,11 +845,6 @@ def test_re_replace_global(con): @pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError) @pytest.mark.notimpl(["druid"], raises=ValidationError) -@pytest.mark.broken( - ["oracle"], - raises=sa.exc.DatabaseError, - reason="ORA-61801: only boolean column or attribute can be used as a predicate", -) def test_substr_with_null_values(backend, alltypes, df): table = alltypes.mutate( substr_col_null=ibis.case() @@ -983,7 +982,7 @@ def test_array_string_join(con): @pytest.mark.notimpl( - ["mssql", "mysql", "druid", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["mssql", "mysql", "druid", "exasol"], raises=com.OperationNotDefinedError ) def test_subs_with_re_replace(con): expr = ibis.literal("hi").re_replace("i", "a").substitute({"d": "b"}, else_="k") @@ -1035,11 +1034,6 @@ def test_levenshtein(con, right): reason="doesn't allow boolean expressions in select statements", raises=sa.exc.ProgrammingError, ) -@pytest.mark.broken( - ["oracle"], - reason="sqlalchemy converts True to 1, which cannot be used in CASE WHEN statement", - raises=sa.exc.DatabaseError, -) @pytest.mark.parametrize( "expr", [ @@ -1051,9 +1045,7 @@ def test_no_conditional_percent_escape(con, expr): assert con.execute(expr) == "%" -@pytest.mark.notimpl( - ["dask", "mssql", "oracle", "exasol"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["dask", "mssql", "exasol"], raises=com.OperationNotDefinedError) def test_non_match_regex_search_is_false(con): expr = ibis.literal("foo").re_search("bar") result = con.execute(expr) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 5643369051f7..827b06a663eb 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -27,6 +27,7 @@ ImpalaOperationalError, MySQLOperationalError, MySQLProgrammingError, + OracleDatabaseError, PolarsComputeError, PolarsPanicException, Py4JJavaError, @@ -74,7 +75,7 @@ def test_date_extract(backend, alltypes, df, attr, expr_fn): param( "quarter", marks=[ - pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), + pytest.mark.notyet(["oracle"], raises=OracleDatabaseError), pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), @@ -310,7 +311,6 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): param( "W", marks=[ - pytest.mark.broken(["sqlite", "exasol"], raises=AssertionError), pytest.mark.notimpl(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["polars"], @@ -446,7 +446,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): reason="attempt to calculate the remainder with a divisor of zero", ), pytest.mark.notimpl( - ["flink", "exasol"], + ["flink"], raises=com.UnsupportedOperationError, reason=" unit is not supported in timestamp truncate", ), @@ -454,12 +454,12 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, reason="AttributeError: 'StringColumn' object has no attribute 'truncate'", ) +@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_truncate(backend, alltypes, df, unit): expr = alltypes.timestamp_col.truncate(unit).name("tmp") @@ -542,10 +542,6 @@ def test_timestamp_truncate(backend, alltypes, df, unit): @pytest.mark.broken( ["polars", "druid"], reason="snaps to the UNIX epoch", raises=AssertionError ) -@pytest.mark.notimpl( - ["oracle"], - raises=com.OperationNotDefinedError, -) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -1007,11 +1003,16 @@ def convert_to_offset(x): raises=Exception, reason="pyarrow.lib.ArrowNotImplementedError: Unsupported cast", ), + pytest.mark.broken( + ["oracle"], + raises=com.OperationNotDefinedError, + reason="Some wonkiness in sqlglot generation.", + ), ], ), ], ) -@pytest.mark.notimpl(["mssql", "oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError) def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): expr = expr_fn(alltypes, backend).name("tmp") expected = expected_fn(df, backend) @@ -1046,11 +1047,6 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): raises=AssertionError, reason="DateTime column overflows, should use DateTime64", ), - pytest.mark.broken( - ["clickhouse"], - raises=AssertionError, - reason="DateTime column overflows, should use DateTime64", - ), pytest.mark.notimpl( ["flink"], # Note (mehmet): Following cannot be imported for backends other than Flink. @@ -1135,11 +1131,6 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): raises=AssertionError, reason="DateTime column overflows, should use DateTime64", ), - pytest.mark.broken( - ["clickhouse"], - raises=AssertionError, - reason="DateTime column overflows, should use DateTime64", - ), pytest.mark.broken( ["flink"], # Note (mehmet): Following cannot be imported for backends other than Flink. @@ -1211,7 +1202,7 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): ), ], ) -@pytest.mark.notimpl(["sqlite", "mssql", "oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["sqlite", "mssql"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_temporal_binop_pandas_timedelta( backend, con, alltypes, df, timedelta, temporal_fn @@ -1380,7 +1371,7 @@ def test_timestamp_comparison_filter_numpy(backend, con, alltypes, df, func_name @pytest.mark.notimpl( - ["sqlite", "snowflake", "mssql", "oracle", "exasol"], + ["sqlite", "snowflake", "mssql", "exasol"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -1403,7 +1394,7 @@ def test_interval_add_cast_scalar(backend, alltypes): @pytest.mark.notimpl( - ["sqlite", "snowflake", "mssql", "oracle", "exasol"], + ["sqlite", "snowflake", "mssql", "exasol"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( @@ -1480,9 +1471,7 @@ def test_interval_add_cast_column(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl( - ["datafusion", "mssql", "oracle"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["datafusion", "mssql"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -1647,7 +1636,6 @@ def test_integer_to_timestamp(backend, con, unit): "datafusion", "mssql", "druid", - "oracle", ], raises=com.OperationNotDefinedError, ) @@ -1817,7 +1805,7 @@ def test_now_from_projection(alltypes): ["druid"], raises=PyDruidProgrammingError, reason="SQL parse failed" ) @pytest.mark.notimpl( - ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936 missing expression" + ["oracle"], raises=OracleDatabaseError, reason="ORA-00936 missing expression" ) @pytest.mark.notimpl( ["risingwave"], @@ -1849,12 +1837,9 @@ def test_date_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "dask", "pyspark", "mysql", "exasol"], + ["pandas", "dask", "pyspark", "mysql", "exasol", "oracle"], raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl( - ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904: MAKE TIMESTAMP invalid" -) @pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], @@ -1885,10 +1870,7 @@ def test_timestamp_literal(con, backend): "Timestamp(timezone='***', scale=None, nullable=True)." ), ) -@pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904: MAKE TIMESTAMP invalid" -) +@pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError) @pytest.mark.parametrize( ("timezone", "expected"), [ @@ -1946,13 +1928,12 @@ def test_timestamp_with_timezone_literal(con, timezone, expected): @pytest.mark.notimpl( - ["pandas", "datafusion", "dask", "pyspark", "polars", "mysql"], + ["pandas", "datafusion", "dask", "pyspark", "polars", "mysql", "oracle"], raises=com.OperationNotDefinedError, ) @pytest.mark.notyet( ["clickhouse", "impala", "exasol"], raises=com.OperationNotDefinedError ) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], @@ -1985,7 +1966,7 @@ def test_time_literal(con, backend): ["sqlite"], raises=AssertionError, reason="SQLite returns Timedelta from execution" ) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) -@pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) +@pytest.mark.notyet(["oracle"], raises=OracleDatabaseError) @pytest.mark.parametrize( "microsecond", [ @@ -2101,7 +2082,7 @@ def test_interval_literal(con, backend): reason="'StringColumn' object has no attribute 'year'", ) @pytest.mark.broken( - ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936: missing expression" + ["oracle"], raises=OracleDatabaseError, reason="ORA-00936: missing expression" ) @pytest.mark.notimpl( ["risingwave"], @@ -2127,15 +2108,12 @@ def test_date_column_from_ymd(backend, con, alltypes, df): raises=AttributeError, reason="StringColumn' object has no attribute 'year'", ) -@pytest.mark.notimpl( - ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904 make timestamp invalid" -) -@pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, reason="function make_timestamp(smallint, smallint, smallint, smallint, smallint, smallint) does not exist", ) +@pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError) def test_timestamp_column_from_ymdhms(backend, con, alltypes, df): c = alltypes.timestamp_col expr = ibis.timestamp( @@ -2149,7 +2127,7 @@ def test_timestamp_column_from_ymdhms(backend, con, alltypes, df): @pytest.mark.notimpl( - ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-01861 literal does not match" + ["oracle"], raises=OracleDatabaseError, reason="ORA-01861 literal does not match" ) def test_date_scalar_from_iso(con): expr = ibis.literal("2022-02-24") @@ -2162,7 +2140,7 @@ def test_date_scalar_from_iso(con): @pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-22849 type CLOB is not supported", ) @pytest.mark.notimpl(["exasol"], raises=AssertionError, strict=False) @@ -2195,7 +2173,11 @@ def test_timestamp_extract_milliseconds_with_big_value(con): raises=Exception, reason="Unsupported CAST from Int32 to Timestamp(Nanosecond, None)", ) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00932") +@pytest.mark.notimpl( + ["oracle"], + raises=OracleDatabaseError, + reason="ORA-00932", +) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_integer_cast_to_timestamp_column(backend, alltypes, df): expr = alltypes.int_col.cast("timestamp") @@ -2204,8 +2186,8 @@ def test_integer_cast_to_timestamp_column(backend, alltypes, df): backend.assert_series_equal(result, expected.astype(result.dtype)) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) +@pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError) def test_integer_cast_to_timestamp_scalar(alltypes, df): expr = alltypes.int_col.min().cast("timestamp") result = expr.execute() @@ -2252,7 +2234,7 @@ def build_date_col(t): @pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) -@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) +@pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError) @pytest.mark.parametrize( ("left_fn", "right_fn"), [ @@ -2398,7 +2380,7 @@ def test_large_timestamp(con): ) @pytest.mark.notimpl( ["oracle"], - raises=sa.exc.DatabaseError, + raises=OracleDatabaseError, reason="ORA-01843: invalid month was specified", ) def test_timestamp_precision_output(con, ts, scale, unit): @@ -2492,7 +2474,6 @@ def test_delta(con, start, end, unit, expected): "dask", "impala", "mysql", - "oracle", "pandas", "pyspark", "sqlite", @@ -2530,6 +2511,10 @@ def test_delta(con, start, end, unit, expected): pytest.mark.notimpl( ["flink"], raises=Py4JJavaError, + ), + pytest.mark.notimpl( + ["oracle"], + raises=com.UnsupportedOperationError, reason="backend doesn't support sub-second interval precision", ), ], @@ -2539,7 +2524,10 @@ def test_delta(con, start, end, unit, expected): {"seconds": 2}, "2s", marks=[ - pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) + pytest.mark.notimpl( + ["datafusion", "oracle"], + raises=com.OperationNotDefinedError, + ), ], id="seconds", ), @@ -2547,7 +2535,10 @@ def test_delta(con, start, end, unit, expected): {"minutes": 5}, "300s", marks=[ - pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) + pytest.mark.notimpl( + ["datafusion", "oracle"], + raises=com.OperationNotDefinedError, + ), ], id="minutes", ), @@ -2555,7 +2546,10 @@ def test_delta(con, start, end, unit, expected): {"hours": 2}, "2h", marks=[ - pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) + pytest.mark.notimpl( + ["datafusion", "oracle"], + raises=com.OperationNotDefinedError, + ), ], id="hours", ), @@ -2563,15 +2557,15 @@ def test_delta(con, start, end, unit, expected): {"days": 2}, "2D", marks=[ - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), pytest.mark.broken( ["flink"], raises=AssertionError, reason="numpy array values are different (50.0 %)", ), + pytest.mark.notimpl( + ["datafusion", "oracle"], + raises=com.OperationNotDefinedError, + ), ], id="days", ), diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 997b733717f0..9e4bff794c6c 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -18,6 +18,7 @@ GoogleBadRequest, ImpalaHiveServer2Error, MySQLOperationalError, + OracleDatabaseError, Py4JJavaError, PyDruidProgrammingError, SnowflakeProgrammingError, @@ -285,7 +286,6 @@ def calc_zscore(s): id="cumany", marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.broken(["oracle"], raises=sa.exc.DatabaseError), ], ), param( @@ -299,7 +299,7 @@ def calc_zscore(s): id="cumnotany", marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.broken(["oracle"], raises=sa.exc.DatabaseError), + pytest.mark.broken(["oracle"], raises=OracleDatabaseError), ], ), param( @@ -313,7 +313,6 @@ def calc_zscore(s): id="cumall", marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.broken(["oracle"], raises=sa.exc.DatabaseError), ], ), param( @@ -327,7 +326,7 @@ def calc_zscore(s): id="cumnotall", marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.broken(["oracle"], raises=sa.exc.DatabaseError), + pytest.mark.broken(["oracle"], raises=OracleDatabaseError), ], ), param( @@ -857,7 +856,11 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=AssertionError, strict=False, # sometimes it passes ), - pytest.mark.broken(["oracle"], raises=AssertionError), + pytest.mark.notyet( + ["oracle"], + raises=com.UnsupportedOperationError, + reason="oracle doesn't allow unordered analytic functions without a windowing clause", + ), pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, @@ -904,7 +907,11 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=AssertionError, strict=False, # sometimes it passes ), - pytest.mark.broken(["oracle"], raises=AssertionError), + pytest.mark.notyet( + ["oracle"], + raises=com.UnsupportedOperationError, + reason="oracle doesn't allow unordered analytic functions without a windowing clause", + ), pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, @@ -1223,7 +1230,7 @@ def test_first_last(backend): ["mysql"], raises=MySQLOperationalError, reason="not supported by MySQL" ) @pytest.mark.notyet( - ["mssql", "oracle", "polars", "snowflake", "sqlite"], + ["mssql", "polars", "snowflake", "sqlite"], raises=com.OperationNotDefinedError, reason="not support by the backend", ) From 81ed41aa1104dad116a63be653b888207cfb4e17 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 23 Jan 2024 06:25:53 -0500 Subject: [PATCH 104/161] chore(deps): remove sqlalchemy dependencies from oracle extra --- poetry.lock | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 199a9f108391..6961bb1322c5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7315,7 +7315,7 @@ geospatial = ["geopandas", "shapely"] impala = ["impyla"] mssql = ["pyodbc", "sqlalchemy", "sqlalchemy-views"] mysql = ["pymysql"] -oracle = ["oracledb", "packaging", "sqlalchemy", "sqlalchemy-views"] +oracle = ["oracledb", "packaging"] pandas = ["regex"] polars = ["packaging", "polars"] postgres = ["psycopg2"] @@ -7329,4 +7329,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "3fcc813731a54acc626f4e5d124030eeeff9ce304dd2851b16dfdf89ab529d01" +content-hash = "f7f31dc794bdbd0993a76f0d8cac3a6b2debb6f5668be01dcd783eeb64f1faa8" diff --git a/pyproject.toml b/pyproject.toml index 69b029a66af0..178b11e57e12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -190,7 +190,7 @@ geospatial = ["geopandas", "shapely"] impala = ["impyla"] mssql = ["sqlalchemy", "pyodbc", "sqlalchemy-views"] mysql = ["pymysql"] -oracle = ["sqlalchemy", "oracledb", "packaging", "sqlalchemy-views"] +oracle = ["oracledb", "packaging"] pandas = ["regex"] polars = ["polars", "packaging"] risingwave = ["psycopg2"] From 59ddaef60d934341bc6dd6b4caafb5eb31c6fbbd Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 6 Jan 2024 13:39:24 -0500 Subject: [PATCH 105/161] refactor(mssql): port to sqlglot --- .github/workflows/ibis-backends.yml | 44 +- ibis/backends/base/sqlglot/__init__.py | 25 +- ibis/backends/base/sqlglot/datatypes.py | 53 ++ ibis/backends/conftest.py | 1 - ibis/backends/mssql/__init__.py | 563 ++++++++++++++---- ibis/backends/mssql/compiler.py | 482 ++++++++++++++- ibis/backends/mssql/datatypes.py | 150 ----- ibis/backends/mssql/registry.py | 387 ------------ ibis/backends/mssql/tests/conftest.py | 33 +- ibis/backends/mssql/tests/test_client.py | 50 +- ibis/backends/mysql/__init__.py | 4 +- ibis/backends/tests/errors.py | 6 + .../test_default_limit/mssql/out.sql | 10 + .../test_disable_query_limit/mssql/out.sql | 10 + .../mssql/out.sql | 3 + .../test_respect_set_limit/mssql/out.sql | 15 + .../test_group_by_has_index/mssql/out.sql | 70 +-- .../test_sql/test_isin_bug/mssql/out.sql | 20 +- ibis/backends/tests/test_aggregation.py | 22 +- ibis/backends/tests/test_array.py | 30 +- ibis/backends/tests/test_asof_join.py | 2 + ibis/backends/tests/test_export.py | 8 +- ibis/backends/tests/test_generic.py | 47 +- ibis/backends/tests/test_join.py | 2 +- ibis/backends/tests/test_numeric.py | 57 +- ibis/backends/tests/test_param.py | 2 +- ibis/backends/tests/test_set_ops.py | 1 + ibis/backends/tests/test_string.py | 76 +-- ibis/backends/tests/test_temporal.py | 32 +- ibis/backends/tests/test_window.py | 39 +- poetry.lock | 4 +- pyproject.toml | 2 +- 32 files changed, 1274 insertions(+), 976 deletions(-) delete mode 100644 ibis/backends/mssql/datatypes.py delete mode 100644 ibis/backends/mssql/registry.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/mssql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/mssql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/mssql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/mssql/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 2a9d77145dde..a131f48756a0 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -134,16 +134,16 @@ jobs: sys-deps: - cmake - ninja-build - # - name: mssql - # title: MS SQL Server - # extras: - # - mssql - # services: - # - mssql - # sys-deps: - # - freetds-dev - # - unixodbc-dev - # - tdsodbc + - name: mssql + title: MS SQL Server + extras: + - mssql + services: + - mssql + sys-deps: + - freetds-dev + - unixodbc-dev + - tdsodbc - name: trino title: Trino extras: @@ -244,18 +244,18 @@ jobs: sys-deps: - cmake - ninja-build - # - os: windows-latest - # backend: - # name: mssql - # title: MS SQL Server - # extras: - # - mssql - # services: - # - mssql - # sys-deps: - # - freetds-dev - # - unixodbc-dev - # - tdsodbc + - os: windows-latest + backend: + name: mssql + title: MS SQL Server + extras: + - mssql + services: + - mssql + sys-deps: + - freetds-dev + - unixodbc-dev + - tdsodbc - os: windows-latest backend: name: trino diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index 3f94f50a7e9a..4025f937e3be 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -16,6 +16,7 @@ if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping + import pandas as pd import pyarrow as pa import ibis.expr.datatypes as dt @@ -28,6 +29,10 @@ class SQLGlotBackend(BaseBackend): compiler: ClassVar[SQLGlotCompiler] name: ClassVar[str] + @property + def _sqlglot_dialect(self) -> str: + return self.compiler.dialect + @classmethod def has_operation(cls, operation: type[ops.Value]) -> bool: # singledispatchmethod overrides `__get__` so we can't directly access @@ -35,7 +40,25 @@ def has_operation(cls, operation: type[ops.Value]) -> bool: dispatcher = cls.compiler.visit_node.register.__self__.dispatcher return dispatcher.dispatch(operation) is not dispatcher.dispatch(object) - # TODO(kszucs): get_schema() is not registered as an abstract method + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + import pandas as pd + + from ibis.formats.pandas import PandasData + + try: + df = pd.DataFrame.from_records( + cursor, columns=schema.names, coerce_float=True + ) + except Exception: + # clean up the cursor if we fail to create the DataFrame + # + # in the sqlite case failing to close the cursor results in + # artificially locked tables + cursor.close() + raise + df = PandasData.convert_table(df, schema) + return df + def table( self, name: str, schema: str | None = None, database: str | None = None ) -> ir.Table: diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index a54959092e4e..2db47acfdef5 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -837,3 +837,56 @@ def _from_sqlglot_MAP(cls, key: sge.DataType, value: sge.DataType) -> NoReturn: @classmethod def _from_sqlglot_STRUCT(cls, *cols: sge.ColumnDef) -> NoReturn: raise com.UnsupportedBackendType("Structs not supported in Exasol") + + +class MSSQLType(SqlglotType): + dialect = "tsql" + + @classmethod + def _from_sqlglot_BIT(cls): + return dt.Boolean(nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_IMAGE(cls): + return dt.Binary(nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_DATETIME(cls, n=None): + return dt.Timestamp( + scale=n if n is None else int(n.this.this), nullable=cls.default_nullable + ) + + @classmethod + def _from_sqlglot_TIMESTAMP(cls): + return dt.Binary(nullable=False) + + @classmethod + def _from_ibis_String(cls, dtype: dt.String) -> sge.DataType: + return sge.DataType( + this=typecode.VARCHAR, + expressions=[sge.DataTypeParam(this=sge.Var(this="max"))], + ) + + @classmethod + def _from_ibis_Array(cls, dtype: dt.String) -> sge.DataType: + raise com.UnsupportedBackendType("SQL Server does not support arrays") + + @classmethod + def _from_ibis_Map(cls, dtype: dt.String) -> sge.DataType: + raise com.UnsupportedBackendType("SQL Server does not support ") + + @classmethod + def _from_ibis_Struct(cls, dtype: dt.String) -> sge.DataType: + raise com.UnsupportedBackendType("SQL Server does not support structs") + + @classmethod + def _from_sqlglot_ARRAY(cls) -> sge.DataType: + raise com.UnsupportedBackendType("SQL Server does not support arrays") + + @classmethod + def _from_sqlglot_MAP(cls) -> sge.DataType: + raise com.UnsupportedBackendType("SQL Server does not support map") + + @classmethod + def _from_sqlglot_STRUCT(cls) -> sge.DataType: + raise com.UnsupportedBackendType("SQL Server does not support structs") diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 5cac11957a5a..7bc3249a7873 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -562,7 +562,6 @@ def ddl_con(ddl_backend): @pytest.fixture( params=_get_backends_to_test( keep=( - "mssql", "risingwave", "sqlite", ) diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index f424864a5791..6c7c43f63310 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -2,30 +2,70 @@ from __future__ import annotations +import atexit import contextlib +import datetime +import struct +from contextlib import closing +from functools import partial +from itertools import repeat +from operator import itemgetter from typing import TYPE_CHECKING, Any -import sqlalchemy as sa -import toolz - -from ibis.backends.base import CanCreateDatabase -from ibis.backends.base.sql.alchemy import AlchemyCanCreateSchema, BaseAlchemyBackend -from ibis.backends.mssql.compiler import MsSqlCompiler -from ibis.backends.mssql.datatypes import _type_from_result_set_info +import pyodbc +import sqlglot as sg +import sqlglot.expressions as sge + +import ibis +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +import ibis.expr.schema as sch +import ibis.expr.types as ir +from ibis import util +from ibis.backends.base import CanCreateDatabase, CanCreateSchema +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import C +from ibis.backends.mssql.compiler import MSSQLCompiler if TYPE_CHECKING: - from collections.abc import Iterable, Mapping - - import ibis.expr.schema as sch - import ibis.expr.types as ir - - -class Backend(BaseAlchemyBackend, CanCreateDatabase, AlchemyCanCreateSchema): + from collections.abc import Iterable, Iterator, Mapping + + import pandas as pd + import pyarrow as pa + + +def datetimeoffset_to_datetime(value): + """Convert a datetimeoffset value to a datetime. + + Adapted from https://github.com/mkleehammer/pyodbc/issues/1141 + """ + # ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794 + year, month, day, hour, minute, second, frac, tz_hour, tz_minutes = struct.unpack( + "<6hI2h", value + ) # e.g., (2017, 3, 16, 10, 35, 18, 500000000, -6, 0) + return datetime.datetime( + year, + month, + day, + hour, + minute, + second, + frac // 1000, + datetime.timezone(datetime.timedelta(hours=tz_hour, minutes=tz_minutes)), + ) + + +class Backend(SQLGlotBackend, CanCreateDatabase, CanCreateSchema): name = "mssql" - compiler = MsSqlCompiler + compiler = MSSQLCompiler() supports_create_or_replace = False - _sqlglot_dialect = "tsql" + @property + def version(self) -> str: + with self._safe_raw_sql("SELECT @@VERSION") as cur: + [(version,)] = cur.fetchall() + return version def do_connect( self, @@ -34,116 +74,183 @@ def do_connect( password: str | None = None, port: int = 1433, database: str | None = None, - url: str | None = None, - query: Mapping[str, Any] | None = None, driver: str | None = None, **kwargs: Any, ) -> None: - if query is None: - query = {} - - if driver is not None: - query["driver"] = driver - - alchemy_url = self._build_alchemy_url( - url=url, - host=host, - port=port, + con = pyodbc.connect( user=user, + server=host, + port=port, password=password, database=database, - driver="mssql+pyodbc", - query=query, + driver=driver, + **kwargs, ) - - engine = sa.create_engine( - alchemy_url, poolclass=sa.pool.StaticPool, connect_args=kwargs + con.add_output_converter(-155, datetimeoffset_to_datetime) + + with closing(con.cursor()) as cur: + cur.execute("SET DATEFIRST 1") + + self.con = con + self._temp_views = set() + + def get_schema( + self, name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + conditions = [sg.column("table_name").eq(sge.convert(name))] + + if schema is not None: + conditions.append(sg.column("table_schema").eq(sge.convert(schema))) + + query = ( + sg.select( + "column_name", + "data_type", + "is_nullable", + "numeric_precision", + "numeric_scale", + "datetime_precision", + ) + .from_( + sg.table( + "columns", + db="information_schema", + catalog=database or self.current_database, + ) + ) + .where(*conditions) + .order_by("ordinal_position") ) - @sa.event.listens_for(engine, "connect") - def connect(dbapi_connection, connection_record): - with dbapi_connection.cursor() as cur: - cur.execute("SET DATEFIRST 1") + with self._safe_raw_sql(query) as cur: + meta = cur.fetchall() + + if not meta: + fqn = sg.table(name, db=schema, catalog=database).sql(self.compiler.dialect) + raise com.IbisError(f"Table not found: {fqn}") + + mapping = {} + for ( + col, + typ, + is_nullable, + numeric_precision, + numeric_scale, + datetime_precision, + ) in meta: + newtyp = self.compiler.type_mapper.from_string( + typ, nullable=is_nullable == "YES" + ) - return super().do_connect(engine) + if typ == "float": + newcls = dt.Float64 if numeric_precision == 53 else dt.Float32 + newtyp = newcls(nullable=newtyp.nullable) + elif newtyp.is_decimal(): + newtyp = newtyp.copy(precision=numeric_precision, scale=numeric_scale) + elif newtyp.is_timestamp(): + newtyp = newtyp.copy(scale=datetime_precision) + mapping[col] = newtyp + + return sch.Schema(mapping) + + def _metadata(self, query) -> Iterator[tuple[str, dt.DataType]]: + tsql = sge.convert(str(query)).sql(self.compiler.dialect) + query = f"EXEC sp_describe_first_result_set @tsql = N{tsql}" + with self._safe_raw_sql(query) as cur: + rows = cur.fetchall() + for ( + _, + _, + name, + nullable, + _, + system_type_name, + _, + precision, + scale, + *_, + ) in sorted(rows, key=itemgetter(1)): + newtyp = self.compiler.type_mapper.from_string( + system_type_name, nullable=nullable + ) - def _metadata(self, query): - if query in self.list_tables(): - query = f"SELECT * FROM [{query}]" + if system_type_name == "float": + newcls = dt.Float64 if precision == 53 else dt.Float32 + newtyp = newcls(nullable=newtyp.nullable) + elif newtyp.is_decimal(): + newtyp = newtyp.copy(precision=precision, scale=scale) + elif newtyp.is_timestamp(): + newtyp = newtyp.copy(scale=scale) - query = sa.text("EXEC sp_describe_first_result_set @tsql = :query").bindparams( - query=query - ) - with self.begin() as bind: - for column in bind.execute(query).mappings(): - yield column["name"], _type_from_result_set_info(column) + yield name, newtyp @property def current_database(self) -> str: - return self._scalar_query(sa.select(sa.func.db_name())) + with self._safe_raw_sql(sg.select(self.compiler.f.db_name())) as cur: + [(database,)] = cur.fetchall() + return database def list_databases(self, like: str | None = None) -> list[str]: - s = sa.table("databases", sa.column("name", sa.VARCHAR()), schema="sys") - query = sa.select(s.c.name) + s = sg.table("databases", db="sys") + + with self._safe_raw_sql(sg.select(C.name).from_(s)) as cur: + results = list(map(itemgetter(0), cur.fetchall())) - with self.begin() as con: - results = list(con.execute(query).scalars()) return self._filter_with_like(results, like=like) @property def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.func.schema_name())) + with self._safe_raw_sql(sg.select(self.compiler.f.schema_name())) as cur: + [(schema,)] = cur.fetchall() + return schema @contextlib.contextmanager - def _safe_raw_sql(self, stmt, *args, **kwargs): - sql = str( - stmt.compile( - dialect=self.con.dialect, compile_kwargs={"literal_binds": True} - ) - ) - with self.begin() as con: - yield con.exec_driver_sql(sql, *args, **kwargs) + def begin(self): + con = self.con + cur = con.cursor() + try: + yield cur + except Exception: + con.rollback() + raise + else: + con.commit() + finally: + cur.close() - def _get_compiled_statement(self, view: sa.Table, definition: sa.sql.Selectable): - return super()._get_compiled_statement( - view, definition, compile_kwargs={"literal_binds": True} - ) - - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - yield f"CREATE OR ALTER VIEW {name} AS {definition}" - - def _table_from_schema( - self, - name: str, - schema: sch.Schema, - database: str | None = None, - temp: bool = False, - ) -> sa.Table: - return super()._table_from_schema( - temp * "#" + name, schema=schema, database=database, temp=False + @contextlib.contextmanager + def _safe_raw_sql(self, query, *args, **kwargs): + with contextlib.suppress(AttributeError): + query = query.sql(self.compiler.dialect) + + with self.begin() as cur: + cur.execute(query, *args, **kwargs) + yield cur + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(self.compiler.dialect) + + con = self.con + cursor = con.cursor() + + try: + cursor.execute(query, **kwargs) + except Exception: + con.rollback() + cursor.close() + raise + else: + con.commit() + return cursor + + def _get_temp_view_definition(self, name: str, definition) -> str: + return sge.Create( + kind="OR ALTER VIEW", + this=sg.to_identifier(name, quoted=self.compiler.quoted), + expression=definition, ) - def _cursor_batches( - self, - expr: ir.Expr, - params: Mapping[ir.Scalar, Any] | None = None, - limit: int | str | None = None, - chunk_size: int = 1_000_000, - ) -> Iterable[list]: - self._run_pre_execute_hooks(expr) - query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - sql = query_ast.compile() - - with self._safe_raw_sql(sql) as cursor: - # this is expensive for large result sets - # - # see https://github.com/ibis-project/ibis/pull/6513 - batch = cursor.fetchall() - - yield from toolz.partition_all(chunk_size, batch) - def create_database(self, name: str, force: bool = False) -> None: name = self._quote(name) create_stmt = ( @@ -156,15 +263,15 @@ def create_database(self, name: str, force: bool = False) -> None: if force else f"CREATE DATABASE {name}" ) - with self.con.connect().execution_options(isolation_level="AUTOCOMMIT") as con: - con.exec_driver_sql(create_stmt) + with self._safe_raw_sql(create_stmt): + pass def drop_database(self, name: str, force: bool = False) -> None: name = self._quote(name) if_exists = "IF EXISTS " * force - with self.con.connect().execution_options(isolation_level="AUTOCOMMIT") as con: - con.exec_driver_sql(f"DROP DATABASE {if_exists}{name}") + with self._safe_raw_sql(f"DROP DATABASE {if_exists}{name}"): + pass def create_schema( self, name: str, database: str | None = None, force: bool = False @@ -185,14 +292,17 @@ def create_schema( else f"CREATE SCHEMA {name}" ) - with self.begin() as con: + with self.begin() as cur: if should_switch_database: - con.exec_driver_sql(f"USE {self._quote(database)}") + cur.execute(f"USE {self._quote(database)}") - con.exec_driver_sql(create_stmt) + cur.execute(create_stmt) if should_switch_database: - con.exec_driver_sql(f"USE {self._quote(current_database)}") + cur.execute(f"USE {self._quote(current_database)}") + + def _quote(self, name: str): + return sg.to_identifier(name, quoted=True).sql(self.compiler.dialect) def drop_schema( self, name: str, database: str | None = None, force: bool = False @@ -204,11 +314,242 @@ def drop_schema( if_exists = "IF EXISTS " * force - with self.begin() as con: + with self.begin() as cur: if should_switch_database: - con.exec_driver_sql(f"USE {self._quote(database)}") + cur.execute(f"USE {self._quote(database)}") - con.exec_driver_sql(f"DROP SCHEMA {if_exists}{name}") + cur.execute(f"DROP SCHEMA {if_exists}{name}") if should_switch_database: - con.exec_driver_sql(f"USE {self._quote(current_database)}") + cur.execute(f"USE {self._quote(current_database)}") + + def list_tables( + self, + like: str | None = None, + database: str | None = None, + schema: str | None = None, + ) -> list[str]: + conditions = [] + + if schema is not None: + conditions.append(C.table_schema.eq(sge.convert(schema))) + + sql = ( + sg.select("table_name") + .from_( + sg.table( + "tables", + db="information_schema", + catalog=database if database is not None else self.current_database, + ) + ) + .distinct() + ) + + if conditions: + sql = sql.where(*conditions) + + sql = sql.sql(self.compiler.dialect) + + with self._safe_raw_sql(sql) as cur: + out = cur.fetchall() + + return self._filter_with_like(map(itemgetter(0), out), like) + + def list_schemas( + self, like: str | None = None, database: str | None = None + ) -> list[str]: + query = sg.select(C.schema_name).from_( + sg.table( + "schemata", + db="information_schema", + catalog=database or self.current_database, + ) + ) + with self._safe_raw_sql(query) as cur: + results = list(map(itemgetter(0), cur.fetchall())) + return self._filter_with_like(results, like=like) + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: sch.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ) -> ir.Table: + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + if database is not None and database != self.current_database: + raise com.UnsupportedOperationError( + "Creating tables in other databases is not supported by Postgres" + ) + else: + database = None + + properties = [] + + if temp: + properties.append(sge.TemporaryProperty()) + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self._to_sqlglot(table) + else: + query = None + + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name(f"{self.name}_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) + raw_table = sg.table(temp_name, catalog=database, quoted=False) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create( + kind="TABLE", + this=target, + properties=sge.Properties(expressions=properties), + ) + + this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + raw_this = sg.table(name, catalog=database, quoted=False) + with self._safe_raw_sql(create_stmt) as cur: + if query is not None: + insert_stmt = sge.Insert(this=table, expression=query).sql( + self.compiler.dialect + ) + cur.execute(insert_stmt) + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql( + self.compiler.dialect + ) + ) + old = raw_table.sql(self.compiler.dialect) + new = raw_this.sql(self.compiler.dialect) + cur.execute(f"EXEC sp_rename '{old}', '{new}'") + + if schema is None: + return self.table(name, schema=database) + + # preserve the input schema if it was provided + return ops.DatabaseTable( + name, schema=schema, source=self, namespace=ops.Namespace(database=database) + ).to_expr() + + def _register_temp_view_cleanup(self, name: str) -> None: + def drop(self, name: str, query: str): + self.raw_sql(query) + self._temp_views.discard(name) + + query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) + atexit.register(drop, self, name=name, query=query) + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise com.IbisTypeError( + "MS SQL cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" + ) + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [ + sg.exp.ColumnConstraint( + kind=sg.exp.NotNullColumnConstraint() + ) + ] + ), + ) + for colname, typ in schema.items() + ] + + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema( + this=sg.to_identifier(name, quoted=quoted), expressions=column_defs + ), + # properties=sg.exp.Properties(expressions=[sge.TemporaryProperty()]), + ) + + df = op.data.to_frame() + data = df.itertuples(index=False) + cols = ", ".join( + ident.sql(self.compiler.dialect) + for ident in map( + partial(sg.to_identifier, quoted=quoted), schema.keys() + ) + ) + specs = ", ".join(repeat("?", len(schema))) + table = sg.table(name, quoted=quoted) + sql = f"INSERT INTO {table.sql(self.compiler.dialect)} ({cols}) VALUES ({specs})" + + with self._safe_raw_sql(create_stmt) as cur: + if not df.empty: + cur.executemany(sql, data) + + def _to_sqlglot( + self, expr: ir.Expr, *, limit: str | None = None, params=None, **_: Any + ): + """Compile an Ibis expression to a sqlglot object.""" + table_expr = expr.as_table() + conversions = { + name: ibis.ifelse(table_expr[name], 1, 0).cast("boolean") + for name, typ in table_expr.schema().items() + if typ.is_boolean() + } + + if conversions: + table_expr = table_expr.mutate(**conversions) + return super()._to_sqlglot(table_expr, limit=limit, params=params) + + def _cursor_batches( + self, + expr: ir.Expr, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1 << 20, + ) -> Iterable[list[tuple]]: + def process_value(value, dtype): + return bool(value) if dtype.is_boolean() else value + + types = expr.as_table().schema().types + + for batch in super()._cursor_batches( + expr, params=params, limit=limit, chunk_size=chunk_size + ): + yield [tuple(map(process_value, row, types)) for row in batch] diff --git a/ibis/backends/mssql/compiler.py b/ibis/backends/mssql/compiler.py index ef1ba639d61c..35d7afa9d3e2 100644 --- a/ibis/backends/mssql/compiler.py +++ b/ibis/backends/mssql/compiler.py @@ -1,39 +1,473 @@ from __future__ import annotations -from sqlalchemy.dialects.mssql import DATETIME2 +import calendar +from functools import singledispatchmethod +import sqlglot as sg +import sqlglot.expressions as sge +from public import public +from sqlglot.dialects import TSQL +from sqlglot.dialects.dialect import rename_func + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.mssql.datatypes import MSSQLType -from ibis.backends.mssql.registry import _timestamp_from_unix, operation_registry -from ibis.expr.rewrites import rewrite_sample +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler, paren +from ibis.backends.base.sqlglot.datatypes import MSSQLType +from ibis.backends.base.sqlglot.rewrites import ( + rewrite_first_to_first_value, + rewrite_last_to_last_value, +) +from ibis.common.deferred import var +from ibis.common.patterns import replace +from ibis.expr.rewrites import p, rewrite_sample +TSQL.Generator.TRANSFORMS |= { + sge.ApproxDistinct: rename_func("approx_count_distinct"), + sge.Stddev: rename_func("stdevp"), + sge.StddevPop: rename_func("stdevp"), + sge.StddevSamp: rename_func("stdev"), + sge.Variance: rename_func("var"), + sge.VariancePop: rename_func("varp"), + sge.Ceil: rename_func("ceiling"), + sge.Trim: lambda self, e: f"TRIM({e.this.sql(self.dialect)})", + sge.DateFromParts: rename_func("datefromparts"), +} -class MsSqlExprTranslator(AlchemyExprTranslator): - _registry = operation_registry - _rewrites = AlchemyExprTranslator._rewrites.copy() - _bool_aggs_need_cast_to_int32 = True +y = var("y") +start = var("start") +end = var("end") - _timestamp_type = DATETIME2 - _integer_to_timestamp = staticmethod(_timestamp_from_unix) - native_json_type = False +# MS SQL facts that make using it a nightmare: +# +# * There is no boolean type +# * There are no boolean literals +# * But there's a numeric bit type whose domain is THE TWO VALUES 0 and 1 (and NULL of course), seriously? +# * Supported boolean expressions are =, <>, <, >, <=, >=, IS NULL, +# IS NOT NULL, IN, NOT IN, EXISTS, BETWEEN, IS NOT DISTINCT FROM, IS DISTINCT FROM, +# LIKE, NOT LIKE, CONTAINS (?), ALL, SOME, ANY +# The return type of these is anyone's guess, but it's definitely NOT BOOLEAN +# * Boolean expressions CANNOT be used in a projection, i.e., SELECT x = 1 is not allowed +# * Boolean expressions MUST be used in a WHERE clause, i.e., SELECT * FROM t WHERE 1 is not allowed - _forbids_frame_clause = AlchemyExprTranslator._forbids_frame_clause + ( - ops.Lag, - ops.Lead, - ) - _require_order_by = AlchemyExprTranslator._require_order_by + (ops.Reduction,) - _dialect_name = "mssql" + +@replace(p.WindowFunction(p.RowNumber | p.NTile, y)) +def exclude_unsupported_window_frame_from_ops_with_offset(_, y): + return ops.Subtract(_.copy(frame=y.copy(start=None, end=0)), 1) + + +@replace(p.WindowFunction(p.Lag | p.Lead | p.PercentRank | p.CumeDist, y)) +def exclude_unsupported_window_frame_from_ops(_, y): + return _.copy(frame=y.copy(start=None, end=0)) + + +@public +class MSSQLCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "tsql" type_mapper = MSSQLType + rewrites = ( + rewrite_sample, + rewrite_first_to_first_value, + rewrite_last_to_last_value, + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_ops_with_offset, + *SQLGlotCompiler.rewrites, + ) + quoted = True + + @property + def NAN(self): + return self.f.double("NaN") + + @property + def POS_INF(self): + return self.f.double("Infinity") + + @property + def NEG_INF(self): + return self.f.double("-Infinity") + + def _aggregate(self, funcname: str, *args, where): + func = self.f[funcname] + if where is not None: + args = tuple(self.if_(where, arg, NULL) for arg in args) + return func(*args) + + @singledispatchmethod + def visit_node(self, op, **kwargs): + return super().visit_node(op, **kwargs) + + @staticmethod + def _generate_groups(groups): + return groups + + @staticmethod + def _minimize_spec(start, end, spec): + if ( + start is None + and isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec + + @visit_node.register(ops.StringLength) + def visit_StringLength(self, op, *, arg): + """The MSSQL LEN function doesn't count trailing spaces. + + Also, DATALENGTH (the suggested alternative) counts bytes and thus its + result depends on the string's encoding. + + https://learn.microsoft.com/en-us/sql/t-sql/functions/len-transact-sql?view=sql-server-ver16#remarks + + The solution is to add a character to the beginning and end of the + string that are guaranteed to have one character in length and are not + spaces, and then subtract 2 from the result of `LEN` of that input. + + Thanks to @arkanovicz for this glorious hack. + """ + return paren(self.f.len(self.f.concat("A", arg, "Z")) - 2) + + @visit_node.register(ops.Capitalize) + def visit_Capitalize(self, op, *, arg): + length = paren(self.f.len(self.f.concat("A", arg, "Z")) - 2) + return self.f.concat( + self.f.upper(self.f.substring(arg, 1, 1)), + self.f.lower(self.f.substring(arg, 2, length - 1)), + ) + + @visit_node.register(ops.GroupConcat) + def visit_GroupConcat(self, op, *, arg, sep, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.group_concat(arg, sep) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + if where is not None: + return self.f.sum(self.if_(where, 1, 0)) + return self.f.count(STAR) + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.count(sge.Distinct(expressions=[arg])) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return self.f.datepart(self.v.weekday, arg) - 1 + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + days = calendar.day_name + return sge.Case( + this=self.f.datepart(self.v.weekday, arg) - 1, + ifs=list(map(self.if_, *zip(*enumerate(days)))), + ) + + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimestampTruncate) + def visit_DateTimestampTruncate(self, op, *, arg, unit): + interval_units = { + "us": "microsecond", + "ms": "millisecond", + "s": "second", + "m": "minute", + "h": "hour", + "D": "day", + "W": "week", + "M": "month", + "Q": "quarter", + "Y": "year", + } + if (unit := interval_units.get(unit.short)) is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit!r}") + + return self.f.datetrunc(self.v[unit], arg, dialect=self.dialect) + + @visit_node.register(ops.Date) + def visit_Date(self, op, *, arg): + return self.cast(arg, dt.date) + + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.TimestampDelta) + def visit_DateTimeDelta(self, op, *, left, right, part): + return self.f.datediff( + sge.Var(this=part.this.upper()), right, left, dialect=self.dialect + ) + + @visit_node.register(ops.Xor) + def visit_Xor(self, op, *, left, right): + return sg.and_(sg.or_(left, right), sg.not_(sg.and_(left, right))) + + @visit_node.register(ops.TimestampBucket) + def visit_TimestampBucket(self, op, *, arg, interval, offset): + interval_units = { + "ms": "millisecond", + "s": "second", + "m": "minute", + "h": "hour", + "D": "day", + "W": "week", + "M": "month", + "Q": "quarter", + "Y": "year", + } + + if not isinstance(op.interval, ops.Literal): + raise com.UnsupportedOperationError( + "Only literal interval values are supported with MS SQL timestamp bucketing" + ) + + if (unit := interval_units.get(op.interval.dtype.unit.short)) is None: + raise com.UnsupportedOperationError( + f"Unsupported bucket interval {op.interval!r}" + ) + if offset is not None: + raise com.UnsupportedOperationError( + "Timestamp bucket with offset is not supported" + ) + + part = self.v[unit] + origin = self.cast("1970-01-01", op.arg.dtype) + + return self.f.date_bucket(part, op.interval.value, arg, origin) + + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractEpochSeconds(self, op, *, arg): + return self.cast( + self.f.datediff(self.v.s, "1970-01-01 00:00:00", arg, dialect=self.dialect), + dt.int64, + ) + + @visit_node.register(ops.ExtractYear) + @visit_node.register(ops.ExtractMonth) + @visit_node.register(ops.ExtractDay) + @visit_node.register(ops.ExtractDayOfYear) + @visit_node.register(ops.ExtractHour) + @visit_node.register(ops.ExtractMinute) + @visit_node.register(ops.ExtractSecond) + @visit_node.register(ops.ExtractMillisecond) + @visit_node.register(ops.ExtractMicrosecond) + def visit_Extract(self, op, *, arg): + return self.f.datepart(self.v[type(op).__name__[len("Extract") :].lower()], arg) + + @visit_node.register(ops.ExtractWeekOfYear) + def visit_ExtractWeekOfYear(self, op, *, arg): + return self.f.datepart(self.v.iso_week, arg) + + @visit_node.register(ops.TimeFromHMS) + def visit_TimeFromHMS(self, op, *, hours, minutes, seconds): + return self.f.timefromparts(hours, minutes, seconds, 0, 0) + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds + ): + return self.f.datetimefromparts(year, month, day, hours, minutes, seconds, 0) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if start is not None: + return self.f.charindex(substr, arg, start) + return self.f.charindex(substr, arg) + + @visit_node.register(ops.Round) + def visit_Round(self, op, *, arg, digits): + return self.f.round(arg, digits if digits is not None else 0) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + unit = unit.short + if unit == "s": + return self.f.dateadd(self.v.s, arg, "1970-01-01 00:00:00") + elif unit == "ms": + return self.f.dateadd(self.v.s, arg / 1_000, "1970-01-01 00:00:00") + raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") + + @visit_node.register(ops.SQLStringView) + def visit_SQLStringView(self, op, *, query: str, name: str, child): + return sg.parse_one(query, read=self.dialect).subquery(name) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_decimal(): + return self.cast(str(value.normalize()), dtype) + elif dtype.is_date(): + return self.f.datefromparts(value.year, value.month, value.day) + elif dtype.is_timestamp(): + args = ( + value.year, + value.month, + value.day, + value.hour, + value.minute, + value.second, + value.microsecond, + ) + if dtype.timezone is not None: + assert value.tzinfo is not None + + offset = value.strftime("%z") + hour_offset = int(offset[:3]) + minute_offset = int(offset[-2:]) + return self.f.datetimeoffsetfromparts( + *args, hour_offset, minute_offset, 6 + ) + else: + return self.f.datetime2fromparts(*args, 6) + elif dtype.is_time(): + return self.f.timefromparts( + value.hour, value.minute, value.second, value.microsecond, 0 + ) + elif dtype.is_uuid(): + return sge.Cast( + this=sge.convert(str(value)), + to=sge.DataType(this=sge.DataType.Type.UNIQUEIDENTIFIER), + ) + elif dtype.is_binary(): + return self.f.convert( + sge.DataType(this=sge.DataType.Type.VARBINARY), + value.hex(), + 2, # style, see https://learn.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql?view=sql-server-ver16#binary-styles + ) + elif dtype.is_array() or dtype.is_struct() or dtype.is_map(): + raise com.UnsupportedBackendType("MS SQL does not support complex types") + + return None + + @visit_node.register(ops.Log2) + def visit_Log2(self, op, *, arg): + return self.f.log(arg, 2, dialect=self.dialect) + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + if base is None: + return self.f.log(arg, dialect=self.dialect) + return self.f.log(arg, base, dialect=self.dialect) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + from_ = op.arg.dtype + + if to.is_boolean(): + # no such thing as a boolean in MSSQL + return arg + elif from_.is_integer() and to.is_timestamp(): + return self.f.dateadd(self.v.s, arg, "1970-01-01 00:00:00") + return super().visit_Cast(op, arg=arg, to=to) + + @visit_node.register(ops.Sum) + def visit_Sum(self, op, *, arg, where): + if op.arg.dtype.is_boolean(): + arg = self.if_(arg, 1, 0) + return self.agg.sum(arg, where=where) + + @visit_node.register(ops.Mean) + def visit_Mean(self, op, *, arg, where): + if op.arg.dtype.is_boolean(): + arg = self.if_(arg, 1, 0) + return self.agg.avg(arg, where=where) + + @visit_node.register(ops.Not) + def visit_Not(self, op, *, arg): + if isinstance(arg, sge.Boolean): + return sge.FALSE if arg == sge.TRUE else sge.TRUE + return self.if_(arg, 1, 0).eq(0) + + @visit_node.register(ops.Any) + @visit_node.register(ops.All) + @visit_node.register(ops.ApproxMedian) + @visit_node.register(ops.Arbitrary) + @visit_node.register(ops.ArgMax) + @visit_node.register(ops.ArgMin) + @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.ArrayColumn) + @visit_node.register(ops.ArrayDistinct) + @visit_node.register(ops.ArrayFlatten) + @visit_node.register(ops.ArrayMap) + @visit_node.register(ops.ArraySort) + @visit_node.register(ops.ArrayUnion) + @visit_node.register(ops.BitAnd) + @visit_node.register(ops.BitOr) + @visit_node.register(ops.BitXor) + @visit_node.register(ops.Covariance) + @visit_node.register(ops.CountDistinctStar) + @visit_node.register(ops.DateAdd) + @visit_node.register(ops.DateDiff) + @visit_node.register(ops.DateSub) + @visit_node.register(ops.EndsWith) + @visit_node.register(ops.First) + @visit_node.register(ops.IntervalAdd) + @visit_node.register(ops.IntervalFromInteger) + @visit_node.register(ops.IntervalMultiply) + @visit_node.register(ops.IntervalSubtract) + @visit_node.register(ops.IsInf) + @visit_node.register(ops.IsNan) + @visit_node.register(ops.Last) + @visit_node.register(ops.LPad) + @visit_node.register(ops.Levenshtein) + @visit_node.register(ops.Map) + @visit_node.register(ops.Median) + @visit_node.register(ops.Mode) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.NthValue) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.RegexExtract) + @visit_node.register(ops.RegexReplace) + @visit_node.register(ops.RegexSearch) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.RowID) + @visit_node.register(ops.RPad) + @visit_node.register(ops.StartsWith) + @visit_node.register(ops.StringSplit) + @visit_node.register(ops.StringToTimestamp) + @visit_node.register(ops.StructColumn) + @visit_node.register(ops.TimestampAdd) + @visit_node.register(ops.TimestampDiff) + @visit_node.register(ops.TimestampSub) + @visit_node.register(ops.Unnest) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.Atan2: "atn2", + ops.DateFromYMD: "datefromparts", + ops.Hash: "checksum", + ops.Ln: "log", + ops.Log10: "log10", + ops.Power: "power", + ops.RandomScalar: "rand", + ops.Repeat: "replicate", + ops.Reverse: "reverse", + ops.StringAscii: "ascii", + ops.TimestampNow: "sysdatetime", + ops.Min: "min", + ops.Max: "max", +} + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @MSSQLCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + else: -rewrites = MsSqlExprTranslator.rewrites + @MSSQLCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + setattr(MSSQLCompiler, f"visit_{_op.__name__}", _fmt) -class MsSqlCompiler(AlchemyCompiler): - translator_class = MsSqlExprTranslator - supports_indexed_grouping_keys = False - null_limit = None - rewrites = AlchemyCompiler.rewrites | rewrite_sample +del _op, _name, _fmt diff --git a/ibis/backends/mssql/datatypes.py b/ibis/backends/mssql/datatypes.py deleted file mode 100644 index 817fdfa22db6..000000000000 --- a/ibis/backends/mssql/datatypes.py +++ /dev/null @@ -1,150 +0,0 @@ -from __future__ import annotations - -from functools import partial -from typing import Optional, TypedDict - -from sqlalchemy.dialects import mssql - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType - - -class _FieldDescription(TypedDict): - """Partial type of result of sp_describe_first_result_set procedure.""" - - name: str - system_type_name: str - precision: Optional[int] - scale: Optional[int] - - -def _type_from_result_set_info(col: _FieldDescription) -> dt.DataType: - """Construct an ibis type from MSSQL result set description.""" - typename = col["system_type_name"].split("(")[0].upper() - typ = _from_mssql_typenames.get(typename) - if typ is None: - raise NotImplementedError( - f"MSSQL type {col['system_type_name']} is not supported" - ) - - if typename in ("DECIMAL", "NUMERIC"): - typ = partial(typ, precision=col["precision"], scale=col["scale"]) - elif typename in ("GEOMETRY", "GEOGRAPHY"): - typ = partial(typ, geotype=typename.lower()) - elif typename == "DATETIME2": - typ = partial(typ, scale=col["scale"]) - elif typename == "DATETIMEOFFSET": - typ = partial(typ, scale=col["scale"], timezone="UTC") - elif typename == "FLOAT": - if col["precision"] <= 24: - typ = dt.Float32 - else: - typ = dt.Float64 - return typ(nullable=col["is_nullable"]) - - -# The following MSSQL 2022 types are not supported: 'XML', 'SQL_VARIANT', 'SYSNAME', 'HIERARCHYID', -_from_mssql_typenames = { - # Exact numerics - "BIGINT": dt.Int64, - "BIT": dt.Boolean, - "DECIMAL": dt.Decimal, - "INT": dt.Int32, - "MONEY": dt.Int64, - "NUMERIC": dt.Decimal, - "SMALLINT": dt.Int16, - "SMALLMONEY": dt.Int32, - "TINYINT": dt.Int8, - # Approximate numerics - "FLOAT": dt.Float64, - "REAL": dt.Float32, - # Date and time - "DATE": dt.Date, - "DATETIME2": dt.Timestamp, - "DATETIME": dt.Timestamp, - "DATETIMEOFFSET": dt.Timestamp, - "SMALLDATETIME": dt.Timestamp, - "TIME": dt.Time, - # Character string - "CHAR": dt.String, - "TEXT": dt.String, - "VARCHAR": dt.String, - # Unicode character strings - "NCHAR": dt.String, - "NTEXT": dt.String, - "NVARCHAR": dt.String, - # Binary string - "BINARY": dt.Binary, - "IMAGE": dt.Binary, - "VARBINARY": dt.Binary, - # Other data types - "UNIQUEIDENTIFIER": dt.UUID, - "GEOMETRY": dt.GeoSpatial, - "GEOGRAPHY": dt.GeoSpatial, - # This timestamp datatype is also known as "rowversion", and the original name is really unfortunate. - # See: - # https://learn.microsoft.com/en-us/sql/t-sql/data-types/rowversion-transact-sql?view=sql-server-ver16 - "TIMESTAMP": dt.Binary, -} - - -_to_mssql_types = { - dt.Boolean: mssql.BIT, - dt.Int8: mssql.TINYINT, - dt.Int16: mssql.SMALLINT, - dt.Int32: mssql.INTEGER, - dt.Int64: mssql.BIGINT, - dt.Float16: mssql.FLOAT, - dt.Float32: mssql.FLOAT, - dt.Float64: mssql.REAL, - dt.String: mssql.NVARCHAR, -} - -_from_mssql_types = { - mssql.TINYINT: dt.Int8, - mssql.BIT: dt.Boolean, - mssql.MONEY: dt.Int64, - mssql.SMALLMONEY: dt.Int32, - mssql.UNIQUEIDENTIFIER: dt.UUID, - mssql.BINARY: dt.Binary, - mssql.TIMESTAMP: dt.Binary, - mssql.NVARCHAR: dt.String, - mssql.NTEXT: dt.String, - mssql.VARBINARY: dt.Binary, - mssql.IMAGE: dt.Binary, - mssql.TIME: dt.Time, - mssql.NCHAR: dt.String, -} - - -class MSSQLType(AlchemyType): - dialect = "mssql" - - @classmethod - def to_ibis(cls, typ, nullable=True): - if dtype := _from_mssql_types.get(type(typ)): - return dtype(nullable=nullable) - elif isinstance(typ, mssql.DATETIMEOFFSET): - if (prec := typ.precision) is None: - prec = 7 - return dt.Timestamp(scale=prec, timezone="UTC", nullable=nullable) - elif isinstance(typ, mssql.DATETIME2): - if (prec := typ.precision) is None: - prec = 7 - return dt.Timestamp(scale=prec, nullable=nullable) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_ibis(cls, dtype): - if typ := _to_mssql_types.get(type(dtype)): - return typ - elif dtype.is_timestamp(): - if (precision := dtype.scale) is None: - precision = 7 - if dtype.timezone is not None: - return mssql.DATETIMEOFFSET(precision=precision) - else: - return mssql.DATETIME2(precision=precision) - else: - return super().from_ibis(dtype) diff --git a/ibis/backends/mssql/registry.py b/ibis/backends/mssql/registry.py deleted file mode 100644 index 3c3e780ef203..000000000000 --- a/ibis/backends/mssql/registry.py +++ /dev/null @@ -1,387 +0,0 @@ -from __future__ import annotations - -import sqlalchemy as sa -from sqlalchemy.dialects import mssql -from sqlalchemy.ext.compiler import compiles - -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import ( - fixed_arity, - sqlalchemy_operation_registry, - sqlalchemy_window_functions_registry, - unary, -) -from ibis.backends.base.sql.alchemy.registry import substr, variance_reduction - - -def _reduction(func, cast_type="int32"): - def reduction_compiler(t, op): - arg, where = op.args - - if arg.dtype.is_boolean(): - if isinstance(arg, ops.TableColumn): - nullable = arg.dtype.nullable - arg = ops.Cast(arg, dt.dtype(cast_type)(nullable=nullable)) - else: - arg = ops.IfElse(arg, 1, 0) - - if where is not None: - arg = ops.IfElse(where, arg, None) - return func(t.translate(arg)) - - return reduction_compiler - - -@compiles(substr, "mssql") -def mssql_substr(element, compiler, **kw): - return compiler.process(sa.func.substring(*element.clauses), **kw) - - -@compiles(mssql.VARBINARY) -def compile_mssql_varbinary(element, compiler, **kw): - if (length := element.length) is not None: - return f"VARBINARY({length})" - return "VARBINARY" - - -# String -# TODO: find is copied from SQLite, we should really have a -# "base" set of SQL functions that are the most common APIs across the major -# RDBMS -def _string_find(t, op): - arg, substr, start, _ = op.args - - sa_arg = t.translate(arg) - sa_substr = t.translate(substr) - - if start is not None: - sa_start = t.translate(start) - return sa.func.charindex(sa_substr, sa_arg, sa_start) - 1 - - return sa.func.charindex(sa_substr, sa_arg) - 1 - - -def _extract(fmt): - def translator(t, op): - (arg,) = op.args - sa_arg = t.translate(arg) - # sa.literal_column is used because it makes the argument pass - # in NOT as a parameter - return sa.cast(sa.func.datepart(sa.literal_column(fmt), sa_arg), sa.SMALLINT) - - return translator - - -def _round(t, op): - sa_arg = t.translate(op.arg) - - if op.digits is not None: - return sa.func.round(sa_arg, t.translate(op.digits)) - else: - return sa.func.round(sa_arg, 0) - - -def _timestamp_from_unix(x, unit="s"): - if unit == "s": - return sa.func.dateadd(sa.text("s"), x, "1970-01-01 00:00:00") - if unit == "ms": - return sa.func.dateadd(sa.text("s"), x / 1_000, "1970-01-01 00:00:00") - raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") - - -_interval_units = { - "us": "microsecond", - "ms": "millisecond", - "s": "second", - "m": "minute", - "h": "hour", - "D": "day", - "W": "week", - "M": "month", - "Q": "quarter", - "Y": "year", -} - - -def _timestamp_truncate(t, op): - arg = t.translate(op.arg) - unit = op.unit.short - if unit not in _interval_units: - raise com.UnsupportedOperationError(f"Unsupported truncate unit {op.unit!r}") - - return sa.func.datetrunc(sa.text(_interval_units[unit]), arg) - - -def _timestamp_bucket(t, op): - unit = op.interval.dtype.unit.short - if not isinstance(op.interval, ops.Literal): - raise com.UnsupportedOperationError( - "Only literal interval values are supported" - ) - if unit == "us" or unit not in _interval_units: - raise com.UnsupportedOperationError( - f"Unsupported bucket interval {op.interval!r}" - ) - if op.offset is not None: - raise com.UnsupportedOperationError( - "Timestamp bucket with offset is not supported" - ) - - part = sa.literal_column(_interval_units[unit]) - value = sa.literal_column(str(op.interval.value)) - arg = t.translate(op.arg) - origin = sa.literal_column("CAST('1970-01-01' AS DATETIME2)") - - return sa.func.DATE_BUCKET(part, value, arg, origin) - - -def _temporal_delta(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.datediff(sa.literal_column(op.part.value.upper()), right, left) - - -def _not(t, op): - arg = t.translate(op.arg) - if t.within_where: - return sa.not_(arg) - else: - # mssql doesn't support boolean types or comparisons at selection positions - # so we need to compare the value wrapped in a case statement - return sa.case((arg == 0, True), else_=False) - - -def _len(x): - """The MSSQL LEN function doesn't count trailing spaces. - - Also, DATALENGTH (the suggested alternative) counts bytes and thus its - result depends on the string's encoding. - - https://learn.microsoft.com/en-us/sql/t-sql/functions/len-transact-sql?view=sql-server-ver16#remarks - - The solution is to add a character to the beginning and end of the string - that are guaranteed to have one character in length and are not spaces, and - then subtract 2 from the result of `LEN` of that input. - - Thanks to @arkanovicz for this glorious hack. - """ - return sa.func.len("A" + x + "Z") - 2 - - -def _literal(_, op): - dtype = op.dtype - value = op.value - - if value is None: - return sa.null() - - if dtype.is_array(): - value = list(value) - elif dtype.is_decimal(): - value = value.normalize() - elif dtype.is_date(): - return sa.func.datefromparts(value.year, value.month, value.day) - elif dtype.is_timestamp(): - args = ( - value.year, - value.month, - value.day, - value.hour, - value.minute, - value.second, - value.microsecond, - ) - if dtype.timezone is not None: - assert value.tzinfo is not None - - offset = value.strftime("%z") - hour_offset = int(offset[:3]) - minute_offset = int(offset[-2:]) - return sa.func.datetimeoffsetfromparts( - *args, - hour_offset, - minute_offset, - 6, # precision - ) - else: - return sa.func.datetime2fromparts( - *args, - 6, # precision - ) - elif dtype.is_time(): - return sa.func.timefromparts( - value.hour, - value.minute, - value.second, - value.microsecond, - sa.literal_column("0"), - ) - elif dtype.is_uuid(): - return sa.cast(sa.literal(str(value)), mssql.UNIQUEIDENTIFIER) - elif dtype.is_binary(): - return sa.cast(value, mssql.VARBINARY("max")) - - return sa.literal(value) - - -def _hashbytes(translator, op): - how = op.how - - arg_formatted = translator.translate(op.arg) - - if how in ("md5", "sha1"): - return sa.func.hashbytes(how, arg_formatted) - elif how == "sha256": - return sa.func.hashbytes("sha2_256", arg_formatted) - elif how == "sha512": - return sa.func.hashbytes("sha2_512", arg_formatted) - else: - raise NotImplementedError(how) - - -def _hexdigest(translator, op): - # SO post on getting convert to play nice with VARCHAR in Sqlalchemy - # https://stackoverflow.com/questions/20291962/how-to-use-convert-function-in-sqlalchemy - how = op.how - - arg_formatted = translator.translate(op.arg) - if how in ("md5", "sha1"): - hashbinary = sa.func.hashbytes(how, arg_formatted) - elif how == "sha256": - hashbinary = sa.func.hashbytes("sha2_256", arg_formatted) - elif how == "sha512": - hashbinary = sa.func.hashbytes("sha2_512", arg_formatted) - else: - raise NotImplementedError(how) - - # mssql uppercases the hexdigest which is inconsistent with several other - # implementations and inconsistent with Python, so lowercase it. - return sa.func.lower( - sa.func.convert( - sa.literal_column("VARCHAR(MAX)"), - hashbinary, - 2, # 2 means strip off leading '0x' - ) - ) - - -operation_registry = sqlalchemy_operation_registry.copy() -operation_registry.update(sqlalchemy_window_functions_registry) - -operation_registry.update( - { - ops.Not: _not, - # aggregate methods - ops.Count: _reduction(sa.func.count), - ops.Max: _reduction(sa.func.max), - ops.Min: _reduction(sa.func.min), - ops.Sum: _reduction(sa.func.sum), - ops.Mean: _reduction(sa.func.avg, "float64"), - ops.IfElse: fixed_arity(sa.func.iif, 3), - # string methods - ops.Capitalize: unary( - lambda arg: sa.func.concat( - sa.func.upper(sa.func.substring(arg, 1, 1)), - sa.func.lower(sa.func.substring(arg, 2, _len(arg) - 1)), - ) - ), - ops.LStrip: unary(sa.func.ltrim), - ops.Lowercase: unary(sa.func.lower), - ops.RStrip: unary(sa.func.rtrim), - ops.Repeat: fixed_arity(sa.func.replicate, 2), - ops.Reverse: unary(sa.func.reverse), - ops.StringFind: _string_find, - ops.StringLength: unary(_len), - ops.StringReplace: fixed_arity(sa.func.replace, 3), - ops.Strip: unary(sa.func.trim), - ops.Uppercase: unary(sa.func.upper), - # math - ops.Abs: unary(sa.func.abs), - ops.Acos: unary(sa.func.acos), - ops.Asin: unary(sa.func.asin), - ops.Atan2: fixed_arity(sa.func.atn2, 2), - ops.Atan: unary(sa.func.atan), - ops.Ceil: unary(sa.func.ceiling), - ops.Cos: unary(sa.func.cos), - ops.Floor: unary(sa.func.floor), - ops.FloorDivide: fixed_arity( - lambda left, right: sa.func.floor(left / right), 2 - ), - ops.Power: fixed_arity(sa.func.power, 2), - ops.Sign: unary(sa.func.sign), - ops.Sin: unary(sa.func.sin), - ops.Sqrt: unary(sa.func.sqrt), - ops.Tan: unary(sa.func.tan), - ops.Round: _round, - ops.RandomScalar: fixed_arity(sa.func.RAND, 0), - ops.Ln: fixed_arity(sa.func.log, 1), - ops.Log: fixed_arity(lambda x, p: sa.func.log(x, p), 2), - ops.Log2: fixed_arity(lambda x: sa.func.log(x, 2), 1), - ops.Log10: fixed_arity(lambda x: sa.func.log(x, 10), 1), - ops.StandardDev: variance_reduction("stdev", {"sample": "", "pop": "p"}), - ops.Variance: variance_reduction("var", {"sample": "", "pop": "p"}), - # timestamp methods - ops.TimestampNow: fixed_arity(sa.func.GETDATE, 0), - ops.ExtractYear: _extract("year"), - ops.ExtractMonth: _extract("month"), - ops.ExtractDay: _extract("day"), - ops.ExtractDayOfYear: _extract("dayofyear"), - ops.ExtractHour: _extract("hour"), - ops.ExtractMinute: _extract("minute"), - ops.ExtractSecond: _extract("second"), - ops.ExtractMillisecond: _extract("millisecond"), - ops.ExtractWeekOfYear: _extract("iso_week"), - ops.DayOfWeekIndex: fixed_arity( - lambda x: sa.func.datepart(sa.text("weekday"), x) - 1, 1 - ), - ops.ExtractEpochSeconds: fixed_arity( - lambda x: sa.cast( - sa.func.datediff(sa.text("s"), "1970-01-01 00:00:00", x), sa.BIGINT - ), - 1, - ), - ops.TimestampFromUNIX: lambda t, op: _timestamp_from_unix( - t.translate(op.arg), op.unit.short - ), - ops.DateFromYMD: fixed_arity(sa.func.datefromparts, 3), - ops.TimestampFromYMDHMS: fixed_arity( - lambda y, m, d, h, min, s: sa.func.datetimefromparts(y, m, d, h, min, s, 0), - 6, - ), - ops.TimeFromHMS: fixed_arity( - lambda h, m, s: sa.func.timefromparts(h, m, s, 0, sa.literal_column("0")), 3 - ), - ops.TimestampTruncate: _timestamp_truncate, - ops.DateTruncate: _timestamp_truncate, - ops.TimestampBucket: _timestamp_bucket, - ops.Hash: unary(sa.func.checksum), - ops.HashBytes: _hashbytes, - ops.HexDigest: _hexdigest, - ops.ExtractMicrosecond: fixed_arity( - lambda arg: sa.func.datepart(sa.literal_column("microsecond"), arg), 1 - ), - ops.TimeDelta: _temporal_delta, - ops.DateDelta: _temporal_delta, - ops.TimestampDelta: _temporal_delta, - ops.Literal: _literal, - } -) - -_invalid_operations = { - # ibis.expr.operations.strings - ops.RPad, - ops.LPad, - # ibis.expr.operations.reductions - ops.BitAnd, - ops.BitOr, - ops.BitXor, - ops.GroupConcat, - # ibis.expr.operations.window - ops.NthValue, -} - -operation_registry = { - k: v for k, v in operation_registry.items() if k not in _invalid_operations -} diff --git a/ibis/backends/mssql/tests/conftest.py b/ibis/backends/mssql/tests/conftest.py index adc3977f7109..414c34a47ecf 100644 --- a/ibis/backends/mssql/tests/conftest.py +++ b/ibis/backends/mssql/tests/conftest.py @@ -4,10 +4,8 @@ from typing import TYPE_CHECKING, Any import pytest -import sqlalchemy as sa import ibis -from ibis.backends.conftest import init_database from ibis.backends.tests.base import ServiceBackendTest if TYPE_CHECKING: @@ -27,28 +25,18 @@ class TestConf(ServiceBackendTest): check_dtype = False supports_window_operations = False returned_timestamp_unit = "s" - supports_arrays = False - supports_arrays_outside_of_select = supports_arrays + supports_arrays_outside_of_select = supports_arrays = False supports_structs = False supports_json = False rounding_method = "half_to_even" service_name = "mssql" - deps = "pyodbc", "sqlalchemy" + deps = ("pyodbc",) @property def test_files(self) -> Iterable[Path]: return self.data_dir.joinpath("csv").glob("*.csv") - def _load_data( - self, - *, - user: str = MSSQL_USER, - password: str = MSSQL_PASS, - host: str = MSSQL_HOST, - port: int = MSSQL_PORT, - database: str = IBIS_TEST_MSSQL_DB, - **_: Any, - ) -> None: + def _load_data(self, **_: Any) -> None: """Load test data into a MSSQL backend instance. Parameters @@ -58,17 +46,9 @@ def _load_data( script_dir Location of scripts defining schemas """ - params = f"driver={MSSQL_PYODBC_DRIVER}" - url = sa.engine.make_url( - f"mssql+pyodbc://{user}:{password}@{host}:{port:d}/{database}?{params}" - ) - init_database( - url=url, - database=database, - schema=self.ddl_script, - isolation_level="AUTOCOMMIT", - recreate=False, - ) + with self.connection.begin() as cur: + for stmt in self.ddl_script: + cur.execute(stmt) @staticmethod def connect(*, tmpdir, worker_id, **kw): @@ -79,6 +59,7 @@ def connect(*, tmpdir, worker_id, **kw): database=IBIS_TEST_MSSQL_DB, port=MSSQL_PORT, driver=MSSQL_PYODBC_DRIVER, + autocommit=True, **kw, ) diff --git a/ibis/backends/mssql/tests/test_client.py b/ibis/backends/mssql/tests/test_client.py index b26c78a53c38..102b5c2da721 100644 --- a/ibis/backends/mssql/tests/test_client.py +++ b/ibis/backends/mssql/tests/test_client.py @@ -1,8 +1,6 @@ from __future__ import annotations import pytest -import sqlalchemy as sa -from pytest import param import ibis import ibis.expr.datatypes as dt @@ -27,13 +25,14 @@ ("FLOAT", dt.float64), ("FLOAT(3)", dt.float32), ("FLOAT(25)", dt.float64), + ("FLOAT(37)", dt.float64), # Date and time ("DATE", dt.date), ("TIME", dt.time), ("DATETIME2", dt.timestamp(scale=7)), ("DATETIMEOFFSET", dt.timestamp(scale=7, timezone="UTC")), - ("SMALLDATETIME", dt.timestamp), - ("DATETIME", dt.timestamp), + ("SMALLDATETIME", dt.Timestamp(scale=0)), + ("DATETIME", dt.Timestamp(scale=3)), # Characters strings ("CHAR", dt.string), ("TEXT", dt.string), @@ -49,38 +48,23 @@ # Other data types ("UNIQUEIDENTIFIER", dt.uuid), ("TIMESTAMP", dt.binary(nullable=False)), + ("DATETIME2(4)", dt.timestamp(scale=4)), + ("DATETIMEOFFSET(5)", dt.timestamp(scale=5, timezone="UTC")), + ("GEOMETRY", dt.geometry), + ("GEOGRAPHY", dt.geography), ] -broken_sqlalchemy_autoload = pytest.mark.xfail( - reason="scale not inferred by sqlalchemy autoload" -) - - -@pytest.mark.parametrize( - ("server_type", "expected_type"), - DB_TYPES - + [ - param( - "DATETIME2(4)", dt.timestamp(scale=4), marks=[broken_sqlalchemy_autoload] - ), - param( - "DATETIMEOFFSET(5)", - dt.timestamp(scale=5, timezone="UTC"), - marks=[broken_sqlalchemy_autoload], - ), - ], - ids=str, -) -def test_get_schema_from_query(con, server_type, expected_type, temp_table): - expected_schema = ibis.schema(dict(x=expected_type)) +@pytest.mark.parametrize(("server_type", "expected_type"), DB_TYPES, ids=str) +def test_get_schema(con, server_type, expected_type, temp_table): with con.begin() as c: - c.exec_driver_sql(f"CREATE TABLE [{temp_table}] (x {server_type})") + c.execute(f"CREATE TABLE [{temp_table}] (x {server_type})") + expected_schema = ibis.schema(dict(x=expected_type)) - result_schema = con._get_schema_using_query(f"SELECT * FROM [{temp_table}]") - assert result_schema == expected_schema - t = con.table(temp_table) - assert t.schema() == expected_schema + + assert con.get_schema(temp_table) == expected_schema + assert con.table(temp_table).schema() == expected_schema + assert con.sql(f"SELECT * FROM [{temp_table}]").schema() == expected_schema def test_builtin_scalar_udf(con): @@ -110,10 +94,6 @@ def count_big(x, where: bool = True) -> int: ft = con.tables.functional_alltypes expr = count_big(ft.id) - with pytest.raises( - sa.exc.ProgrammingError, match="An expression of non-boolean type specified" - ): - assert expr.execute() expr = count_big(ft.id, where=ft.id == 1) assert expr.execute() == ft[ft.id == 1].count().execute() diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index cae87e456800..b853bd14a0e0 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -297,7 +297,7 @@ def list_tables( conditions = [TRUE] if schema is not None: - conditions = C.table_schema.eq(sge.convert(schema)) + conditions.append(C.table_schema.eq(sge.convert(schema))) col = "table_name" sql = ( @@ -305,7 +305,7 @@ def list_tables( .from_(sg.table("tables", db="information_schema")) .distinct() .where(*conditions) - .sql(self.name, pretty=True) + .sql(self.name) ) with self._safe_raw_sql(sql) as cur: diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 188c7ee9ccaa..a314b4f7543c 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -117,3 +117,9 @@ from oracledb.exceptions import DatabaseError as OracleDatabaseError except ImportError: OracleDatabaseError = None + +try: + from pyodbc import DataError as PyODBCDataError + from pyodbc import ProgrammingError as PyODBCProgrammingError +except ImportError: + PyODBCProgrammingError = PyODBCDataError = None diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/mssql/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/mssql/out.sql new file mode 100644 index 000000000000..c3c69f741a10 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/mssql/out.sql @@ -0,0 +1,10 @@ +SELECT + [t2].[id], + IIF([t2].[bool_col] <> 0, 1, 0) AS [bool_col] +FROM ( + SELECT + TOP 11 + [t0].[id], + [t0].[bool_col] + FROM [functional_alltypes] AS [t0] +) AS [t2] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/mssql/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/mssql/out.sql new file mode 100644 index 000000000000..c3c69f741a10 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/mssql/out.sql @@ -0,0 +1,10 @@ +SELECT + [t2].[id], + IIF([t2].[bool_col] <> 0, 1, 0) AS [bool_col] +FROM ( + SELECT + TOP 11 + [t0].[id], + [t0].[bool_col] + FROM [functional_alltypes] AS [t0] +) AS [t2] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/mssql/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/mssql/out.sql new file mode 100644 index 000000000000..a4c958333e78 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/mssql/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM([t0].[bigint_col]) AS [Sum(bigint_col)] +FROM [functional_alltypes] AS [t0] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/mssql/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/mssql/out.sql new file mode 100644 index 000000000000..06e59f87c727 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/mssql/out.sql @@ -0,0 +1,15 @@ +SELECT + [t3].[id], + IIF([t3].[bool_col] <> 0, 1, 0) AS [bool_col] +FROM ( + SELECT + TOP 11 + * + FROM ( + SELECT + TOP 10 + [t0].[id], + [t0].[bool_col] + FROM [functional_alltypes] AS [t0] + ) AS [t2] +) AS [t3] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/mssql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/mssql/out.sql index ca36c8d7b815..9cd3ea8cfde6 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/mssql/out.sql @@ -1,38 +1,38 @@ SELECT - CASE t0.continent - WHEN N'NA' - THEN N'North America' - WHEN N'SA' - THEN N'South America' - WHEN N'EU' - THEN N'Europe' - WHEN N'AF' - THEN N'Africa' - WHEN N'AS' - THEN N'Asia' - WHEN N'OC' - THEN N'Oceania' - WHEN N'AN' - THEN N'Antarctica' - ELSE N'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + CASE [t0].[continent] + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS [cont], + SUM([t0].[population]) AS [total_pop] +FROM [countries] AS [t0] GROUP BY - CASE t0.continent - WHEN N'NA' - THEN N'North America' - WHEN N'SA' - THEN N'South America' - WHEN N'EU' - THEN N'Europe' - WHEN N'AF' - THEN N'Africa' - WHEN N'AS' - THEN N'Asia' - WHEN N'OC' - THEN N'Oceania' - WHEN N'AN' - THEN N'Antarctica' - ELSE N'Unknown continent' + CASE [t0].[continent] + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' END \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/mssql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/mssql/out.sql index ffad28ca58a6..34d87120bad2 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/mssql/out.sql @@ -1,13 +1,13 @@ SELECT - t0.x IN ( - SELECT - t1.x - FROM ( + IIF( + [t0].[x] IN ( SELECT - t0.x AS x - FROM t AS t0 + [t0].[x] + FROM [t] AS [t0] WHERE - t0.x > 2 - ) AS t1 - ) AS [InColumn(x, x)] -FROM t AS t0 \ No newline at end of file + [t0].[x] > 2 + ), + 1, + 0 + ) AS [InSubquery(x)] +FROM [t] AS [t0] \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index a64179a8f9f0..f5008f169585 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -24,6 +24,7 @@ PolarsInvalidOperationError, Py4JError, PyDruidProgrammingError, + PyODBCProgrammingError, PySparkAnalysisException, SnowflakeProgrammingError, TrinoUserError, @@ -286,6 +287,7 @@ def mean_and_std(v): raises=AttributeError, reason="'IntegerColumn' object has no attribute 'any'", ), + pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -306,6 +308,7 @@ def mean_and_std(v): pytest.mark.notimpl( ["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError) ), + pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -326,6 +329,7 @@ def mean_and_std(v): pytest.mark.notimpl( ["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError) ), + pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -338,6 +342,7 @@ def mean_and_std(v): raises=AttributeError, reason="'IntegerColumn' object has no attribute 'all'", ), + pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -358,6 +363,7 @@ def mean_and_std(v): pytest.mark.notimpl( ["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError) ), + pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -378,6 +384,7 @@ def mean_and_std(v): pytest.mark.notimpl( ["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError) ), + pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -899,7 +906,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): "bigquery", "dask", "datafusion", - "mssql", "polars", "sqlite", "druid", @@ -908,7 +914,8 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): raises=com.OperationNotDefinedError, ), pytest.mark.notyet( - ["mysql", "impala", "exasol"], raises=com.UnsupportedBackendType + ["mysql", "mssql", "impala", "exasol"], + raises=com.UnsupportedBackendType, ), pytest.mark.notyet( ["snowflake"], @@ -1320,9 +1327,7 @@ def test_date_quantile(alltypes, func): ), ], ) -@pytest.mark.notimpl( - ["datafusion", "polars", "mssql"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( ["oracle"], raises=OracleDatabaseError, @@ -1373,6 +1378,7 @@ def test_group_concat( raises=NotImplementedError, reason="sorting on aggregations not yet implemented", ) +@pytest.mark.notimpl(["mssql"], raises=PyODBCProgrammingError) def test_topk_op(alltypes, df): # TopK expression will order rows by "count" but each backend # can have different result for that. @@ -1397,6 +1403,7 @@ def test_topk_op(alltypes, df): ) ], id="string_col_filter_top3", + marks=pytest.mark.notimpl(["mssql"], raises=PyODBCProgrammingError), ) ], ) @@ -1582,9 +1589,7 @@ def test_grouped_case(backend, con): backend.assert_frame_equal(result, expected) -@pytest.mark.notimpl( - ["datafusion", "mssql", "polars"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.broken( ["dask"], @@ -1600,6 +1605,7 @@ def test_grouped_case(backend, con): @pytest.mark.notyet(["mysql"], raises=MySQLNotSupportedError) @pytest.mark.notyet(["oracle"], raises=OracleDatabaseError) @pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) +@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) def test_group_concat_over_window(backend, con): input_df = pd.DataFrame( { diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 75a0d5d21cef..0bda4091815a 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -32,9 +32,16 @@ pytestmark = [ pytest.mark.never( - ["sqlite", "mssql", "exasol"], + ["sqlite", "mysql", "exasol"], reason="No array support", raises=Exception + ), + pytest.mark.never( + ["mssql"], reason="No array support", - raises=Exception, + raises=( + com.UnsupportedBackendType, + com.OperationNotDefinedError, + AssertionError, + ), ), pytest.mark.never(["mysql"], reason="No array support", raises=(com.UnsupportedBackendType, com.OperationNotDefinedError, MySQLOperationalError)), pytest.mark.notyet( @@ -429,7 +436,6 @@ def test_array_slice(backend, start, stop): [ "datafusion", "flink", - "mssql", "polars", "snowflake", "sqlite", @@ -491,7 +497,6 @@ def test_array_map(con, input, output): "dask", "datafusion", "flink", - "mssql", "pandas", "polars", "snowflake", @@ -542,7 +547,7 @@ def test_array_filter(con, input, output): @builtin_array -@pytest.mark.notimpl(["mssql", "polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], @@ -608,7 +613,7 @@ def test_array_position(backend, con, a, expected_array): @builtin_array -@pytest.mark.notimpl(["dask", "mssql", "polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["dask", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["risingwave"], raises=AssertionError, @@ -643,7 +648,7 @@ def test_array_remove(con, a): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "mssql", "polars"], raises=com.OperationNotDefinedError + ["dask", "datafusion", "polars"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl( ["sqlite"], raises=NotImplementedError, reason="Unsupported type: Array..." @@ -696,7 +701,7 @@ def test_array_unique(con, input, expected): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "flink", "mssql", "polars"], + ["dask", "datafusion", "flink", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -717,7 +722,7 @@ def test_array_sort(con): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "mssql", "polars"], raises=com.OperationNotDefinedError + ["dask", "datafusion", "polars"], raises=com.OperationNotDefinedError ) @pytest.mark.parametrize( ("a", "b", "expected_array"), @@ -761,7 +766,7 @@ def test_array_union(con, a, b, expected_array): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "mssql", "pandas", "polars", "flink"], + ["dask", "datafusion", "pandas", "polars", "flink"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( @@ -828,9 +833,6 @@ def test_unnest_struct(con): @builtin_array -@pytest.mark.never( - ["mssql"], raises=com.OperationNotDefinedError, reason="no array support" -) @pytest.mark.notimpl( [ "dask", @@ -1083,7 +1085,6 @@ def test_unnest_empty_array(con): [ "datafusion", "flink", - "mssql", "polars", "snowflake", "sqlite", @@ -1108,7 +1109,6 @@ def test_array_map_with_conflicting_names(backend, con): [ "datafusion", "flink", - "mssql", "polars", "snowflake", "sqlite", diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index c10fe79dbff6..23097069bc66 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -93,6 +93,7 @@ def time_keyed_right(time_keyed_df2): "bigquery", "exasol", "oracle", + "mssql", ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): @@ -131,6 +132,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op "bigquery", "exasol", "oracle", + "mssql", ] ) def test_keyed_asof_join_with_tolerance( diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 5707f8ce3ec6..cfbc1b2d0360 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -4,8 +4,8 @@ import pyarrow as pa import pyarrow.csv as pcsv import pytest -import sqlalchemy as sa from pytest import param +import sqlalchemy as sa import ibis import ibis.expr.datatypes as dt @@ -18,6 +18,7 @@ OracleDatabaseError, PyDeltaTableError, PyDruidProgrammingError, + PyODBCProgrammingError, PySparkArithmeticException, PySparkParseException, SnowflakeProgrammingError, @@ -356,7 +357,7 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): marks=[ pytest.mark.notyet(["impala"], reason="precision not supported"), pytest.mark.notyet(["duckdb"], reason="precision is out of range"), - pytest.mark.notyet(["mssql"], raises=sa.exc.ProgrammingError), + pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.notyet(["oracle"], raises=OracleDatabaseError), @@ -427,9 +428,6 @@ def test_roundtrip_delta(backend, con, alltypes, tmp_path, monkeypatch): backend.assert_frame_equal(result, expected) -@pytest.mark.xfail_version( - duckdb=["duckdb<0.8.1"], raises=AssertionError, reason="bug in duckdb" -) @pytest.mark.notimpl( ["druid"], raises=AttributeError, reason="string type is used for timestamp_col" ) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 1c829b50c0d3..c4d41f4546d1 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -9,9 +9,9 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa import toolz from pytest import param +import sqlalchemy as sa import ibis import ibis.common.exceptions as com @@ -28,6 +28,8 @@ MySQLProgrammingError, OracleDatabaseError, PyDruidProgrammingError, + PyODBCDataError, + PyODBCProgrammingError, SnowflakeProgrammingError, TrinoUserError, PsycoPg2InvalidTextRepresentation @@ -113,7 +115,7 @@ def test_scalar_fillna_nullif(con, expr, expected): ibis.literal(np.nan), methodcaller("isnan"), marks=[ - pytest.mark.notimpl(["mysql", "sqlite", "druid"]), + pytest.mark.notimpl(["mysql", "mssql", "sqlite", "druid"]), pytest.mark.notyet( ["exasol"], raises=ExaQueryError, @@ -127,7 +129,6 @@ def test_scalar_fillna_nullif(con, expr, expected): ), ], ) -@pytest.mark.notimpl(["mssql"]) @pytest.mark.notyet(["flink"], "NaN is not supported in Flink SQL", raises=ValueError) def test_isna(backend, alltypes, col, value, filt): table = alltypes.select(**{col: value}) @@ -205,7 +206,7 @@ def test_coalesce(con, expr, expected): # TODO(dask) - identicalTo - #2553 -@pytest.mark.notimpl(["clickhouse", "dask", "mssql", "druid", "exasol"]) +@pytest.mark.notimpl(["clickhouse", "dask", "druid", "exasol"]) def test_identical_to(backend, alltypes, sorted_df): sorted_alltypes = alltypes.order_by("id") df = sorted_df @@ -234,7 +235,7 @@ def test_identical_to(backend, alltypes, sorted_df): ("int_col", frozenset({1})), ], ) -@pytest.mark.notimpl(["mssql", "druid"]) +@pytest.mark.notimpl(["druid"]) def test_isin(backend, alltypes, sorted_df, column, elements): sorted_alltypes = alltypes.order_by("id") expr = sorted_alltypes[ @@ -258,7 +259,7 @@ def test_isin(backend, alltypes, sorted_df, column, elements): ("int_col", frozenset({1})), ], ) -@pytest.mark.notimpl(["mssql", "druid"]) +@pytest.mark.notimpl(["druid"]) def test_notin(backend, alltypes, sorted_df, column, elements): sorted_alltypes = alltypes.order_by("id") expr = sorted_alltypes[ @@ -744,7 +745,6 @@ def test_select_filter_select(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError) def test_between(backend, alltypes, df): expr = alltypes.double_col.between(5, 10) result = expr.execute().rename("double_col") @@ -852,7 +852,7 @@ def test_typeof(con): @pytest.mark.notyet(["impala"], reason="can't find table in subquery") @pytest.mark.notimpl(["datafusion", "druid"]) @pytest.mark.notimpl(["pyspark"], condition=is_older_than("pyspark", "3.5.0")) -@pytest.mark.notyet(["dask", "mssql"], reason="not supported by the backend") +@pytest.mark.notyet(["dask"], reason="not supported by the backend") @pytest.mark.broken( ["risingwave"], raises=sa.exc.InternalError, @@ -902,13 +902,16 @@ def test_isin_uncorrelated_filter( @pytest.mark.parametrize( "dtype", [ - "bool", + param( + "bool", + marks=[pytest.mark.notimpl(["mssql"], raises=AssertionError)], + ), param( "bytes", marks=[ pytest.mark.notyet( ["exasol"], raises=ExaQueryError, reason="no binary type" - ) + ), ], ), "str", @@ -1346,7 +1349,6 @@ def hash_256(col): [ "pandas", "dask", - "mssql", "oracle", "risingwave", "snowflake", @@ -1379,6 +1381,7 @@ def hash_256(col): ["datafusion"], reason="casts to 1672531200000000 (microseconds)" ), pytest.mark.broken(["mysql"], reason="returns 20230101000000"), + pytest.mark.notyet(["mssql"], raises=PyODBCDataError), ], ), ], @@ -1396,7 +1399,6 @@ def test_try_cast(con, from_val, to_type, expected): "datafusion", "druid", "exasol", - "mssql", "mysql", "oracle", "pandas", @@ -1419,6 +1421,7 @@ def test_try_cast(con, from_val, to_type, expected): ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet(["trino"], raises=TrinoUserError), + pytest.mark.notyet(["mssql"], raises=PyODBCDataError), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), ], ), @@ -1435,7 +1438,6 @@ def test_try_cast_null(con, from_val, to_type): "dask", "datafusion", "druid", - "mssql", "mysql", "oracle", "postgres", @@ -1463,7 +1465,6 @@ def test_try_cast_table(backend, con): "pandas", "dask", "datafusion", - "mssql", "mysql", "oracle", "postgres", @@ -1489,6 +1490,7 @@ def test_try_cast_table(backend, con): ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet(["trino"], raises=TrinoUserError), + pytest.mark.notyet(["mssql"], raises=PyODBCDataError), ], id="datetime-to-float", ), @@ -1576,7 +1578,7 @@ def test_try_cast_func(con, from_val, to_type, func): ), pytest.mark.notyet( ["mssql"], - raises=sa.exc.CompileError, + raises=PyODBCProgrammingError, reason="mssql doesn't support OFFSET without LIMIT", ), pytest.mark.notyet(["exasol"], raises=ExaQueryError), @@ -1617,11 +1619,6 @@ def test_try_cast_func(con, from_val, to_type, func): lambda _: 1, id="[3:4]", marks=[ - pytest.mark.notyet( - ["mssql"], - raises=sa.exc.CompileError, - reason="mssql doesn't support OFFSET without LIMIT", - ), pytest.mark.notyet(["exasol"], raises=ExaQueryError), pytest.mark.notyet(["oracle"], raises=com.UnsupportedArgumentError), pytest.mark.notyet( @@ -1687,11 +1684,6 @@ def test_static_table_slice(backend, slc, expected_count_fn): raises=TrinoUserError, reason="backend doesn't support dynamic limit/offset", ) -@pytest.mark.notimpl( - ["mssql"], - raises=sa.exc.CompileError, - reason="mssql doesn't support dynamic limit/offset without an ORDER BY", -) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1782,6 +1774,11 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): raises=sa.exc.InternalError, reason="risingwave doesn't support limit/offset", ) +@pytest.mark.notyet( + ["mssql"], + reason="doesn't support dynamic limit/offset; compiles incorrectly in sqlglot", + raises=AssertionError, +) def test_dynamic_table_slice_with_computed_offset(backend): t = backend.functional_alltypes diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index f1b79bdc2ef2..e52ab15e0954 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -190,7 +190,7 @@ def test_semi_join_topk(batting, awards_players, func): @pytest.mark.notimpl(["dask", "druid", "exasol", "oracle"]) @pytest.mark.notimpl( - ["postgres"], + ["postgres", "mssql"], raises=com.IbisTypeError, reason="postgres can't handle null types columns", ) diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index f91cdffd89fb..bb1e48b4fb42 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -27,6 +27,8 @@ PsycoPg2DivisionByZero, Py4JError, PyDruidProgrammingError, + PyODBCDataError, + PyODBCProgrammingError, PySparkArithmeticException, PySparkParseException, SnowflakeProgrammingError, @@ -258,7 +260,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), "mysql": decimal.Decimal("1"), - "mssql": 1.1, + "mssql": decimal.Decimal("1"), "druid": decimal.Decimal("1.1"), "datafusion": decimal.Decimal("1.1"), "oracle": decimal.Decimal("1.1"), @@ -302,7 +304,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "mysql": decimal.Decimal("1.1"), "clickhouse": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), - "mssql": 1.1, + "mssql": decimal.Decimal("1.1"), "druid": decimal.Decimal("1.1"), "datafusion": decimal.Decimal("1.1"), "oracle": decimal.Decimal("1.1"), @@ -337,7 +339,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "clickhouse": decimal.Decimal( "1.10000000000000003193790845333396190208" ), - "mssql": 1.1, "druid": decimal.Decimal("1.1"), "oracle": 1.1, }, @@ -375,6 +376,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "The precision can be up to 38 in Flink", raises=ValueError, ), + pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError), ], id="decimal-big", ), @@ -413,14 +415,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.notyet( ["mysql", "impala"], raises=com.UnsupportedOperationError ), - pytest.mark.broken( - ["mssql"], - "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'Infinity'." - "DB-Lib error message 20018, severity 16:\nGeneral SQL Server error: " - 'Check messages from the SQL Server\n")' - "[SQL: SELECT %(param_1)s AS [Decimal('Infinity')]]", - raises=(sa.exc.ProgrammingError, KeyError), - ), + pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError), pytest.mark.broken( ["druid"], "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " @@ -490,14 +485,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.notyet( ["mysql", "impala"], raises=com.UnsupportedOperationError ), - pytest.mark.broken( - ["mssql"], - "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'Infinity'." - "DB-Lib error message 20018, severity 16:\nGeneral SQL Server error: " - 'Check messages from the SQL Server\n")' - "[SQL: SELECT %(param_1)s AS [Decimal('-Infinity')]]", - raises=(sa.exc.ProgrammingError, KeyError), - ), + pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError), pytest.mark.broken( ["druid"], "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " @@ -571,22 +559,7 @@ def test_numeric_literal(con, backend, expr, expected_types): pytest.mark.notyet( ["mysql", "impala"], raises=com.UnsupportedOperationError ), - pytest.mark.broken( - ["mssql"], - "(pymssql._pymssql.ProgrammingError) (207, b\"Invalid column name 'NaN'." - "DB-Lib error message 20018, severity 16:\nGeneral SQL Server error: " - 'Check messages from the SQL Server\n")' - "[SQL: SELECT %(param_1)s AS [Decimal('NaN')]]", - raises=(sa.exc.ProgrammingError, KeyError), - ), - pytest.mark.broken( - ["mssql"], - "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " - "(org.apache.calcite.tools.ValidationException): " - "org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 10: Column 'NaN' not found in any table" - "[SQL: SELECT NaN AS \"Decimal('NaN')\"]", - raises=sa.exc.ProgrammingError, - ), + pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError), pytest.mark.broken( ["druid"], "(pydruid.db.exceptions.ProgrammingError) Plan validation failed " @@ -1130,9 +1103,7 @@ def test_mod(backend, alltypes, df): backend.assert_series_equal(result, expected, check_dtype=False) -@pytest.mark.notimpl( - ["mssql"], raises=(sa.exc.OperationalError, sa.exc.ProgrammingError) -) +@pytest.mark.notimpl(["mssql"], raises=PyODBCProgrammingError) @pytest.mark.notyet( ["druid"], raises=AssertionError, reason="mod with floats is integer mod" ) @@ -1299,7 +1270,7 @@ def test_floating_mod(backend, alltypes, df): raises=AssertionError, reason="returns NULL when dividing by zero", ) -@pytest.mark.notyet(["mssql"], raises=(sa.exc.OperationalError, sa.exc.DataError)) +@pytest.mark.notyet(["mssql"], raises=PyODBCDataError) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notyet(["postgres"], raises=PsycoPg2DivisionByZero) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @@ -1358,6 +1329,8 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "postgres", "mysql", "druid", + "mssql", + "exasol", ], reason="Not SQLAlchemy backends", ) @@ -1366,7 +1339,6 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): raises=sa.exc.InternalError, reason="Feature is not yet implemented: unsupported data type: NUMERIC(5)", ) -@pytest.mark.notimpl(["exasol"], raises=KeyError) def test_sa_default_numeric_precision_and_scale( con, backend, default_precisions, default_scales, temp_table ): @@ -1463,6 +1435,11 @@ def test_clip(backend, alltypes, df, ibis_func, pandas_func): backend.assert_series_equal(result, expected, check_names=False) +@pytest.mark.broken( + ["mssql"], + raises=PyODBCProgrammingError, + reason="unbounded window frames are not supported", +) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 7d8ead956c71..b97401a9d47e 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -37,7 +37,7 @@ def test_floating_scalar_parameter(backend, alltypes, df, column, raw_value): ("start_string", "end_string"), [("2009-03-01", "2010-07-03"), ("2014-12-01", "2017-01-05")], ) -@pytest.mark.notimpl(["mssql", "trino", "druid"]) +@pytest.mark.notimpl(["trino", "druid"]) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, diff --git a/ibis/backends/tests/test_set_ops.py b/ibis/backends/tests/test_set_ops.py index d489ac1a0884..3b8b78d4f7ec 100644 --- a/ibis/backends/tests/test_set_ops.py +++ b/ibis/backends/tests/test_set_ops.py @@ -5,6 +5,7 @@ import pandas as pd import pytest from pytest import param +import sqlalchemy as sa import ibis import ibis.common.exceptions as com diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 37703174fb22..ae01c33023c6 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -5,8 +5,8 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from pytest import param +import sqlalchemy as sa import ibis import ibis.common.exceptions as com @@ -15,6 +15,7 @@ ClickHouseDatabaseError, OracleDatabaseError, PyDruidProgrammingError, + PyODBCProgrammingError, ) from ibis.common.annotations import ValidationError @@ -132,16 +133,7 @@ def uses_java_re(t): id="contains", marks=[ pytest.mark.broken( - ["mssql"], - raises=sa.exc.ProgrammingError, - reason=( - "(pymssql._pymssql.ProgrammingError) (102, b\"Incorrect syntax near '>'." - "DB-Lib error message 20018, severity 15:\nGeneral SQL Server error: " - 'Check messages from the SQL Server\n")' - "[SQL: SELECT charindex(%(param_1)s, t0.string_col) - %(charindex_1)s >= " - "%(param_2)s AS tmp" - "FROM functional_alltypes AS t0]" - ), + ["mssql"], raises=PyODBCProgrammingError, reason="incorrect syntax" ), ], ), @@ -150,12 +142,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6.*"), id="like", marks=[ - pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), - pytest.mark.broken( - ["mssql"], - reason="mssql doesn't allow like outside of filters", - raises=sa.exc.ProgrammingError, - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) ], ), param( @@ -163,12 +150,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6%"), id="complex_like_escape", marks=[ - pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), - pytest.mark.broken( - ["mssql"], - reason="mssql doesn't allow like outside of filters", - raises=sa.exc.ProgrammingError, - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) ], ), param( @@ -176,12 +158,7 @@ def uses_java_re(t): lambda t: t.string_col.str.contains("6%.*"), id="complex_like_escape_match", marks=[ - pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError), - pytest.mark.broken( - ["mssql"], - reason="mssql doesn't allow like outside of filters", - raises=sa.exc.ProgrammingError, - ), + pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) ], ), param( @@ -196,7 +173,7 @@ def uses_java_re(t): pytest.mark.broken( ["mssql"], reason="mssql doesn't allow like outside of filters", - raises=sa.exc.ProgrammingError, + raises=PyODBCProgrammingError, ), ], ), @@ -454,8 +431,7 @@ def uses_java_re(t): id="translate", marks=[ pytest.mark.notimpl( - ["mssql", "mysql", "polars", "druid"], - raises=com.OperationNotDefinedError, + ["mysql", "polars", "druid"], raises=com.OperationNotDefinedError ), pytest.mark.notyet( ["flink"], @@ -583,8 +559,9 @@ def uses_java_re(t): id="startswith", # pyspark doesn't support `cases` yet marks=[ - pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), - pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), + pytest.mark.notimpl( + ["dask", "mssql"], raises=com.OperationNotDefinedError + ), ], ), param( @@ -596,9 +573,8 @@ def uses_java_re(t): # pyspark doesn't support `cases` yet marks=[ pytest.mark.notimpl( - ["dask", "datafusion"], raises=com.OperationNotDefinedError + ["dask", "datafusion", "mssql"], raises=com.OperationNotDefinedError ), - pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), ], ), param( @@ -606,8 +582,9 @@ def uses_java_re(t): lambda t: t.date_string_col.str.startswith("2010-01"), id="startswith-simple", marks=[ - pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), - pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), + pytest.mark.notimpl( + ["dask", "mssql"], raises=com.OperationNotDefinedError + ), ], ), param( @@ -616,10 +593,8 @@ def uses_java_re(t): id="endswith-simple", marks=[ pytest.mark.notimpl( - ["dask", "datafusion"], - raises=com.OperationNotDefinedError, + ["dask", "datafusion", "mssql"], raises=com.OperationNotDefinedError ), - pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), ], ), param( @@ -655,7 +630,7 @@ def uses_java_re(t): pytest.mark.broken( ["mssql"], reason="substr requires 3 arguments", - raises=sa.exc.ProgrammingError, + raises=PyODBCProgrammingError, ), ], ), @@ -843,7 +818,6 @@ def test_re_replace_global(con): assert result == "cbc" -@pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError) @pytest.mark.notimpl(["druid"], raises=ValidationError) def test_substr_with_null_values(backend, alltypes, df): table = alltypes.mutate( @@ -959,17 +933,14 @@ def test_capitalize(con): @pytest.mark.notimpl( ["dask", "pandas", "polars", "oracle", "flink"], raises=com.OperationNotDefinedError ) -@pytest.mark.notyet( - ["mssql", "sqlite"], reason="no arrays", raises=com.OperationNotDefinedError -) +@pytest.mark.notyet(["sqlite"], reason="no arrays", raises=com.OperationNotDefinedError) @pytest.mark.never( ["mysql"], raises=com.OperationNotDefinedError, reason="no array support" ) -@pytest.mark.never( - ["exasol"], raises=com.UnsupportedBackendType, reason="no array support" -) @pytest.mark.notimpl( - ["impala"], raises=com.UnsupportedBackendType, reason="no array support" + ["mssql", "exasol", "impala"], + raises=com.UnsupportedBackendType, + reason="no array support", ) def test_array_string_join(con): s = ibis.array(["a", "b", "c"]) @@ -1029,11 +1000,6 @@ def test_levenshtein(con, right): assert result == 3 -@pytest.mark.notyet( - ["mssql"], - reason="doesn't allow boolean expressions in select statements", - raises=sa.exc.ProgrammingError, -) @pytest.mark.parametrize( "expr", [ diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 827b06a663eb..c353d905a07d 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -9,9 +9,9 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa import sqlglot as sg from pytest import param +import sqlalchemy as sa import ibis import ibis.common.exceptions as com @@ -32,6 +32,7 @@ PolarsPanicException, Py4JJavaError, PyDruidProgrammingError, + PyODBCProgrammingError, SnowflakeProgrammingError, TrinoUserError, ) @@ -143,7 +144,7 @@ def test_timestamp_extract(backend, alltypes, df, attr): id="day_of_week_full_name", marks=[ pytest.mark.notimpl( - ["mssql", "druid", "oracle", "exasol"], + ["druid", "oracle", "exasol"], raises=com.OperationNotDefinedError, ), pytest.mark.broken( @@ -1471,7 +1472,7 @@ def test_interval_add_cast_column(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["datafusion", "mssql"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -1662,7 +1663,12 @@ def test_string_to_timestamp(alltypes, fmt): param("2017-01-07", 5, "Saturday", id="saturday"), ], ) -@pytest.mark.notimpl(["mssql", "druid", "oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["druid", "oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["flink"], + raises=Py4JJavaError, + reason="DayOfWeekName is not supported in Flink", +) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["risingwave"], @@ -1678,7 +1684,7 @@ def test_day_of_week_scalar(con, date, expected_index, expected_day): assert result_day.lower() == expected_day.lower() -@pytest.mark.notimpl(["mssql", "oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -1717,15 +1723,20 @@ def test_day_of_week_column(backend, alltypes, df): lambda s: s.dt.day_name().str.len().sum(), id="day_of_week_full_name", marks=[ - pytest.mark.notimpl( - ["mssql"], - raises=com.OperationNotDefinedError, - ), pytest.mark.broken( ["risingwave"], raises=AssertionError, reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14670", ), + pytest.mark.never( + ["flink"], + raises=Py4JJavaError, + reason=( + "SqlValidatorException: No match found for function signature dayname()" + "`day_of_week_name` is not supported in Flink" + "Ref: https://nightlies.apache.org/flink/flink-docs-release-1.13/docs/dev/table/functions/systemfunctions/#temporal-functions" + ), + ), ], ), ], @@ -2065,6 +2076,7 @@ def test_extract_time_from_timestamp(con, microsecond): "support logical type INTERVAL SECOND(3) NOT NULL currently" ), ) +@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) def test_interval_literal(con, backend): expr = ibis.interval(1, unit="s") result = con.execute(expr) @@ -2348,7 +2360,7 @@ def test_large_timestamp(con): pytest.mark.notyet( ["mssql"], reason="doesn't support nanoseconds", - raises=sa.exc.ProgrammingError, + raises=PyODBCProgrammingError, ), pytest.mark.notyet( ["mysql"], diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 9e4bff794c6c..20001f5bac9e 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -6,8 +6,8 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from pytest import param +import sqlalchemy as sa import ibis import ibis.common.exceptions as com @@ -21,6 +21,7 @@ OracleDatabaseError, Py4JJavaError, PyDruidProgrammingError, + PyODBCProgrammingError, SnowflakeProgrammingError, ) from ibis.legacy.udf.vectorized import analytic, reduction @@ -286,6 +287,7 @@ def calc_zscore(s): id="cumany", marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), + pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -300,6 +302,7 @@ def calc_zscore(s): marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.broken(["oracle"], raises=OracleDatabaseError), + pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -313,6 +316,7 @@ def calc_zscore(s): id="cumall", marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), + pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -327,6 +331,7 @@ def calc_zscore(s): marks=[ pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.broken(["oracle"], raises=OracleDatabaseError), + pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -609,7 +614,14 @@ def test_grouped_bounded_preceding_window(backend, alltypes, df, window_fn): param( False, id="unordered", - marks=pytest.mark.notimpl(["flink"], raises=com.UnsupportedOperationError), + marks=[ + pytest.mark.notimpl(["flink"], raises=com.UnsupportedOperationError), + pytest.mark.broken( + ["mssql"], + raises=PyODBCProgrammingError, + reason="unbounded window frames are not supported", + ), + ], ), ], ) @@ -652,7 +664,8 @@ def test_grouped_unbounded_window( ], ) @pytest.mark.broken(["snowflake"], raises=AssertionError) -@pytest.mark.broken(["dask", "mssql"], raises=AssertionError) +@pytest.mark.broken(["dask"], raises=AssertionError) +@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["flink"], @@ -742,6 +755,11 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=com.UnsupportedOperationError, reason="Flink engine does not support generic window clause with no order by", ), + pytest.mark.broken( + ["mssql"], + raises=PyODBCProgrammingError, + reason="unbounded window frames are not supported", + ), ], ), param( @@ -866,7 +884,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=com.UnsupportedOperationError, reason="Flink engine does not support generic window clause with no order by", ), - pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), + pytest.mark.broken(["mssql"], raises=PyODBCProgrammingError), pytest.mark.notyet( ["snowflake"], reason="backend requires ordering", @@ -917,7 +935,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): raises=com.UnsupportedOperationError, reason="Flink engine does not support generic window clause with no order by", ), - pytest.mark.broken(["mssql"], raises=sa.exc.ProgrammingError), + pytest.mark.broken(["mssql"], raises=PyODBCProgrammingError), pytest.mark.notyet( ["snowflake"], reason="backend requires ordering", @@ -1047,7 +1065,7 @@ def test_ungrouped_unbounded_window( raises=sa.exc.InternalError, reason="Feature is not yet implemented: window frame in `RANGE` mode is not supported yet", ) -@pytest.mark.notyet(["mssql"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) @pytest.mark.broken( ["mysql"], raises=MySQLOperationalError, @@ -1230,10 +1248,13 @@ def test_first_last(backend): ["mysql"], raises=MySQLOperationalError, reason="not supported by MySQL" ) @pytest.mark.notyet( - ["mssql", "polars", "snowflake", "sqlite"], + ["polars", "snowflake", "sqlite"], raises=com.OperationNotDefinedError, reason="not support by the backend", ) +@pytest.mark.notyet( + ["mssql"], raises=PyODBCProgrammingError, reason="not support by the backend" +) @pytest.mark.broken(["flink"], raises=Py4JJavaError, reason="bug in Flink") @pytest.mark.broken( ["risingwave"], @@ -1287,7 +1308,7 @@ def test_range_expression_bounds(backend): raises=com.OperationNotDefinedError, ) @pytest.mark.broken( - ["mssql"], reason="lack of support for booleans", raises=sa.exc.ProgrammingError + ["mssql"], reason="lack of support for booleans", raises=PyODBCProgrammingError ) @pytest.mark.broken( ["risingwave"], @@ -1320,7 +1341,7 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): @pytest.mark.notyet( ["mssql"], reason="IS NULL not valid syntax for mssql", - raises=sa.exc.ProgrammingError, + raises=PyODBCProgrammingError, ) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notyet(["flink"], raises=com.UnsupportedOperationError) diff --git a/poetry.lock b/poetry.lock index 6961bb1322c5..98a9021ccea0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7313,7 +7313,7 @@ exasol = ["pyexasol"] flink = [] geospatial = ["geopandas", "shapely"] impala = ["impyla"] -mssql = ["pyodbc", "sqlalchemy", "sqlalchemy-views"] +mssql = ["pyodbc"] mysql = ["pymysql"] oracle = ["oracledb", "packaging"] pandas = ["regex"] @@ -7329,4 +7329,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "f7f31dc794bdbd0993a76f0d8cac3a6b2debb6f5668be01dcd783eeb64f1faa8" +content-hash = "11da6bdc8c65ae8790ee2cbc799ca82af0c1f783f8c5ec6d0ab1477fd21b03b7" diff --git a/pyproject.toml b/pyproject.toml index 178b11e57e12..5b8607ac76bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -188,7 +188,7 @@ exasol = ["pyexasol"] flink = [] geospatial = ["geopandas", "shapely"] impala = ["impyla"] -mssql = ["sqlalchemy", "pyodbc", "sqlalchemy-views"] +mssql = ["pyodbc"] mysql = ["pymysql"] oracle = ["oracledb", "packaging"] pandas = ["regex"] From abe2d931f4773473dbd06fcdf8c9d570c77f98c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 22 Jan 2024 12:45:20 +0100 Subject: [PATCH 106/161] fix(sql): don't generate table aliases for `ops.JoinLink` --- ibis/backends/base/sqlglot/compiler.py | 5 - .../test_select/test_complex_join/out.sql | 14 +- .../test_table_drop_with_filter/out.sql | 6 +- .../test_aggregate_count_joined/out.sql | 2 +- .../out.sql | 6 +- .../test_bug_project_multiple_times/out.sql | 18 +-- .../test_join_between_joins/out.sql | 8 +- .../result.sql | 14 +- .../test_limit_with_self_join/out.sql | 2 +- .../out.sql | 6 +- .../out.sql | 56 ++++---- .../test_subquery_in_union/out.sql | 14 +- .../test_subquery_used_for_self_join/out.sql | 6 +- .../test_topk_analysis_bug/out.sql | 4 +- .../test_topk_predicate_pushdown_bug/out.sql | 18 +-- .../test_tpch_self_join_failure/out.sql | 24 ++-- .../test_where_no_pushdown_possible/out.sql | 14 +- .../test_where_with_join/out.sql | 16 +-- .../snapshots/test_sql/test_gh_1045/out.sql | 18 +-- .../test_lower_projection_sort_key/out.sql | 12 +- .../test_sql/test_multi_join/out.sql | 10 +- .../test_sql/test_no_cart_join/out.sql | 12 +- .../test_sql/test_no_cartesian_join/out.sql | 52 ++++---- .../out.sql | 20 +-- .../test_h02/test_tpc_h02/duckdb/h02.sql | 80 ++++++------ .../test_h02/test_tpc_h02/snowflake/h02.sql | 80 ++++++------ .../test_h02/test_tpc_h02/trino/h02.sql | 80 ++++++------ .../test_h03/test_tpc_h03/duckdb/h03.sql | 100 +++++++------- .../test_h03/test_tpc_h03/snowflake/h03.sql | 100 +++++++------- .../test_h03/test_tpc_h03/trino/h03.sql | 100 +++++++------- .../test_h05/test_tpc_h05/duckdb/h05.sql | 118 ++++++++--------- .../test_h05/test_tpc_h05/snowflake/h05.sql | 118 ++++++++--------- .../test_h05/test_tpc_h05/trino/h05.sql | 118 ++++++++--------- .../test_h07/test_tpc_h07/duckdb/h07.sql | 52 ++++---- .../test_h07/test_tpc_h07/snowflake/h07.sql | 52 ++++---- .../test_h07/test_tpc_h07/trino/h07.sql | 52 ++++---- .../test_h08/test_tpc_h08/duckdb/h08.sql | 36 +++--- .../test_h08/test_tpc_h08/snowflake/h08.sql | 36 +++--- .../test_h08/test_tpc_h08/trino/h08.sql | 36 +++--- .../test_h09/test_tpc_h09/duckdb/h09.sql | 32 ++--- .../test_h09/test_tpc_h09/snowflake/h09.sql | 32 ++--- .../test_h09/test_tpc_h09/trino/h09.sql | 32 ++--- .../test_h10/test_tpc_h10/duckdb/h10.sql | 122 +++++++++--------- .../test_h10/test_tpc_h10/snowflake/h10.sql | 122 +++++++++--------- .../test_h10/test_tpc_h10/trino/h10.sql | 122 +++++++++--------- .../test_h11/test_tpc_h11/duckdb/h11.sql | 92 ++++++------- .../test_h11/test_tpc_h11/snowflake/h11.sql | 92 ++++++------- .../test_h11/test_tpc_h11/trino/h11.sql | 92 ++++++------- .../test_h12/test_tpc_h12/duckdb/h12.sql | 80 ++++++------ .../test_h12/test_tpc_h12/snowflake/h12.sql | 80 ++++++------ .../test_h12/test_tpc_h12/trino/h12.sql | 80 ++++++------ .../test_h13/test_tpc_h13/duckdb/h13.sql | 20 +-- .../test_h13/test_tpc_h13/snowflake/h13.sql | 20 +-- .../test_h13/test_tpc_h13/trino/h13.sql | 20 +-- .../test_h14/test_tpc_h14/duckdb/h14.sql | 66 +++++----- .../test_h14/test_tpc_h14/snowflake/h14.sql | 66 +++++----- .../test_h14/test_tpc_h14/trino/h14.sql | 66 +++++----- .../test_h15/test_tpc_h15/duckdb/h15.sql | 20 +-- .../test_h15/test_tpc_h15/snowflake/h15.sql | 20 +-- .../test_h15/test_tpc_h15/trino/h15.sql | 20 +-- .../test_h16/test_tpc_h16/duckdb/h16.sql | 66 +++++----- .../test_h16/test_tpc_h16/snowflake/h16.sql | 66 +++++----- .../test_h16/test_tpc_h16/trino/h16.sql | 66 +++++----- .../test_h17/test_tpc_h17/duckdb/h17.sql | 68 +++++----- .../test_h17/test_tpc_h17/snowflake/h17.sql | 68 +++++----- .../test_h17/test_tpc_h17/trino/h17.sql | 68 +++++----- .../test_h18/test_tpc_h18/duckdb/h18.sql | 102 +++++++-------- .../test_h18/test_tpc_h18/snowflake/h18.sql | 108 ++++++++-------- .../test_h18/test_tpc_h18/trino/h18.sql | 108 ++++++++-------- .../test_h19/test_tpc_h19/duckdb/h19.sql | 100 +++++++------- .../test_h19/test_tpc_h19/snowflake/h19.sql | 100 +++++++------- .../test_h19/test_tpc_h19/trino/h19.sql | 100 +++++++------- .../test_h20/test_tpc_h20/duckdb/h20.sql | 12 +- .../test_h20/test_tpc_h20/snowflake/h20.sql | 12 +- .../test_h20/test_tpc_h20/trino/h20.sql | 12 +- .../test_h21/test_tpc_h21/duckdb/h21.sql | 44 +++---- .../test_h21/test_tpc_h21/snowflake/h21.sql | 44 +++---- .../test_h21/test_tpc_h21/trino/h21.sql | 44 +++---- 78 files changed, 1997 insertions(+), 2002 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 12432647752f..b6920f5718e6 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -268,11 +268,6 @@ def fn(node, _, **kwargs): if node is op: return result - elif isinstance(node, ops.JoinLink): - # TODO(kszucs): this is a hack to preserve the generated table - # aliases, going to remove in a follow-up PR - next(alias_counter) - return result elif isinstance(node, ops.Relation): aliases[node] = alias = f"t{next(alias_counter)}" alias = sg.to_identifier(alias, quoted=self.quoted) diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql index a13c5f564c3e..ef5ec770f49a 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql @@ -1,10 +1,10 @@ SELECT - t5.a, - t5.b, - t5.c, - t5.d, - t5.c / ( - t5.a - t5.b + t4.a, + t4.b, + t4.c, + t4.d, + t4.c / ( + t4.a - t4.b ) AS e FROM ( SELECT @@ -15,4 +15,4 @@ FROM ( FROM s AS t2 INNER JOIN t AS t3 ON t2.a = t3.c -) AS t5 \ No newline at end of file +) AS t4 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql index bf3eedb03752..db148ea33a9f 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql @@ -1,5 +1,5 @@ SELECT - t6.a + t5.a FROM ( SELECT t4.a @@ -14,6 +14,6 @@ FROM ( ) AS t4 INNER JOIN s AS t2 ON t4.b = t2.b -) AS t6 +) AS t5 WHERE - t6.a < CAST(1.0 AS DOUBLE) \ No newline at end of file + t5.a < CAST(1.0 AS DOUBLE) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql index 3011cdb409b4..889345c4621d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql @@ -10,4 +10,4 @@ FROM ( FROM tpch_region AS t2 INNER JOIN tpch_nation AS t3 ON t2.r_regionkey = t3.n_regionkey -) AS t5 \ No newline at end of file +) AS t4 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql index a15f1a8cce7e..0a483c1315c4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql @@ -1,6 +1,6 @@ SELECT - t5.foo_id, - SUM(t5.value1) AS total + t4.foo_id, + SUM(t4.value1) AS total FROM ( SELECT t2.c, @@ -11,6 +11,6 @@ FROM ( FROM star1 AS t2 INNER JOIN star2 AS t3 ON t2.foo_id = t3.foo_id -) AS t5 +) AS t4 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql index 88011a4c2cf9..f498893a25cb 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql @@ -9,12 +9,12 @@ INNER JOIN tpch_region AS t5 ON t4.n_regionkey = t5.r_regionkey SEMI JOIN ( SELECT - t9.n_name, - t9."Sum(Cast(c_acctbal, float64))" + t7.n_name, + t7."Sum(Cast(c_acctbal, float64))" FROM ( SELECT - t8.n_name, - SUM(CAST(t8.c_acctbal AS DOUBLE)) AS "Sum(Cast(c_acctbal, float64))" + t6.n_name, + SUM(CAST(t6.c_acctbal AS DOUBLE)) AS "Sum(Cast(c_acctbal, float64))" FROM ( SELECT t3.c_custkey, @@ -32,12 +32,12 @@ SEMI JOIN ( ON t3.c_nationkey = t4.n_nationkey INNER JOIN tpch_region AS t5 ON t4.n_regionkey = t5.r_regionkey - ) AS t8 + ) AS t6 GROUP BY 1 - ) AS t9 + ) AS t7 ORDER BY - t9."Sum(Cast(c_acctbal, float64))" DESC + t7."Sum(Cast(c_acctbal, float64))" DESC LIMIT 10 -) AS t12 - ON t4.n_name = t12.n_name \ No newline at end of file +) AS t10 + ON t4.n_name = t10.n_name \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql index a66ce49c2bc5..396306956a81 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql @@ -3,8 +3,8 @@ SELECT t4.key2, t4.value1, t5.value2, - t11.value3, - t11.value4 + t9.value3, + t9.value4 FROM first AS t4 INNER JOIN second AS t5 ON t4.key1 = t5.key1 @@ -17,5 +17,5 @@ INNER JOIN ( FROM third AS t6 INNER JOIN fourth AS t7 ON t6.key3 = t7.key3 -) AS t11 - ON t4.key2 = t11.key2 \ No newline at end of file +) AS t9 + ON t4.key2 = t9.key2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql index 7b3d0ffe12ef..cf540b867fb9 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql @@ -1,9 +1,9 @@ SELECT - t6.on, - t6.by, - t6.on_right, - t6.by_right, - t6.val + t5.on, + t5.by, + t5.on_right, + t5.by_right, + t5.val FROM ( SELECT t2.on, @@ -14,9 +14,9 @@ FROM ( FROM left AS t2 LEFT OUTER JOIN right AS t3 ON t2.by = t3.by -) AS t6 +) AS t5 WHERE - t6.on_right = ( + t5.on_right = ( SELECT MAX(t4.on) AS "Max(on)" FROM ( diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql index b8cfc5063ba5..bb7c1b27ee01 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql @@ -31,4 +31,4 @@ FROM ( FROM functional_alltypes AS t1 INNER JOIN functional_alltypes AS t3 ON t1.tinyint_col < EXTRACT(minute FROM t3.timestamp_col) -) AS t5 \ No newline at end of file +) AS t4 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql index 38b614333084..3d676c32001a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql @@ -1,6 +1,6 @@ SELECT t2.r_name, - t7.n_name + t6.n_name FROM tpch_region AS t2 INNER JOIN tpch_nation AS t3 ON t2.r_regionkey = t3.n_regionkey @@ -16,5 +16,5 @@ INNER JOIN ( FROM tpch_region AS t2 INNER JOIN tpch_nation AS t3 ON t2.r_regionkey = t3.n_regionkey -) AS t7 - ON t2.r_regionkey = t7.r_regionkey \ No newline at end of file +) AS t6 + ON t2.r_regionkey = t6.r_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql index f9302546380a..d400eab10c82 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql @@ -1,15 +1,15 @@ SELECT - t11.c_custkey, - t11.c_name, - t11.c_address, - t11.c_nationkey, - t11.c_phone, - t11.c_acctbal, - t11.c_mktsegment, - t11.c_comment, - t11.region, - t11.amount, - t11.odate + t8.c_custkey, + t8.c_name, + t8.c_address, + t8.c_nationkey, + t8.c_phone, + t8.c_acctbal, + t8.c_mktsegment, + t8.c_comment, + t8.region, + t8.amount, + t8.odate FROM ( SELECT t6.c_custkey, @@ -30,24 +30,24 @@ FROM ( ON t6.c_nationkey = t5.n_nationkey INNER JOIN tpch_orders AS t7 ON t7.o_custkey = t6.c_custkey -) AS t11 +) AS t8 WHERE - t11.amount > ( + t8.amount > ( SELECT - AVG(t13.amount) AS "Mean(amount)" + AVG(t10.amount) AS "Mean(amount)" FROM ( SELECT - t12.c_custkey, - t12.c_name, - t12.c_address, - t12.c_nationkey, - t12.c_phone, - t12.c_acctbal, - t12.c_mktsegment, - t12.c_comment, - t12.region, - t12.amount, - t12.odate + t9.c_custkey, + t9.c_name, + t9.c_address, + t9.c_nationkey, + t9.c_phone, + t9.c_acctbal, + t9.c_mktsegment, + t9.c_comment, + t9.region, + t9.amount, + t9.odate FROM ( SELECT t6.c_custkey, @@ -68,9 +68,9 @@ WHERE ON t6.c_nationkey = t5.n_nationkey INNER JOIN tpch_orders AS t7 ON t7.o_custkey = t6.c_custkey - ) AS t12 + ) AS t9 WHERE - t12.region = t11.region - ) AS t13 + t9.region = t8.region + ) AS t10 ) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql index fbaeee461612..62595b73b907 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql @@ -1,4 +1,4 @@ -WITH t6 AS ( +WITH t5 AS ( SELECT t2.a, t2.g, @@ -35,15 +35,15 @@ WITH t6 AS ( 2 ) SELECT - t9.a, - t9.g, - t9.metric + t8.a, + t8.g, + t8.metric FROM ( SELECT * - FROM t6 AS t7 + FROM t5 AS t6 UNION ALL SELECT * - FROM t6 AS t8 -) AS t9 \ No newline at end of file + FROM t5 AS t7 +) AS t8 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql index 76fc7a3e43e4..029dae462da1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql @@ -11,8 +11,8 @@ WITH t1 AS ( 3 ) SELECT - t7.g, - MAX(t7.total - t7.total_right) AS metric + t6.g, + MAX(t6.total - t6.total_right) AS metric FROM ( SELECT t3.g, @@ -26,6 +26,6 @@ FROM ( FROM t1 AS t3 INNER JOIN t1 AS t5 ON t3.a = t5.b -) AS t7 +) AS t6 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql index 5c61bb338e11..54240af94422 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql @@ -8,7 +8,7 @@ WITH t1 AS ( t0.dest IN ('ORD', 'JFK', 'SFO') ) SELECT - t9.origin, + t8.origin, COUNT(*) AS "CountStar()" FROM ( SELECT @@ -33,6 +33,6 @@ FROM ( LIMIT 10 ) AS t7 ON t3.dest = t7.dest -) AS t9 +) AS t8 GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql index 9dffcedd667b..b81d1c1f474a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql @@ -16,12 +16,12 @@ INNER JOIN tpch_region AS t5 ON t4.n_regionkey = t5.r_regionkey SEMI JOIN ( SELECT - t9.n_name, - t9."Sum(c_acctbal)" + t7.n_name, + t7."Sum(c_acctbal)" FROM ( SELECT - t8.n_name, - SUM(t8.c_acctbal) AS "Sum(c_acctbal)" + t6.n_name, + SUM(t6.c_acctbal) AS "Sum(c_acctbal)" FROM ( SELECT t3.c_custkey, @@ -39,12 +39,12 @@ SEMI JOIN ( ON t3.c_nationkey = t4.n_nationkey INNER JOIN tpch_region AS t5 ON t4.n_regionkey = t5.r_regionkey - ) AS t8 + ) AS t6 GROUP BY 1 - ) AS t9 + ) AS t7 ORDER BY - t9."Sum(c_acctbal)" DESC + t7."Sum(c_acctbal)" DESC LIMIT 10 -) AS t12 - ON t4.n_name = t12.n_name \ No newline at end of file +) AS t10 + ON t4.n_name = t10.n_name \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql index 2da6a3ed3dca..6a6108dcf40c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql @@ -1,8 +1,8 @@ -WITH t12 AS ( +WITH t9 AS ( SELECT - t11.region, - EXTRACT(year FROM t11.odate) AS year, - CAST(SUM(t11.amount) AS DOUBLE) AS total + t8.region, + EXTRACT(year FROM t8.odate) AS year, + CAST(SUM(t8.amount) AS DOUBLE) AS total FROM ( SELECT t4.r_name AS region, @@ -16,17 +16,17 @@ WITH t12 AS ( ON t6.c_nationkey = t5.n_nationkey INNER JOIN tpch_orders AS t7 ON t7.o_custkey = t6.c_custkey - ) AS t11 + ) AS t8 GROUP BY 1, 2 ) SELECT - t14.region, - t14.year, - t14.total - t16.total AS yoy_change -FROM t12 AS t14 -INNER JOIN t12 AS t16 - ON t14.year = ( - t16.year - CAST(1 AS TINYINT) + t11.region, + t11.year, + t11.total - t13.total AS yoy_change +FROM t9 AS t11 +INNER JOIN t9 AS t13 + ON t11.year = ( + t13.year - CAST(1 AS TINYINT) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql index 98fedc20876d..e38225fffe4e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql @@ -1,9 +1,9 @@ SELECT - t5.c, - t5.f, - t5.foo_id, - t5.bar_id, - t5.diff + t4.c, + t4.f, + t4.foo_id, + t4.bar_id, + t4.diff FROM ( SELECT t2.c, @@ -14,6 +14,6 @@ FROM ( FROM star1 AS t2 INNER JOIN star2 AS t3 ON t2.foo_id = t3.foo_id -) AS t5 +) AS t4 WHERE - t5.diff > CAST(1 AS TINYINT) \ No newline at end of file + t4.diff > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql index 3f187d69a7fc..338494c9c8e1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql @@ -1,10 +1,10 @@ SELECT - t5.c, - t5.f, - t5.foo_id, - t5.bar_id, - t5.value1, - t5.value3 + t4.c, + t4.f, + t4.foo_id, + t4.bar_id, + t4.value1, + t4.value3 FROM ( SELECT t2.c, @@ -16,6 +16,6 @@ FROM ( FROM star1 AS t2 INNER JOIN star2 AS t3 ON t2.foo_id = t3.foo_id -) AS t5 +) AS t4 WHERE - t5.f > CAST(0 AS TINYINT) AND t5.value3 < CAST(1000 AS SMALLINT) \ No newline at end of file + t4.f > CAST(0 AS TINYINT) AND t4.value3 < CAST(1000 AS SMALLINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql index b9a81bb4916f..ebf908ac0397 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql @@ -1,13 +1,13 @@ SELECT t5.t1_id1, t5.t1_val1, - t10.id3, - t10.val2, - t10.dt, - t10.t3_val2, - t10.id2a, - t10.id2b, - t10.val2_right + t9.id3, + t9.val2, + t9.dt, + t9.t3_val2, + t9.id2a, + t9.id2b, + t9.val2_right FROM ( SELECT t0.id1 AS t1_id1, @@ -33,5 +33,5 @@ LEFT OUTER JOIN ( ) AS t7 INNER JOIN test2 AS t3 ON t3.id2b = t7.id3 -) AS t10 - ON t5.t1_id1 = t10.id2a \ No newline at end of file +) AS t9 + ON t5.t1_id1 = t9.id2a \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql index b6c7dab91969..c297a58cd8f8 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql @@ -1,7 +1,7 @@ SELECT - t6.foo_id, - t6.total, - t6.value1 + t5.foo_id, + t5.total, + t5.value1 FROM ( SELECT t4.foo_id, @@ -17,8 +17,8 @@ FROM ( ) AS t4 INNER JOIN star2 AS t2 ON t4.foo_id = t2.foo_id -) AS t6 +) AS t5 WHERE - t6.total > CAST(100 AS TINYINT) + t5.total > CAST(100 AS TINYINT) ORDER BY - t6.total DESC \ No newline at end of file + t5.total DESC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql index d9c97bc180ca..ad0a5d35f89d 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql @@ -2,9 +2,9 @@ SELECT t4.x1, t4.y1, t5.x2, - t11.x3, - t11.y2, - t11.x4 + t9.x3, + t9.y2, + t9.x4 FROM t1 AS t4 INNER JOIN t2 AS t5 ON t4.x1 = t5.x2 @@ -16,5 +16,5 @@ INNER JOIN ( FROM t3 AS t6 INNER JOIN t4 AS t7 ON t6.x3 = t7.x4 -) AS t11 - ON t4.y1 = t11.y2 \ No newline at end of file +) AS t9 + ON t4.y1 = t9.y2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql index 580263e1f156..8cb242edd156 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql @@ -1,9 +1,9 @@ SELECT - t7.ancestor_node_sort_order, - t7.n + t6.ancestor_node_sort_order, + t6.n FROM ( SELECT - t6.ancestor_node_sort_order, + t5.ancestor_node_sort_order, CAST(1 AS TINYINT) AS n FROM ( SELECT @@ -29,9 +29,9 @@ FROM ( FROM products AS t1 ) AS t4 ON t2.product_id = t4.descendant_node_natural_key - ) AS t6 + ) AS t5 GROUP BY 1 -) AS t7 +) AS t6 ORDER BY - t7.ancestor_node_sort_order ASC \ No newline at end of file + t6.ancestor_node_sort_order ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql index a3df1de479ac..a94638f06f0b 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql @@ -1,28 +1,28 @@ SELECT - t15.customer_id, - t15.first_name, - t15.last_name, - t15.first_order, - t15.most_recent_order, - t15.number_of_orders, - t13.total_amount AS customer_lifetime_value + t13.customer_id, + t13.first_name, + t13.last_name, + t13.first_order, + t13.most_recent_order, + t13.number_of_orders, + t11.total_amount AS customer_lifetime_value FROM ( SELECT - t12.customer_id, - t12.first_name, - t12.last_name, - t12.first_order, - t12.most_recent_order, - t12.number_of_orders + t10.customer_id, + t10.first_name, + t10.last_name, + t10.first_order, + t10.most_recent_order, + t10.number_of_orders FROM ( SELECT t3.customer_id, t3.first_name, t3.last_name, - t8.customer_id AS customer_id_right, - t8.first_order, - t8.most_recent_order, - t8.number_of_orders + t7.customer_id AS customer_id_right, + t7.first_order, + t7.most_recent_order, + t7.number_of_orders FROM customers AS t3 LEFT OUTER JOIN ( SELECT @@ -33,14 +33,14 @@ FROM ( FROM orders AS t2 GROUP BY 1 - ) AS t8 - ON t3.customer_id = t8.customer_id - ) AS t12 -) AS t15 + ) AS t7 + ON t3.customer_id = t7.customer_id + ) AS t10 +) AS t13 LEFT OUTER JOIN ( SELECT - t9.customer_id, - SUM(t9.amount) AS total_amount + t8.customer_id, + SUM(t8.amount) AS total_amount FROM ( SELECT t4.payment_id, @@ -54,8 +54,8 @@ LEFT OUTER JOIN ( FROM payments AS t4 LEFT OUTER JOIN orders AS t5 ON t4.order_id = t5.order_id - ) AS t9 + ) AS t8 GROUP BY 1 -) AS t13 - ON t15.customer_id = t13.customer_id \ No newline at end of file +) AS t11 + ON t13.customer_id = t11.customer_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql index 9f3f24cd76eb..f2e028c28d23 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql @@ -1,6 +1,6 @@ SELECT - t9.p_partkey, - t9.ps_supplycost + t7.p_partkey, + t7.ps_supplycost FROM ( SELECT t3.p_partkey, @@ -8,15 +8,15 @@ FROM ( FROM part AS t3 INNER JOIN partsupp AS t4 ON t3.p_partkey = t4.ps_partkey -) AS t9 +) AS t7 WHERE - t9.ps_supplycost = ( + t7.ps_supplycost = ( SELECT - MIN(t11.ps_supplycost) AS "Min(ps_supplycost)" + MIN(t9.ps_supplycost) AS "Min(ps_supplycost)" FROM ( SELECT - t10.ps_partkey, - t10.ps_supplycost + t8.ps_partkey, + t8.ps_supplycost FROM ( SELECT t5.ps_partkey, @@ -24,8 +24,8 @@ WHERE FROM partsupp AS t5 INNER JOIN supplier AS t6 ON t6.s_suppkey = t5.ps_suppkey - ) AS t10 + ) AS t8 WHERE - t10.ps_partkey = t9.p_partkey - ) AS t11 + t8.ps_partkey = t7.p_partkey + ) AS t9 ) \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql index 9fa0195c56bb..eb30c07672f4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql @@ -1,12 +1,12 @@ SELECT - t21.s_acctbal, - t21.s_name, - t21.n_name, - t21.p_partkey, - t21.p_mfgr, - t21.s_address, - t21.s_phone, - t21.s_comment + t14.s_acctbal, + t14.s_name, + t14.n_name, + t14.p_partkey, + t14.p_mfgr, + t14.s_address, + t14.s_phone, + t14.s_comment FROM ( SELECT t5.p_partkey, @@ -46,35 +46,35 @@ FROM ( ON t8.s_nationkey = t10.n_nationkey INNER JOIN region AS t12 ON t10.n_regionkey = t12.r_regionkey -) AS t21 +) AS t14 WHERE - t21.p_size = CAST(15 AS TINYINT) - AND t21.p_type LIKE '%BRASS' - AND t21.r_name = 'EUROPE' - AND t21.ps_supplycost = ( + t14.p_size = CAST(15 AS TINYINT) + AND t14.p_type LIKE '%BRASS' + AND t14.r_name = 'EUROPE' + AND t14.ps_supplycost = ( SELECT - MIN(t23.ps_supplycost) AS "Min(ps_supplycost)" + MIN(t16.ps_supplycost) AS "Min(ps_supplycost)" FROM ( SELECT - t22.ps_partkey, - t22.ps_suppkey, - t22.ps_availqty, - t22.ps_supplycost, - t22.ps_comment, - t22.s_suppkey, - t22.s_name, - t22.s_address, - t22.s_nationkey, - t22.s_phone, - t22.s_acctbal, - t22.s_comment, - t22.n_nationkey, - t22.n_name, - t22.n_regionkey, - t22.n_comment, - t22.r_regionkey, - t22.r_name, - t22.r_comment + t15.ps_partkey, + t15.ps_suppkey, + t15.ps_availqty, + t15.ps_supplycost, + t15.ps_comment, + t15.s_suppkey, + t15.s_name, + t15.s_address, + t15.s_nationkey, + t15.s_phone, + t15.s_acctbal, + t15.s_comment, + t15.n_nationkey, + t15.n_name, + t15.n_regionkey, + t15.n_comment, + t15.r_regionkey, + t15.r_name, + t15.r_comment FROM ( SELECT t7.ps_partkey, @@ -103,14 +103,14 @@ WHERE ON t9.s_nationkey = t11.n_nationkey INNER JOIN region AS t13 ON t11.n_regionkey = t13.r_regionkey - ) AS t22 + ) AS t15 WHERE - t22.r_name = 'EUROPE' AND t21.p_partkey = t22.ps_partkey - ) AS t23 + t15.r_name = 'EUROPE' AND t14.p_partkey = t15.ps_partkey + ) AS t16 ) ORDER BY - t21.s_acctbal DESC, - t21.n_name ASC, - t21.s_name ASC, - t21.p_partkey ASC + t14.s_acctbal DESC, + t14.n_name ASC, + t14.s_name ASC, + t14.p_partkey ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql index 59ffed3b4cae..5c0cbb4ae431 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql @@ -1,12 +1,12 @@ SELECT - "t26"."s_acctbal", - "t26"."s_name", - "t26"."n_name", - "t26"."p_partkey", - "t26"."p_mfgr", - "t26"."s_address", - "t26"."s_phone", - "t26"."s_comment" + "t19"."s_acctbal", + "t19"."s_name", + "t19"."n_name", + "t19"."p_partkey", + "t19"."p_mfgr", + "t19"."s_address", + "t19"."s_phone", + "t19"."s_comment" FROM ( SELECT "t10"."p_partkey", @@ -89,35 +89,35 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t4" ) AS "t17" ON "t15"."n_regionkey" = "t17"."r_regionkey" -) AS "t26" +) AS "t19" WHERE - "t26"."p_size" = 15 - AND "t26"."p_type" LIKE '%BRASS' - AND "t26"."r_name" = 'EUROPE' - AND "t26"."ps_supplycost" = ( + "t19"."p_size" = 15 + AND "t19"."p_type" LIKE '%BRASS' + AND "t19"."r_name" = 'EUROPE' + AND "t19"."ps_supplycost" = ( SELECT - MIN("t28"."ps_supplycost") AS "Min(ps_supplycost)" + MIN("t21"."ps_supplycost") AS "Min(ps_supplycost)" FROM ( SELECT - "t27"."ps_partkey", - "t27"."ps_suppkey", - "t27"."ps_availqty", - "t27"."ps_supplycost", - "t27"."ps_comment", - "t27"."s_suppkey", - "t27"."s_name", - "t27"."s_address", - "t27"."s_nationkey", - "t27"."s_phone", - "t27"."s_acctbal", - "t27"."s_comment", - "t27"."n_nationkey", - "t27"."n_name", - "t27"."n_regionkey", - "t27"."n_comment", - "t27"."r_regionkey", - "t27"."r_name", - "t27"."r_comment" + "t20"."ps_partkey", + "t20"."ps_suppkey", + "t20"."ps_availqty", + "t20"."ps_supplycost", + "t20"."ps_comment", + "t20"."s_suppkey", + "t20"."s_name", + "t20"."s_address", + "t20"."s_nationkey", + "t20"."s_phone", + "t20"."s_acctbal", + "t20"."s_comment", + "t20"."n_nationkey", + "t20"."n_name", + "t20"."n_regionkey", + "t20"."n_comment", + "t20"."r_regionkey", + "t20"."r_name", + "t20"."r_comment" FROM ( SELECT "t12"."ps_partkey", @@ -177,14 +177,14 @@ WHERE FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t4" ) AS "t18" ON "t16"."n_regionkey" = "t18"."r_regionkey" - ) AS "t27" + ) AS "t20" WHERE - "t27"."r_name" = 'EUROPE' AND "t26"."p_partkey" = "t27"."ps_partkey" - ) AS "t28" + "t20"."r_name" = 'EUROPE' AND "t19"."p_partkey" = "t20"."ps_partkey" + ) AS "t21" ) ORDER BY - "t26"."s_acctbal" DESC NULLS LAST, - "t26"."n_name" ASC, - "t26"."s_name" ASC, - "t26"."p_partkey" ASC + "t19"."s_acctbal" DESC NULLS LAST, + "t19"."n_name" ASC, + "t19"."s_name" ASC, + "t19"."p_partkey" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql index b44f31b764fc..99b705b54a52 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql @@ -1,12 +1,12 @@ SELECT - "t26"."s_acctbal", - "t26"."s_name", - "t26"."n_name", - "t26"."p_partkey", - "t26"."p_mfgr", - "t26"."s_address", - "t26"."s_phone", - "t26"."s_comment" + "t19"."s_acctbal", + "t19"."s_name", + "t19"."n_name", + "t19"."p_partkey", + "t19"."p_mfgr", + "t19"."s_address", + "t19"."s_phone", + "t19"."s_comment" FROM ( SELECT "t14"."p_partkey", @@ -89,35 +89,35 @@ FROM ( FROM "hive"."ibis_sf1"."region" AS "t4" ) AS "t12" ON "t10"."n_regionkey" = "t12"."r_regionkey" -) AS "t26" +) AS "t19" WHERE - "t26"."p_size" = 15 - AND "t26"."p_type" LIKE '%BRASS' - AND "t26"."r_name" = 'EUROPE' - AND "t26"."ps_supplycost" = ( + "t19"."p_size" = 15 + AND "t19"."p_type" LIKE '%BRASS' + AND "t19"."r_name" = 'EUROPE' + AND "t19"."ps_supplycost" = ( SELECT - MIN("t28"."ps_supplycost") AS "Min(ps_supplycost)" + MIN("t21"."ps_supplycost") AS "Min(ps_supplycost)" FROM ( SELECT - "t27"."ps_partkey", - "t27"."ps_suppkey", - "t27"."ps_availqty", - "t27"."ps_supplycost", - "t27"."ps_comment", - "t27"."s_suppkey", - "t27"."s_name", - "t27"."s_address", - "t27"."s_nationkey", - "t27"."s_phone", - "t27"."s_acctbal", - "t27"."s_comment", - "t27"."n_nationkey", - "t27"."n_name", - "t27"."n_regionkey", - "t27"."n_comment", - "t27"."r_regionkey", - "t27"."r_name", - "t27"."r_comment" + "t20"."ps_partkey", + "t20"."ps_suppkey", + "t20"."ps_availqty", + "t20"."ps_supplycost", + "t20"."ps_comment", + "t20"."s_suppkey", + "t20"."s_name", + "t20"."s_address", + "t20"."s_nationkey", + "t20"."s_phone", + "t20"."s_acctbal", + "t20"."s_comment", + "t20"."n_nationkey", + "t20"."n_name", + "t20"."n_regionkey", + "t20"."n_comment", + "t20"."r_regionkey", + "t20"."r_name", + "t20"."r_comment" FROM ( SELECT "t16"."ps_partkey", @@ -177,14 +177,14 @@ WHERE FROM "hive"."ibis_sf1"."region" AS "t4" ) AS "t13" ON "t11"."n_regionkey" = "t13"."r_regionkey" - ) AS "t27" + ) AS "t20" WHERE - "t27"."r_name" = 'EUROPE' AND "t26"."p_partkey" = "t27"."ps_partkey" - ) AS "t28" + "t20"."r_name" = 'EUROPE' AND "t19"."p_partkey" = "t20"."ps_partkey" + ) AS "t21" ) ORDER BY - "t26"."s_acctbal" DESC, - "t26"."n_name" ASC, - "t26"."s_name" ASC, - "t26"."p_partkey" ASC + "t19"."s_acctbal" DESC, + "t19"."n_name" ASC, + "t19"."s_name" ASC, + "t19"."p_partkey" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql index adb97afaf7f1..f1cd6fd9a332 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql @@ -1,51 +1,51 @@ SELECT - t10.l_orderkey, - t10.revenue, - t10.o_orderdate, - t10.o_shippriority + t8.l_orderkey, + t8.revenue, + t8.o_orderdate, + t8.o_shippriority FROM ( SELECT - t9.l_orderkey, - t9.o_orderdate, - t9.o_shippriority, - SUM(t9.l_extendedprice * ( - CAST(1 AS TINYINT) - t9.l_discount + t7.l_orderkey, + t7.o_orderdate, + t7.o_shippriority, + SUM(t7.l_extendedprice * ( + CAST(1 AS TINYINT) - t7.l_discount )) AS revenue FROM ( SELECT - t8.c_custkey, - t8.c_name, - t8.c_address, - t8.c_nationkey, - t8.c_phone, - t8.c_acctbal, - t8.c_mktsegment, - t8.c_comment, - t8.o_orderkey, - t8.o_custkey, - t8.o_orderstatus, - t8.o_totalprice, - t8.o_orderdate, - t8.o_orderpriority, - t8.o_clerk, - t8.o_shippriority, - t8.o_comment, - t8.l_orderkey, - t8.l_partkey, - t8.l_suppkey, - t8.l_linenumber, - t8.l_quantity, - t8.l_extendedprice, - t8.l_discount, - t8.l_tax, - t8.l_returnflag, - t8.l_linestatus, - t8.l_shipdate, - t8.l_commitdate, - t8.l_receiptdate, - t8.l_shipinstruct, - t8.l_shipmode, - t8.l_comment + t6.c_custkey, + t6.c_name, + t6.c_address, + t6.c_nationkey, + t6.c_phone, + t6.c_acctbal, + t6.c_mktsegment, + t6.c_comment, + t6.o_orderkey, + t6.o_custkey, + t6.o_orderstatus, + t6.o_totalprice, + t6.o_orderdate, + t6.o_orderpriority, + t6.o_clerk, + t6.o_shippriority, + t6.o_comment, + t6.l_orderkey, + t6.l_partkey, + t6.l_suppkey, + t6.l_linenumber, + t6.l_quantity, + t6.l_extendedprice, + t6.l_discount, + t6.l_tax, + t6.l_returnflag, + t6.l_linestatus, + t6.l_shipdate, + t6.l_commitdate, + t6.l_receiptdate, + t6.l_shipinstruct, + t6.l_shipmode, + t6.l_comment FROM ( SELECT t3.c_custkey, @@ -86,18 +86,18 @@ FROM ( ON t3.c_custkey = t4.o_custkey INNER JOIN lineitem AS t5 ON t5.l_orderkey = t4.o_orderkey - ) AS t8 + ) AS t6 WHERE - t8.c_mktsegment = 'BUILDING' - AND t8.o_orderdate < MAKE_DATE(1995, 3, 15) - AND t8.l_shipdate > MAKE_DATE(1995, 3, 15) - ) AS t9 + t6.c_mktsegment = 'BUILDING' + AND t6.o_orderdate < MAKE_DATE(1995, 3, 15) + AND t6.l_shipdate > MAKE_DATE(1995, 3, 15) + ) AS t7 GROUP BY 1, 2, 3 -) AS t10 +) AS t8 ORDER BY - t10.revenue DESC, - t10.o_orderdate ASC + t8.revenue DESC, + t8.o_orderdate ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql index 24678d9406a7..3a550d31ee0c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql @@ -1,51 +1,51 @@ SELECT - "t13"."l_orderkey", - "t13"."revenue", - "t13"."o_orderdate", - "t13"."o_shippriority" + "t11"."l_orderkey", + "t11"."revenue", + "t11"."o_orderdate", + "t11"."o_shippriority" FROM ( SELECT - "t12"."l_orderkey", - "t12"."o_orderdate", - "t12"."o_shippriority", - SUM("t12"."l_extendedprice" * ( - 1 - "t12"."l_discount" + "t10"."l_orderkey", + "t10"."o_orderdate", + "t10"."o_shippriority", + SUM("t10"."l_extendedprice" * ( + 1 - "t10"."l_discount" )) AS "revenue" FROM ( SELECT - "t11"."c_custkey", - "t11"."c_name", - "t11"."c_address", - "t11"."c_nationkey", - "t11"."c_phone", - "t11"."c_acctbal", - "t11"."c_mktsegment", - "t11"."c_comment", - "t11"."o_orderkey", - "t11"."o_custkey", - "t11"."o_orderstatus", - "t11"."o_totalprice", - "t11"."o_orderdate", - "t11"."o_orderpriority", - "t11"."o_clerk", - "t11"."o_shippriority", - "t11"."o_comment", - "t11"."l_orderkey", - "t11"."l_partkey", - "t11"."l_suppkey", - "t11"."l_linenumber", - "t11"."l_quantity", - "t11"."l_extendedprice", - "t11"."l_discount", - "t11"."l_tax", - "t11"."l_returnflag", - "t11"."l_linestatus", - "t11"."l_shipdate", - "t11"."l_commitdate", - "t11"."l_receiptdate", - "t11"."l_shipinstruct", - "t11"."l_shipmode", - "t11"."l_comment" + "t9"."c_custkey", + "t9"."c_name", + "t9"."c_address", + "t9"."c_nationkey", + "t9"."c_phone", + "t9"."c_acctbal", + "t9"."c_mktsegment", + "t9"."c_comment", + "t9"."o_orderkey", + "t9"."o_custkey", + "t9"."o_orderstatus", + "t9"."o_totalprice", + "t9"."o_orderdate", + "t9"."o_orderpriority", + "t9"."o_clerk", + "t9"."o_shippriority", + "t9"."o_comment", + "t9"."l_orderkey", + "t9"."l_partkey", + "t9"."l_suppkey", + "t9"."l_linenumber", + "t9"."l_quantity", + "t9"."l_extendedprice", + "t9"."l_discount", + "t9"."l_tax", + "t9"."l_returnflag", + "t9"."l_linestatus", + "t9"."l_shipdate", + "t9"."l_commitdate", + "t9"."l_receiptdate", + "t9"."l_shipinstruct", + "t9"."l_shipmode", + "t9"."l_comment" FROM ( SELECT "t6"."c_custkey", @@ -128,18 +128,18 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" ) AS "t8" ON "t8"."l_orderkey" = "t7"."o_orderkey" - ) AS "t11" + ) AS "t9" WHERE - "t11"."c_mktsegment" = 'BUILDING' - AND "t11"."o_orderdate" < DATE_FROM_PARTS(1995, 3, 15) - AND "t11"."l_shipdate" > DATE_FROM_PARTS(1995, 3, 15) - ) AS "t12" + "t9"."c_mktsegment" = 'BUILDING' + AND "t9"."o_orderdate" < DATE_FROM_PARTS(1995, 3, 15) + AND "t9"."l_shipdate" > DATE_FROM_PARTS(1995, 3, 15) + ) AS "t10" GROUP BY 1, 2, 3 -) AS "t13" +) AS "t11" ORDER BY - "t13"."revenue" DESC NULLS LAST, - "t13"."o_orderdate" ASC + "t11"."revenue" DESC NULLS LAST, + "t11"."o_orderdate" ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql index d1905163479d..9081d8efe020 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql @@ -1,51 +1,51 @@ SELECT - "t13"."l_orderkey", - "t13"."revenue", - "t13"."o_orderdate", - "t13"."o_shippriority" + "t11"."l_orderkey", + "t11"."revenue", + "t11"."o_orderdate", + "t11"."o_shippriority" FROM ( SELECT - "t12"."l_orderkey", - "t12"."o_orderdate", - "t12"."o_shippriority", - SUM("t12"."l_extendedprice" * ( - 1 - "t12"."l_discount" + "t10"."l_orderkey", + "t10"."o_orderdate", + "t10"."o_shippriority", + SUM("t10"."l_extendedprice" * ( + 1 - "t10"."l_discount" )) AS "revenue" FROM ( SELECT - "t11"."c_custkey", - "t11"."c_name", - "t11"."c_address", - "t11"."c_nationkey", - "t11"."c_phone", - "t11"."c_acctbal", - "t11"."c_mktsegment", - "t11"."c_comment", - "t11"."o_orderkey", - "t11"."o_custkey", - "t11"."o_orderstatus", - "t11"."o_totalprice", - "t11"."o_orderdate", - "t11"."o_orderpriority", - "t11"."o_clerk", - "t11"."o_shippriority", - "t11"."o_comment", - "t11"."l_orderkey", - "t11"."l_partkey", - "t11"."l_suppkey", - "t11"."l_linenumber", - "t11"."l_quantity", - "t11"."l_extendedprice", - "t11"."l_discount", - "t11"."l_tax", - "t11"."l_returnflag", - "t11"."l_linestatus", - "t11"."l_shipdate", - "t11"."l_commitdate", - "t11"."l_receiptdate", - "t11"."l_shipinstruct", - "t11"."l_shipmode", - "t11"."l_comment" + "t9"."c_custkey", + "t9"."c_name", + "t9"."c_address", + "t9"."c_nationkey", + "t9"."c_phone", + "t9"."c_acctbal", + "t9"."c_mktsegment", + "t9"."c_comment", + "t9"."o_orderkey", + "t9"."o_custkey", + "t9"."o_orderstatus", + "t9"."o_totalprice", + "t9"."o_orderdate", + "t9"."o_orderpriority", + "t9"."o_clerk", + "t9"."o_shippriority", + "t9"."o_comment", + "t9"."l_orderkey", + "t9"."l_partkey", + "t9"."l_suppkey", + "t9"."l_linenumber", + "t9"."l_quantity", + "t9"."l_extendedprice", + "t9"."l_discount", + "t9"."l_tax", + "t9"."l_returnflag", + "t9"."l_linestatus", + "t9"."l_shipdate", + "t9"."l_commitdate", + "t9"."l_receiptdate", + "t9"."l_shipinstruct", + "t9"."l_shipmode", + "t9"."l_comment" FROM ( SELECT "t6"."c_custkey", @@ -128,18 +128,18 @@ FROM ( FROM "hive"."ibis_sf1"."lineitem" AS "t2" ) AS "t8" ON "t8"."l_orderkey" = "t7"."o_orderkey" - ) AS "t11" + ) AS "t9" WHERE - "t11"."c_mktsegment" = 'BUILDING' - AND "t11"."o_orderdate" < FROM_ISO8601_DATE('1995-03-15') - AND "t11"."l_shipdate" > FROM_ISO8601_DATE('1995-03-15') - ) AS "t12" + "t9"."c_mktsegment" = 'BUILDING' + AND "t9"."o_orderdate" < FROM_ISO8601_DATE('1995-03-15') + AND "t9"."l_shipdate" > FROM_ISO8601_DATE('1995-03-15') + ) AS "t10" GROUP BY 1, 2, 3 -) AS "t13" +) AS "t11" ORDER BY - "t13"."revenue" DESC, - "t13"."o_orderdate" ASC + "t11"."revenue" DESC, + "t11"."o_orderdate" ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql index ae3bbac7941f..2ee7ce67f6ed 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql @@ -1,61 +1,61 @@ SELECT - t19.n_name, - t19.revenue + t14.n_name, + t14.revenue FROM ( SELECT - t18.n_name, - SUM(t18.l_extendedprice * ( - CAST(1 AS TINYINT) - t18.l_discount + t13.n_name, + SUM(t13.l_extendedprice * ( + CAST(1 AS TINYINT) - t13.l_discount )) AS revenue FROM ( SELECT - t17.c_custkey, - t17.c_name, - t17.c_address, - t17.c_nationkey, - t17.c_phone, - t17.c_acctbal, - t17.c_mktsegment, - t17.c_comment, - t17.o_orderkey, - t17.o_custkey, - t17.o_orderstatus, - t17.o_totalprice, - t17.o_orderdate, - t17.o_orderpriority, - t17.o_clerk, - t17.o_shippriority, - t17.o_comment, - t17.l_orderkey, - t17.l_partkey, - t17.l_suppkey, - t17.l_linenumber, - t17.l_quantity, - t17.l_extendedprice, - t17.l_discount, - t17.l_tax, - t17.l_returnflag, - t17.l_linestatus, - t17.l_shipdate, - t17.l_commitdate, - t17.l_receiptdate, - t17.l_shipinstruct, - t17.l_shipmode, - t17.l_comment, - t17.s_suppkey, - t17.s_name, - t17.s_address, - t17.s_nationkey, - t17.s_phone, - t17.s_acctbal, - t17.s_comment, - t17.n_nationkey, - t17.n_name, - t17.n_regionkey, - t17.n_comment, - t17.r_regionkey, - t17.r_name, - t17.r_comment + t12.c_custkey, + t12.c_name, + t12.c_address, + t12.c_nationkey, + t12.c_phone, + t12.c_acctbal, + t12.c_mktsegment, + t12.c_comment, + t12.o_orderkey, + t12.o_custkey, + t12.o_orderstatus, + t12.o_totalprice, + t12.o_orderdate, + t12.o_orderpriority, + t12.o_clerk, + t12.o_shippriority, + t12.o_comment, + t12.l_orderkey, + t12.l_partkey, + t12.l_suppkey, + t12.l_linenumber, + t12.l_quantity, + t12.l_extendedprice, + t12.l_discount, + t12.l_tax, + t12.l_returnflag, + t12.l_linestatus, + t12.l_shipdate, + t12.l_commitdate, + t12.l_receiptdate, + t12.l_shipinstruct, + t12.l_shipmode, + t12.l_comment, + t12.s_suppkey, + t12.s_name, + t12.s_address, + t12.s_nationkey, + t12.s_phone, + t12.s_acctbal, + t12.s_comment, + t12.n_nationkey, + t12.n_name, + t12.n_regionkey, + t12.n_comment, + t12.r_regionkey, + t12.r_name, + t12.r_comment FROM ( SELECT t6.c_custkey, @@ -116,14 +116,14 @@ FROM ( ON t6.c_nationkey = t9.s_nationkey AND t9.s_nationkey = t10.n_nationkey INNER JOIN region AS t11 ON t10.n_regionkey = t11.r_regionkey - ) AS t17 + ) AS t12 WHERE - t17.r_name = 'ASIA' - AND t17.o_orderdate >= MAKE_DATE(1994, 1, 1) - AND t17.o_orderdate < MAKE_DATE(1995, 1, 1) - ) AS t18 + t12.r_name = 'ASIA' + AND t12.o_orderdate >= MAKE_DATE(1994, 1, 1) + AND t12.o_orderdate < MAKE_DATE(1995, 1, 1) + ) AS t13 GROUP BY 1 -) AS t19 +) AS t14 ORDER BY - t19.revenue DESC \ No newline at end of file + t14.revenue DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql index 045a902f9d0b..b48f537aea51 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql @@ -1,61 +1,61 @@ SELECT - "t25"."n_name", - "t25"."revenue" + "t20"."n_name", + "t20"."revenue" FROM ( SELECT - "t24"."n_name", - SUM("t24"."l_extendedprice" * ( - 1 - "t24"."l_discount" + "t19"."n_name", + SUM("t19"."l_extendedprice" * ( + 1 - "t19"."l_discount" )) AS "revenue" FROM ( SELECT - "t23"."c_custkey", - "t23"."c_name", - "t23"."c_address", - "t23"."c_nationkey", - "t23"."c_phone", - "t23"."c_acctbal", - "t23"."c_mktsegment", - "t23"."c_comment", - "t23"."o_orderkey", - "t23"."o_custkey", - "t23"."o_orderstatus", - "t23"."o_totalprice", - "t23"."o_orderdate", - "t23"."o_orderpriority", - "t23"."o_clerk", - "t23"."o_shippriority", - "t23"."o_comment", - "t23"."l_orderkey", - "t23"."l_partkey", - "t23"."l_suppkey", - "t23"."l_linenumber", - "t23"."l_quantity", - "t23"."l_extendedprice", - "t23"."l_discount", - "t23"."l_tax", - "t23"."l_returnflag", - "t23"."l_linestatus", - "t23"."l_shipdate", - "t23"."l_commitdate", - "t23"."l_receiptdate", - "t23"."l_shipinstruct", - "t23"."l_shipmode", - "t23"."l_comment", - "t23"."s_suppkey", - "t23"."s_name", - "t23"."s_address", - "t23"."s_nationkey", - "t23"."s_phone", - "t23"."s_acctbal", - "t23"."s_comment", - "t23"."n_nationkey", - "t23"."n_name", - "t23"."n_regionkey", - "t23"."n_comment", - "t23"."r_regionkey", - "t23"."r_name", - "t23"."r_comment" + "t18"."c_custkey", + "t18"."c_name", + "t18"."c_address", + "t18"."c_nationkey", + "t18"."c_phone", + "t18"."c_acctbal", + "t18"."c_mktsegment", + "t18"."c_comment", + "t18"."o_orderkey", + "t18"."o_custkey", + "t18"."o_orderstatus", + "t18"."o_totalprice", + "t18"."o_orderdate", + "t18"."o_orderpriority", + "t18"."o_clerk", + "t18"."o_shippriority", + "t18"."o_comment", + "t18"."l_orderkey", + "t18"."l_partkey", + "t18"."l_suppkey", + "t18"."l_linenumber", + "t18"."l_quantity", + "t18"."l_extendedprice", + "t18"."l_discount", + "t18"."l_tax", + "t18"."l_returnflag", + "t18"."l_linestatus", + "t18"."l_shipdate", + "t18"."l_commitdate", + "t18"."l_receiptdate", + "t18"."l_shipinstruct", + "t18"."l_shipmode", + "t18"."l_comment", + "t18"."s_suppkey", + "t18"."s_name", + "t18"."s_address", + "t18"."s_nationkey", + "t18"."s_phone", + "t18"."s_acctbal", + "t18"."s_comment", + "t18"."n_nationkey", + "t18"."n_name", + "t18"."n_regionkey", + "t18"."n_comment", + "t18"."r_regionkey", + "t18"."r_name", + "t18"."r_comment" FROM ( SELECT "t12"."c_custkey", @@ -182,14 +182,14 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS "t5" ) AS "t17" ON "t16"."n_regionkey" = "t17"."r_regionkey" - ) AS "t23" + ) AS "t18" WHERE - "t23"."r_name" = 'ASIA' - AND "t23"."o_orderdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND "t23"."o_orderdate" < DATE_FROM_PARTS(1995, 1, 1) - ) AS "t24" + "t18"."r_name" = 'ASIA' + AND "t18"."o_orderdate" >= DATE_FROM_PARTS(1994, 1, 1) + AND "t18"."o_orderdate" < DATE_FROM_PARTS(1995, 1, 1) + ) AS "t19" GROUP BY 1 -) AS "t25" +) AS "t20" ORDER BY - "t25"."revenue" DESC NULLS LAST \ No newline at end of file + "t20"."revenue" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql index 9c3e856b2a91..fe4eaf17ec3b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql @@ -1,61 +1,61 @@ SELECT - "t25"."n_name", - "t25"."revenue" + "t20"."n_name", + "t20"."revenue" FROM ( SELECT - "t24"."n_name", - SUM("t24"."l_extendedprice" * ( - 1 - "t24"."l_discount" + "t19"."n_name", + SUM("t19"."l_extendedprice" * ( + 1 - "t19"."l_discount" )) AS "revenue" FROM ( SELECT - "t23"."c_custkey", - "t23"."c_name", - "t23"."c_address", - "t23"."c_nationkey", - "t23"."c_phone", - "t23"."c_acctbal", - "t23"."c_mktsegment", - "t23"."c_comment", - "t23"."o_orderkey", - "t23"."o_custkey", - "t23"."o_orderstatus", - "t23"."o_totalprice", - "t23"."o_orderdate", - "t23"."o_orderpriority", - "t23"."o_clerk", - "t23"."o_shippriority", - "t23"."o_comment", - "t23"."l_orderkey", - "t23"."l_partkey", - "t23"."l_suppkey", - "t23"."l_linenumber", - "t23"."l_quantity", - "t23"."l_extendedprice", - "t23"."l_discount", - "t23"."l_tax", - "t23"."l_returnflag", - "t23"."l_linestatus", - "t23"."l_shipdate", - "t23"."l_commitdate", - "t23"."l_receiptdate", - "t23"."l_shipinstruct", - "t23"."l_shipmode", - "t23"."l_comment", - "t23"."s_suppkey", - "t23"."s_name", - "t23"."s_address", - "t23"."s_nationkey", - "t23"."s_phone", - "t23"."s_acctbal", - "t23"."s_comment", - "t23"."n_nationkey", - "t23"."n_name", - "t23"."n_regionkey", - "t23"."n_comment", - "t23"."r_regionkey", - "t23"."r_name", - "t23"."r_comment" + "t18"."c_custkey", + "t18"."c_name", + "t18"."c_address", + "t18"."c_nationkey", + "t18"."c_phone", + "t18"."c_acctbal", + "t18"."c_mktsegment", + "t18"."c_comment", + "t18"."o_orderkey", + "t18"."o_custkey", + "t18"."o_orderstatus", + "t18"."o_totalprice", + "t18"."o_orderdate", + "t18"."o_orderpriority", + "t18"."o_clerk", + "t18"."o_shippriority", + "t18"."o_comment", + "t18"."l_orderkey", + "t18"."l_partkey", + "t18"."l_suppkey", + "t18"."l_linenumber", + "t18"."l_quantity", + "t18"."l_extendedprice", + "t18"."l_discount", + "t18"."l_tax", + "t18"."l_returnflag", + "t18"."l_linestatus", + "t18"."l_shipdate", + "t18"."l_commitdate", + "t18"."l_receiptdate", + "t18"."l_shipinstruct", + "t18"."l_shipmode", + "t18"."l_comment", + "t18"."s_suppkey", + "t18"."s_name", + "t18"."s_address", + "t18"."s_nationkey", + "t18"."s_phone", + "t18"."s_acctbal", + "t18"."s_comment", + "t18"."n_nationkey", + "t18"."n_name", + "t18"."n_regionkey", + "t18"."n_comment", + "t18"."r_regionkey", + "t18"."r_name", + "t18"."r_comment" FROM ( SELECT "t14"."c_custkey", @@ -182,14 +182,14 @@ FROM ( FROM "hive"."ibis_sf1"."region" AS "t5" ) AS "t13" ON "t12"."n_regionkey" = "t13"."r_regionkey" - ) AS "t23" + ) AS "t18" WHERE - "t23"."r_name" = 'ASIA' - AND "t23"."o_orderdate" >= FROM_ISO8601_DATE('1994-01-01') - AND "t23"."o_orderdate" < FROM_ISO8601_DATE('1995-01-01') - ) AS "t24" + "t18"."r_name" = 'ASIA' + AND "t18"."o_orderdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t18"."o_orderdate" < FROM_ISO8601_DATE('1995-01-01') + ) AS "t19" GROUP BY 1 -) AS "t25" +) AS "t20" ORDER BY - "t25"."revenue" DESC \ No newline at end of file + "t20"."revenue" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql index ea7f9f6eb7fe..1f0d06b91d10 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql @@ -1,23 +1,23 @@ SELECT - t19.supp_nation, - t19.cust_nation, - t19.l_year, - t19.revenue + t14.supp_nation, + t14.cust_nation, + t14.l_year, + t14.revenue FROM ( SELECT - t18.supp_nation, - t18.cust_nation, - t18.l_year, - SUM(t18.volume) AS revenue + t13.supp_nation, + t13.cust_nation, + t13.l_year, + SUM(t13.volume) AS revenue FROM ( SELECT - t17.supp_nation, - t17.cust_nation, - t17.l_shipdate, - t17.l_extendedprice, - t17.l_discount, - t17.l_year, - t17.volume + t12.supp_nation, + t12.cust_nation, + t12.l_shipdate, + t12.l_extendedprice, + t12.l_discount, + t12.l_year, + t12.volume FROM ( SELECT t9.n_name AS supp_nation, @@ -40,32 +40,32 @@ FROM ( ON t5.s_nationkey = t9.n_nationkey INNER JOIN nation AS t11 ON t8.c_nationkey = t11.n_nationkey - ) AS t17 + ) AS t12 WHERE ( ( ( - t17.cust_nation = 'FRANCE' + t12.cust_nation = 'FRANCE' ) AND ( - t17.supp_nation = 'GERMANY' + t12.supp_nation = 'GERMANY' ) ) OR ( ( - t17.cust_nation = 'GERMANY' + t12.cust_nation = 'GERMANY' ) AND ( - t17.supp_nation = 'FRANCE' + t12.supp_nation = 'FRANCE' ) ) ) - AND t17.l_shipdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) - ) AS t18 + AND t12.l_shipdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + ) AS t13 GROUP BY 1, 2, 3 -) AS t19 +) AS t14 ORDER BY - t19.supp_nation ASC, - t19.cust_nation ASC, - t19.l_year ASC \ No newline at end of file + t14.supp_nation ASC, + t14.cust_nation ASC, + t14.l_year ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql index 703b030ad4bc..5dcfbef386e1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql @@ -7,25 +7,25 @@ WITH "t9" AS ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t4" ) SELECT - "t25"."supp_nation", - "t25"."cust_nation", - "t25"."l_year", - "t25"."revenue" + "t20"."supp_nation", + "t20"."cust_nation", + "t20"."l_year", + "t20"."revenue" FROM ( SELECT - "t24"."supp_nation", - "t24"."cust_nation", - "t24"."l_year", - SUM("t24"."volume") AS "revenue" + "t19"."supp_nation", + "t19"."cust_nation", + "t19"."l_year", + SUM("t19"."volume") AS "revenue" FROM ( SELECT - "t23"."supp_nation", - "t23"."cust_nation", - "t23"."l_shipdate", - "t23"."l_extendedprice", - "t23"."l_discount", - "t23"."l_year", - "t23"."volume" + "t18"."supp_nation", + "t18"."cust_nation", + "t18"."l_shipdate", + "t18"."l_extendedprice", + "t18"."l_discount", + "t18"."l_year", + "t18"."volume" FROM ( SELECT "t15"."n_name" AS "supp_nation", @@ -100,32 +100,32 @@ FROM ( ON "t10"."s_nationkey" = "t15"."n_nationkey" INNER JOIN "t9" AS "t17" ON "t13"."c_nationkey" = "t17"."n_nationkey" - ) AS "t23" + ) AS "t18" WHERE ( ( ( - "t23"."cust_nation" = 'FRANCE' + "t18"."cust_nation" = 'FRANCE' ) AND ( - "t23"."supp_nation" = 'GERMANY' + "t18"."supp_nation" = 'GERMANY' ) ) OR ( ( - "t23"."cust_nation" = 'GERMANY' + "t18"."cust_nation" = 'GERMANY' ) AND ( - "t23"."supp_nation" = 'FRANCE' + "t18"."supp_nation" = 'FRANCE' ) ) ) - AND "t23"."l_shipdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) - ) AS "t24" + AND "t18"."l_shipdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) + ) AS "t19" GROUP BY 1, 2, 3 -) AS "t25" +) AS "t20" ORDER BY - "t25"."supp_nation" ASC, - "t25"."cust_nation" ASC, - "t25"."l_year" ASC \ No newline at end of file + "t20"."supp_nation" ASC, + "t20"."cust_nation" ASC, + "t20"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql index c891e8aa758f..986668485bbf 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql @@ -7,25 +7,25 @@ WITH "t5" AS ( FROM "hive"."ibis_sf1"."nation" AS "t4" ) SELECT - "t25"."supp_nation", - "t25"."cust_nation", - "t25"."l_year", - "t25"."revenue" + "t20"."supp_nation", + "t20"."cust_nation", + "t20"."l_year", + "t20"."revenue" FROM ( SELECT - "t24"."supp_nation", - "t24"."cust_nation", - "t24"."l_year", - SUM("t24"."volume") AS "revenue" + "t19"."supp_nation", + "t19"."cust_nation", + "t19"."l_year", + SUM("t19"."volume") AS "revenue" FROM ( SELECT - "t23"."supp_nation", - "t23"."cust_nation", - "t23"."l_shipdate", - "t23"."l_extendedprice", - "t23"."l_discount", - "t23"."l_year", - "t23"."volume" + "t18"."supp_nation", + "t18"."cust_nation", + "t18"."l_shipdate", + "t18"."l_extendedprice", + "t18"."l_discount", + "t18"."l_year", + "t18"."volume" FROM ( SELECT "t15"."n_name" AS "supp_nation", @@ -100,32 +100,32 @@ FROM ( ON "t11"."s_nationkey" = "t15"."n_nationkey" INNER JOIN "t5" AS "t17" ON "t14"."c_nationkey" = "t17"."n_nationkey" - ) AS "t23" + ) AS "t18" WHERE ( ( ( - "t23"."cust_nation" = 'FRANCE' + "t18"."cust_nation" = 'FRANCE' ) AND ( - "t23"."supp_nation" = 'GERMANY' + "t18"."supp_nation" = 'GERMANY' ) ) OR ( ( - "t23"."cust_nation" = 'GERMANY' + "t18"."cust_nation" = 'GERMANY' ) AND ( - "t23"."supp_nation" = 'FRANCE' + "t18"."supp_nation" = 'FRANCE' ) ) ) - AND "t23"."l_shipdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') - ) AS "t24" + AND "t18"."l_shipdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + ) AS "t19" GROUP BY 1, 2, 3 -) AS "t25" +) AS "t20" ORDER BY - "t25"."supp_nation" ASC, - "t25"."cust_nation" ASC, - "t25"."l_year" ASC \ No newline at end of file + "t20"."supp_nation" ASC, + "t20"."cust_nation" ASC, + "t20"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql index 99ba095e07ae..a06154f0383f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql @@ -1,19 +1,19 @@ SELECT - t25.o_year, - t25.mkt_share + t18.o_year, + t18.mkt_share FROM ( SELECT - t24.o_year, - SUM(t24.nation_volume) / SUM(t24.volume) AS mkt_share + t17.o_year, + SUM(t17.nation_volume) / SUM(t17.volume) AS mkt_share FROM ( SELECT - t23.o_year, - t23.volume, - t23.nation, - t23.r_name, - t23.o_orderdate, - t23.p_type, - CASE WHEN t23.nation = 'BRAZIL' THEN t23.volume ELSE CAST(0 AS TINYINT) END AS nation_volume + t16.o_year, + t16.volume, + t16.nation, + t16.r_name, + t16.o_orderdate, + t16.p_type, + CASE WHEN t16.nation = 'BRAZIL' THEN t16.volume ELSE CAST(0 AS TINYINT) END AS nation_volume FROM ( SELECT EXTRACT(year FROM t10.o_orderdate) AS o_year, @@ -39,14 +39,14 @@ FROM ( ON t12.n_regionkey = t14.r_regionkey INNER JOIN nation AS t15 ON t9.s_nationkey = t15.n_nationkey - ) AS t23 + ) AS t16 WHERE - t23.r_name = 'AMERICA' - AND t23.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) - AND t23.p_type = 'ECONOMY ANODIZED STEEL' - ) AS t24 + t16.r_name = 'AMERICA' + AND t16.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + AND t16.p_type = 'ECONOMY ANODIZED STEEL' + ) AS t17 GROUP BY 1 -) AS t25 +) AS t18 ORDER BY - t25.o_year ASC \ No newline at end of file + t18.o_year ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql index e92bababc234..8ff3c4767c4d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql @@ -7,21 +7,21 @@ WITH "t13" AS ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t6" ) SELECT - "t33"."o_year", - "t33"."mkt_share" + "t26"."o_year", + "t26"."mkt_share" FROM ( SELECT - "t32"."o_year", - SUM("t32"."nation_volume") / SUM("t32"."volume") AS "mkt_share" + "t25"."o_year", + SUM("t25"."nation_volume") / SUM("t25"."volume") AS "mkt_share" FROM ( SELECT - "t31"."o_year", - "t31"."volume", - "t31"."nation", - "t31"."r_name", - "t31"."o_orderdate", - "t31"."p_type", - CASE WHEN "t31"."nation" = 'BRAZIL' THEN "t31"."volume" ELSE 0 END AS "nation_volume" + "t24"."o_year", + "t24"."volume", + "t24"."nation", + "t24"."r_name", + "t24"."o_orderdate", + "t24"."p_type", + CASE WHEN "t24"."nation" = 'BRAZIL' THEN "t24"."volume" ELSE 0 END AS "nation_volume" FROM ( SELECT DATE_PART(year, "t17"."o_orderdate") AS "o_year", @@ -117,14 +117,14 @@ FROM ( ON "t21"."n_regionkey" = "t19"."r_regionkey" INNER JOIN "t13" AS "t23" ON "t16"."s_nationkey" = "t23"."n_nationkey" - ) AS "t31" + ) AS "t24" WHERE - "t31"."r_name" = 'AMERICA' - AND "t31"."o_orderdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) - AND "t31"."p_type" = 'ECONOMY ANODIZED STEEL' - ) AS "t32" + "t24"."r_name" = 'AMERICA' + AND "t24"."o_orderdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) + AND "t24"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t25" GROUP BY 1 -) AS "t33" +) AS "t26" ORDER BY - "t33"."o_year" ASC \ No newline at end of file + "t26"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql index b78f38c6519a..80ae67d1ebd1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql @@ -7,21 +7,21 @@ WITH "t8" AS ( FROM "hive"."ibis_sf1"."nation" AS "t6" ) SELECT - "t33"."o_year", - "t33"."mkt_share" + "t26"."o_year", + "t26"."mkt_share" FROM ( SELECT - "t32"."o_year", - CAST(SUM("t32"."nation_volume") AS DOUBLE) / SUM("t32"."volume") AS "mkt_share" + "t25"."o_year", + CAST(SUM("t25"."nation_volume") AS DOUBLE) / SUM("t25"."volume") AS "mkt_share" FROM ( SELECT - "t31"."o_year", - "t31"."volume", - "t31"."nation", - "t31"."r_name", - "t31"."o_orderdate", - "t31"."p_type", - CASE WHEN "t31"."nation" = 'BRAZIL' THEN "t31"."volume" ELSE 0 END AS "nation_volume" + "t24"."o_year", + "t24"."volume", + "t24"."nation", + "t24"."r_name", + "t24"."o_orderdate", + "t24"."p_type", + CASE WHEN "t24"."nation" = 'BRAZIL' THEN "t24"."volume" ELSE 0 END AS "nation_volume" FROM ( SELECT EXTRACT(year FROM "t19"."o_orderdate") AS "o_year", @@ -117,14 +117,14 @@ FROM ( ON "t21"."n_regionkey" = "t14"."r_regionkey" INNER JOIN "t8" AS "t23" ON "t18"."s_nationkey" = "t23"."n_nationkey" - ) AS "t31" + ) AS "t24" WHERE - "t31"."r_name" = 'AMERICA' - AND "t31"."o_orderdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') - AND "t31"."p_type" = 'ECONOMY ANODIZED STEEL' - ) AS "t32" + "t24"."r_name" = 'AMERICA' + AND "t24"."o_orderdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + AND "t24"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t25" GROUP BY 1 -) AS "t33" +) AS "t26" ORDER BY - "t33"."o_year" ASC \ No newline at end of file + "t26"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql index b305db73e0ae..b146d5c6cea8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql @@ -1,18 +1,18 @@ SELECT - t19.nation, - t19.o_year, - t19.sum_profit + t14.nation, + t14.o_year, + t14.sum_profit FROM ( SELECT - t18.nation, - t18.o_year, - SUM(t18.amount) AS sum_profit + t13.nation, + t13.o_year, + SUM(t13.amount) AS sum_profit FROM ( SELECT - t17.amount, - t17.o_year, - t17.nation, - t17.p_name + t12.amount, + t12.o_year, + t12.nation, + t12.p_name FROM ( SELECT ( @@ -36,14 +36,14 @@ FROM ( ON t10.o_orderkey = t6.l_orderkey INNER JOIN nation AS t11 ON t7.s_nationkey = t11.n_nationkey - ) AS t17 + ) AS t12 WHERE - t17.p_name LIKE '%green%' - ) AS t18 + t12.p_name LIKE '%green%' + ) AS t13 GROUP BY 1, 2 -) AS t19 +) AS t14 ORDER BY - t19.nation ASC, - t19.o_year DESC \ No newline at end of file + t14.nation ASC, + t14.o_year DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql index 7d854d04d97f..44332a751480 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql @@ -1,18 +1,18 @@ SELECT - "t25"."nation", - "t25"."o_year", - "t25"."sum_profit" + "t20"."nation", + "t20"."o_year", + "t20"."sum_profit" FROM ( SELECT - "t24"."nation", - "t24"."o_year", - SUM("t24"."amount") AS "sum_profit" + "t19"."nation", + "t19"."o_year", + SUM("t19"."amount") AS "sum_profit" FROM ( SELECT - "t23"."amount", - "t23"."o_year", - "t23"."nation", - "t23"."p_name" + "t18"."amount", + "t18"."o_year", + "t18"."nation", + "t18"."p_name" FROM ( SELECT ( @@ -104,14 +104,14 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t5" ) AS "t17" ON "t13"."s_nationkey" = "t17"."n_nationkey" - ) AS "t23" + ) AS "t18" WHERE - "t23"."p_name" LIKE '%green%' - ) AS "t24" + "t18"."p_name" LIKE '%green%' + ) AS "t19" GROUP BY 1, 2 -) AS "t25" +) AS "t20" ORDER BY - "t25"."nation" ASC, - "t25"."o_year" DESC NULLS LAST \ No newline at end of file + "t20"."nation" ASC, + "t20"."o_year" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql index e8149adf2755..80c015bf89bb 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql @@ -1,18 +1,18 @@ SELECT - "t25"."nation", - "t25"."o_year", - "t25"."sum_profit" + "t20"."nation", + "t20"."o_year", + "t20"."sum_profit" FROM ( SELECT - "t24"."nation", - "t24"."o_year", - SUM("t24"."amount") AS "sum_profit" + "t19"."nation", + "t19"."o_year", + SUM("t19"."amount") AS "sum_profit" FROM ( SELECT - "t23"."amount", - "t23"."o_year", - "t23"."nation", - "t23"."p_name" + "t18"."amount", + "t18"."o_year", + "t18"."nation", + "t18"."p_name" FROM ( SELECT ( @@ -104,14 +104,14 @@ FROM ( FROM "hive"."ibis_sf1"."nation" AS "t5" ) AS "t12" ON "t14"."s_nationkey" = "t12"."n_nationkey" - ) AS "t23" + ) AS "t18" WHERE - "t23"."p_name" LIKE '%green%' - ) AS "t24" + "t18"."p_name" LIKE '%green%' + ) AS "t19" GROUP BY 1, 2 -) AS "t25" +) AS "t20" ORDER BY - "t25"."nation" ASC, - "t25"."o_year" DESC \ No newline at end of file + "t20"."nation" ASC, + "t20"."o_year" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql index a08b8198283b..33986b7a70e9 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql @@ -1,63 +1,63 @@ SELECT - t13.c_custkey, - t13.c_name, - t13.revenue, - t13.c_acctbal, - t13.n_name, - t13.c_address, - t13.c_phone, - t13.c_comment + t10.c_custkey, + t10.c_name, + t10.revenue, + t10.c_acctbal, + t10.n_name, + t10.c_address, + t10.c_phone, + t10.c_comment FROM ( SELECT - t12.c_custkey, - t12.c_name, - t12.c_acctbal, - t12.n_name, - t12.c_address, - t12.c_phone, - t12.c_comment, - SUM(t12.l_extendedprice * ( - CAST(1 AS TINYINT) - t12.l_discount + t9.c_custkey, + t9.c_name, + t9.c_acctbal, + t9.n_name, + t9.c_address, + t9.c_phone, + t9.c_comment, + SUM(t9.l_extendedprice * ( + CAST(1 AS TINYINT) - t9.l_discount )) AS revenue FROM ( SELECT - t11.c_custkey, - t11.c_name, - t11.c_address, - t11.c_nationkey, - t11.c_phone, - t11.c_acctbal, - t11.c_mktsegment, - t11.c_comment, - t11.o_orderkey, - t11.o_custkey, - t11.o_orderstatus, - t11.o_totalprice, - t11.o_orderdate, - t11.o_orderpriority, - t11.o_clerk, - t11.o_shippriority, - t11.o_comment, - t11.l_orderkey, - t11.l_partkey, - t11.l_suppkey, - t11.l_linenumber, - t11.l_quantity, - t11.l_extendedprice, - t11.l_discount, - t11.l_tax, - t11.l_returnflag, - t11.l_linestatus, - t11.l_shipdate, - t11.l_commitdate, - t11.l_receiptdate, - t11.l_shipinstruct, - t11.l_shipmode, - t11.l_comment, - t11.n_nationkey, - t11.n_name, - t11.n_regionkey, - t11.n_comment + t8.c_custkey, + t8.c_name, + t8.c_address, + t8.c_nationkey, + t8.c_phone, + t8.c_acctbal, + t8.c_mktsegment, + t8.c_comment, + t8.o_orderkey, + t8.o_custkey, + t8.o_orderstatus, + t8.o_totalprice, + t8.o_orderdate, + t8.o_orderpriority, + t8.o_clerk, + t8.o_shippriority, + t8.o_comment, + t8.l_orderkey, + t8.l_partkey, + t8.l_suppkey, + t8.l_linenumber, + t8.l_quantity, + t8.l_extendedprice, + t8.l_discount, + t8.l_tax, + t8.l_returnflag, + t8.l_linestatus, + t8.l_shipdate, + t8.l_commitdate, + t8.l_receiptdate, + t8.l_shipinstruct, + t8.l_shipmode, + t8.l_comment, + t8.n_nationkey, + t8.n_name, + t8.n_regionkey, + t8.n_comment FROM ( SELECT t4.c_custkey, @@ -104,12 +104,12 @@ FROM ( ON t6.l_orderkey = t5.o_orderkey INNER JOIN nation AS t7 ON t4.c_nationkey = t7.n_nationkey - ) AS t11 + ) AS t8 WHERE - t11.o_orderdate >= MAKE_DATE(1993, 10, 1) - AND t11.o_orderdate < MAKE_DATE(1994, 1, 1) - AND t11.l_returnflag = 'R' - ) AS t12 + t8.o_orderdate >= MAKE_DATE(1993, 10, 1) + AND t8.o_orderdate < MAKE_DATE(1994, 1, 1) + AND t8.l_returnflag = 'R' + ) AS t9 GROUP BY 1, 2, @@ -118,7 +118,7 @@ FROM ( 5, 6, 7 -) AS t13 +) AS t10 ORDER BY - t13.revenue DESC + t10.revenue DESC LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql index 3714cb16807d..8a086f2236e9 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql @@ -1,63 +1,63 @@ SELECT - "t17"."c_custkey", - "t17"."c_name", - "t17"."revenue", - "t17"."c_acctbal", - "t17"."n_name", - "t17"."c_address", - "t17"."c_phone", - "t17"."c_comment" + "t14"."c_custkey", + "t14"."c_name", + "t14"."revenue", + "t14"."c_acctbal", + "t14"."n_name", + "t14"."c_address", + "t14"."c_phone", + "t14"."c_comment" FROM ( SELECT - "t16"."c_custkey", - "t16"."c_name", - "t16"."c_acctbal", - "t16"."n_name", - "t16"."c_address", - "t16"."c_phone", - "t16"."c_comment", - SUM("t16"."l_extendedprice" * ( - 1 - "t16"."l_discount" + "t13"."c_custkey", + "t13"."c_name", + "t13"."c_acctbal", + "t13"."n_name", + "t13"."c_address", + "t13"."c_phone", + "t13"."c_comment", + SUM("t13"."l_extendedprice" * ( + 1 - "t13"."l_discount" )) AS "revenue" FROM ( SELECT - "t15"."c_custkey", - "t15"."c_name", - "t15"."c_address", - "t15"."c_nationkey", - "t15"."c_phone", - "t15"."c_acctbal", - "t15"."c_mktsegment", - "t15"."c_comment", - "t15"."o_orderkey", - "t15"."o_custkey", - "t15"."o_orderstatus", - "t15"."o_totalprice", - "t15"."o_orderdate", - "t15"."o_orderpriority", - "t15"."o_clerk", - "t15"."o_shippriority", - "t15"."o_comment", - "t15"."l_orderkey", - "t15"."l_partkey", - "t15"."l_suppkey", - "t15"."l_linenumber", - "t15"."l_quantity", - "t15"."l_extendedprice", - "t15"."l_discount", - "t15"."l_tax", - "t15"."l_returnflag", - "t15"."l_linestatus", - "t15"."l_shipdate", - "t15"."l_commitdate", - "t15"."l_receiptdate", - "t15"."l_shipinstruct", - "t15"."l_shipmode", - "t15"."l_comment", - "t15"."n_nationkey", - "t15"."n_name", - "t15"."n_regionkey", - "t15"."n_comment" + "t12"."c_custkey", + "t12"."c_name", + "t12"."c_address", + "t12"."c_nationkey", + "t12"."c_phone", + "t12"."c_acctbal", + "t12"."c_mktsegment", + "t12"."c_comment", + "t12"."o_orderkey", + "t12"."o_custkey", + "t12"."o_orderstatus", + "t12"."o_totalprice", + "t12"."o_orderdate", + "t12"."o_orderpriority", + "t12"."o_clerk", + "t12"."o_shippriority", + "t12"."o_comment", + "t12"."l_orderkey", + "t12"."l_partkey", + "t12"."l_suppkey", + "t12"."l_linenumber", + "t12"."l_quantity", + "t12"."l_extendedprice", + "t12"."l_discount", + "t12"."l_tax", + "t12"."l_returnflag", + "t12"."l_linestatus", + "t12"."l_shipdate", + "t12"."l_commitdate", + "t12"."l_receiptdate", + "t12"."l_shipinstruct", + "t12"."l_shipmode", + "t12"."l_comment", + "t12"."n_nationkey", + "t12"."n_name", + "t12"."n_regionkey", + "t12"."n_comment" FROM ( SELECT "t8"."c_custkey", @@ -153,12 +153,12 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t3" ) AS "t11" ON "t8"."c_nationkey" = "t11"."n_nationkey" - ) AS "t15" + ) AS "t12" WHERE - "t15"."o_orderdate" >= DATE_FROM_PARTS(1993, 10, 1) - AND "t15"."o_orderdate" < DATE_FROM_PARTS(1994, 1, 1) - AND "t15"."l_returnflag" = 'R' - ) AS "t16" + "t12"."o_orderdate" >= DATE_FROM_PARTS(1993, 10, 1) + AND "t12"."o_orderdate" < DATE_FROM_PARTS(1994, 1, 1) + AND "t12"."l_returnflag" = 'R' + ) AS "t13" GROUP BY 1, 2, @@ -167,7 +167,7 @@ FROM ( 5, 6, 7 -) AS "t17" +) AS "t14" ORDER BY - "t17"."revenue" DESC NULLS LAST + "t14"."revenue" DESC NULLS LAST LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql index df0d20c1dcd9..788f2fc4a1cd 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql @@ -1,63 +1,63 @@ SELECT - "t17"."c_custkey", - "t17"."c_name", - "t17"."revenue", - "t17"."c_acctbal", - "t17"."n_name", - "t17"."c_address", - "t17"."c_phone", - "t17"."c_comment" + "t14"."c_custkey", + "t14"."c_name", + "t14"."revenue", + "t14"."c_acctbal", + "t14"."n_name", + "t14"."c_address", + "t14"."c_phone", + "t14"."c_comment" FROM ( SELECT - "t16"."c_custkey", - "t16"."c_name", - "t16"."c_acctbal", - "t16"."n_name", - "t16"."c_address", - "t16"."c_phone", - "t16"."c_comment", - SUM("t16"."l_extendedprice" * ( - 1 - "t16"."l_discount" + "t13"."c_custkey", + "t13"."c_name", + "t13"."c_acctbal", + "t13"."n_name", + "t13"."c_address", + "t13"."c_phone", + "t13"."c_comment", + SUM("t13"."l_extendedprice" * ( + 1 - "t13"."l_discount" )) AS "revenue" FROM ( SELECT - "t15"."c_custkey", - "t15"."c_name", - "t15"."c_address", - "t15"."c_nationkey", - "t15"."c_phone", - "t15"."c_acctbal", - "t15"."c_mktsegment", - "t15"."c_comment", - "t15"."o_orderkey", - "t15"."o_custkey", - "t15"."o_orderstatus", - "t15"."o_totalprice", - "t15"."o_orderdate", - "t15"."o_orderpriority", - "t15"."o_clerk", - "t15"."o_shippriority", - "t15"."o_comment", - "t15"."l_orderkey", - "t15"."l_partkey", - "t15"."l_suppkey", - "t15"."l_linenumber", - "t15"."l_quantity", - "t15"."l_extendedprice", - "t15"."l_discount", - "t15"."l_tax", - "t15"."l_returnflag", - "t15"."l_linestatus", - "t15"."l_shipdate", - "t15"."l_commitdate", - "t15"."l_receiptdate", - "t15"."l_shipinstruct", - "t15"."l_shipmode", - "t15"."l_comment", - "t15"."n_nationkey", - "t15"."n_name", - "t15"."n_regionkey", - "t15"."n_comment" + "t12"."c_custkey", + "t12"."c_name", + "t12"."c_address", + "t12"."c_nationkey", + "t12"."c_phone", + "t12"."c_acctbal", + "t12"."c_mktsegment", + "t12"."c_comment", + "t12"."o_orderkey", + "t12"."o_custkey", + "t12"."o_orderstatus", + "t12"."o_totalprice", + "t12"."o_orderdate", + "t12"."o_orderpriority", + "t12"."o_clerk", + "t12"."o_shippriority", + "t12"."o_comment", + "t12"."l_orderkey", + "t12"."l_partkey", + "t12"."l_suppkey", + "t12"."l_linenumber", + "t12"."l_quantity", + "t12"."l_extendedprice", + "t12"."l_discount", + "t12"."l_tax", + "t12"."l_returnflag", + "t12"."l_linestatus", + "t12"."l_shipdate", + "t12"."l_commitdate", + "t12"."l_receiptdate", + "t12"."l_shipinstruct", + "t12"."l_shipmode", + "t12"."l_comment", + "t12"."n_nationkey", + "t12"."n_name", + "t12"."n_regionkey", + "t12"."n_comment" FROM ( SELECT "t9"."c_custkey", @@ -153,12 +153,12 @@ FROM ( FROM "hive"."ibis_sf1"."nation" AS "t3" ) AS "t8" ON "t9"."c_nationkey" = "t8"."n_nationkey" - ) AS "t15" + ) AS "t12" WHERE - "t15"."o_orderdate" >= FROM_ISO8601_DATE('1993-10-01') - AND "t15"."o_orderdate" < FROM_ISO8601_DATE('1994-01-01') - AND "t15"."l_returnflag" = 'R' - ) AS "t16" + "t12"."o_orderdate" >= FROM_ISO8601_DATE('1993-10-01') + AND "t12"."o_orderdate" < FROM_ISO8601_DATE('1994-01-01') + AND "t12"."l_returnflag" = 'R' + ) AS "t13" GROUP BY 1, 2, @@ -167,7 +167,7 @@ FROM ( 5, 6, 7 -) AS "t17" +) AS "t14" ORDER BY - "t17"."revenue" DESC + "t14"."revenue" DESC LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql index c5d401180d41..594e5a7db6bd 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql @@ -1,28 +1,28 @@ SELECT - t10.ps_partkey, - t10.value + t8.ps_partkey, + t8.value FROM ( SELECT - t9.ps_partkey, - SUM(t9.ps_supplycost * t9.ps_availqty) AS value + t7.ps_partkey, + SUM(t7.ps_supplycost * t7.ps_availqty) AS value FROM ( SELECT - t8.ps_partkey, - t8.ps_suppkey, - t8.ps_availqty, - t8.ps_supplycost, - t8.ps_comment, - t8.s_suppkey, - t8.s_name, - t8.s_address, - t8.s_nationkey, - t8.s_phone, - t8.s_acctbal, - t8.s_comment, - t8.n_nationkey, - t8.n_name, - t8.n_regionkey, - t8.n_comment + t6.ps_partkey, + t6.ps_suppkey, + t6.ps_availqty, + t6.ps_supplycost, + t6.ps_comment, + t6.s_suppkey, + t6.s_name, + t6.s_address, + t6.s_nationkey, + t6.s_phone, + t6.s_acctbal, + t6.s_comment, + t6.n_nationkey, + t6.n_name, + t6.n_regionkey, + t6.n_comment FROM ( SELECT t3.ps_partkey, @@ -46,36 +46,36 @@ FROM ( ON t3.ps_suppkey = t4.s_suppkey INNER JOIN nation AS t5 ON t5.n_nationkey = t4.s_nationkey - ) AS t8 + ) AS t6 WHERE - t8.n_name = 'GERMANY' - ) AS t9 + t6.n_name = 'GERMANY' + ) AS t7 GROUP BY 1 -) AS t10 +) AS t8 WHERE - t10.value > ( + t8.value > ( ( SELECT - SUM(t9.ps_supplycost * t9.ps_availqty) AS "Sum(Multiply(ps_supplycost, ps_availqty))" + SUM(t7.ps_supplycost * t7.ps_availqty) AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - t8.ps_partkey, - t8.ps_suppkey, - t8.ps_availqty, - t8.ps_supplycost, - t8.ps_comment, - t8.s_suppkey, - t8.s_name, - t8.s_address, - t8.s_nationkey, - t8.s_phone, - t8.s_acctbal, - t8.s_comment, - t8.n_nationkey, - t8.n_name, - t8.n_regionkey, - t8.n_comment + t6.ps_partkey, + t6.ps_suppkey, + t6.ps_availqty, + t6.ps_supplycost, + t6.ps_comment, + t6.s_suppkey, + t6.s_name, + t6.s_address, + t6.s_nationkey, + t6.s_phone, + t6.s_acctbal, + t6.s_comment, + t6.n_nationkey, + t6.n_name, + t6.n_regionkey, + t6.n_comment FROM ( SELECT t3.ps_partkey, @@ -99,11 +99,11 @@ WHERE ON t3.ps_suppkey = t4.s_suppkey INNER JOIN nation AS t5 ON t5.n_nationkey = t4.s_nationkey - ) AS t8 + ) AS t6 WHERE - t8.n_name = 'GERMANY' - ) AS t9 + t6.n_name = 'GERMANY' + ) AS t7 ) * CAST(0.0001 AS DOUBLE) ) ORDER BY - t10.value DESC \ No newline at end of file + t8.value DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql index 3dae0694734f..d655a5b4381e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql @@ -1,28 +1,28 @@ SELECT - "t13"."ps_partkey", - "t13"."value" + "t11"."ps_partkey", + "t11"."value" FROM ( SELECT - "t12"."ps_partkey", - SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "value" + "t10"."ps_partkey", + SUM("t10"."ps_supplycost" * "t10"."ps_availqty") AS "value" FROM ( SELECT - "t11"."ps_partkey", - "t11"."ps_suppkey", - "t11"."ps_availqty", - "t11"."ps_supplycost", - "t11"."ps_comment", - "t11"."s_suppkey", - "t11"."s_name", - "t11"."s_address", - "t11"."s_nationkey", - "t11"."s_phone", - "t11"."s_acctbal", - "t11"."s_comment", - "t11"."n_nationkey", - "t11"."n_name", - "t11"."n_regionkey", - "t11"."n_comment" + "t9"."ps_partkey", + "t9"."ps_suppkey", + "t9"."ps_availqty", + "t9"."ps_supplycost", + "t9"."ps_comment", + "t9"."s_suppkey", + "t9"."s_name", + "t9"."s_address", + "t9"."s_nationkey", + "t9"."s_phone", + "t9"."s_acctbal", + "t9"."s_comment", + "t9"."n_nationkey", + "t9"."n_name", + "t9"."n_regionkey", + "t9"."n_comment" FROM ( SELECT "t6"."ps_partkey", @@ -71,36 +71,36 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t2" ) AS "t8" ON "t8"."n_nationkey" = "t7"."s_nationkey" - ) AS "t11" + ) AS "t9" WHERE - "t11"."n_name" = 'GERMANY' - ) AS "t12" + "t9"."n_name" = 'GERMANY' + ) AS "t10" GROUP BY 1 -) AS "t13" +) AS "t11" WHERE - "t13"."value" > ( + "t11"."value" > ( ( SELECT - SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" + SUM("t10"."ps_supplycost" * "t10"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - "t11"."ps_partkey", - "t11"."ps_suppkey", - "t11"."ps_availqty", - "t11"."ps_supplycost", - "t11"."ps_comment", - "t11"."s_suppkey", - "t11"."s_name", - "t11"."s_address", - "t11"."s_nationkey", - "t11"."s_phone", - "t11"."s_acctbal", - "t11"."s_comment", - "t11"."n_nationkey", - "t11"."n_name", - "t11"."n_regionkey", - "t11"."n_comment" + "t9"."ps_partkey", + "t9"."ps_suppkey", + "t9"."ps_availqty", + "t9"."ps_supplycost", + "t9"."ps_comment", + "t9"."s_suppkey", + "t9"."s_name", + "t9"."s_address", + "t9"."s_nationkey", + "t9"."s_phone", + "t9"."s_acctbal", + "t9"."s_comment", + "t9"."n_nationkey", + "t9"."n_name", + "t9"."n_regionkey", + "t9"."n_comment" FROM ( SELECT "t6"."ps_partkey", @@ -149,11 +149,11 @@ WHERE FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t2" ) AS "t8" ON "t8"."n_nationkey" = "t7"."s_nationkey" - ) AS "t11" + ) AS "t9" WHERE - "t11"."n_name" = 'GERMANY' - ) AS "t12" + "t9"."n_name" = 'GERMANY' + ) AS "t10" ) * 0.0001 ) ORDER BY - "t13"."value" DESC NULLS LAST \ No newline at end of file + "t11"."value" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql index 066c3e445817..ea70d27358c1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql @@ -1,28 +1,28 @@ SELECT - "t13"."ps_partkey", - "t13"."value" + "t11"."ps_partkey", + "t11"."value" FROM ( SELECT - "t12"."ps_partkey", - SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "value" + "t10"."ps_partkey", + SUM("t10"."ps_supplycost" * "t10"."ps_availqty") AS "value" FROM ( SELECT - "t11"."ps_partkey", - "t11"."ps_suppkey", - "t11"."ps_availqty", - "t11"."ps_supplycost", - "t11"."ps_comment", - "t11"."s_suppkey", - "t11"."s_name", - "t11"."s_address", - "t11"."s_nationkey", - "t11"."s_phone", - "t11"."s_acctbal", - "t11"."s_comment", - "t11"."n_nationkey", - "t11"."n_name", - "t11"."n_regionkey", - "t11"."n_comment" + "t9"."ps_partkey", + "t9"."ps_suppkey", + "t9"."ps_availqty", + "t9"."ps_supplycost", + "t9"."ps_comment", + "t9"."s_suppkey", + "t9"."s_name", + "t9"."s_address", + "t9"."s_nationkey", + "t9"."s_phone", + "t9"."s_acctbal", + "t9"."s_comment", + "t9"."n_nationkey", + "t9"."n_name", + "t9"."n_regionkey", + "t9"."n_comment" FROM ( SELECT "t7"."ps_partkey", @@ -71,36 +71,36 @@ FROM ( FROM "hive"."ibis_sf1"."nation" AS "t2" ) AS "t6" ON "t6"."n_nationkey" = "t8"."s_nationkey" - ) AS "t11" + ) AS "t9" WHERE - "t11"."n_name" = 'GERMANY' - ) AS "t12" + "t9"."n_name" = 'GERMANY' + ) AS "t10" GROUP BY 1 -) AS "t13" +) AS "t11" WHERE - "t13"."value" > ( + "t11"."value" > ( ( SELECT - SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" + SUM("t10"."ps_supplycost" * "t10"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - "t11"."ps_partkey", - "t11"."ps_suppkey", - "t11"."ps_availqty", - "t11"."ps_supplycost", - "t11"."ps_comment", - "t11"."s_suppkey", - "t11"."s_name", - "t11"."s_address", - "t11"."s_nationkey", - "t11"."s_phone", - "t11"."s_acctbal", - "t11"."s_comment", - "t11"."n_nationkey", - "t11"."n_name", - "t11"."n_regionkey", - "t11"."n_comment" + "t9"."ps_partkey", + "t9"."ps_suppkey", + "t9"."ps_availqty", + "t9"."ps_supplycost", + "t9"."ps_comment", + "t9"."s_suppkey", + "t9"."s_name", + "t9"."s_address", + "t9"."s_nationkey", + "t9"."s_phone", + "t9"."s_acctbal", + "t9"."s_comment", + "t9"."n_nationkey", + "t9"."n_name", + "t9"."n_regionkey", + "t9"."n_comment" FROM ( SELECT "t7"."ps_partkey", @@ -149,11 +149,11 @@ WHERE FROM "hive"."ibis_sf1"."nation" AS "t2" ) AS "t6" ON "t6"."n_nationkey" = "t8"."s_nationkey" - ) AS "t11" + ) AS "t9" WHERE - "t11"."n_name" = 'GERMANY' - ) AS "t12" + "t9"."n_name" = 'GERMANY' + ) AS "t10" ) * CAST(0.0001 AS DOUBLE) ) ORDER BY - "t13"."value" DESC \ No newline at end of file + "t11"."value" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql index 1b0c38b528aa..ab4f275c250c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql @@ -1,12 +1,12 @@ SELECT - t7.l_shipmode, - t7.high_line_count, - t7.low_line_count + t6.l_shipmode, + t6.high_line_count, + t6.low_line_count FROM ( SELECT - t6.l_shipmode, + t5.l_shipmode, SUM( - CASE t6.o_orderpriority + CASE t5.o_orderpriority WHEN '1-URGENT' THEN CAST(1 AS TINYINT) WHEN '2-HIGH' @@ -15,7 +15,7 @@ FROM ( END ) AS high_line_count, SUM( - CASE t6.o_orderpriority + CASE t5.o_orderpriority WHEN '1-URGENT' THEN CAST(0 AS TINYINT) WHEN '2-HIGH' @@ -25,31 +25,31 @@ FROM ( ) AS low_line_count FROM ( SELECT - t5.o_orderkey, - t5.o_custkey, - t5.o_orderstatus, - t5.o_totalprice, - t5.o_orderdate, - t5.o_orderpriority, - t5.o_clerk, - t5.o_shippriority, - t5.o_comment, - t5.l_orderkey, - t5.l_partkey, - t5.l_suppkey, - t5.l_linenumber, - t5.l_quantity, - t5.l_extendedprice, - t5.l_discount, - t5.l_tax, - t5.l_returnflag, - t5.l_linestatus, - t5.l_shipdate, - t5.l_commitdate, - t5.l_receiptdate, - t5.l_shipinstruct, - t5.l_shipmode, - t5.l_comment + t4.o_orderkey, + t4.o_custkey, + t4.o_orderstatus, + t4.o_totalprice, + t4.o_orderdate, + t4.o_orderpriority, + t4.o_clerk, + t4.o_shippriority, + t4.o_comment, + t4.l_orderkey, + t4.l_partkey, + t4.l_suppkey, + t4.l_linenumber, + t4.l_quantity, + t4.l_extendedprice, + t4.l_discount, + t4.l_tax, + t4.l_returnflag, + t4.l_linestatus, + t4.l_shipdate, + t4.l_commitdate, + t4.l_receiptdate, + t4.l_shipinstruct, + t4.l_shipmode, + t4.l_comment FROM ( SELECT t2.o_orderkey, @@ -80,16 +80,16 @@ FROM ( FROM orders AS t2 INNER JOIN lineitem AS t3 ON t2.o_orderkey = t3.l_orderkey - ) AS t5 + ) AS t4 WHERE - t5.l_shipmode IN ('MAIL', 'SHIP') - AND t5.l_commitdate < t5.l_receiptdate - AND t5.l_shipdate < t5.l_commitdate - AND t5.l_receiptdate >= MAKE_DATE(1994, 1, 1) - AND t5.l_receiptdate < MAKE_DATE(1995, 1, 1) - ) AS t6 + t4.l_shipmode IN ('MAIL', 'SHIP') + AND t4.l_commitdate < t4.l_receiptdate + AND t4.l_shipdate < t4.l_commitdate + AND t4.l_receiptdate >= MAKE_DATE(1994, 1, 1) + AND t4.l_receiptdate < MAKE_DATE(1995, 1, 1) + ) AS t5 GROUP BY 1 -) AS t7 +) AS t6 ORDER BY - t7.l_shipmode ASC \ No newline at end of file + t6.l_shipmode ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql index 3a07fd0399a2..17b6cadfda4f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql @@ -1,43 +1,43 @@ SELECT - "t9"."l_shipmode", - "t9"."high_line_count", - "t9"."low_line_count" + "t8"."l_shipmode", + "t8"."high_line_count", + "t8"."low_line_count" FROM ( SELECT - "t8"."l_shipmode", + "t7"."l_shipmode", SUM( - CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END + CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END ) AS "high_line_count", SUM( - CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END + CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END ) AS "low_line_count" FROM ( SELECT - "t7"."o_orderkey", - "t7"."o_custkey", - "t7"."o_orderstatus", - "t7"."o_totalprice", - "t7"."o_orderdate", - "t7"."o_orderpriority", - "t7"."o_clerk", - "t7"."o_shippriority", - "t7"."o_comment", - "t7"."l_orderkey", - "t7"."l_partkey", - "t7"."l_suppkey", - "t7"."l_linenumber", - "t7"."l_quantity", - "t7"."l_extendedprice", - "t7"."l_discount", - "t7"."l_tax", - "t7"."l_returnflag", - "t7"."l_linestatus", - "t7"."l_shipdate", - "t7"."l_commitdate", - "t7"."l_receiptdate", - "t7"."l_shipinstruct", - "t7"."l_shipmode", - "t7"."l_comment" + "t6"."o_orderkey", + "t6"."o_custkey", + "t6"."o_orderstatus", + "t6"."o_totalprice", + "t6"."o_orderdate", + "t6"."o_orderpriority", + "t6"."o_clerk", + "t6"."o_shippriority", + "t6"."o_comment", + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment" FROM ( SELECT "t4"."o_orderkey", @@ -99,16 +99,16 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t1" ) AS "t5" ON "t4"."o_orderkey" = "t5"."l_orderkey" - ) AS "t7" + ) AS "t6" WHERE - "t7"."l_shipmode" IN ('MAIL', 'SHIP') - AND "t7"."l_commitdate" < "t7"."l_receiptdate" - AND "t7"."l_shipdate" < "t7"."l_commitdate" - AND "t7"."l_receiptdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND "t7"."l_receiptdate" < DATE_FROM_PARTS(1995, 1, 1) - ) AS "t8" + "t6"."l_shipmode" IN ('MAIL', 'SHIP') + AND "t6"."l_commitdate" < "t6"."l_receiptdate" + AND "t6"."l_shipdate" < "t6"."l_commitdate" + AND "t6"."l_receiptdate" >= DATE_FROM_PARTS(1994, 1, 1) + AND "t6"."l_receiptdate" < DATE_FROM_PARTS(1995, 1, 1) + ) AS "t7" GROUP BY 1 -) AS "t9" +) AS "t8" ORDER BY - "t9"."l_shipmode" ASC \ No newline at end of file + "t8"."l_shipmode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql index e09b830eaf5e..5fb9adb53990 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql @@ -1,43 +1,43 @@ SELECT - "t9"."l_shipmode", - "t9"."high_line_count", - "t9"."low_line_count" + "t8"."l_shipmode", + "t8"."high_line_count", + "t8"."low_line_count" FROM ( SELECT - "t8"."l_shipmode", + "t7"."l_shipmode", SUM( - CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END + CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END ) AS "high_line_count", SUM( - CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END + CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END ) AS "low_line_count" FROM ( SELECT - "t7"."o_orderkey", - "t7"."o_custkey", - "t7"."o_orderstatus", - "t7"."o_totalprice", - "t7"."o_orderdate", - "t7"."o_orderpriority", - "t7"."o_clerk", - "t7"."o_shippriority", - "t7"."o_comment", - "t7"."l_orderkey", - "t7"."l_partkey", - "t7"."l_suppkey", - "t7"."l_linenumber", - "t7"."l_quantity", - "t7"."l_extendedprice", - "t7"."l_discount", - "t7"."l_tax", - "t7"."l_returnflag", - "t7"."l_linestatus", - "t7"."l_shipdate", - "t7"."l_commitdate", - "t7"."l_receiptdate", - "t7"."l_shipinstruct", - "t7"."l_shipmode", - "t7"."l_comment" + "t6"."o_orderkey", + "t6"."o_custkey", + "t6"."o_orderstatus", + "t6"."o_totalprice", + "t6"."o_orderdate", + "t6"."o_orderpriority", + "t6"."o_clerk", + "t6"."o_shippriority", + "t6"."o_comment", + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment" FROM ( SELECT "t4"."o_orderkey", @@ -99,16 +99,16 @@ FROM ( FROM "hive"."ibis_sf1"."lineitem" AS "t1" ) AS "t5" ON "t4"."o_orderkey" = "t5"."l_orderkey" - ) AS "t7" + ) AS "t6" WHERE - "t7"."l_shipmode" IN ('MAIL', 'SHIP') - AND "t7"."l_commitdate" < "t7"."l_receiptdate" - AND "t7"."l_shipdate" < "t7"."l_commitdate" - AND "t7"."l_receiptdate" >= FROM_ISO8601_DATE('1994-01-01') - AND "t7"."l_receiptdate" < FROM_ISO8601_DATE('1995-01-01') - ) AS "t8" + "t6"."l_shipmode" IN ('MAIL', 'SHIP') + AND "t6"."l_commitdate" < "t6"."l_receiptdate" + AND "t6"."l_shipdate" < "t6"."l_commitdate" + AND "t6"."l_receiptdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t6"."l_receiptdate" < FROM_ISO8601_DATE('1995-01-01') + ) AS "t7" GROUP BY 1 -) AS "t9" +) AS "t8" ORDER BY - "t9"."l_shipmode" ASC \ No newline at end of file + "t8"."l_shipmode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql index 58270b87504b..c020de0fbaec 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql @@ -1,14 +1,14 @@ SELECT - t7.c_count, - t7.custdist + t6.c_count, + t6.custdist FROM ( SELECT - t6.c_count, + t5.c_count, COUNT(*) AS custdist FROM ( SELECT - t5.c_custkey, - COUNT(t5.o_orderkey) AS c_count + t4.c_custkey, + COUNT(t4.o_orderkey) AS c_count FROM ( SELECT t2.c_custkey, @@ -33,13 +33,13 @@ FROM ( ON t2.c_custkey = t3.o_custkey AND NOT ( t3.o_comment LIKE '%special%requests%' ) - ) AS t5 + ) AS t4 GROUP BY 1 - ) AS t6 + ) AS t5 GROUP BY 1 -) AS t7 +) AS t6 ORDER BY - t7.custdist DESC, - t7.c_count DESC \ No newline at end of file + t6.custdist DESC, + t6.c_count DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql index 2a93f8a9369d..8cb7af743baa 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql @@ -1,14 +1,14 @@ SELECT - "t9"."c_count", - "t9"."custdist" + "t8"."c_count", + "t8"."custdist" FROM ( SELECT - "t8"."c_count", + "t7"."c_count", COUNT(*) AS "custdist" FROM ( SELECT - "t7"."c_custkey", - COUNT("t7"."o_orderkey") AS "c_count" + "t6"."c_custkey", + COUNT("t6"."o_orderkey") AS "c_count" FROM ( SELECT "t4"."c_custkey", @@ -57,13 +57,13 @@ FROM ( AND NOT ( "t5"."o_comment" LIKE '%special%requests%' ) - ) AS "t7" + ) AS "t6" GROUP BY 1 - ) AS "t8" + ) AS "t7" GROUP BY 1 -) AS "t9" +) AS "t8" ORDER BY - "t9"."custdist" DESC NULLS LAST, - "t9"."c_count" DESC NULLS LAST \ No newline at end of file + "t8"."custdist" DESC NULLS LAST, + "t8"."c_count" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql index 2e0da41dd7b2..62a4d186822a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql @@ -1,14 +1,14 @@ SELECT - "t9"."c_count", - "t9"."custdist" + "t8"."c_count", + "t8"."custdist" FROM ( SELECT - "t8"."c_count", + "t7"."c_count", COUNT(*) AS "custdist" FROM ( SELECT - "t7"."c_custkey", - COUNT("t7"."o_orderkey") AS "c_count" + "t6"."c_custkey", + COUNT("t6"."o_orderkey") AS "c_count" FROM ( SELECT "t4"."c_custkey", @@ -57,13 +57,13 @@ FROM ( AND NOT ( "t5"."o_comment" LIKE '%special%requests%' ) - ) AS "t7" + ) AS "t6" GROUP BY 1 - ) AS "t8" + ) AS "t7" GROUP BY 1 -) AS "t9" +) AS "t8" ORDER BY - "t9"."custdist" DESC, - "t9"."c_count" DESC \ No newline at end of file + "t8"."custdist" DESC, + "t8"."c_count" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql index 42d6dbe835b4..2e411aa24794 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql @@ -2,43 +2,43 @@ SELECT ( SUM( CASE - WHEN t6.p_type LIKE 'PROMO%' - THEN t6.l_extendedprice * ( - CAST(1 AS TINYINT) - t6.l_discount + WHEN t5.p_type LIKE 'PROMO%' + THEN t5.l_extendedprice * ( + CAST(1 AS TINYINT) - t5.l_discount ) ELSE CAST(0 AS TINYINT) END ) * CAST(100 AS TINYINT) - ) / SUM(t6.l_extendedprice * ( - CAST(1 AS TINYINT) - t6.l_discount + ) / SUM(t5.l_extendedprice * ( + CAST(1 AS TINYINT) - t5.l_discount )) AS promo_revenue FROM ( SELECT - t5.l_orderkey, - t5.l_partkey, - t5.l_suppkey, - t5.l_linenumber, - t5.l_quantity, - t5.l_extendedprice, - t5.l_discount, - t5.l_tax, - t5.l_returnflag, - t5.l_linestatus, - t5.l_shipdate, - t5.l_commitdate, - t5.l_receiptdate, - t5.l_shipinstruct, - t5.l_shipmode, - t5.l_comment, - t5.p_partkey, - t5.p_name, - t5.p_mfgr, - t5.p_brand, - t5.p_type, - t5.p_size, - t5.p_container, - t5.p_retailprice, - t5.p_comment + t4.l_orderkey, + t4.l_partkey, + t4.l_suppkey, + t4.l_linenumber, + t4.l_quantity, + t4.l_extendedprice, + t4.l_discount, + t4.l_tax, + t4.l_returnflag, + t4.l_linestatus, + t4.l_shipdate, + t4.l_commitdate, + t4.l_receiptdate, + t4.l_shipinstruct, + t4.l_shipmode, + t4.l_comment, + t4.p_partkey, + t4.p_name, + t4.p_mfgr, + t4.p_brand, + t4.p_type, + t4.p_size, + t4.p_container, + t4.p_retailprice, + t4.p_comment FROM ( SELECT t2.l_orderkey, @@ -69,7 +69,7 @@ FROM ( FROM lineitem AS t2 INNER JOIN part AS t3 ON t2.l_partkey = t3.p_partkey - ) AS t5 + ) AS t4 WHERE - t5.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t5.l_shipdate < MAKE_DATE(1995, 10, 1) -) AS t6 \ No newline at end of file + t4.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t4.l_shipdate < MAKE_DATE(1995, 10, 1) +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql index 308333259aec..cf0bc91584d3 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql @@ -1,40 +1,40 @@ SELECT ( SUM( - IFF("t8"."p_type" LIKE 'PROMO%', "t8"."l_extendedprice" * ( - 1 - "t8"."l_discount" + IFF("t7"."p_type" LIKE 'PROMO%', "t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" ), 0) ) * 100 - ) / SUM("t8"."l_extendedprice" * ( - 1 - "t8"."l_discount" + ) / SUM("t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" )) AS "promo_revenue" FROM ( SELECT - "t7"."l_orderkey", - "t7"."l_partkey", - "t7"."l_suppkey", - "t7"."l_linenumber", - "t7"."l_quantity", - "t7"."l_extendedprice", - "t7"."l_discount", - "t7"."l_tax", - "t7"."l_returnflag", - "t7"."l_linestatus", - "t7"."l_shipdate", - "t7"."l_commitdate", - "t7"."l_receiptdate", - "t7"."l_shipinstruct", - "t7"."l_shipmode", - "t7"."l_comment", - "t7"."p_partkey", - "t7"."p_name", - "t7"."p_mfgr", - "t7"."p_brand", - "t7"."p_type", - "t7"."p_size", - "t7"."p_container", - "t7"."p_retailprice", - "t7"."p_comment" + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment", + "t6"."p_partkey", + "t6"."p_name", + "t6"."p_mfgr", + "t6"."p_brand", + "t6"."p_type", + "t6"."p_size", + "t6"."p_container", + "t6"."p_retailprice", + "t6"."p_comment" FROM ( SELECT "t4"."l_orderkey", @@ -96,8 +96,8 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t1" ) AS "t5" ON "t4"."l_partkey" = "t5"."p_partkey" - ) AS "t7" + ) AS "t6" WHERE - "t7"."l_shipdate" >= DATE_FROM_PARTS(1995, 9, 1) - AND "t7"."l_shipdate" < DATE_FROM_PARTS(1995, 10, 1) -) AS "t8" \ No newline at end of file + "t6"."l_shipdate" >= DATE_FROM_PARTS(1995, 9, 1) + AND "t6"."l_shipdate" < DATE_FROM_PARTS(1995, 10, 1) +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql index 1a4327c3b6e3..f5f5f0b6ee6b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql @@ -1,40 +1,40 @@ SELECT CAST(( SUM( - IF("t8"."p_type" LIKE 'PROMO%', "t8"."l_extendedprice" * ( - 1 - "t8"."l_discount" + IF("t7"."p_type" LIKE 'PROMO%', "t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" ), 0) ) * 100 - ) AS DOUBLE) / SUM("t8"."l_extendedprice" * ( - 1 - "t8"."l_discount" + ) AS DOUBLE) / SUM("t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" )) AS "promo_revenue" FROM ( SELECT - "t7"."l_orderkey", - "t7"."l_partkey", - "t7"."l_suppkey", - "t7"."l_linenumber", - "t7"."l_quantity", - "t7"."l_extendedprice", - "t7"."l_discount", - "t7"."l_tax", - "t7"."l_returnflag", - "t7"."l_linestatus", - "t7"."l_shipdate", - "t7"."l_commitdate", - "t7"."l_receiptdate", - "t7"."l_shipinstruct", - "t7"."l_shipmode", - "t7"."l_comment", - "t7"."p_partkey", - "t7"."p_name", - "t7"."p_mfgr", - "t7"."p_brand", - "t7"."p_type", - "t7"."p_size", - "t7"."p_container", - "t7"."p_retailprice", - "t7"."p_comment" + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment", + "t6"."p_partkey", + "t6"."p_name", + "t6"."p_mfgr", + "t6"."p_brand", + "t6"."p_type", + "t6"."p_size", + "t6"."p_container", + "t6"."p_retailprice", + "t6"."p_comment" FROM ( SELECT "t4"."l_orderkey", @@ -96,8 +96,8 @@ FROM ( FROM "hive"."ibis_sf1"."part" AS "t1" ) AS "t5" ON "t4"."l_partkey" = "t5"."p_partkey" - ) AS "t7" + ) AS "t6" WHERE - "t7"."l_shipdate" >= FROM_ISO8601_DATE('1995-09-01') - AND "t7"."l_shipdate" < FROM_ISO8601_DATE('1995-10-01') -) AS "t8" \ No newline at end of file + "t6"."l_shipdate" >= FROM_ISO8601_DATE('1995-09-01') + AND "t6"."l_shipdate" < FROM_ISO8601_DATE('1995-10-01') +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql index afad257dc2f2..3a5449a2361c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql @@ -1,9 +1,9 @@ SELECT - t7.s_suppkey, - t7.s_name, - t7.s_address, - t7.s_phone, - t7.total_revenue + t6.s_suppkey, + t6.s_name, + t6.s_address, + t6.s_phone, + t6.total_revenue FROM ( SELECT t2.s_suppkey, @@ -48,11 +48,11 @@ FROM ( 1 ) AS t5 ON t2.s_suppkey = t5.l_suppkey -) AS t7 +) AS t6 WHERE - t7.total_revenue = ( + t6.total_revenue = ( SELECT - MAX(t7.total_revenue) AS "Max(total_revenue)" + MAX(t6.total_revenue) AS "Max(total_revenue)" FROM ( SELECT t2.s_suppkey, @@ -97,7 +97,7 @@ WHERE 1 ) AS t5 ON t2.s_suppkey = t5.l_suppkey - ) AS t7 + ) AS t6 ) ORDER BY - t7.s_suppkey ASC \ No newline at end of file + t6.s_suppkey ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql index bbbce8c1d553..5a32077fb984 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql @@ -1,9 +1,9 @@ SELECT - "t8"."s_suppkey", - "t8"."s_name", - "t8"."s_address", - "t8"."s_phone", - "t8"."total_revenue" + "t7"."s_suppkey", + "t7"."s_name", + "t7"."s_address", + "t7"."s_phone", + "t7"."total_revenue" FROM ( SELECT "t3"."s_suppkey", @@ -59,11 +59,11 @@ FROM ( 1 ) AS "t6" ON "t3"."s_suppkey" = "t6"."l_suppkey" -) AS "t8" +) AS "t7" WHERE - "t8"."total_revenue" = ( + "t7"."total_revenue" = ( SELECT - MAX("t8"."total_revenue") AS "Max(total_revenue)" + MAX("t7"."total_revenue") AS "Max(total_revenue)" FROM ( SELECT "t3"."s_suppkey", @@ -119,7 +119,7 @@ WHERE 1 ) AS "t6" ON "t3"."s_suppkey" = "t6"."l_suppkey" - ) AS "t8" + ) AS "t7" ) ORDER BY - "t8"."s_suppkey" ASC \ No newline at end of file + "t7"."s_suppkey" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql index eabc65ac19cf..a86f45f2bdf2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql @@ -1,9 +1,9 @@ SELECT - "t8"."s_suppkey", - "t8"."s_name", - "t8"."s_address", - "t8"."s_phone", - "t8"."total_revenue" + "t7"."s_suppkey", + "t7"."s_name", + "t7"."s_address", + "t7"."s_phone", + "t7"."total_revenue" FROM ( SELECT "t4"."s_suppkey", @@ -59,11 +59,11 @@ FROM ( 1 ) AS "t6" ON "t4"."s_suppkey" = "t6"."l_suppkey" -) AS "t8" +) AS "t7" WHERE - "t8"."total_revenue" = ( + "t7"."total_revenue" = ( SELECT - MAX("t8"."total_revenue") AS "Max(total_revenue)" + MAX("t7"."total_revenue") AS "Max(total_revenue)" FROM ( SELECT "t4"."s_suppkey", @@ -119,7 +119,7 @@ WHERE 1 ) AS "t6" ON "t4"."s_suppkey" = "t6"."l_suppkey" - ) AS "t8" + ) AS "t7" ) ORDER BY - "t8"."s_suppkey" ASC \ No newline at end of file + "t7"."s_suppkey" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql index 711276ec20dd..b6491dc0efa5 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql @@ -1,30 +1,30 @@ SELECT - t9.p_brand, - t9.p_type, - t9.p_size, - t9.supplier_cnt + t8.p_brand, + t8.p_type, + t8.p_size, + t8.supplier_cnt FROM ( SELECT - t8.p_brand, - t8.p_type, - t8.p_size, - COUNT(DISTINCT t8.ps_suppkey) AS supplier_cnt + t7.p_brand, + t7.p_type, + t7.p_size, + COUNT(DISTINCT t7.ps_suppkey) AS supplier_cnt FROM ( SELECT - t7.ps_partkey, - t7.ps_suppkey, - t7.ps_availqty, - t7.ps_supplycost, - t7.ps_comment, - t7.p_partkey, - t7.p_name, - t7.p_mfgr, - t7.p_brand, - t7.p_type, - t7.p_size, - t7.p_container, - t7.p_retailprice, - t7.p_comment + t6.ps_partkey, + t6.ps_suppkey, + t6.ps_availqty, + t6.ps_supplycost, + t6.ps_comment, + t6.p_partkey, + t6.p_name, + t6.p_mfgr, + t6.p_brand, + t6.p_type, + t6.p_size, + t6.p_container, + t6.p_retailprice, + t6.p_comment FROM ( SELECT t3.ps_partkey, @@ -44,15 +44,15 @@ FROM ( FROM partsupp AS t3 INNER JOIN part AS t4 ON t4.p_partkey = t3.ps_partkey - ) AS t7 + ) AS t6 WHERE - t7.p_brand <> 'Brand#45' + t6.p_brand <> 'Brand#45' AND NOT ( - t7.p_type LIKE 'MEDIUM POLISHED%' + t6.p_type LIKE 'MEDIUM POLISHED%' ) - AND t7.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) + AND t6.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) AND NOT ( - t7.ps_suppkey IN ( + t6.ps_suppkey IN ( SELECT t2.s_suppkey FROM supplier AS t2 @@ -60,14 +60,14 @@ FROM ( t2.s_comment LIKE '%Customer%Complaints%' ) ) - ) AS t8 + ) AS t7 GROUP BY 1, 2, 3 -) AS t9 +) AS t8 ORDER BY - t9.supplier_cnt DESC, - t9.p_brand ASC, - t9.p_type ASC, - t9.p_size ASC \ No newline at end of file + t8.supplier_cnt DESC, + t8.p_brand ASC, + t8.p_type ASC, + t8.p_size ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql index 842af99dbc5a..b34997f19e97 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql @@ -1,30 +1,30 @@ SELECT - "t11"."p_brand", - "t11"."p_type", - "t11"."p_size", - "t11"."supplier_cnt" + "t10"."p_brand", + "t10"."p_type", + "t10"."p_size", + "t10"."supplier_cnt" FROM ( SELECT - "t10"."p_brand", - "t10"."p_type", - "t10"."p_size", - COUNT(DISTINCT "t10"."ps_suppkey") AS "supplier_cnt" + "t9"."p_brand", + "t9"."p_type", + "t9"."p_size", + COUNT(DISTINCT "t9"."ps_suppkey") AS "supplier_cnt" FROM ( SELECT - "t9"."ps_partkey", - "t9"."ps_suppkey", - "t9"."ps_availqty", - "t9"."ps_supplycost", - "t9"."ps_comment", - "t9"."p_partkey", - "t9"."p_name", - "t9"."p_mfgr", - "t9"."p_brand", - "t9"."p_type", - "t9"."p_size", - "t9"."p_container", - "t9"."p_retailprice", - "t9"."p_comment" + "t8"."ps_partkey", + "t8"."ps_suppkey", + "t8"."ps_availqty", + "t8"."ps_supplycost", + "t8"."ps_comment", + "t8"."p_partkey", + "t8"."p_name", + "t8"."p_mfgr", + "t8"."p_brand", + "t8"."p_type", + "t8"."p_size", + "t8"."p_container", + "t8"."p_retailprice", + "t8"."p_comment" FROM ( SELECT "t5"."ps_partkey", @@ -64,15 +64,15 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t2" ) AS "t7" ON "t7"."p_partkey" = "t5"."ps_partkey" - ) AS "t9" + ) AS "t8" WHERE - "t9"."p_brand" <> 'Brand#45' + "t8"."p_brand" <> 'Brand#45' AND NOT ( - "t9"."p_type" LIKE 'MEDIUM POLISHED%' + "t8"."p_type" LIKE 'MEDIUM POLISHED%' ) - AND "t9"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) + AND "t8"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) AND NOT ( - "t9"."ps_suppkey" IN ( + "t8"."ps_suppkey" IN ( SELECT "t1"."S_SUPPKEY" AS "s_suppkey" FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS "t1" @@ -80,14 +80,14 @@ FROM ( "t1"."S_COMMENT" LIKE '%Customer%Complaints%' ) ) - ) AS "t10" + ) AS "t9" GROUP BY 1, 2, 3 -) AS "t11" +) AS "t10" ORDER BY - "t11"."supplier_cnt" DESC NULLS LAST, - "t11"."p_brand" ASC, - "t11"."p_type" ASC, - "t11"."p_size" ASC \ No newline at end of file + "t10"."supplier_cnt" DESC NULLS LAST, + "t10"."p_brand" ASC, + "t10"."p_type" ASC, + "t10"."p_size" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql index a775b1a392c4..82b0521c4edf 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql @@ -1,30 +1,30 @@ SELECT - "t11"."p_brand", - "t11"."p_type", - "t11"."p_size", - "t11"."supplier_cnt" + "t10"."p_brand", + "t10"."p_type", + "t10"."p_size", + "t10"."supplier_cnt" FROM ( SELECT - "t10"."p_brand", - "t10"."p_type", - "t10"."p_size", - COUNT(DISTINCT "t10"."ps_suppkey") AS "supplier_cnt" + "t9"."p_brand", + "t9"."p_type", + "t9"."p_size", + COUNT(DISTINCT "t9"."ps_suppkey") AS "supplier_cnt" FROM ( SELECT - "t9"."ps_partkey", - "t9"."ps_suppkey", - "t9"."ps_availqty", - "t9"."ps_supplycost", - "t9"."ps_comment", - "t9"."p_partkey", - "t9"."p_name", - "t9"."p_mfgr", - "t9"."p_brand", - "t9"."p_type", - "t9"."p_size", - "t9"."p_container", - "t9"."p_retailprice", - "t9"."p_comment" + "t8"."ps_partkey", + "t8"."ps_suppkey", + "t8"."ps_availqty", + "t8"."ps_supplycost", + "t8"."ps_comment", + "t8"."p_partkey", + "t8"."p_name", + "t8"."p_mfgr", + "t8"."p_brand", + "t8"."p_type", + "t8"."p_size", + "t8"."p_container", + "t8"."p_retailprice", + "t8"."p_comment" FROM ( SELECT "t6"."ps_partkey", @@ -64,15 +64,15 @@ FROM ( FROM "hive"."ibis_sf1"."part" AS "t2" ) AS "t7" ON "t7"."p_partkey" = "t6"."ps_partkey" - ) AS "t9" + ) AS "t8" WHERE - "t9"."p_brand" <> 'Brand#45' + "t8"."p_brand" <> 'Brand#45' AND NOT ( - "t9"."p_type" LIKE 'MEDIUM POLISHED%' + "t8"."p_type" LIKE 'MEDIUM POLISHED%' ) - AND "t9"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) + AND "t8"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) AND NOT ( - "t9"."ps_suppkey" IN ( + "t8"."ps_suppkey" IN ( SELECT "t1"."s_suppkey" FROM "hive"."ibis_sf1"."supplier" AS "t1" @@ -80,14 +80,14 @@ FROM ( "t1"."s_comment" LIKE '%Customer%Complaints%' ) ) - ) AS "t10" + ) AS "t9" GROUP BY 1, 2, 3 -) AS "t11" +) AS "t10" ORDER BY - "t11"."supplier_cnt" DESC, - "t11"."p_brand" ASC, - "t11"."p_type" ASC, - "t11"."p_size" ASC \ No newline at end of file + "t10"."supplier_cnt" DESC, + "t10"."p_brand" ASC, + "t10"."p_type" ASC, + "t10"."p_size" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql index 905e5c095d3d..378aac0f97ee 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql @@ -1,32 +1,32 @@ SELECT - SUM(t8.l_extendedprice) / CAST(7.0 AS DOUBLE) AS avg_yearly + SUM(t7.l_extendedprice) / CAST(7.0 AS DOUBLE) AS avg_yearly FROM ( SELECT - t5.l_orderkey, - t5.l_partkey, - t5.l_suppkey, - t5.l_linenumber, - t5.l_quantity, - t5.l_extendedprice, - t5.l_discount, - t5.l_tax, - t5.l_returnflag, - t5.l_linestatus, - t5.l_shipdate, - t5.l_commitdate, - t5.l_receiptdate, - t5.l_shipinstruct, - t5.l_shipmode, - t5.l_comment, - t5.p_partkey, - t5.p_name, - t5.p_mfgr, - t5.p_brand, - t5.p_type, - t5.p_size, - t5.p_container, - t5.p_retailprice, - t5.p_comment + t4.l_orderkey, + t4.l_partkey, + t4.l_suppkey, + t4.l_linenumber, + t4.l_quantity, + t4.l_extendedprice, + t4.l_discount, + t4.l_tax, + t4.l_returnflag, + t4.l_linestatus, + t4.l_shipdate, + t4.l_commitdate, + t4.l_receiptdate, + t4.l_shipinstruct, + t4.l_shipmode, + t4.l_comment, + t4.p_partkey, + t4.p_name, + t4.p_mfgr, + t4.p_brand, + t4.p_type, + t4.p_size, + t4.p_container, + t4.p_retailprice, + t4.p_comment FROM ( SELECT t2.l_orderkey, @@ -57,14 +57,14 @@ FROM ( FROM lineitem AS t2 INNER JOIN part AS t3 ON t3.p_partkey = t2.l_partkey - ) AS t5 + ) AS t4 WHERE - t5.p_brand = 'Brand#23' - AND t5.p_container = 'MED BOX' - AND t5.l_quantity < ( + t4.p_brand = 'Brand#23' + AND t4.p_container = 'MED BOX' + AND t4.l_quantity < ( ( SELECT - AVG(t6.l_quantity) AS "Mean(l_quantity)" + AVG(t5.l_quantity) AS "Mean(l_quantity)" FROM ( SELECT t0.l_orderkey, @@ -85,8 +85,8 @@ FROM ( t0.l_comment FROM lineitem AS t0 WHERE - t0.l_partkey = t5.p_partkey - ) AS t6 + t0.l_partkey = t4.p_partkey + ) AS t5 ) * CAST(0.2 AS DOUBLE) ) -) AS t8 \ No newline at end of file +) AS t7 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql index c5f3e273aba1..52a4df761879 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql @@ -1,32 +1,32 @@ SELECT - SUM("t10"."l_extendedprice") / 7.0 AS "avg_yearly" + SUM("t9"."l_extendedprice") / 7.0 AS "avg_yearly" FROM ( SELECT - "t7"."l_orderkey", - "t7"."l_partkey", - "t7"."l_suppkey", - "t7"."l_linenumber", - "t7"."l_quantity", - "t7"."l_extendedprice", - "t7"."l_discount", - "t7"."l_tax", - "t7"."l_returnflag", - "t7"."l_linestatus", - "t7"."l_shipdate", - "t7"."l_commitdate", - "t7"."l_receiptdate", - "t7"."l_shipinstruct", - "t7"."l_shipmode", - "t7"."l_comment", - "t7"."p_partkey", - "t7"."p_name", - "t7"."p_mfgr", - "t7"."p_brand", - "t7"."p_type", - "t7"."p_size", - "t7"."p_container", - "t7"."p_retailprice", - "t7"."p_comment" + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment", + "t6"."p_partkey", + "t6"."p_name", + "t6"."p_mfgr", + "t6"."p_brand", + "t6"."p_type", + "t6"."p_size", + "t6"."p_container", + "t6"."p_retailprice", + "t6"."p_comment" FROM ( SELECT "t4"."l_orderkey", @@ -88,14 +88,14 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t1" ) AS "t5" ON "t5"."p_partkey" = "t4"."l_partkey" - ) AS "t7" + ) AS "t6" WHERE - "t7"."p_brand" = 'Brand#23' - AND "t7"."p_container" = 'MED BOX' - AND "t7"."l_quantity" < ( + "t6"."p_brand" = 'Brand#23' + AND "t6"."p_container" = 'MED BOX' + AND "t6"."l_quantity" < ( ( SELECT - AVG("t8"."l_quantity") AS "Mean(l_quantity)" + AVG("t7"."l_quantity") AS "Mean(l_quantity)" FROM ( SELECT "t0"."L_ORDERKEY" AS "l_orderkey", @@ -116,8 +116,8 @@ FROM ( "t0"."L_COMMENT" AS "l_comment" FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t0" WHERE - "t0"."L_PARTKEY" = "t7"."p_partkey" - ) AS "t8" + "t0"."L_PARTKEY" = "t6"."p_partkey" + ) AS "t7" ) * 0.2 ) -) AS "t10" \ No newline at end of file +) AS "t9" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql index 657b48da6ca8..e49b15c15592 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql @@ -1,32 +1,32 @@ SELECT - SUM("t10"."l_extendedprice") / CAST(7.0 AS DOUBLE) AS "avg_yearly" + SUM("t9"."l_extendedprice") / CAST(7.0 AS DOUBLE) AS "avg_yearly" FROM ( SELECT - "t7"."l_orderkey", - "t7"."l_partkey", - "t7"."l_suppkey", - "t7"."l_linenumber", - "t7"."l_quantity", - "t7"."l_extendedprice", - "t7"."l_discount", - "t7"."l_tax", - "t7"."l_returnflag", - "t7"."l_linestatus", - "t7"."l_shipdate", - "t7"."l_commitdate", - "t7"."l_receiptdate", - "t7"."l_shipinstruct", - "t7"."l_shipmode", - "t7"."l_comment", - "t7"."p_partkey", - "t7"."p_name", - "t7"."p_mfgr", - "t7"."p_brand", - "t7"."p_type", - "t7"."p_size", - "t7"."p_container", - "t7"."p_retailprice", - "t7"."p_comment" + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment", + "t6"."p_partkey", + "t6"."p_name", + "t6"."p_mfgr", + "t6"."p_brand", + "t6"."p_type", + "t6"."p_size", + "t6"."p_container", + "t6"."p_retailprice", + "t6"."p_comment" FROM ( SELECT "t4"."l_orderkey", @@ -88,14 +88,14 @@ FROM ( FROM "hive"."ibis_sf1"."part" AS "t1" ) AS "t5" ON "t5"."p_partkey" = "t4"."l_partkey" - ) AS "t7" + ) AS "t6" WHERE - "t7"."p_brand" = 'Brand#23' - AND "t7"."p_container" = 'MED BOX' - AND "t7"."l_quantity" < ( + "t6"."p_brand" = 'Brand#23' + AND "t6"."p_container" = 'MED BOX' + AND "t6"."l_quantity" < ( ( SELECT - AVG("t8"."l_quantity") AS "Mean(l_quantity)" + AVG("t7"."l_quantity") AS "Mean(l_quantity)" FROM ( SELECT "t0"."l_orderkey", @@ -116,8 +116,8 @@ FROM ( "t0"."l_comment" FROM "hive"."ibis_sf1"."lineitem" AS "t0" WHERE - "t0"."l_partkey" = "t7"."p_partkey" - ) AS "t8" + "t0"."l_partkey" = "t6"."p_partkey" + ) AS "t7" ) * CAST(0.2 AS DOUBLE) ) -) AS "t10" \ No newline at end of file +) AS "t9" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql index 9d3d4f821010..27ca9fa730fd 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql @@ -1,53 +1,53 @@ SELECT - t12.c_name, - t12.c_custkey, - t12.o_orderkey, - t12.o_orderdate, - t12.o_totalprice, - t12.sum_qty + t10.c_name, + t10.c_custkey, + t10.o_orderkey, + t10.o_orderdate, + t10.o_totalprice, + t10.sum_qty FROM ( SELECT - t11.c_name, - t11.c_custkey, - t11.o_orderkey, - t11.o_orderdate, - t11.o_totalprice, - SUM(t11.l_quantity) AS sum_qty + t9.c_name, + t9.c_custkey, + t9.o_orderkey, + t9.o_orderdate, + t9.o_totalprice, + SUM(t9.l_quantity) AS sum_qty FROM ( SELECT - t9.c_custkey, - t9.c_name, - t9.c_address, - t9.c_nationkey, - t9.c_phone, - t9.c_acctbal, - t9.c_mktsegment, - t9.c_comment, - t9.o_orderkey, - t9.o_custkey, - t9.o_orderstatus, - t9.o_totalprice, - t9.o_orderdate, - t9.o_orderpriority, - t9.o_clerk, - t9.o_shippriority, - t9.o_comment, - t9.l_orderkey, - t9.l_partkey, - t9.l_suppkey, - t9.l_linenumber, - t9.l_quantity, - t9.l_extendedprice, - t9.l_discount, - t9.l_tax, - t9.l_returnflag, - t9.l_linestatus, - t9.l_shipdate, - t9.l_commitdate, - t9.l_receiptdate, - t9.l_shipinstruct, - t9.l_shipmode, - t9.l_comment + t7.c_custkey, + t7.c_name, + t7.c_address, + t7.c_nationkey, + t7.c_phone, + t7.c_acctbal, + t7.c_mktsegment, + t7.c_comment, + t7.o_orderkey, + t7.o_custkey, + t7.o_orderstatus, + t7.o_totalprice, + t7.o_orderdate, + t7.o_orderpriority, + t7.o_clerk, + t7.o_shippriority, + t7.o_comment, + t7.l_orderkey, + t7.l_partkey, + t7.l_suppkey, + t7.l_linenumber, + t7.l_quantity, + t7.l_extendedprice, + t7.l_discount, + t7.l_tax, + t7.l_returnflag, + t7.l_linestatus, + t7.l_shipdate, + t7.l_commitdate, + t7.l_receiptdate, + t7.l_shipinstruct, + t7.l_shipmode, + t7.l_comment FROM ( SELECT t3.c_custkey, @@ -88,9 +88,9 @@ FROM ( ON t3.c_custkey = t4.o_custkey INNER JOIN lineitem AS t5 ON t4.o_orderkey = t5.l_orderkey - ) AS t9 + ) AS t7 WHERE - t9.o_orderkey IN ( + t7.o_orderkey IN ( SELECT t6.l_orderkey FROM ( @@ -104,15 +104,15 @@ FROM ( WHERE t6.qty_sum > CAST(300 AS SMALLINT) ) - ) AS t11 + ) AS t9 GROUP BY 1, 2, 3, 4, 5 -) AS t12 +) AS t10 ORDER BY - t12.o_totalprice DESC, - t12.o_orderdate ASC + t10.o_totalprice DESC, + t10.o_orderdate ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql index cbeae5fe06f4..7f4390504899 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql @@ -19,55 +19,55 @@ WITH "t5" AS ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t2" ) SELECT - "t16"."c_name", - "t16"."c_custkey", - "t16"."o_orderkey", - "t16"."o_orderdate", - "t16"."o_totalprice", - "t16"."sum_qty" + "t14"."c_name", + "t14"."c_custkey", + "t14"."o_orderkey", + "t14"."o_orderdate", + "t14"."o_totalprice", + "t14"."sum_qty" FROM ( SELECT - "t15"."c_name", - "t15"."c_custkey", - "t15"."o_orderkey", - "t15"."o_orderdate", - "t15"."o_totalprice", - SUM("t15"."l_quantity") AS "sum_qty" + "t13"."c_name", + "t13"."c_custkey", + "t13"."o_orderkey", + "t13"."o_orderdate", + "t13"."o_totalprice", + SUM("t13"."l_quantity") AS "sum_qty" FROM ( SELECT - "t13"."c_custkey", - "t13"."c_name", - "t13"."c_address", - "t13"."c_nationkey", - "t13"."c_phone", - "t13"."c_acctbal", - "t13"."c_mktsegment", - "t13"."c_comment", - "t13"."o_orderkey", - "t13"."o_custkey", - "t13"."o_orderstatus", - "t13"."o_totalprice", - "t13"."o_orderdate", - "t13"."o_orderpriority", - "t13"."o_clerk", - "t13"."o_shippriority", - "t13"."o_comment", - "t13"."l_orderkey", - "t13"."l_partkey", - "t13"."l_suppkey", - "t13"."l_linenumber", - "t13"."l_quantity", - "t13"."l_extendedprice", - "t13"."l_discount", - "t13"."l_tax", - "t13"."l_returnflag", - "t13"."l_linestatus", - "t13"."l_shipdate", - "t13"."l_commitdate", - "t13"."l_receiptdate", - "t13"."l_shipinstruct", - "t13"."l_shipmode", - "t13"."l_comment" + "t11"."c_custkey", + "t11"."c_name", + "t11"."c_address", + "t11"."c_nationkey", + "t11"."c_phone", + "t11"."c_acctbal", + "t11"."c_mktsegment", + "t11"."c_comment", + "t11"."o_orderkey", + "t11"."o_custkey", + "t11"."o_orderstatus", + "t11"."o_totalprice", + "t11"."o_orderdate", + "t11"."o_orderpriority", + "t11"."o_clerk", + "t11"."o_shippriority", + "t11"."o_comment", + "t11"."l_orderkey", + "t11"."l_partkey", + "t11"."l_suppkey", + "t11"."l_linenumber", + "t11"."l_quantity", + "t11"."l_extendedprice", + "t11"."l_discount", + "t11"."l_tax", + "t11"."l_returnflag", + "t11"."l_linestatus", + "t11"."l_shipdate", + "t11"."l_commitdate", + "t11"."l_receiptdate", + "t11"."l_shipinstruct", + "t11"."l_shipmode", + "t11"."l_comment" FROM ( SELECT "t6"."c_custkey", @@ -131,11 +131,11 @@ FROM ( ON "t6"."c_custkey" = "t7"."o_custkey" INNER JOIN "t5" AS "t9" ON "t7"."o_orderkey" = "t9"."l_orderkey" - ) AS "t13" + ) AS "t11" WHERE - "t13"."o_orderkey" IN ( + "t11"."o_orderkey" IN ( SELECT - "t11"."l_orderkey" + "t10"."l_orderkey" FROM ( SELECT "t8"."l_orderkey", @@ -143,19 +143,19 @@ FROM ( FROM "t5" AS "t8" GROUP BY 1 - ) AS "t11" + ) AS "t10" WHERE - "t11"."qty_sum" > 300 + "t10"."qty_sum" > 300 ) - ) AS "t15" + ) AS "t13" GROUP BY 1, 2, 3, 4, 5 -) AS "t16" +) AS "t14" ORDER BY - "t16"."o_totalprice" DESC NULLS LAST, - "t16"."o_orderdate" ASC + "t14"."o_totalprice" DESC NULLS LAST, + "t14"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql index 73600edb4467..242f77b11536 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql @@ -19,55 +19,55 @@ WITH "t5" AS ( FROM "hive"."ibis_sf1"."lineitem" AS "t2" ) SELECT - "t16"."c_name", - "t16"."c_custkey", - "t16"."o_orderkey", - "t16"."o_orderdate", - "t16"."o_totalprice", - "t16"."sum_qty" + "t14"."c_name", + "t14"."c_custkey", + "t14"."o_orderkey", + "t14"."o_orderdate", + "t14"."o_totalprice", + "t14"."sum_qty" FROM ( SELECT - "t15"."c_name", - "t15"."c_custkey", - "t15"."o_orderkey", - "t15"."o_orderdate", - "t15"."o_totalprice", - SUM("t15"."l_quantity") AS "sum_qty" + "t13"."c_name", + "t13"."c_custkey", + "t13"."o_orderkey", + "t13"."o_orderdate", + "t13"."o_totalprice", + SUM("t13"."l_quantity") AS "sum_qty" FROM ( SELECT - "t13"."c_custkey", - "t13"."c_name", - "t13"."c_address", - "t13"."c_nationkey", - "t13"."c_phone", - "t13"."c_acctbal", - "t13"."c_mktsegment", - "t13"."c_comment", - "t13"."o_orderkey", - "t13"."o_custkey", - "t13"."o_orderstatus", - "t13"."o_totalprice", - "t13"."o_orderdate", - "t13"."o_orderpriority", - "t13"."o_clerk", - "t13"."o_shippriority", - "t13"."o_comment", - "t13"."l_orderkey", - "t13"."l_partkey", - "t13"."l_suppkey", - "t13"."l_linenumber", - "t13"."l_quantity", - "t13"."l_extendedprice", - "t13"."l_discount", - "t13"."l_tax", - "t13"."l_returnflag", - "t13"."l_linestatus", - "t13"."l_shipdate", - "t13"."l_commitdate", - "t13"."l_receiptdate", - "t13"."l_shipinstruct", - "t13"."l_shipmode", - "t13"."l_comment" + "t11"."c_custkey", + "t11"."c_name", + "t11"."c_address", + "t11"."c_nationkey", + "t11"."c_phone", + "t11"."c_acctbal", + "t11"."c_mktsegment", + "t11"."c_comment", + "t11"."o_orderkey", + "t11"."o_custkey", + "t11"."o_orderstatus", + "t11"."o_totalprice", + "t11"."o_orderdate", + "t11"."o_orderpriority", + "t11"."o_clerk", + "t11"."o_shippriority", + "t11"."o_comment", + "t11"."l_orderkey", + "t11"."l_partkey", + "t11"."l_suppkey", + "t11"."l_linenumber", + "t11"."l_quantity", + "t11"."l_extendedprice", + "t11"."l_discount", + "t11"."l_tax", + "t11"."l_returnflag", + "t11"."l_linestatus", + "t11"."l_shipdate", + "t11"."l_commitdate", + "t11"."l_receiptdate", + "t11"."l_shipinstruct", + "t11"."l_shipmode", + "t11"."l_comment" FROM ( SELECT "t6"."c_custkey", @@ -131,11 +131,11 @@ FROM ( ON "t6"."c_custkey" = "t7"."o_custkey" INNER JOIN "t5" AS "t9" ON "t7"."o_orderkey" = "t9"."l_orderkey" - ) AS "t13" + ) AS "t11" WHERE - "t13"."o_orderkey" IN ( + "t11"."o_orderkey" IN ( SELECT - "t11"."l_orderkey" + "t10"."l_orderkey" FROM ( SELECT "t8"."l_orderkey", @@ -143,19 +143,19 @@ FROM ( FROM "t5" AS "t8" GROUP BY 1 - ) AS "t11" + ) AS "t10" WHERE - "t11"."qty_sum" > 300 + "t10"."qty_sum" > 300 ) - ) AS "t15" + ) AS "t13" GROUP BY 1, 2, 3, 4, 5 -) AS "t16" +) AS "t14" ORDER BY - "t16"."o_totalprice" DESC, - "t16"."o_orderdate" ASC + "t14"."o_totalprice" DESC, + "t14"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql index 29adca6df1be..b33da3fd86a6 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql @@ -1,34 +1,34 @@ SELECT - SUM(t6.l_extendedprice * ( - CAST(1 AS TINYINT) - t6.l_discount + SUM(t5.l_extendedprice * ( + CAST(1 AS TINYINT) - t5.l_discount )) AS revenue FROM ( SELECT - t5.l_orderkey, - t5.l_partkey, - t5.l_suppkey, - t5.l_linenumber, - t5.l_quantity, - t5.l_extendedprice, - t5.l_discount, - t5.l_tax, - t5.l_returnflag, - t5.l_linestatus, - t5.l_shipdate, - t5.l_commitdate, - t5.l_receiptdate, - t5.l_shipinstruct, - t5.l_shipmode, - t5.l_comment, - t5.p_partkey, - t5.p_name, - t5.p_mfgr, - t5.p_brand, - t5.p_type, - t5.p_size, - t5.p_container, - t5.p_retailprice, - t5.p_comment + t4.l_orderkey, + t4.l_partkey, + t4.l_suppkey, + t4.l_linenumber, + t4.l_quantity, + t4.l_extendedprice, + t4.l_discount, + t4.l_tax, + t4.l_returnflag, + t4.l_linestatus, + t4.l_shipdate, + t4.l_commitdate, + t4.l_receiptdate, + t4.l_shipinstruct, + t4.l_shipmode, + t4.l_comment, + t4.p_partkey, + t4.p_name, + t4.p_mfgr, + t4.p_brand, + t4.p_type, + t4.p_size, + t4.p_container, + t4.p_retailprice, + t4.p_comment FROM ( SELECT t2.l_orderkey, @@ -59,7 +59,7 @@ FROM ( FROM lineitem AS t2 INNER JOIN part AS t3 ON t3.p_partkey = t2.l_partkey - ) AS t5 + ) AS t4 WHERE ( ( @@ -69,24 +69,24 @@ FROM ( ( ( ( - t5.p_brand = 'Brand#12' + t4.p_brand = 'Brand#12' ) - AND t5.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND t4.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') ) AND ( - t5.l_quantity >= CAST(1 AS TINYINT) + t4.l_quantity >= CAST(1 AS TINYINT) ) ) AND ( - t5.l_quantity <= CAST(11 AS TINYINT) + t4.l_quantity <= CAST(11 AS TINYINT) ) ) - AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) + AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) ) - AND t5.l_shipmode IN ('AIR', 'AIR REG') + AND t4.l_shipmode IN ('AIR', 'AIR REG') ) AND ( - t5.l_shipinstruct = 'DELIVER IN PERSON' + t4.l_shipinstruct = 'DELIVER IN PERSON' ) ) OR ( @@ -96,24 +96,24 @@ FROM ( ( ( ( - t5.p_brand = 'Brand#23' + t4.p_brand = 'Brand#23' ) - AND t5.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + AND t4.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') ) AND ( - t5.l_quantity >= CAST(10 AS TINYINT) + t4.l_quantity >= CAST(10 AS TINYINT) ) ) AND ( - t5.l_quantity <= CAST(20 AS TINYINT) + t4.l_quantity <= CAST(20 AS TINYINT) ) ) - AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) + AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) ) - AND t5.l_shipmode IN ('AIR', 'AIR REG') + AND t4.l_shipmode IN ('AIR', 'AIR REG') ) AND ( - t5.l_shipinstruct = 'DELIVER IN PERSON' + t4.l_shipinstruct = 'DELIVER IN PERSON' ) ) ) @@ -124,24 +124,24 @@ FROM ( ( ( ( - t5.p_brand = 'Brand#34' + t4.p_brand = 'Brand#34' ) - AND t5.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND t4.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') ) AND ( - t5.l_quantity >= CAST(20 AS TINYINT) + t4.l_quantity >= CAST(20 AS TINYINT) ) ) AND ( - t5.l_quantity <= CAST(30 AS TINYINT) + t4.l_quantity <= CAST(30 AS TINYINT) ) ) - AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) + AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) ) - AND t5.l_shipmode IN ('AIR', 'AIR REG') + AND t4.l_shipmode IN ('AIR', 'AIR REG') ) AND ( - t5.l_shipinstruct = 'DELIVER IN PERSON' + t4.l_shipinstruct = 'DELIVER IN PERSON' ) ) -) AS t6 \ No newline at end of file +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql index 4d2f688bde96..b5210a1aab12 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql @@ -1,34 +1,34 @@ SELECT - SUM("t8"."l_extendedprice" * ( - 1 - "t8"."l_discount" + SUM("t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" )) AS "revenue" FROM ( SELECT - "t7"."l_orderkey", - "t7"."l_partkey", - "t7"."l_suppkey", - "t7"."l_linenumber", - "t7"."l_quantity", - "t7"."l_extendedprice", - "t7"."l_discount", - "t7"."l_tax", - "t7"."l_returnflag", - "t7"."l_linestatus", - "t7"."l_shipdate", - "t7"."l_commitdate", - "t7"."l_receiptdate", - "t7"."l_shipinstruct", - "t7"."l_shipmode", - "t7"."l_comment", - "t7"."p_partkey", - "t7"."p_name", - "t7"."p_mfgr", - "t7"."p_brand", - "t7"."p_type", - "t7"."p_size", - "t7"."p_container", - "t7"."p_retailprice", - "t7"."p_comment" + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment", + "t6"."p_partkey", + "t6"."p_name", + "t6"."p_mfgr", + "t6"."p_brand", + "t6"."p_type", + "t6"."p_size", + "t6"."p_container", + "t6"."p_retailprice", + "t6"."p_comment" FROM ( SELECT "t4"."l_orderkey", @@ -90,7 +90,7 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS "t1" ) AS "t5" ON "t5"."p_partkey" = "t4"."l_partkey" - ) AS "t7" + ) AS "t6" WHERE ( ( @@ -100,24 +100,24 @@ FROM ( ( ( ( - "t7"."p_brand" = 'Brand#12' + "t6"."p_brand" = 'Brand#12' ) - AND "t7"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND "t6"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') ) AND ( - "t7"."l_quantity" >= 1 + "t6"."l_quantity" >= 1 ) ) AND ( - "t7"."l_quantity" <= 11 + "t6"."l_quantity" <= 11 ) ) - AND "t7"."p_size" BETWEEN 1 AND 5 + AND "t6"."p_size" BETWEEN 1 AND 5 ) - AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) OR ( @@ -127,24 +127,24 @@ FROM ( ( ( ( - "t7"."p_brand" = 'Brand#23' + "t6"."p_brand" = 'Brand#23' ) - AND "t7"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + AND "t6"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') ) AND ( - "t7"."l_quantity" >= 10 + "t6"."l_quantity" >= 10 ) ) AND ( - "t7"."l_quantity" <= 20 + "t6"."l_quantity" <= 20 ) ) - AND "t7"."p_size" BETWEEN 1 AND 10 + AND "t6"."p_size" BETWEEN 1 AND 10 ) - AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) ) @@ -155,24 +155,24 @@ FROM ( ( ( ( - "t7"."p_brand" = 'Brand#34' + "t6"."p_brand" = 'Brand#34' ) - AND "t7"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND "t6"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') ) AND ( - "t7"."l_quantity" >= 20 + "t6"."l_quantity" >= 20 ) ) AND ( - "t7"."l_quantity" <= 30 + "t6"."l_quantity" <= 30 ) ) - AND "t7"."p_size" BETWEEN 1 AND 15 + AND "t6"."p_size" BETWEEN 1 AND 15 ) - AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) -) AS "t8" \ No newline at end of file +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql index 7f859fefa591..7e820dbb7484 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql @@ -1,34 +1,34 @@ SELECT - SUM("t8"."l_extendedprice" * ( - 1 - "t8"."l_discount" + SUM("t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" )) AS "revenue" FROM ( SELECT - "t7"."l_orderkey", - "t7"."l_partkey", - "t7"."l_suppkey", - "t7"."l_linenumber", - "t7"."l_quantity", - "t7"."l_extendedprice", - "t7"."l_discount", - "t7"."l_tax", - "t7"."l_returnflag", - "t7"."l_linestatus", - "t7"."l_shipdate", - "t7"."l_commitdate", - "t7"."l_receiptdate", - "t7"."l_shipinstruct", - "t7"."l_shipmode", - "t7"."l_comment", - "t7"."p_partkey", - "t7"."p_name", - "t7"."p_mfgr", - "t7"."p_brand", - "t7"."p_type", - "t7"."p_size", - "t7"."p_container", - "t7"."p_retailprice", - "t7"."p_comment" + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment", + "t6"."p_partkey", + "t6"."p_name", + "t6"."p_mfgr", + "t6"."p_brand", + "t6"."p_type", + "t6"."p_size", + "t6"."p_container", + "t6"."p_retailprice", + "t6"."p_comment" FROM ( SELECT "t4"."l_orderkey", @@ -90,7 +90,7 @@ FROM ( FROM "hive"."ibis_sf1"."part" AS "t1" ) AS "t5" ON "t5"."p_partkey" = "t4"."l_partkey" - ) AS "t7" + ) AS "t6" WHERE ( ( @@ -100,24 +100,24 @@ FROM ( ( ( ( - "t7"."p_brand" = 'Brand#12' + "t6"."p_brand" = 'Brand#12' ) - AND "t7"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND "t6"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') ) AND ( - "t7"."l_quantity" >= 1 + "t6"."l_quantity" >= 1 ) ) AND ( - "t7"."l_quantity" <= 11 + "t6"."l_quantity" <= 11 ) ) - AND "t7"."p_size" BETWEEN 1 AND 5 + AND "t6"."p_size" BETWEEN 1 AND 5 ) - AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) OR ( @@ -127,24 +127,24 @@ FROM ( ( ( ( - "t7"."p_brand" = 'Brand#23' + "t6"."p_brand" = 'Brand#23' ) - AND "t7"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + AND "t6"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') ) AND ( - "t7"."l_quantity" >= 10 + "t6"."l_quantity" >= 10 ) ) AND ( - "t7"."l_quantity" <= 20 + "t6"."l_quantity" <= 20 ) ) - AND "t7"."p_size" BETWEEN 1 AND 10 + AND "t6"."p_size" BETWEEN 1 AND 10 ) - AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) ) @@ -155,24 +155,24 @@ FROM ( ( ( ( - "t7"."p_brand" = 'Brand#34' + "t6"."p_brand" = 'Brand#34' ) - AND "t7"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND "t6"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') ) AND ( - "t7"."l_quantity" >= 20 + "t6"."l_quantity" >= 20 ) ) AND ( - "t7"."l_quantity" <= 30 + "t6"."l_quantity" <= 30 ) ) - AND "t7"."p_size" BETWEEN 1 AND 15 + AND "t6"."p_size" BETWEEN 1 AND 15 ) - AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) -) AS "t8" \ No newline at end of file +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql index 111f26421e9a..87dee39630d3 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql @@ -1,6 +1,6 @@ SELECT - t10.s_name, - t10.s_address + t9.s_name, + t9.s_address FROM ( SELECT t5.s_suppkey, @@ -17,10 +17,10 @@ FROM ( FROM supplier AS t5 INNER JOIN nation AS t6 ON t5.s_nationkey = t6.n_nationkey -) AS t10 +) AS t9 WHERE - t10.n_name = 'CANADA' - AND t10.s_suppkey IN ( + t9.n_name = 'CANADA' + AND t9.s_suppkey IN ( SELECT t1.ps_suppkey FROM partsupp AS t1 @@ -65,4 +65,4 @@ WHERE ) ) ORDER BY - t10.s_name ASC \ No newline at end of file + t9.s_name ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql index fb4b2b507e07..ee7298e9314c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql @@ -1,6 +1,6 @@ SELECT - "t13"."s_name", - "t13"."s_address" + "t12"."s_name", + "t12"."s_address" FROM ( SELECT "t8"."s_suppkey", @@ -34,10 +34,10 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t2" ) AS "t9" ON "t8"."s_nationkey" = "t9"."n_nationkey" -) AS "t13" +) AS "t12" WHERE - "t13"."n_name" = 'CANADA' - AND "t13"."s_suppkey" IN ( + "t12"."n_name" = 'CANADA' + AND "t12"."s_suppkey" IN ( SELECT "t6"."ps_suppkey" FROM ( @@ -90,4 +90,4 @@ WHERE ) ) ORDER BY - "t13"."s_name" ASC \ No newline at end of file + "t12"."s_name" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql index d9e0e24998e1..c1cfd46abfd8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql @@ -1,6 +1,6 @@ SELECT - "t13"."s_name", - "t13"."s_address" + "t12"."s_name", + "t12"."s_address" FROM ( SELECT "t10"."s_suppkey", @@ -34,10 +34,10 @@ FROM ( FROM "hive"."ibis_sf1"."nation" AS "t2" ) AS "t8" ON "t10"."s_nationkey" = "t8"."n_nationkey" -) AS "t13" +) AS "t12" WHERE - "t13"."n_name" = 'CANADA' - AND "t13"."s_suppkey" IN ( + "t12"."n_name" = 'CANADA' + AND "t12"."s_suppkey" IN ( SELECT "t7"."ps_suppkey" FROM ( @@ -90,4 +90,4 @@ WHERE ) ) ORDER BY - "t13"."s_name" ASC \ No newline at end of file + "t12"."s_name" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql index 72dd9ea9697b..3e5527999c44 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql @@ -1,19 +1,19 @@ SELECT - t17.s_name, - t17.numwait + t14.s_name, + t14.numwait FROM ( SELECT - t16.s_name, + t13.s_name, COUNT(*) AS numwait FROM ( SELECT - t13.l1_orderkey, - t13.o_orderstatus, - t13.l_receiptdate, - t13.l_commitdate, - t13.l1_suppkey, - t13.s_name, - t13.n_name + t10.l1_orderkey, + t10.o_orderstatus, + t10.l_receiptdate, + t10.l_commitdate, + t10.l1_suppkey, + t10.s_name, + t10.n_name FROM ( SELECT t5.l_orderkey AS l1_orderkey, @@ -30,20 +30,20 @@ FROM ( ON t8.o_orderkey = t5.l_orderkey INNER JOIN nation AS t9 ON t4.s_nationkey = t9.n_nationkey - ) AS t13 + ) AS t10 WHERE - t13.o_orderstatus = 'F' - AND t13.l_receiptdate > t13.l_commitdate - AND t13.n_name = 'SAUDI ARABIA' + t10.o_orderstatus = 'F' + AND t10.l_receiptdate > t10.l_commitdate + AND t10.n_name = 'SAUDI ARABIA' AND EXISTS( SELECT CAST(1 AS TINYINT) AS "1" FROM lineitem AS t6 WHERE ( - t6.l_orderkey = t13.l1_orderkey + t6.l_orderkey = t10.l1_orderkey ) AND ( - t6.l_suppkey <> t13.l1_suppkey + t6.l_suppkey <> t10.l1_suppkey ) ) AND NOT ( @@ -54,9 +54,9 @@ FROM ( WHERE ( ( - t7.l_orderkey = t13.l1_orderkey + t7.l_orderkey = t10.l1_orderkey ) AND ( - t7.l_suppkey <> t13.l1_suppkey + t7.l_suppkey <> t10.l1_suppkey ) ) AND ( @@ -64,11 +64,11 @@ FROM ( ) ) ) - ) AS t16 + ) AS t13 GROUP BY 1 -) AS t17 +) AS t14 ORDER BY - t17.numwait DESC, - t17.s_name ASC + t14.numwait DESC, + t14.s_name ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql index 6fbec4262788..7d7d29a2a839 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql @@ -19,21 +19,21 @@ WITH "t7" AS ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS "t3" ) SELECT - "t22"."s_name", - "t22"."numwait" + "t19"."s_name", + "t19"."numwait" FROM ( SELECT - "t21"."s_name", + "t18"."s_name", COUNT(*) AS "numwait" FROM ( SELECT - "t18"."l1_orderkey", - "t18"."o_orderstatus", - "t18"."l_receiptdate", - "t18"."l_commitdate", - "t18"."l1_suppkey", - "t18"."s_name", - "t18"."n_name" + "t15"."l1_orderkey", + "t15"."o_orderstatus", + "t15"."l_receiptdate", + "t15"."l_commitdate", + "t15"."l1_suppkey", + "t15"."s_name", + "t15"."n_name" FROM ( SELECT "t12"."l_orderkey" AS "l1_orderkey", @@ -79,21 +79,21 @@ FROM ( FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS "t2" ) AS "t10" ON "t8"."s_nationkey" = "t10"."n_nationkey" - ) AS "t18" + ) AS "t15" WHERE - "t18"."o_orderstatus" = 'F' - AND "t18"."l_receiptdate" > "t18"."l_commitdate" - AND "t18"."n_name" = 'SAUDI ARABIA' + "t15"."o_orderstatus" = 'F' + AND "t15"."l_receiptdate" > "t15"."l_commitdate" + AND "t15"."n_name" = 'SAUDI ARABIA' AND EXISTS( SELECT 1 AS "1" FROM "t7" AS "t13" WHERE ( - "t13"."l_orderkey" = "t18"."l1_orderkey" + "t13"."l_orderkey" = "t15"."l1_orderkey" ) AND ( - "t13"."l_suppkey" <> "t18"."l1_suppkey" + "t13"."l_suppkey" <> "t15"."l1_suppkey" ) ) AND NOT ( @@ -104,10 +104,10 @@ FROM ( WHERE ( ( - "t14"."l_orderkey" = "t18"."l1_orderkey" + "t14"."l_orderkey" = "t15"."l1_orderkey" ) AND ( - "t14"."l_suppkey" <> "t18"."l1_suppkey" + "t14"."l_suppkey" <> "t15"."l1_suppkey" ) ) AND ( @@ -115,11 +115,11 @@ FROM ( ) ) ) - ) AS "t21" + ) AS "t18" GROUP BY 1 -) AS "t22" +) AS "t19" ORDER BY - "t22"."numwait" DESC NULLS LAST, - "t22"."s_name" ASC + "t19"."numwait" DESC NULLS LAST, + "t19"."s_name" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql index 774b842175e1..4a0e86189ec0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql @@ -19,21 +19,21 @@ WITH "t8" AS ( FROM "hive"."ibis_sf1"."lineitem" AS "t3" ) SELECT - "t22"."s_name", - "t22"."numwait" + "t19"."s_name", + "t19"."numwait" FROM ( SELECT - "t21"."s_name", + "t18"."s_name", COUNT(*) AS "numwait" FROM ( SELECT - "t18"."l1_orderkey", - "t18"."o_orderstatus", - "t18"."l_receiptdate", - "t18"."l_commitdate", - "t18"."l1_suppkey", - "t18"."s_name", - "t18"."n_name" + "t15"."l1_orderkey", + "t15"."o_orderstatus", + "t15"."l_receiptdate", + "t15"."l_commitdate", + "t15"."l1_suppkey", + "t15"."s_name", + "t15"."n_name" FROM ( SELECT "t12"."l_orderkey" AS "l1_orderkey", @@ -79,21 +79,21 @@ FROM ( FROM "hive"."ibis_sf1"."nation" AS "t2" ) AS "t7" ON "t9"."s_nationkey" = "t7"."n_nationkey" - ) AS "t18" + ) AS "t15" WHERE - "t18"."o_orderstatus" = 'F' - AND "t18"."l_receiptdate" > "t18"."l_commitdate" - AND "t18"."n_name" = 'SAUDI ARABIA' + "t15"."o_orderstatus" = 'F' + AND "t15"."l_receiptdate" > "t15"."l_commitdate" + AND "t15"."n_name" = 'SAUDI ARABIA' AND EXISTS( SELECT 1 AS "1" FROM "t8" AS "t13" WHERE ( - "t13"."l_orderkey" = "t18"."l1_orderkey" + "t13"."l_orderkey" = "t15"."l1_orderkey" ) AND ( - "t13"."l_suppkey" <> "t18"."l1_suppkey" + "t13"."l_suppkey" <> "t15"."l1_suppkey" ) ) AND NOT ( @@ -104,10 +104,10 @@ FROM ( WHERE ( ( - "t14"."l_orderkey" = "t18"."l1_orderkey" + "t14"."l_orderkey" = "t15"."l1_orderkey" ) AND ( - "t14"."l_suppkey" <> "t18"."l1_suppkey" + "t14"."l_suppkey" <> "t15"."l1_suppkey" ) ) AND ( @@ -115,11 +115,11 @@ FROM ( ) ) ) - ) AS "t21" + ) AS "t18" GROUP BY 1 -) AS "t22" +) AS "t19" ORDER BY - "t22"."numwait" DESC, - "t22"."s_name" ASC + "t19"."numwait" DESC, + "t19"."s_name" ASC LIMIT 100 \ No newline at end of file From e4e2dc8628643aa5353cd9831b42445e961e95e8 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 22 Jan 2024 07:19:59 -0500 Subject: [PATCH 107/161] chore(impala): regen snapshots --- .../test_sql/test_join_aliasing/out.sql | 22 ++--- .../test_sql/test_join_key_name/out.sql | 80 +++++++++---------- .../test_sql/test_join_key_name2/out.sql | 18 ++--- .../test_nested_join_multiple_ctes/out.sql | 40 +++++----- 4 files changed, 80 insertions(+), 80 deletions(-) diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql index 12d1e4ecb77f..29c3c615d8f2 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_aliasing/out.sql @@ -20,7 +20,7 @@ SELECT `t6`.`b`, `t6`.`count`, `t6`.`unique`, - `t14`.`total` + `t13`.`total` FROM ( SELECT `t1`.`d`, @@ -41,11 +41,11 @@ FROM ( ) AS `t6` INNER JOIN ( SELECT - `t12`.`d`, - `t12`.`idx`, - `t12`.`c`, - `t12`.`row_count`, - `t12`.`total` + `t11`.`d`, + `t11`.`idx`, + `t11`.`c`, + `t11`.`row_count`, + `t11`.`total` FROM ( SELECT `t8`.`d`, @@ -63,10 +63,10 @@ INNER JOIN ( 1 ) AS `t10` ON `t8`.`d` = `t10`.`d` - ) AS `t12` + ) AS `t11` WHERE - `t12`.`row_count` < ( - `t12`.`total` / 2 + `t11`.`row_count` < ( + `t11`.`total` / 2 ) -) AS `t14` - ON `t6`.`d` = `t14`.`d` \ No newline at end of file +) AS `t13` + ON `t6`.`d` = `t13`.`d` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql index c7e04edfeb7a..58a634e12636 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name/out.sql @@ -1,4 +1,4 @@ -WITH `t11` AS ( +WITH `t8` AS ( SELECT `t6`.`c_custkey`, `t6`.`c_name`, @@ -20,60 +20,60 @@ WITH `t11` AS ( ON `t7`.`o_custkey` = `t6`.`c_custkey` ) SELECT - `t15`.`year`, - `t15`.`CountStar()` AS `pre_count`, - `t20`.`CountStar()` AS `post_count`, - `t20`.`CountStar()` / CAST(`t15`.`CountStar()` AS DOUBLE) AS `fraction` + `t12`.`year`, + `t12`.`CountStar()` AS `pre_count`, + `t17`.`CountStar()` AS `post_count`, + `t17`.`CountStar()` / CAST(`t12`.`CountStar()` AS DOUBLE) AS `fraction` FROM ( SELECT - EXTRACT(year FROM `t12`.`odate`) AS `year`, + EXTRACT(year FROM `t9`.`odate`) AS `year`, COUNT(*) AS `CountStar()` - FROM `t11` AS `t12` + FROM `t8` AS `t9` GROUP BY 1 -) AS `t15` +) AS `t12` INNER JOIN ( SELECT - EXTRACT(year FROM `t18`.`odate`) AS `year`, + EXTRACT(year FROM `t15`.`odate`) AS `year`, COUNT(*) AS `CountStar()` FROM ( SELECT - `t12`.`c_custkey`, - `t12`.`c_name`, - `t12`.`c_address`, - `t12`.`c_nationkey`, - `t12`.`c_phone`, - `t12`.`c_acctbal`, - `t12`.`c_mktsegment`, - `t12`.`c_comment`, - `t12`.`region`, - `t12`.`o_totalprice`, - `t12`.`odate` - FROM `t11` AS `t12` + `t9`.`c_custkey`, + `t9`.`c_name`, + `t9`.`c_address`, + `t9`.`c_nationkey`, + `t9`.`c_phone`, + `t9`.`c_acctbal`, + `t9`.`c_mktsegment`, + `t9`.`c_comment`, + `t9`.`region`, + `t9`.`o_totalprice`, + `t9`.`odate` + FROM `t8` AS `t9` WHERE - `t12`.`o_totalprice` > ( + `t9`.`o_totalprice` > ( SELECT - AVG(`t16`.`o_totalprice`) AS `Mean(o_totalprice)` + AVG(`t13`.`o_totalprice`) AS `Mean(o_totalprice)` FROM ( SELECT - `t13`.`c_custkey`, - `t13`.`c_name`, - `t13`.`c_address`, - `t13`.`c_nationkey`, - `t13`.`c_phone`, - `t13`.`c_acctbal`, - `t13`.`c_mktsegment`, - `t13`.`c_comment`, - `t13`.`region`, - `t13`.`o_totalprice`, - `t13`.`odate` - FROM `t11` AS `t13` + `t10`.`c_custkey`, + `t10`.`c_name`, + `t10`.`c_address`, + `t10`.`c_nationkey`, + `t10`.`c_phone`, + `t10`.`c_acctbal`, + `t10`.`c_mktsegment`, + `t10`.`c_comment`, + `t10`.`region`, + `t10`.`o_totalprice`, + `t10`.`odate` + FROM `t8` AS `t10` WHERE - `t13`.`region` = `t12`.`region` - ) AS `t16` + `t10`.`region` = `t9`.`region` + ) AS `t13` ) - ) AS `t18` + ) AS `t15` GROUP BY 1 -) AS `t20` - ON `t15`.`year` = `t20`.`year` \ No newline at end of file +) AS `t17` + ON `t12`.`year` = `t17`.`year` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql index 39c29d332eba..d1e5ad2187ee 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_join_key_name2/out.sql @@ -1,6 +1,6 @@ -WITH `t12` AS ( +WITH `t9` AS ( SELECT - EXTRACT(year FROM `t11`.`odate`) AS `year`, + EXTRACT(year FROM `t8`.`odate`) AS `year`, COUNT(*) AS `CountStar()` FROM ( SELECT @@ -22,14 +22,14 @@ WITH `t12` AS ( ON `t6`.`c_nationkey` = `t5`.`n_nationkey` INNER JOIN `tpch_orders` AS `t7` ON `t7`.`o_custkey` = `t6`.`c_custkey` - ) AS `t11` + ) AS `t8` GROUP BY 1 ) SELECT - `t14`.`year`, - `t14`.`CountStar()` AS `pre_count`, - `t16`.`CountStar()` AS `post_count` -FROM `t12` AS `t14` -INNER JOIN `t12` AS `t16` - ON `t14`.`year` = `t16`.`year` \ No newline at end of file + `t11`.`year`, + `t11`.`CountStar()` AS `pre_count`, + `t13`.`CountStar()` AS `post_count` +FROM `t9` AS `t11` +INNER JOIN `t9` AS `t13` + ON `t11`.`year` = `t13`.`year` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql index 2ff8a2515300..9430e98e4f6e 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_multiple_ctes/out.sql @@ -1,4 +1,4 @@ -WITH `t6` AS ( +WITH `t5` AS ( SELECT `t4`.`userid`, `t4`.`movieid`, @@ -17,29 +17,29 @@ WITH `t6` AS ( ON `t4`.`movieid` = `t2`.`movieid` ) SELECT - `t8`.`userid`, - `t8`.`movieid`, - `t8`.`rating`, - `t8`.`datetime`, - `t8`.`title` + `t7`.`userid`, + `t7`.`movieid`, + `t7`.`rating`, + `t7`.`datetime`, + `t7`.`title` FROM ( SELECT - `t7`.`userid`, - `t7`.`movieid`, - `t7`.`rating`, - `t7`.`datetime`, - `t7`.`title` - FROM `t6` AS `t7` + `t6`.`userid`, + `t6`.`movieid`, + `t6`.`rating`, + `t6`.`datetime`, + `t6`.`title` + FROM `t5` AS `t6` WHERE - `t7`.`userid` = 118205 AND EXTRACT(year FROM `t7`.`datetime`) > 2001 -) AS `t8` + `t6`.`userid` = 118205 AND EXTRACT(year FROM `t6`.`datetime`) > 2001 +) AS `t7` WHERE - `t8`.`movieid` IN ( + `t7`.`movieid` IN ( SELECT - `t7`.`movieid` - FROM `t6` AS `t7` + `t6`.`movieid` + FROM `t5` AS `t6` WHERE - `t7`.`userid` = 118205 - AND EXTRACT(year FROM `t7`.`datetime`) > 2001 - AND EXTRACT(year FROM `t7`.`datetime`) < 2009 + `t6`.`userid` = 118205 + AND EXTRACT(year FROM `t6`.`datetime`) > 2001 + AND EXTRACT(year FROM `t6`.`datetime`) < 2009 ) \ No newline at end of file From 1f4d0c8817f4b69946f5cf9c6ed427efb4e65099 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Tue, 23 Jan 2024 12:14:02 -0500 Subject: [PATCH 108/161] test(markers): add tests for custom markers I had a thought about a potential edge case with `None` being the only listed "Exception" in a `raises` argument. This fixes the edge case and also adds some simple tests to make sure our custom markers correctly handle when an imported backend exception is set to `None` due to a missing import. --- ibis/backends/conftest.py | 9 +++- ibis/backends/tests/test_markers.py | 67 +++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 ibis/backends/tests/test_markers.py diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 7bc3249a7873..051cf12146d8 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -419,7 +419,14 @@ def _filter_none_from_raises(kwargs): # Filter out any None values from kwargs['raises'] # to cover any missing backend error types as defined in ibis/backends/tests/errors.py if (raises := kwargs.get("raises")) is not None: - kwargs["raises"] = tuple(filter(None, promote_tuple(raises))) + raises = tuple(filter(None, promote_tuple(raises))) + if raises: + kwargs["raises"] = raises + else: + # if filtering removes all of the values of raises pop the + # argument otherwise it gets passed as an empty tuple and this + # messes up xfail + kwargs.pop("raises") return kwargs # Ibis hasn't exposed existing functionality diff --git a/ibis/backends/tests/test_markers.py b/ibis/backends/tests/test_markers.py new file mode 100644 index 000000000000..c01662e9c181 --- /dev/null +++ b/ibis/backends/tests/test_markers.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import pytest + +from ibis.backends.base import _get_backend_names + +all_backends = list(_get_backend_names()) + + +@pytest.mark.notimpl(all_backends) +def test_notimpl(con): + raise Exception + + +@pytest.mark.notimpl(all_backends, raises=None) +def test_notimpl_raises_none(con): + raise Exception + + +@pytest.mark.notimpl(all_backends, raises=(None, None)) +def test_notimpl_raises_none_tuple(con): + raise Exception + + +@pytest.mark.notimpl(all_backends, raises=(Exception, None)) +def test_notimpl_raises_tuple_exception_none(con): + raise Exception + + +@pytest.mark.notyet(all_backends) +def test_notyet(con): + raise Exception + + +@pytest.mark.notyet(all_backends, raises=None) +def test_notyet_raises_none(con): + raise Exception + + +@pytest.mark.notyet(all_backends, raises=(None, None)) +def test_notyet_raises_none_tuple(con): + raise Exception + + +@pytest.mark.notyet(all_backends, raises=(Exception, None)) +def test_notyet_raises_tuple_exception_none(con): + raise Exception + + +@pytest.mark.never(all_backends, reason="because I said so") +def test_never(con): + raise Exception + + +@pytest.mark.never(all_backends, raises=None, reason="because I said so") +def test_never_raises_none(con): + raise Exception + + +@pytest.mark.never(all_backends, raises=(None, None), reason="because I said so") +def test_never_raises_none_tuple(con): + raise Exception + + +@pytest.mark.never(all_backends, raises=(Exception, None), reason="because I said so") +def test_never_raises_tuple_exception_none(con): + raise Exception From 2b4f0d3664627de2e1248be46e9dc69e697c9b0f Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 24 Jan 2024 15:47:29 -0500 Subject: [PATCH 109/161] fix(duckdb): allow passing both overwrite and temp to create_table Currently with a workaround for an upstream DuckDB bug but should be easy to simplify once the upstream fix is released. --- ibis/backends/duckdb/__init__.py | 46 +++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index f13b8b8127cd..af1e4e84893c 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -199,8 +199,10 @@ def create_table( else: temp_name = name - table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) - target = sge.Schema(this=table, expressions=column_defs) + initial_table = sg.table( + temp_name, catalog=database, quoted=self.compiler.quoted + ) + target = sge.Schema(this=initial_table, expressions=column_defs) create_stmt = sge.Create( kind="TABLE", @@ -208,19 +210,45 @@ def create_table( properties=sge.Properties(expressions=properties), ) - this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + # This is the same table as initial_table unless overwrite == True + final_table = sg.table(name, catalog=database, quoted=self.compiler.quoted) with self._safe_raw_sql(create_stmt) as cur: if query is not None: - insert_stmt = sge.Insert(this=table, expression=query).sql(self.name) + insert_stmt = sge.Insert(this=initial_table, expression=query).sql( + self.name + ) cur.execute(insert_stmt).fetchall() if overwrite: cur.execute( - sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) - ).fetchall() - cur.execute( - f"ALTER TABLE IF EXISTS {table.sql(self.name)} RENAME TO {this.sql(self.name)}" - ).fetchall() + sge.Drop(kind="TABLE", this=final_table, exists=True).sql(self.name) + ) + # TODO: This branching should be removed once DuckDB >=0.9.3 is + # our lower bound (there's an upstream bug in 0.9.2 that + # disallows renaming temp tables) + # We should (pending that release) be able to remove the if temp + # branch entirely. + if temp: + cur.execute( + sge.Create( + kind="TABLE", + this=final_table, + expression=sg.select(STAR).from_(initial_table), + properties=sge.Properties(expressions=properties), + ).sql(self.name) + ) + cur.execute( + sge.Drop(kind="TABLE", this=initial_table, exists=True).sql( + self.name + ) + ) + else: + cur.execute( + sge.AlterTable( + this=initial_table, + actions=[sge.RenameTable(this=final_table)], + ).sql(self.name) + ) return self.table(name, schema=database) From 1b46a218253472bd2678817ed58ed770d8d2be40 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 24 Jan 2024 15:48:09 -0500 Subject: [PATCH 110/161] refactor(polars): allow passing temp=False to polars create_table The `is not None` check was a bit too aggressive. --- ibis/backends/polars/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 328fbd4b1631..0550f82d3b2d 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -340,7 +340,7 @@ def create_table( "effect: Polars cannot set a database." ) - if temp is not None: + if temp: raise com.IbisError( "Passing `temp=True` to the Polars backend create_table method has no " "effect: all tables are in memory and temporary. " From f9f8a6875f5be85c4a94167d8454f1ff7d51e258 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 24 Jan 2024 15:48:46 -0500 Subject: [PATCH 111/161] refactor(exasol): add temp kwarg to create_table for api consistency Exasol doesn't support temp tables, but neither does PySpark or other backends. --- ibis/backends/exasol/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index 4878d51ef018..1751309cdd74 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -271,6 +271,7 @@ def create_table( schema: sch.Schema | None = None, database: str | None = None, overwrite: bool = False, + temp: bool = False, ) -> ir.Table: """Create a table in Snowflake. @@ -289,13 +290,20 @@ def create_table( overwrite If `True`, replace the table if it already exists, otherwise fail if the table exists + temp + Create a temporary table (not supported) """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if temp: + raise com.UnsupportedOperationError( + "Creating temp tables is not supported by Exasol." + ) + if database is not None and database != self.current_database: raise com.UnsupportedOperationError( - "Creating tables in other databases is not supported by Postgres" + "Creating tables in other databases is not supported by Exasol" ) else: database = None From 6557210de745d040944ace25101e37396f600bcb Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 24 Jan 2024 15:52:34 -0500 Subject: [PATCH 112/161] test(backends): add test for overwrite and temp intersection in create_table --- ibis/backends/tests/test_client.py | 60 ++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 7a5a3d6a3869..0892f3bf0c78 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -107,6 +107,66 @@ def test_create_table(backend, con, temp_table, lamduh, sch): backend.assert_frame_equal(df, result) +@pytest.mark.parametrize( + "temp, overwrite", + [ + param( + True, + True, + id="temp overwrite", + marks=[ + pytest.mark.notyet(["clickhouse"], reason="Can't specify both"), + pytest.mark.notyet( + ["pyspark", "trino", "exasol"], reason="No support for temp tables" + ), + pytest.mark.never(["polars"], reason="Everything in-memory is temp"), + pytest.mark.broken(["mssql"], reason="Incorrect temp table syntax"), + pytest.mark.broken( + ["bigquery"], + reason="tables created with temp=True cause a 404 on retrieval", + ), + ], + ), + param(False, True, id="no temp, overwrite"), + param( + True, + False, + id="temp, no overwrite", + marks=[ + pytest.mark.notyet( + ["pyspark", "trino", "exasol"], reason="No support for temp tables" + ), + pytest.mark.never(["polars"], reason="Everything in-memory is temp"), + pytest.mark.broken(["mssql"], reason="Incorrect temp table syntax"), + pytest.mark.broken( + ["bigquery"], + reason="tables created with temp=True cause a 404 on retrieval", + ), + ], + ), + ], +) +@pytest.mark.notimpl(["druid", "impala"]) +def test_create_table_overwrite_temp(backend, con, temp_table, temp, overwrite): + df = pd.DataFrame( + { + "first_name": ["A", "B", "C"], + "last_name": ["D", "E", "F"], + "department_name": ["AA", "BB", "CC"], + "salary": [100.0, 200.0, 300.0], + } + ) + + con.create_table(temp_table, df, temp=temp, overwrite=overwrite) + if overwrite: + con.create_table(temp_table, df, temp=temp, overwrite=overwrite) + result = ( + con.table(temp_table).execute().sort_values("first_name").reset_index(drop=True) + ) + + backend.assert_frame_equal(df, result) + + @pytest.mark.parametrize( "lamduh", [(lambda df: df), (lambda df: pa.Table.from_pandas(df))], From fa991e48a76473f6e8557466e5a06d252432665a Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 24 Jan 2024 16:53:45 -0500 Subject: [PATCH 113/161] fix(oracle): enable dropping temporary tables Oracle requires you to truncate temp tables before dropping them, missed handling this in the sqlglot port. --- ibis/backends/oracle/__init__.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index 1f8c430c96cd..7719f3c23388 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -341,6 +341,27 @@ def create_table( name, schema=schema, source=self, namespace=ops.Namespace(database=database) ).to_expr() + def drop_table( + self, + name: str, + database: str | None = None, + schema: str | None = None, + force: bool = False, + ) -> None: + table = sg.table(name, db=schema, catalog=database, quoted=self.compiler.quoted) + + with self.begin() as bind: + # global temporary tables cannot be dropped without first truncating them + # + # https://stackoverflow.com/questions/32423397/force-oracle-drop-global-temp-table + # + # ignore DatabaseError exceptions because the table may not exist + # because it's already been deleted + with contextlib.suppress(oracledb.DatabaseError): + bind.execute(f"TRUNCATE TABLE {table.sql(self.name)}") + + super().drop_table(name, database=database, schema=schema, force=force) + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema From 515ad2b806c6ac68e2b7c316d96b32f0268fefef Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 24 Jan 2024 16:54:41 -0500 Subject: [PATCH 114/161] fix(oracle): clean up memtables at exit Two issues in one! First, need to register memtables as temp tables, also need to make sure they're cleaned up at exit. --- ibis/backends/oracle/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index 7719f3c23388..283cefca95bc 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -2,6 +2,7 @@ from __future__ import annotations +import atexit import contextlib import re import warnings @@ -385,11 +386,12 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: for colname, typ in schema.items() ] - create_stmt = sg.exp.Create( + create_stmt = sge.Create( kind="TABLE", this=sg.exp.Schema( this=sg.to_identifier(name, quoted=quoted), expressions=column_defs ), + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), ).sql(self.name, pretty=True) data = op.data.to_frame().itertuples(index=False) @@ -401,6 +403,8 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: for row in data: cur.execute(insert_stmt, row) + atexit.register(self._clean_up_tmp_table, name) + def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: name = util.gen_name("oracle_metadata") dialect = self.name From e1412ea9f95502d3f3b7ad0ab0e6e443b6bedb57 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 24 Jan 2024 16:55:53 -0500 Subject: [PATCH 115/161] fix(oracle): allow passing both overwrite and temp to create_table --- ibis/backends/oracle/__init__.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index 283cefca95bc..5dd3fde4cfa9 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -311,8 +311,10 @@ def create_table( else: temp_name = name - table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) - target = sge.Schema(this=table, expressions=column_defs) + initial_table = sg.table( + temp_name, catalog=database, quoted=self.compiler.quoted + ) + target = sge.Schema(this=initial_table, expressions=column_defs) create_stmt = sge.Create( kind="TABLE", @@ -320,18 +322,21 @@ def create_table( properties=sge.Properties(expressions=properties), ) - this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + # This is the same table as initial_table unless overwrite == True + final_table = sg.table(name, catalog=database, quoted=self.compiler.quoted) with self._safe_raw_sql(create_stmt) as cur: if query is not None: - insert_stmt = sge.Insert(this=table, expression=query).sql(self.name) + insert_stmt = sge.Insert(this=initial_table, expression=query).sql( + self.name + ) cur.execute(insert_stmt) if overwrite: - cur.execute( - sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + self.drop_table( + final_table.name, final_table.catalog, final_table.db, force=True ) cur.execute( - f"ALTER TABLE IF EXISTS {table.sql(self.name)} RENAME TO {this.sql(self.name)}" + f"ALTER TABLE IF EXISTS {initial_table.sql(self.name)} RENAME TO {final_table.sql(self.name)}" ) if schema is None: @@ -370,7 +375,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: if (name := op.name) not in self.list_tables(): quoted = self.compiler.quoted column_defs = [ - sg.exp.ColumnDef( + sge.ColumnDef( this=sg.to_identifier(colname, quoted=quoted), kind=self.compiler.type_mapper.from_ibis(typ), constraints=( From 4f770f5c3697cc7bf0ff06cf3d33b837e11d7ea3 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Tue, 23 Jan 2024 17:15:17 -0500 Subject: [PATCH 116/161] refactor(oracle): simplify oracle timestamp overrides Also add a note about when it can be ripped out. Unrelated but small change, the stddev pop and samp remappings are unnecessary. --- ibis/backends/oracle/compiler.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/ibis/backends/oracle/compiler.py b/ibis/backends/oracle/compiler.py index 63b171458195..e0b3e711ebc1 100644 --- a/ibis/backends/oracle/compiler.py +++ b/ibis/backends/oracle/compiler.py @@ -40,31 +40,24 @@ def _create_sql(self, expression: sge.Create) -> str: return create_with_partitions_sql(self, expression) -def _datatype_sql(self: Oracle.Generator, expression: sge.DataType) -> str: - # Use this to handle correctly formatting timestamp precision - # e.g. TIMESTAMP (scale) WITH TIME ZONE vs. TIMESTAMP WITH TIME ZONE(scale) - if expression.is_type("timestamptz"): - for exp in expression.expressions: - if isinstance(exp, sge.DataTypeParam): - return f"TIMESTAMP ({self.sql(exp, 'this')}) WITH TIME ZONE" - return "TIMESTAMP WITH TIME ZONE" - return self.datatype_sql(expression) - - Oracle.Generator.TRANSFORMS |= { sge.LogicalOr: rename_func("max"), sge.LogicalAnd: rename_func("min"), sge.VariancePop: rename_func("var_pop"), sge.Variance: rename_func("var_samp"), sge.Stddev: rename_func("stddev_pop"), - sge.StddevPop: rename_func("stddev_pop"), - sge.StddevSamp: rename_func("stddev_samp"), sge.ApproxDistinct: rename_func("approx_count_distinct"), sge.Create: _create_sql, sge.Select: sg.transforms.preprocess([sg.transforms.eliminate_semi_and_anti_joins]), - sge.DataType: _datatype_sql, } +# TODO: can delete this after bumping sqlglot version > 20.9.0 +Oracle.Generator.TYPE_MAPPING |= { + sge.DataType.Type.TIMETZ: "TIME", + sge.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", +} +Oracle.Generator.TZ_TO_WITH_TIME_ZONE = True + @replace(p.WindowFunction(p.First(x, y))) def rewrite_first(_, x, y): From 1f6595e8910a296411a8082b2919cc51ea4e018b Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Thu, 25 Jan 2024 17:24:46 -0600 Subject: [PATCH 117/161] fix(api): forbid using `asc`/`desc` in selections --- ibis/expr/operations/relations.py | 5 +++-- ibis/tests/expr/test_table.py | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index b7e62cf056b4..15dcad52cfea 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -17,13 +17,14 @@ from ibis.common.patterns import Between, InstanceOf from ibis.common.typing import Coercible, VarTuple from ibis.expr.operations.core import Alias, Column, Node, Scalar, Value -from ibis.expr.operations.sortkeys import SortKey # noqa: TCH001 +from ibis.expr.operations.sortkeys import SortKey from ibis.expr.schema import Schema from ibis.formats import TableProxy # noqa: TCH001 T = TypeVar("T") Unaliased = Annotated[T, ~InstanceOf(Alias)] +NonSortKey = Annotated[T, ~InstanceOf(SortKey)] @public @@ -171,7 +172,7 @@ def _check_integrity(values, allowed_parents): @public class Project(Relation): parent: Relation - values: FrozenDict[str, Unaliased[Value]] + values: FrozenDict[str, NonSortKey[Unaliased[Value]]] def __init__(self, parent, values): _check_integrity(values.values(), {parent}) diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 4418425505a7..816b0bb77c6a 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -183,6 +183,15 @@ def test_projection_duplicate_names(table): table.select([table.c, table.c]) +def test_projection_sort_keys_errors(table): + """Forbid using `asc`/`desc` in selections""" + with pytest.raises(ValidationError): + table.select([table.c.desc()]) + + with pytest.raises(ValidationError): + table.mutate(new=table.c.asc()) + + def test_projection_invalid_root(table): schema1 = {"foo": "double", "bar": "int32"} From bcb6905532c8ed628e7155f586864f99f806f7ff Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Fri, 26 Jan 2024 09:45:46 -0600 Subject: [PATCH 118/161] fix(api): support passing literal booleans to `filter` --- ibis/expr/types/relations.py | 2 +- ibis/tests/expr/test_table.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 3fb93b478736..1a4c263970bd 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -106,7 +106,7 @@ def f( # noqa: D417 # nested inputs def bind(table: TableExpr, value: Any, prefer_column=True) -> Iterator[ir.Value]: """Bind a value to a table expression.""" - if prefer_column and isinstance(value, (str, int)): + if prefer_column and type(value) in (str, int): yield table._get_column(value) elif isinstance(value, ValueExpr): yield value diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 816b0bb77c6a..ede8a2aa1e54 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -730,6 +730,18 @@ def test_aggregate_keywords(table): assert_equal(expr2, expected) +def test_filter_on_literal_boolean(table): + expr1 = table.filter(True) + expr2 = table.filter(ibis.literal(True)) + assert expr1.equals(expr2) + + +def test_filter_on_literal_string_is_column(table): + expr1 = table.filter("h") + expr2 = table.filter(table.h) + assert expr1.equals(expr2) + + def test_filter_on_literal_then_aggregate(table): # Mostly just a smoketest, this used to error on construction expr = table.filter(ibis.literal(True)).agg(lambda t: t.a.sum().name("total")) From d6e1611e1540d269514470d962ba9fadb63c4d4a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 27 Jan 2024 10:25:28 -0500 Subject: [PATCH 119/161] test(api): add union aliasing test --- ibis/backends/tests/test_sql.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index fe4cce718e18..e0d306766730 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -180,3 +180,14 @@ def test_union_aliasing(backend_name, snapshot): result = top_ten.union(bottom_ten) snapshot.assert_match(str(ibis.to_sql(result, dialect=backend_name)), "out.sql") + + +def test_union_generates_predictable_aliases(con): + t = ibis.memtable( + data=[{"island": "Torgerson", "body_mass_g": 3750, "sex": "male"}] + ) + sub1 = t.inner_join(t.view(), "island").mutate(island_right=lambda t: t.island) + sub2 = t.inner_join(t.view(), "sex").mutate(sex_right=lambda t: t.sex) + expr = ibis.union(sub1, sub2) + df = con.execute(expr) + assert len(df) == 2 From f10ecbd7761477f9a2782b6f27e92deed268e896 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 27 Jan 2024 10:34:24 -0500 Subject: [PATCH 120/161] fix(polars): reference the correct field in the `ops.SelfReference` rule --- ibis/backends/polars/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index f636c584a47e..83e216eccae4 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -1219,7 +1219,7 @@ def execute_view(op, *, ctx: pl.SQLContext, **kw): @translate.register(ops.SelfReference) def execute_self_reference(op, **kw): - return translate(op.table, **kw) + return translate(op.parent, **kw) @translate.register(ops.JoinTable) From b1fcdb6a542a7ecbcc41be01203f6096a0c31621 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 27 Jan 2024 10:38:33 -0500 Subject: [PATCH 121/161] test(polars): enable xpassing test --- ibis/backends/tests/test_client.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 0892f3bf0c78..f71a85bbe68c 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -954,9 +954,6 @@ def test_agg_memory_table(con, monkeypatch): assert result == 3 -@pytest.mark.broken( - ["polars"], reason="join column renaming is currently incorrect on polars" -) def test_self_join_memory_table(backend, con, monkeypatch): monkeypatch.setattr(ibis.options, "default_backend", con) From f77b9b921148007724805bd3da8de046c11174c7 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 29 Jan 2024 08:48:01 -0500 Subject: [PATCH 122/161] test(duckdb): move tests to specific backend test suites --- .../test_client/test_default_backend/out.sql | 3 + ibis/backends/duckdb/tests/test_client.py | 67 ++++++++- ibis/backends/sqlite/tests/test_client.py | 32 +++++ ibis/backends/tests/test_client.py | 133 ------------------ ibis/conftest.py | 6 + 5 files changed, 103 insertions(+), 138 deletions(-) create mode 100644 ibis/backends/duckdb/tests/snapshots/test_client/test_default_backend/out.sql diff --git a/ibis/backends/duckdb/tests/snapshots/test_client/test_default_backend/out.sql b/ibis/backends/duckdb/tests/snapshots/test_client/test_default_backend/out.sql new file mode 100644 index 000000000000..06687fb622f1 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_client/test_default_backend/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM(t0.a) AS "Sum(a)" +FROM ibis_pandas_memtable_fw3sdos5brerlgtmbkopvh334m AS t0 \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index a09b559c5f98..061ff3809aa2 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -1,5 +1,7 @@ from __future__ import annotations +import os + import duckdb import pandas as pd import pyarrow as pa @@ -8,7 +10,7 @@ import ibis import ibis.expr.datatypes as dt -from ibis.conftest import LINUX, SANDBOXED +from ibis.conftest import LINUX, SANDBOXED, not_windows from ibis.util import gen_name @@ -62,8 +64,6 @@ def test_load_extension(ext_directory): def test_cross_db(tmpdir): - import duckdb - path1 = str(tmpdir.join("test1.ddb")) with duckdb.connect(path1) as con1: con1.execute("CREATE SCHEMA foo") @@ -86,8 +86,6 @@ def test_cross_db(tmpdir): def test_attach_detach(tmpdir): - import duckdb - path1 = str(tmpdir.join("test1.ddb")) with duckdb.connect(path1): pass @@ -211,3 +209,62 @@ def test_insert_preserves_column_case(con): t2 = con.create_table(name2, df2, temp=True) con.insert(name1, t2) assert t1.count().execute() == 8 + + +def test_default_backend(snapshot): + df = pd.DataFrame({"a": [1, 2, 3]}) + t = ibis.memtable(df) + expr = t.a.sum() + + # run this twice to ensure that we hit the optimizations in + # `_default_backend` + for _ in range(2): + assert expr.execute() == df.a.sum() + + sql = ibis.to_sql(expr) + snapshot.assert_match(sql, "out.sql") + + +@pytest.mark.parametrize( + "url", + [ + param(lambda p: p, id="no-scheme-duckdb-ext"), + param(lambda p: f"duckdb://{p}", id="absolute-path"), + param( + lambda p: f"duckdb://{os.path.relpath(p)}", + marks=[ + not_windows + ], # hard to test in CI since tmpdir & cwd are on different drives + id="relative-path", + ), + param(lambda _: "duckdb://", id="in-memory-empty"), + param(lambda _: "duckdb://:memory:", id="in-memory-explicit"), + param(lambda p: f"duckdb://{p}?read_only=1", id="duckdb_read_write_int"), + param(lambda p: f"duckdb://{p}?read_only=False", id="duckdb_read_write_upper"), + param(lambda p: f"duckdb://{p}?read_only=false", id="duckdb_read_write_lower"), + ], +) +def test_connect_duckdb(url, tmp_path): + path = os.path.abspath(tmp_path / "test.duckdb") + with duckdb.connect(path): + pass + con = ibis.connect(url(path)) + one = ibis.literal(1) + assert con.execute(one) == 1 + + +@pytest.mark.parametrize( + "out_method, extension", [("to_csv", "csv"), ("to_parquet", "parquet")] +) +def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path): + getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}") + con = ibis.connect(tmp_path / f"out.{extension}") + t = next(iter(con.tables.values())) + assert not t.head().execute().empty + + +@not_windows +def test_invalid_connect(tmp_path): + url = f"duckdb://{tmp_path}?read_only=invalid_value" + with pytest.raises(ValueError): + ibis.connect(url) diff --git a/ibis/backends/sqlite/tests/test_client.py b/ibis/backends/sqlite/tests/test_client.py index a1c07bc2289a..bfce0701139e 100644 --- a/ibis/backends/sqlite/tests/test_client.py +++ b/ibis/backends/sqlite/tests/test_client.py @@ -1,15 +1,19 @@ from __future__ import annotations +import os +import sqlite3 import uuid from pathlib import Path import numpy as np import pandas.testing as tm import pytest +from pytest import param import ibis import ibis.expr.types as ir from ibis import config, udf +from ibis.conftest import not_windows pytest.importorskip("sqlalchemy") @@ -132,3 +136,31 @@ def total(x) -> float: expr = total(con.tables.functional_alltypes.limit(2).select(n=ibis.NA).n) result = con.execute(expr) assert result == 0.0 + + +@pytest.mark.sqlite +@pytest.mark.parametrize( + "url, ext", + [ + param(lambda p: p, "sqlite", id="no-scheme-sqlite-ext"), + param(lambda p: p, "db", id="no-scheme-db-ext"), + param(lambda p: f"sqlite://{p}", "db", id="absolute-path"), + param( + lambda p: f"sqlite://{os.path.relpath(p)}", + "db", + marks=[ + not_windows + ], # hard to test in CI since tmpdir & cwd are on different drives + id="relative-path", + ), + param(lambda _: "sqlite://", "db", id="in-memory-empty"), + param(lambda _: "sqlite://:memory:", "db", id="in-memory-explicit"), + ], +) +def test_connect_sqlite(url, ext, tmp_path): + path = os.path.abspath(tmp_path / f"test.{ext}") + with sqlite3.connect(path): + pass + con = ibis.connect(url(path)) + one = ibis.literal(1) + assert con.execute(one) == 1 diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index f71a85bbe68c..19868964eb27 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -4,8 +4,6 @@ import contextlib import importlib import inspect -import os -import platform import re import string import subprocess @@ -773,116 +771,6 @@ def test_connect_url(url): assert con.execute(one) == 1 -not_windows = pytest.mark.skipif( - condition=platform.system() == "Windows", - reason=( - "windows prevents two connections to the same duckdb file even in " - "the same process" - ), -) - - -@pytest.fixture(params=["duckdb", "sqlite"]) -def tmp_db(request, tmp_path): - api = request.param - mod = pytest.importorskip(api) - db = tmp_path / "test.db" - mod.connect(str(db)).execute("CREATE TABLE tmp_t AS SELECT 1 AS a").fetchall() - return db - - -@pytest.mark.duckdb -@pytest.mark.parametrize( - "url", - [ - param(lambda p: p, id="no-scheme-duckdb-ext"), - param(lambda p: f"duckdb://{p}", id="absolute-path"), - param( - lambda p: f"duckdb://{os.path.relpath(p)}", - marks=[ - not_windows - ], # hard to test in CI since tmpdir & cwd are on different drives - id="relative-path", - ), - param(lambda _: "duckdb://", id="in-memory-empty"), - param(lambda _: "duckdb://:memory:", id="in-memory-explicit"), - param( - lambda p: f"duckdb://{p}?read_only=1", - id="duckdb_read_write_int", - ), - param( - lambda p: f"duckdb://{p}?read_only=False", - id="duckdb_read_write_upper", - ), - param( - lambda p: f"duckdb://{p}?read_only=false", - id="duckdb_read_write_lower", - ), - ], -) -def test_connect_duckdb(url, tmp_path): - duckdb = pytest.importorskip("duckdb") - path = os.path.abspath(tmp_path / "test.duckdb") - with duckdb.connect(path): - pass - con = ibis.connect(url(path)) - one = ibis.literal(1) - assert con.execute(one) == 1 - - -@pytest.mark.sqlite -@pytest.mark.parametrize( - "url, ext", - [ - param(lambda p: p, "sqlite", id="no-scheme-sqlite-ext"), - param(lambda p: p, "db", id="no-scheme-db-ext"), - param(lambda p: f"sqlite://{p}", "db", id="absolute-path"), - param( - lambda p: f"sqlite://{os.path.relpath(p)}", - "db", - marks=[ - not_windows - ], # hard to test in CI since tmpdir & cwd are on different drives - id="relative-path", - ), - param(lambda _: "sqlite://", "db", id="in-memory-empty"), - param(lambda _: "sqlite://:memory:", "db", id="in-memory-explicit"), - ], -) -def test_connect_sqlite(url, ext, tmp_path): - import sqlite3 - - path = os.path.abspath(tmp_path / f"test.{ext}") - with sqlite3.connect(path): - pass - con = ibis.connect(url(path)) - one = ibis.literal(1) - assert con.execute(one) == 1 - - -@pytest.mark.duckdb -@pytest.mark.parametrize( - "out_method, extension", - [ - ("to_csv", "csv"), - ("to_parquet", "parquet"), - ], -) -def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path): - getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}") - con = ibis.connect(tmp_path / f"out.{extension}") - t = next(iter(con.tables.values())) - assert not t.head().execute().empty - - -@not_windows -def test_invalid_connect(tmp_path): - pytest.importorskip("duckdb") - url = f"duckdb://{tmp_path}?read_only=invalid_value" - with pytest.raises(ValueError): - ibis.connect(url) - - @pytest.mark.parametrize( ("arg", "lambda_", "expected"), [ @@ -988,7 +876,6 @@ def test_create_from_in_memory_table(con, temp_table, arg, func, monkeypatch): assert temp_table in con.list_tables() -@pytest.mark.usefixtures("backend") def test_default_backend_option(con, monkeypatch): # verify that there's nothing already set assert ibis.options.default_backend is None @@ -1045,26 +932,6 @@ def test_default_backend_no_duckdb_read_parquet(): ) -@pytest.mark.duckdb -def test_default_backend(): - pytest.importorskip("duckdb") - - df = pd.DataFrame({"a": [1, 2, 3]}) - t = ibis.memtable(df) - expr = t.a.sum() - # run this twice to ensure that we hit the optimizations in - # `_default_backend` - for _ in range(2): - assert expr.execute() == df.a.sum() - - sql = ibis.to_sql(expr) - rx = """\ -SELECT - SUM\\((t\\d+)\\.a\\) AS ".+" -FROM \\w+ AS \\1""" - assert re.match(rx, sql) is not None - - @pytest.mark.parametrize("dtype", [None, "f8"]) def test_dunder_array_table(alltypes, dtype): expr = alltypes.group_by("string_col").int_col.sum().order_by("string_col") diff --git a/ibis/conftest.py b/ibis/conftest.py index 9b91a3aaaa7b..e6002fccf8d0 100644 --- a/ibis/conftest.py +++ b/ibis/conftest.py @@ -35,3 +35,9 @@ def add_ibis(monkeypatch, doctest_namespace): # people to write `from $MODULE_BEING_TESTED import *` doctest_namespace["all"] = builtins.all doctest_namespace["range"] = builtins.range + + +not_windows = pytest.mark.skipif( + condition=WINDOWS, + reason="windows prevents two connections to the same file even in the same process", +) From c9ab21d83126e25cc0c46329b4819c5dc540461b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 29 Jan 2024 08:55:20 -0500 Subject: [PATCH 123/161] test(duckdb): run test in subprocess to avoid setting the default backend --- .../test_client/test_default_backend/out.sql | 3 -- ibis/backends/duckdb/tests/test_client.py | 31 +++++++++++++------ 2 files changed, 21 insertions(+), 13 deletions(-) delete mode 100644 ibis/backends/duckdb/tests/snapshots/test_client/test_default_backend/out.sql diff --git a/ibis/backends/duckdb/tests/snapshots/test_client/test_default_backend/out.sql b/ibis/backends/duckdb/tests/snapshots/test_client/test_default_backend/out.sql deleted file mode 100644 index 06687fb622f1..000000000000 --- a/ibis/backends/duckdb/tests/snapshots/test_client/test_default_backend/out.sql +++ /dev/null @@ -1,3 +0,0 @@ -SELECT - SUM(t0.a) AS "Sum(a)" -FROM ibis_pandas_memtable_fw3sdos5brerlgtmbkopvh334m AS t0 \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index 061ff3809aa2..f2cfa5619568 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -1,6 +1,8 @@ from __future__ import annotations import os +import subprocess +import sys import duckdb import pandas as pd @@ -211,18 +213,27 @@ def test_insert_preserves_column_case(con): assert t1.count().execute() == 8 -def test_default_backend(snapshot): - df = pd.DataFrame({"a": [1, 2, 3]}) - t = ibis.memtable(df) - expr = t.a.sum() +def test_default_backend(): + # use subprocess to avoid mutating state across tests + script = """\ +import pandas as pd + +import ibis - # run this twice to ensure that we hit the optimizations in - # `_default_backend` - for _ in range(2): - assert expr.execute() == df.a.sum() +df = pd.DataFrame({"a": [1, 2, 3]}) - sql = ibis.to_sql(expr) - snapshot.assert_match(sql, "out.sql") +t = ibis.memtable(df) + +expr = t.a.sum() + +# run twice to ensure that we hit the optimizations in +# `_default_backend` +for _ in range(2): + assert expr.execute() == df.a.sum()""" + + assert ibis.options.default_backend is None + subprocess.run([sys.executable, "-c", script], check=True) + assert ibis.options.default_backend is None @pytest.mark.parametrize( From 0a088e59337d94accfac6eb4051c181de624383d Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Tue, 30 Jan 2024 18:28:20 -0600 Subject: [PATCH 124/161] refactor(sqlite): port to SQLGlot (#8154) Port the SQLite backend to sqlglot Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> --- .github/workflows/ibis-backends.yml | 8 +- ibis/backends/base/sqlglot/datatypes.py | 20 + ibis/backends/conftest.py | 5 +- ibis/backends/sqlite/__init__.py | 666 ++++++++++++---- ibis/backends/sqlite/compiler.py | 540 ++++++++++++- ibis/backends/sqlite/converter.py | 23 + ibis/backends/sqlite/datatypes.py | 68 -- ibis/backends/sqlite/registry.py | 448 ----------- ibis/backends/sqlite/tests/conftest.py | 35 +- .../test_client/test_compile_toplevel/out.sql | 2 - .../test_count_on_order_by/out.sql | 30 - ibis/backends/sqlite/tests/test_client.py | 91 +-- ibis/backends/sqlite/tests/test_functions.py | 713 ------------------ ibis/backends/sqlite/tests/test_types.py | 26 +- ibis/backends/sqlite/udf.py | 403 +++++----- .../test_default_limit/sqlite/out.sql | 5 + .../test_disable_query_limit/sqlite/out.sql | 5 + .../sqlite/out.sql | 19 + .../test_respect_set_limit/sqlite/out.sql | 10 + .../test_group_by_has_index/sqlite/out.sql | 8 +- .../test_sql/test_isin_bug/sqlite/out.sql | 18 +- ibis/backends/tests/test_aggregation.py | 8 +- ibis/backends/tests/test_array.py | 16 +- ibis/backends/tests/test_asof_join.py | 2 + ibis/backends/tests/test_join.py | 46 +- ibis/backends/tests/test_numeric.py | 14 +- ibis/backends/tests/test_string.py | 3 +- ibis/backends/tests/test_temporal.py | 22 +- ibis/backends/tests/test_udf.py | 3 +- poetry.lock | 7 +- pyproject.toml | 4 +- requirements-dev.txt | 1 - 32 files changed, 1382 insertions(+), 1887 deletions(-) create mode 100644 ibis/backends/sqlite/converter.py delete mode 100644 ibis/backends/sqlite/datatypes.py delete mode 100644 ibis/backends/sqlite/registry.py delete mode 100644 ibis/backends/sqlite/tests/snapshots/test_client/test_compile_toplevel/out.sql delete mode 100644 ibis/backends/sqlite/tests/snapshots/test_functions/test_count_on_order_by/out.sql delete mode 100644 ibis/backends/sqlite/tests/test_functions.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/sqlite/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/sqlite/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/sqlite/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/sqlite/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index a131f48756a0..1de52e0b4a15 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -81,10 +81,10 @@ jobs: title: Pandas extras: - pandas - # - name: sqlite - # title: SQLite - # extras: - # - sqlite + - name: sqlite + title: SQLite + extras: + - sqlite - name: datafusion title: Datafusion extras: diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 2db47acfdef5..dc70f7b6e3ba 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -631,6 +631,26 @@ def _from_ibis_Struct(cls, dtype: dt.Struct) -> sge.DataType: class SQLiteType(SqlglotType): dialect = "sqlite" + @classmethod + def _from_sqlglot_INT(cls) -> dt.Int64: + return dt.Int64(nullable=cls.default_nullable) + + @classmethod + def _from_sqlglot_FLOAT(cls) -> dt.Float64: + return dt.Float64(nullable=cls.default_nullable) + + @classmethod + def _from_ibis_Array(cls, dtype: dt.Array) -> NoReturn: + raise com.UnsupportedBackendType("Array types aren't supported in SQLite") + + @classmethod + def _from_ibis_Map(cls, dtype: dt.Map) -> NoReturn: + raise com.UnsupportedBackendType("Map types aren't supported in SQLite") + + @classmethod + def _from_ibis_Struct(cls, dtype: dt.Struct) -> sge.DataType: + raise com.UnsupportedBackendType("Struct types aren't supported in SQLite") + class ImpalaType(SqlglotType): dialect = "impala" diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 051cf12146d8..5c835ae0ba22 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -570,20 +570,19 @@ def ddl_con(ddl_backend): params=_get_backends_to_test( keep=( "risingwave", - "sqlite", ) ), scope="session", ) def alchemy_backend(request, data_dir, tmp_path_factory, worker_id): """Set up the SQLAlchemy-based backends.""" - return _setup_backend(request, data_dir, tmp_path_factory, worker_id) + pytest.skip("No SQLAlchemy backends remaining") @pytest.fixture(scope="session") def alchemy_con(alchemy_backend): """Instance of Client, already connected to the db (if applies).""" - return alchemy_backend.connection + pytest.skip("No SQLAlchemy backends remaining") @pytest.fixture( diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index ea85c74dc6ff..c0a3d8f9a396 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -1,74 +1,59 @@ -# Copyright 2015 Cloudera Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from __future__ import annotations -import inspect +import contextlib +import functools import sqlite3 -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, NoReturn +from urllib.parse import urlparse -import sqlalchemy as sa -import toolz -from sqlalchemy.dialects.sqlite import TIMESTAMP +import sqlglot as sg +import sqlglot.expressions as sge +import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt +import ibis.expr.operations as ops import ibis.expr.schema as sch +import ibis.expr.types as ir from ibis import util -from ibis.backends.base import CanListDatabases -from ibis.backends.base.sql.alchemy import BaseAlchemyBackend -from ibis.backends.sqlite import udf +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import C, F from ibis.backends.sqlite.compiler import SQLiteCompiler -from ibis.backends.sqlite.datatypes import ISODATETIME, SqliteType +from ibis.backends.sqlite.converter import SQLitePandasData +from ibis.backends.sqlite.udf import ignore_nulls, register_all if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Iterator, Mapping from pathlib import Path - import ibis.expr.operations as ops - import ibis.expr.types as ir + import pandas as pd + import pyarrow as pa + + +@functools.cache +def _init_sqlite3(): + import pandas as pd + + # TODO: can we remove this? + sqlite3.register_adapter(pd.Timestamp, lambda value: value.isoformat()) + +def _quote(name: str) -> str: + return sg.to_identifier(name, quoted=True).sql("sqlite") -class Backend(BaseAlchemyBackend, CanListDatabases): + +class Backend(SQLGlotBackend): name = "sqlite" - compiler = SQLiteCompiler - supports_create_or_replace = False + compiler = SQLiteCompiler() supports_python_udfs = True - def __getstate__(self) -> dict: - r = super().__getstate__() - r.update( - dict( - compiler=self.compiler, - database_name=self.database_name, - _con=None, # clear connection on copy() - _meta=None, - ) - ) - return r - @property def current_database(self) -> str: - # AFAICT there is no notion of a schema in SQLite return "main" - def list_databases(self, like: str | None = None) -> list[str]: - with self.begin() as con: - mappings = con.exec_driver_sql("PRAGMA database_list").mappings() - results = list(toolz.pluck("name", mappings)) - - return sorted(self._filter_with_like(results, like)) + @property + def version(self) -> str: + return sqlite3.sqlite_version def do_connect( self, @@ -95,169 +80,516 @@ def do_connect( >>> import ibis >>> ibis.sqlite.connect("path/to/my/sqlite.db") """ - import pandas as pd + _init_sqlite3() - self.database_name = "main" + if type_map: + self._type_map = {k.lower(): ibis.dtype(v) for k, v in type_map.items()} + else: + self._type_map = {} - engine = sa.create_engine( - f"sqlite:///{database if database is not None else ':memory:'}", - poolclass=sa.pool.StaticPool, - ) + self.con = sqlite3.connect(":memory:" if database is None else database) + self._temp_views = set() - if type_map: - # Patch out ischema_names for the instantiated dialect. This - # attribute is required for all SQLAlchemy dialects, but as no - # public way of modifying it for a given dialect. Patching seems - # easier than subclassing the builtin SQLite dialect, and achieves - # the same desired behavior. - def _to_ischema_val(t): - sa_type = SqliteType.from_ibis(dt.dtype(t)) - if isinstance(sa_type, sa.types.TypeEngine): - # SQLAlchemy expects a callable here, rather than an - # instance. Use a lambda to work around this. - return lambda: sa_type - return sa_type - - overrides = {k: _to_ischema_val(v) for k, v in type_map.items()} - engine.dialect.ischema_names = engine.dialect.ischema_names.copy() - engine.dialect.ischema_names.update(overrides) - - sqlite3.register_adapter(pd.Timestamp, lambda value: value.isoformat()) - - @sa.event.listens_for(engine, "connect") - def connect(dbapi_connection, connection_record): - """Register UDFs on connection.""" - udf.register_all(dbapi_connection) - dbapi_connection.execute("PRAGMA case_sensitive_like=ON") - - super().do_connect(engine) + register_all(self.con) + self.con.execute("PRAGMA case_sensitive_like=ON") - def attach(self, name: str, path: str | Path) -> None: - """Connect another SQLite database file to the current connection. + def _from_url(self, url: str, **kwargs): + """Connect to a backend using a URL `url`. Parameters ---------- - name - Database name within SQLite - path - Path to sqlite3 database files - - Examples - -------- - >>> con1 = ibis.sqlite.connect("original.db") - >>> con2 = ibis.sqlite.connect("new.db") - >>> con1.attach("new", "new.db") - >>> con1.list_tables(database="new") + url + URL with which to connect to a backend. + kwargs + Additional keyword arguments + + Returns + ------- + BaseBackend + A backend instance """ - with self.begin() as con: - con.exec_driver_sql(f"ATTACH DATABASE {str(path)!r} AS {self._quote(name)}") + url = urlparse(url) + database = url.path[1:] or ":memory:" + return self.connect(database=database, **kwargs) + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + if not isinstance(query, str): + query = query.sql(dialect=self.name) + return self.con.execute(query, **kwargs) + + @contextlib.contextmanager + def _safe_raw_sql(self, *args, **kwargs): + with contextlib.closing(self.raw_sql(*args, **kwargs)) as result: + yield result + + @contextlib.contextmanager + def begin(self): + cur = self.con.cursor() + try: + yield cur + except Exception: + self.con.rollback() + raise + else: + self.con.commit() + finally: + cur.close() - @staticmethod - def _new_sa_metadata(): - meta = sa.MetaData() - - @sa.event.listens_for(meta, "column_reflect") - def column_reflect(inspector, table, column_info): - if type(column_info["type"]) is TIMESTAMP: - column_info["type"] = ISODATETIME() + def list_databases(self, like: str | None = None) -> list[str]: + with self._safe_raw_sql("SELECT name FROM pragma_database_list()") as cur: + results = [r[0] for r in cur.fetchall()] - return meta + return sorted(self._filter_with_like(results, like)) - def _table_from_schema( - self, name, schema, database: str | None = None, temp: bool = True - ) -> sa.Table: - prefixes = [] - if temp: - prefixes.append("TEMPORARY") - columns = self._columns_from_schema(name, schema) - return sa.Table( - name, sa.MetaData(), *columns, schema=database, prefixes=prefixes + def list_tables( + self, + like: str | None = None, + database: str | None = None, + ) -> list[str]: + if database is None: + database = "main" + + sql = ( + sg.select("name") + .from_(F.pragma_table_list()) + .where( + C.schema.eq(database), + C.type.isin("table", "view"), + ~( + C.name.isin( + "sqlite_schema", + "sqlite_master", + "sqlite_temp_schema", + "sqlite_temp_master", + ) + ), + ) + .sql(self.name) ) + with self._safe_raw_sql(sql) as cur: + results = [r[0] for r in cur.fetchall()] - @property - def _current_schema(self) -> str | None: - return self.current_database + return sorted(self._filter_with_like(results, like)) - def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: - view = f"__ibis_sqlite_metadata_{util.guid()}" + def _parse_type(self, typ: str, nullable: bool) -> dt.DataType: + typ = typ.lower() + try: + out = self._type_map[typ] + except KeyError: + return self.compiler.type_mapper.from_string(typ, nullable=nullable) + else: + return out.copy(nullable=nullable) + + def _inspect_schema( + self, cur: sqlite3.Cursor, table_name: str, database: str | None = None + ) -> Iterator[tuple[str, dt.DataType]]: + if database is None: + database = "main" + + quoted_db = _quote(database) + quoted_table = _quote(table_name) + + sql = f'SELECT name, type, "notnull" FROM {quoted_db}.pragma_table_info({quoted_table})' + cur.execute(sql) + rows = cur.fetchall() + if not rows: + raise com.IbisError(f"Table not found: {table_name!r}") + + table_info = {name: (typ, not notnull) for name, typ, notnull in rows} + + # if no type info was returned for a column, fetch the type of the + # first row and assume that matches the rest of the rows + unknown = [name for name, (typ, _) in table_info.items() if not typ] + if unknown: + queries = ", ".join(f"typeof({_quote(name)})" for name in unknown) + cur.execute(f"SELECT {queries} FROM {quoted_db}.{quoted_table} LIMIT 1") + row = cur.fetchone() + if row is not None: + for name, typ in zip(unknown, row): + _, nullable = table_info[name] + table_info[name] = (typ, nullable) + else: + raise com.IbisError(f"Failed to infer types for columns {unknown}") + + for name, (typ, nullable) in table_info.items(): + yield name, self._parse_type(typ, nullable) + + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + """Compute the schema of a `table`. + + Parameters + ---------- + table_name + May **not** be fully qualified. Use `database` if you want to + qualify the identifier. + schema + Schema name. Unused for sqlite. + database + Database name - with self.begin() as con: - if query in self.list_tables(): - query = f"SELECT * FROM {query}" + Returns + ------- + sch.Schema + Ibis schema + """ + if schema is not None: + raise TypeError("sqlite doesn't support `schema`, use `database` instead") + with self.begin() as cur: + return sch.Schema.from_tuples( + self._inspect_schema(cur, table_name, database) + ) + + def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: + with self.begin() as cur: # create a view that should only be visible in this transaction - con.exec_driver_sql(f"CREATE TEMPORARY VIEW {view} AS {query}") + view = util.gen_name("ibis_sqlite_metadata") + cur.execute(f"CREATE TEMPORARY VIEW {view} AS {query}") - # extract table info from the view - table_info = con.exec_driver_sql(f"PRAGMA table_info({view})") + yield from self._inspect_schema(cur, view, database="temp") + + # drop the view when we're done with it + cur.execute(f"DROP VIEW IF EXISTS {view}") + + def _fetch_from_cursor( + self, cursor: sqlite3.Cursor, schema: sch.Schema + ) -> pd.DataFrame: + import pandas as pd - # get names and not nullables - names, notnulls, raw_types = zip( - *toolz.pluck(["name", "notnull", "type"], table_info.mappings()) + df = pd.DataFrame.from_records(cursor, columns=schema.names, coerce_float=True) + return SQLitePandasData.convert_table(df, schema) + + @util.experimental + def to_pyarrow_batches( + self, + expr: ir.Expr, + *, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1_000_000, + **_: Any, + ) -> pa.ipc.RecordBatchReader: + import pyarrow as pa + + self._run_pre_execute_hooks(expr) + + schema = expr.as_table().schema() + with self._safe_raw_sql( + self.compile(expr, limit=limit, params=params) + ) as cursor: + df = self._fetch_from_cursor(cursor, schema) + table = pa.Table.from_pandas( + df, schema=schema.to_pyarrow(), preserve_index=False + ) + return table.to_reader(max_chunksize=chunk_size) + + def _generate_create_table(self, table: sge.Table, schema: sch.Schema): + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), ) + for colname, typ in schema.items() + ] - # get the type of the first row if no affinity was returned in - # `raw_types`; assume that reflects the rest of the rows - type_queries = ", ".join(map("typeof({})".format, names)) - single_row_types = con.exec_driver_sql( - f"SELECT {type_queries} FROM {view} LIMIT 1" - ).fetchone() - for name, notnull, raw_typ, typ in zip( - names, notnulls, raw_types, single_row_types - ): - ibis_type = SqliteType.from_string(raw_typ or typ) - yield name, ibis_type(nullable=not notnull) + target = sge.Schema(this=table, expressions=column_defs) - # drop the view when we're done with it - con.exec_driver_sql(f"DROP VIEW IF EXISTS {view}") + return sge.Create(kind="TABLE", this=target) + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + # only register if we haven't already done so + if op.name not in self.list_tables(database="temp"): + table = sg.table(op.name, quoted=self.compiler.quoted, catalog="temp") + create_stmt = self._generate_create_table(table, op.schema).sql(self.name) + df = op.data.to_frame() + + data = df.itertuples(index=False) + cols = ", ".join(_quote(col) for col in op.schema.keys()) + specs = ", ".join(["?"] * len(op.schema)) + insert_stmt = ( + f"INSERT INTO {table.sql(self.name)} ({cols}) VALUES ({specs})" + ) - def _get_schema_using_query(self, query: str) -> sch.Schema: - """Return an ibis Schema from a SQLite SQL string.""" - return sch.Schema.from_tuples(self._metadata(query)) + with self.begin() as cur: + cur.execute(create_stmt) + cur.executemany(insert_stmt, data) + + def _define_udf_translation_rules(self, expr): + """No-op, these are defined in the compiler.""" def _register_udfs(self, expr: ir.Expr) -> None: import ibis.expr.operations as ops - with self.begin() as con: - for udf_node in expr.op().find(ops.ScalarUDF): - compile_func = getattr( - self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" - ) + con = self.con + + for udf_node in expr.op().find(ops.ScalarUDF): + compile_func = getattr( + self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" + ) + registration_func = compile_func(udf_node) + if registration_func is not None: + registration_func(con) - registration_func = compile_func(udf_node) - if registration_func is not None: - registration_func(con) + def _compile_builtin_udf(self, udf_node: ops.ScalarUDF) -> None: + pass def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> None: + name = type(udf_node).__name__ + nargs = len(udf_node.__signature__.parameters) func = udf_node.__func__ - name = func.__name__ - for argname, arg in zip(udf_node.argnames, udf_node.args): - dtype = arg.dtype + def check_dtype(dtype, name=None): if not ( dtype.is_string() or dtype.is_binary() or dtype.is_numeric() or dtype.is_boolean() ): + label = "return value" if name is None else f"argument `{name}`" raise com.IbisTypeError( "SQLite only supports strings, bytes, booleans and numbers as UDF input and output, " - f"got argument `{argname}` with unsupported type {dtype}" + f"{label} has unsupported type {dtype}" ) + for argname, arg in zip(udf_node.argnames, udf_node.args): + check_dtype(arg.dtype, argname) + check_dtype(udf_node.dtype) + def register_udf(con): - return con.connection.create_function( - name, len(inspect.signature(func).parameters), udf.ignore_nulls(func) - ) + return con.create_function(name, nargs, ignore_nulls(func)) return register_udf - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - yield f"DROP VIEW IF EXISTS {name}" - yield f"CREATE TEMPORARY VIEW {name} AS {definition}" + def _compile_pyarrow_udf(self, udf_node: ops.ScalarUDF) -> NoReturn: + raise NotImplementedError("pyarrow UDFs are not supported in SQLite") - def _get_compiled_statement(self, view: sa.Table, definition: sa.sql.Selectable): - return super()._get_compiled_statement( - view, definition, compile_kwargs={"literal_binds": True} + def _compile_pandas_udf(self, udf_node: ops.ScalarUDF) -> NoReturn: + raise NotImplementedError("pandas UDFs are not supported in SQLite") + + def attach(self, name: str, path: str | Path) -> None: + """Connect another SQLite database file to the current connection. + + Parameters + ---------- + name + Database name within SQLite + path + Path to sqlite3 database files + + Examples + -------- + >>> con1 = ibis.sqlite.connect("original.db") + >>> con2 = ibis.sqlite.connect("new.db") + >>> con1.attach("new", "new.db") + >>> con1.list_tables(database="new") + """ + with self.begin() as cur: + cur.execute(f"ATTACH DATABASE {str(path)!r} AS {_quote(name)}") + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: ibis.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ): + """Create a table in SQLite. + + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The name of the database in which to create the table; if not + passed, the current database is used. + temp + Create a temporary table + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists + """ + if schema is None and obj is None: + raise ValueError("Either `obj` or `schema` must be specified") + + if schema is not None: + schema = ibis.schema(schema) + + if obj is not None: + if not isinstance(obj, ir.Expr): + obj = ibis.memtable(obj) + + self._run_pre_execute_hooks(obj) + + insert_query = self._to_sqlglot(obj) + else: + insert_query = None + + if temp: + if database not in (None, "temp"): + raise ValueError( + "SQLite doesn't support creating temporary tables in an explicit database" + ) + else: + database = "temp" + + if overwrite: + created_table = sg.table( + util.gen_name(f"{self.name}_table"), + catalog=database, + quoted=self.compiler.quoted, + ) + table = sg.table(name, catalog=database, quoted=self.compiler.quoted) + else: + created_table = table = sg.table( + name, catalog=database, quoted=self.compiler.quoted + ) + + create_stmt = self._generate_create_table( + created_table, schema=(schema or obj.schema()) + ).sql(self.name) + + with self.begin() as cur: + cur.execute(create_stmt) + + if insert_query is not None: + cur.execute( + sge.Insert(this=created_table, expression=insert_query).sql( + self.name + ) + ) + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=table, exists=True).sql(self.name) + ) + # SQLite's ALTER TABLE statement doesn't support using a + # fully-qualified table reference after RENAME TO. Since we + # never rename between databases, we only need the table name + # here. + quoted_name = _quote(name) + cur.execute( + f"ALTER TABLE {created_table.sql(self.name)} RENAME TO {quoted_name}" + ) + + if schema is None: + return self.table(name, database=database) + + # preserve the input schema if it was provided + return ops.DatabaseTable( + name, schema=schema, source=self, namespace=ops.Namespace(database=database) + ).to_expr() + + def drop_table( + self, + name: str, + database: str | None = None, + force: bool = False, + ) -> None: + drop_stmt = sg.exp.Drop( + kind="TABLE", + this=sg.table(name, catalog=database, quoted=self.compiler.quoted), + exists=force, ) + with self._safe_raw_sql(drop_stmt): + pass + + def _create_temp_view(self, table_name, source): + if table_name not in self._temp_views and table_name in self.list_tables(): + raise ValueError( + f"{table_name} already exists as a non-temporary table or view" + ) + + view = sg.table(table_name, catalog="temp", quoted=self.compiler.quoted) + drop = sge.Drop(kind="VIEW", exists=True, this=view).sql(self.name) + create = sge.Create( + kind="VIEW", this=view, expression=source, replace=False + ).sql(self.name) + + with self.begin() as cur: + cur.execute(drop) + cur.execute(create) + + self._temp_views.add(table_name) + self._register_temp_view_cleanup(table_name) + + def create_view( + self, + name: str, + obj: ir.Table, + *, + database: str | None = None, + schema: str | None = None, + overwrite: bool = False, + ) -> ir.Table: + view = sg.table(name, catalog=database, quoted=self.compiler.quoted) + + stmts = [] + if overwrite: + stmts.append(sge.Drop(kind="VIEW", this=view, exists=True).sql(self.name)) + stmts.append( + sge.Create( + this=view, kind="VIEW", replace=False, expression=self.compile(obj) + ).sql(self.name) + ) + + self._run_pre_execute_hooks(obj) + + with self.begin() as cur: + for stmt in stmts: + cur.execute(stmt) + + return self.table(name, database=database) + + def insert( + self, + table_name: str, + obj: pd.DataFrame | ir.Table | list | dict, + database: str | None = None, + overwrite: bool = False, + ) -> None: + """Insert data into a table. + + Parameters + ---------- + table_name + The name of the table to which data needs will be inserted + obj + The source data or expression to insert + database + Name of the attached database that the table is located in. + overwrite + If `True` then replace existing contents of table + + Raises + ------ + NotImplementedError + If inserting data from a different database + ValueError + If the type of `obj` isn't supported + """ + table = sg.table(table_name, catalog=database, quoted=self.compiler.quoted) + if not isinstance(obj, ir.Expr): + obj = ibis.memtable(obj) + + self._run_pre_execute_hooks(obj) + expr = self._to_sqlglot(obj) + insert_stmt = sge.Insert(this=table, expression=expr).sql(self.name) + with self.begin() as cur: + if overwrite: + cur.execute(f"DELETE FROM {table.sql(self.name)}") + cur.execute(insert_stmt) diff --git a/ibis/backends/sqlite/compiler.py b/ibis/backends/sqlite/compiler.py index 09db8897ebfc..efc31ed68906 100644 --- a/ibis/backends/sqlite/compiler.py +++ b/ibis/backends/sqlite/compiler.py @@ -1,36 +1,522 @@ from __future__ import annotations -# Copyright 2014 Cloudera Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.sqlite.datatypes import SqliteType -from ibis.backends.sqlite.registry import operation_registry +from functools import singledispatchmethod + +import sqlglot as sg +import sqlglot.expressions as sge +from public import public +from sqlglot.dialects.sqlite import SQLite + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.compiler import SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import SQLiteType +from ibis.backends.base.sqlglot.rewrites import ( + rewrite_first_to_first_value, + rewrite_last_to_last_value, +) +from ibis.common.temporal import DateUnit, IntervalUnit from ibis.expr.rewrites import rewrite_sample +SQLite.Generator.TYPE_MAPPING |= { + sge.DataType.Type.BOOLEAN: "BOOLEAN", +} + + +@public +class SQLiteCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "sqlite" + quoted = True + type_mapper = SQLiteType + rewrites = SQLGlotCompiler.rewrites + ( + rewrite_sample, + rewrite_first_to_first_value, + rewrite_last_to_last_value, + ) + + NAN = sge.NULL + POS_INF = sge.Literal.number("1e999") + NEG_INF = sge.Literal.number("-1e999") + + def _aggregate(self, funcname: str, *args, where): + expr = self.f[funcname](*args) + if where is not None: + return sge.Filter(this=expr, expression=sge.Where(this=where)) + return expr + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + @visit_node.register(ops.Levenshtein) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.StringSplit) + @visit_node.register(ops.IsNan) + @visit_node.register(ops.IsInf) + @visit_node.register(ops.Covariance) + @visit_node.register(ops.Correlation) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.Median) + @visit_node.register(ops.ApproxMedian) + @visit_node.register(ops.Array) + @visit_node.register(ops.ArrayConcat) + @visit_node.register(ops.ArrayStringJoin) + @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.ArrayContains) + @visit_node.register(ops.ArrayFlatten) + @visit_node.register(ops.ArrayLength) + @visit_node.register(ops.ArraySort) + @visit_node.register(ops.ArrayStringJoin) + @visit_node.register(ops.CountDistinctStar) + @visit_node.register(ops.IntervalBinary) + @visit_node.register(ops.IntervalAdd) + @visit_node.register(ops.IntervalSubtract) + @visit_node.register(ops.IntervalMultiply) + @visit_node.register(ops.IntervalFloorDivide) + @visit_node.register(ops.IntervalFromInteger) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimestampAdd) + @visit_node.register(ops.TimestampSub) + @visit_node.register(ops.TimestampDiff) + @visit_node.register(ops.StringToTimestamp) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.TimestampDelta) + @visit_node.register(ops.TryCast) + def visit_Undefined(self, op, **kwargs): + return super().visit_Undefined(op, **kwargs) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to) -> sge.Cast: + if to.is_timestamp(): + if to.timezone not in (None, "UTC"): + raise com.UnsupportedOperationError( + "SQLite does not support casting to timezones other than 'UTC'" + ) + if op.arg.dtype.is_numeric(): + return self.f.datetime(arg, "unixepoch") + else: + return self.f.strftime("%Y-%m-%d %H:%M:%f", arg) + elif to.is_date(): + return self.f.date(arg) + elif to.is_time(): + return self.f.time(arg) + return super().visit_Cast(op, arg=arg, to=to) + + @visit_node.register(ops.Limit) + def visit_Limit(self, op, *, parent, n, offset): + # SQLite doesn't support compiling an OFFSET without a LIMIT, but + # treats LIMIT -1 as no limit + return super().visit_Limit( + op, parent=parent, n=(-1 if n is None else n), offset=offset + ) + + @visit_node.register(ops.WindowBoundary) + def visit_WindowBoundary(self, op, *, value, preceding): + if op.value.dtype.is_interval(): + raise com.OperationNotDefinedError( + "Interval window bounds not supported by SQLite" + ) + return super().visit_WindowBoundary(op, value=value, preceding=preceding) + + @visit_node.register(ops.JoinLink) + def visit_JoinLink(self, op, **kwargs): + if op.how == "asof": + raise com.UnsupportedOperationError( + "ASOF joins are not supported by SQLite" + ) + return super().visit_JoinLink(op, **kwargs) + + @visit_node.register(ops.StartsWith) + def visit_StartsWith(self, op, *, arg, start): + return arg.like(self.f.concat(start, "%")) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return arg.like(self.f.concat("%", end)) + + @visit_node.register(ops.StrRight) + def visit_StrRight(self, op, *, arg, nchars): + return self.f.substr(arg, -nchars, nchars) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if op.end is not None: + raise NotImplementedError("`end` not yet implemented") + + if op.start is not None: + arg = self.f.substr(arg, start + 1) + pos = self.f.instr(arg, substr) + return sg.case().when(pos > 0, pos + start).else_(0) + + return self.f.instr(arg, substr) + + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, arg, sep): + args = [arg[0]] + for item in arg[1:]: + args.extend([sep, item]) + return self.f.concat(*args) + + @visit_node.register(ops.StringContains) + def visit_Contains(self, op, *, haystack, needle): + return self.f.instr(haystack, needle) >= 1 + + @visit_node.register(ops.ExtractQuery) + def visit_ExtractQuery(self, op, *, arg, key): + if op.key is None: + return self.f._ibis_extract_full_query(arg) + return self.f._ibis_extract_query(arg, key) + + @visit_node.register(ops.Greatest) + def visit_Greatest(self, op, *, arg): + return self.f.max(*arg) + + @visit_node.register(ops.Least) + def visit_Least(self, op, *, arg): + return self.f.min(*arg) + + @visit_node.register(ops.IdenticalTo) + def visit_IdenticalTo(self, op, *, left, right): + return sge.Is(this=left, expression=right) + + @visit_node.register(ops.Clip) + def visit_Clip(self, op, *, arg, lower, upper): + if upper is not None: + arg = self.if_(arg.is_(sge.NULL), arg, self.f.min(upper, arg)) + + if lower is not None: + arg = self.if_(arg.is_(sge.NULL), arg, self.f.max(lower, arg)) + + return arg + + @visit_node.register(ops.RandomScalar) + def visit_RandomScalar(self, op): + return 0.5 + self.f.random() / sge.Literal.number(float(-1 << 64)) + + @visit_node.register(ops.Cot) + def visit_Cot(self, op, *, arg): + return 1 / self.f.tan(arg) + + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if op.how == "heavy": + raise com.OperationNotDefinedError( + "how='heavy' not implemented for the SQLite backend" + ) + + return self._aggregate(f"_ibis_arbitrary_{how}", arg, where=where) + + @visit_node.register(ops.ArgMin) + def visit_ArgMin(self, *args, **kwargs): + return self._visit_arg_reduction("min", *args, **kwargs) + + @visit_node.register(ops.ArgMax) + def visit_ArgMax(self, *args, **kwargs): + return self._visit_arg_reduction("max", *args, **kwargs) + + def _visit_arg_reduction(self, func, op, *, arg, key, where): + cond = arg.is_(sg.not_(sge.NULL)) + + if op.where is not None: + cond = sg.and_(cond, where) + + agg = self._aggregate(func, key, where=cond) + return self.f.anon.json_extract(self.f.json_array(arg, agg), "$[0]") + + @visit_node.register(ops.Variance) + def visit_Variance(self, op, *, arg, how, where): + return self._aggregate(f"_ibis_var_{op.how}", arg, where=where) + + @visit_node.register(ops.StandardDev) + def visit_StandardDev(self, op, *, arg, how, where): + var = self._aggregate(f"_ibis_var_{op.how}", arg, where=where) + return self.f.sqrt(var) + + @visit_node.register(ops.ApproxCountDistinct) + def visit_ApproxCountDistinct(self, op, *, arg, where): + return self.agg.count(sge.Distinct(expressions=[arg]), where=where) + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + return self.agg.count(sge.Distinct(expressions=[arg]), where=where) + + @visit_node.register(ops.Strftime) + def visit_Strftime(self, op, *, arg, format_str): + return self.f.strftime(format_str, arg) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return self.f.date(self.f.printf("%04d-%02d-%02d", year, month, day)) + + @visit_node.register(ops.TimeFromHMS) + def visit_TimeFromHMS(self, op, *, hours, minutes, seconds): + return self.f.time(self.f.printf("%02d:%02d:%02d", hours, minutes, seconds)) + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds + ): + return self.f.datetime( + self.f.printf( + "%04d-%02d-%02d %02d:%02d:%02d%s", + year, + month, + day, + hours, + minutes, + seconds, + ) + ) + + def _temporal_truncate(self, func, arg, unit): + modifiers = { + DateUnit.DAY: ("start of day",), + DateUnit.WEEK: ("weekday 0", "-6 days"), + DateUnit.MONTH: ("start of month",), + DateUnit.YEAR: ("start of year",), + IntervalUnit.DAY: ("start of day",), + IntervalUnit.WEEK: ("weekday 0", "-6 days", "start of day"), + IntervalUnit.MONTH: ("start of month",), + IntervalUnit.YEAR: ("start of year",), + } + + params = modifiers.get(unit) + if params is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") + return func(arg, *params) + + @visit_node.register(ops.DateTruncate) + def visit_DateTruncate(self, op, *, arg, unit): + return self._temporal_truncate(self.f.date, arg, unit) + + @visit_node.register(ops.TimestampTruncate) + def visit_TimestampTruncate(self, op, *, arg, unit): + return self._temporal_truncate(self.f.datetime, arg, unit) + + @visit_node.register(ops.DateAdd) + @visit_node.register(ops.DateSub) + def visit_DateArithmetic(self, op, *, left, right): + unit = op.right.dtype.unit + sign = "+" if isinstance(op, ops.DateAdd) else "-" + if unit not in (IntervalUnit.YEAR, IntervalUnit.MONTH, IntervalUnit.DAY): + raise com.UnsupportedOperationError( + "SQLite does not allow binary op {sign!r} with INTERVAL offset {unit}" + ) + if isinstance(op.right, ops.Literal): + return self.f.date(left, f"{sign}{op.right.value} {unit.plural}") + else: + return self.f.date(left, self.f.concat(sign, right, f" {unit.plural}")) + + @visit_node.register(ops.DateDiff) + def visit_DateDiff(self, op, *, left, right): + return self.f.julianday(left) - self.f.julianday(right) + + @visit_node.register(ops.ExtractYear) + def visit_ExtractYear(self, op, *, arg): + return self.cast(self.f.strftime("%Y", arg), dt.int64) + + @visit_node.register(ops.ExtractQuarter) + def visit_ExtractQuarter(self, op, *, arg): + return (self.f.strftime("%m", arg) + 2) / 3 + + @visit_node.register(ops.ExtractMonth) + def visit_ExtractMonth(self, op, *, arg): + return self.cast(self.f.strftime("%m", arg), dt.int64) + + @visit_node.register(ops.ExtractDay) + def visit_ExtractDay(self, op, *, arg): + return self.cast(self.f.strftime("%d", arg), dt.int64) + + @visit_node.register(ops.ExtractDayOfYear) + def visit_ExtractDayOfYear(self, op, *, arg): + return self.cast(self.f.strftime("%j", arg), dt.int64) + + @visit_node.register(ops.ExtractHour) + def visit_ExtractHour(self, op, *, arg): + return self.cast(self.f.strftime("%H", arg), dt.int64) + + @visit_node.register(ops.ExtractMinute) + def visit_ExtractMinute(self, op, *, arg): + return self.cast(self.f.strftime("%M", arg), dt.int64) + + @visit_node.register(ops.ExtractSecond) + def visit_ExtractSecond(self, op, *, arg): + return self.cast(self.f.strftime("%S", arg), dt.int64) + + @visit_node.register(ops.ExtractMillisecond) + def visit_Millisecond(self, op, *, arg): + return self.cast(self.f.mod(self.f.strftime("%f", arg) * 1000, 1000), dt.int64) + + @visit_node.register(ops.ExtractMicrosecond) + def visit_Microsecond(self, op, *, arg): + return self.cast( + self.f.mod(self.cast(self.f.strftime("%f", arg), dt.int64), 1000), dt.int64 + ) + + @visit_node.register(ops.ExtractWeekOfYear) + def visit_ExtractWeekOfYear(self, op, *, arg): + """ISO week of year. + + This solution is based on https://stackoverflow.com/a/15511864 and handle + the edge cases when computing ISO week from non-ISO week. + + The implementation gives the same results as `datetime.isocalendar()`. + + The year's week that "wins" the day is the year with more allotted days. + + For example: + + ``` + $ cal '2011-01-01' + January 2011 + Su Mo Tu We Th Fr Sa + |1| + 2 3 4 5 6 7 8 + 9 10 11 12 13 14 15 + 16 17 18 19 20 21 22 + 23 24 25 26 27 28 29 + 30 31 + ``` + + Here the ISO week number is `52` since the day occurs in a week with more + days in the week occurring in the _previous_ week's year. + + ``` + $ cal '2012-12-31' + December 2012 + Su Mo Tu We Th Fr Sa + 1 + 2 3 4 5 6 7 8 + 9 10 11 12 13 14 15 + 16 17 18 19 20 21 22 + 23 24 25 26 27 28 29 + 30 |31| + ``` + + Here the ISO week of year is `1` since the day occurs in a week with more + days in the week occurring in the _next_ week's year. + """ + date = self.f.date(arg, "-3 days", "weekday 4") + return (self.f.strftime("%j", date) - 1) / 7 + 1 + + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractEpochSeconds(self, op, *, arg): + return self.cast((self.f.julianday(arg) - 2440587.5) * 86400.0, dt.int64) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return self.cast( + self.f.mod(self.cast(self.f.strftime("%w", arg) + 6, dt.int64), 7), dt.int64 + ) + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + return sge.Case( + this=self.f.strftime("%w", arg), + ifs=[ + self.if_("0", "Sunday"), + self.if_("1", "Monday"), + self.if_("2", "Tuesday"), + self.if_("3", "Wednesday"), + self.if_("4", "Thursday"), + self.if_("5", "Friday"), + self.if_("6", "Saturday"), + ], + ) + + @visit_node.register(ops.Xor) + def visit_Xor(self, op, *, left, right): + return (left.or_(right)).and_(sg.not_(left.and_(right))) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_binary(): + return self.f.unhex(value.hex()) + + if dtype.is_decimal(): + value = float(value) + dtype = dt.double(nullable=dtype.nullable) + elif dtype.is_uuid(): + value = str(value) + dtype = dt.string(nullable=dtype.nullable) + elif dtype.is_interval(): + value = int(value) + dtype = dt.int64(nullable=dtype.nullable) + elif dtype.is_date() or dtype.is_timestamp() or dtype.is_time(): + # To ensure comparisons apply uniformly between temporal values + # (which are always represented as strings), we need to enforce + # that temporal literals are formatted the same way that SQLite + # formats them. This means " " instead of "T" and no offset suffix + # for UTC. + value = ( + value.isoformat() + .replace("T", " ") + .replace("Z", "") + .replace("+00:00", "") + ) + dtype = dt.string(nullable=dtype.nullable) + elif ( + dtype.is_map() + or dtype.is_struct() + or dtype.is_array() + or dtype.is_geospatial() + ): + raise com.UnsupportedBackendType(f"Unsupported type: {dtype!r}") + return super().visit_NonNullLiteral(op, value=value, dtype=dtype) + + +_SIMPLE_OPS = { + ops.RegexReplace: "_ibis_regex_replace", + ops.RegexExtract: "_ibis_regex_extract", + ops.RegexSearch: "_ibis_regex_search", + ops.Translate: "_ibis_translate", + ops.Capitalize: "_ibis_capitalize", + ops.Reverse: "_ibis_reverse", + ops.RPad: "_ibis_rpad", + ops.LPad: "_ibis_lpad", + ops.Repeat: "_ibis_repeat", + ops.StringAscii: "_ibis_string_ascii", + ops.ExtractAuthority: "_ibis_extract_authority", + ops.ExtractFragment: "_ibis_extract_fragment", + ops.ExtractHost: "_ibis_extract_host", + ops.ExtractPath: "_ibis_extract_path", + ops.ExtractProtocol: "_ibis_extract_protocol", + ops.ExtractUserInfo: "_ibis_extract_user_info", + ops.BitwiseXor: "_ibis_xor", + ops.BitwiseNot: "_ibis_inv", + ops.Modulus: "mod", + ops.Log10: "log10", + ops.TypeOf: "typeof", + ops.BitOr: "_ibis_bit_or", + ops.BitAnd: "_ibis_bit_and", + ops.BitXor: "_ibis_bit_xor", + ops.First: "_ibis_arbitrary_first", + ops.Last: "_ibis_arbitrary_last", + ops.Mode: "_ibis_mode", + ops.Time: "time", + ops.Date: "date", +} + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @SQLiteCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) -class SQLiteExprTranslator(AlchemyExprTranslator): - _registry = operation_registry - _rewrites = AlchemyExprTranslator._rewrites.copy() - _dialect_name = "sqlite" - type_mapper = SqliteType + else: + @SQLiteCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) -rewrites = SQLiteExprTranslator.rewrites + setattr(SQLiteCompiler, f"visit_{_op.__name__}", _fmt) -class SQLiteCompiler(AlchemyCompiler): - translator_class = SQLiteExprTranslator - support_values_syntax_in_select = False - null_limit = None - rewrites = AlchemyCompiler.rewrites | rewrite_sample +del _op, _name, _fmt diff --git a/ibis/backends/sqlite/converter.py b/ibis/backends/sqlite/converter.py new file mode 100644 index 000000000000..555fa56b5275 --- /dev/null +++ b/ibis/backends/sqlite/converter.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import pandas as pd + +from ibis.formats.pandas import PandasData + +# The "mixed" format was added in pandas 2 +_DATETIME_FORMAT = "mixed" if pd.__version__ >= "2.0.0" else None + + +class SQLitePandasData(PandasData): + @classmethod + def convert_Timestamp(cls, s, dtype, pandas_type): + """A more flexible timestamp parser. + + This handles the valid formats supported by SQLite. + See https://sqlite.org/lang_datefunc.html#time_values for more info. + """ + try: + return super().convert_Timestamp(s, dtype, pandas_type) + except ValueError: + # Parsing failed, try a more relaxed parser + return pd.to_datetime(s, format=_DATETIME_FORMAT, utc=True) diff --git a/ibis/backends/sqlite/datatypes.py b/ibis/backends/sqlite/datatypes.py deleted file mode 100644 index 5d92a8d89b90..000000000000 --- a/ibis/backends/sqlite/datatypes.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Parse SQLite data types.""" - -from __future__ import annotations - -import datetime - -import sqlalchemy.types as sat -from sqlalchemy.dialects import sqlite - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import SQLiteType as SqlglotSQLiteType - - -class SqliteType(AlchemyType): - dialect = "sqlite" - - @classmethod - def from_ibis(cls, dtype: dt.DataType) -> sat.TypeEngine: - if dtype.is_floating(): - return sat.REAL - else: - return super().from_ibis(dtype) - - @classmethod - def to_ibis(cls, typ: sat.TypeEngine, nullable: bool = True) -> dt.DataType: - if isinstance(typ, sat.REAL): - return dt.Float64(nullable=nullable) - elif isinstance(typ, sqlite.JSON): - return dt.JSON(nullable=nullable) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotSQLiteType.from_string(type_string, nullable=nullable) - - -class ISODATETIME(sqlite.DATETIME): - """A thin `datetime` type to override sqlalchemy's datetime parsing. - - This is to support a wider range of timestamp formats accepted by SQLite. - - See https://sqlite.org/lang_datefunc.html#time_values for the full - list of datetime formats SQLite accepts. - """ - - def result_processor(self, *_): - def process(value: str | None) -> datetime.datetime | None: - """Convert a `str` to a `datetime` according to SQLite's rules. - - This function ignores `None` values. - """ - if value is None: - return None - if value.endswith("Z"): - # Parse and set the timezone as UTC - o = datetime.datetime.fromisoformat(value[:-1]).replace( - tzinfo=datetime.timezone.utc - ) - else: - o = datetime.datetime.fromisoformat(value) - if o.tzinfo: - # Convert any aware datetime to UTC - return o.astimezone(datetime.timezone.utc) - return o - - return process diff --git a/ibis/backends/sqlite/registry.py b/ibis/backends/sqlite/registry.py deleted file mode 100644 index 4c91cbf049bf..000000000000 --- a/ibis/backends/sqlite/registry.py +++ /dev/null @@ -1,448 +0,0 @@ -from __future__ import annotations - -import functools -import operator - -import sqlalchemy as sa -import toolz -from multipledispatch import Dispatcher - -import ibis -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import ( - fixed_arity, - reduction, - sqlalchemy_operation_registry, - sqlalchemy_window_functions_registry, - unary, - varargs, - variance_reduction, -) -from ibis.backends.base.sql.alchemy.registry import _gen_string_find -from ibis.backends.base.sql.alchemy.registry import _literal as base_literal -from ibis.backends.sqlite.datatypes import ISODATETIME -from ibis.common.temporal import DateUnit, IntervalUnit - -operation_registry = sqlalchemy_operation_registry.copy() -operation_registry.update(sqlalchemy_window_functions_registry) - -sqlite_cast = Dispatcher("sqlite_cast") - - -@sqlite_cast.register(object, dt.Integer, dt.Timestamp) -def _unixepoch(arg, from_, to, **_): - return sa.func.datetime(arg, "unixepoch") - - -@sqlite_cast.register(object, dt.String, dt.Timestamp) -def _string_to_timestamp(arg, from_, to, **_): - return sa.func.strftime("%Y-%m-%d %H:%M:%f", arg) - - -@sqlite_cast.register(object, dt.Integer, dt.Date) -def _integer_to_date(arg, from_, to, **_): - return sa.func.date(sa.func.datetime(arg, "unixepoch")) - - -@sqlite_cast.register(object, (dt.String, dt.Timestamp), dt.Date) -def _string_or_timestamp_to_date(arg, from_, to, **_): - return sa.func.date(arg) - - -@sqlite_cast.register(object, dt.Timestamp, dt.Timestamp) -def _timestamp_to_timestamp(arg, from_, to, **_): - if from_ == to: - return arg - if from_.timezone is None and to.timezone == "UTC": - return arg - raise com.UnsupportedOperationError(f"Cannot cast from {from_} to {to}") - - -@sqlite_cast.register(object, dt.DataType, (dt.Date, dt.Timestamp)) -def _value_to_temporal(arg, from_, to, **_): - raise com.UnsupportedOperationError(f"Unable to cast from {from_!r} to {to!r}.") - - -@sqlite_cast.register(object, dt.DataType, dt.DataType) -def _default_cast_impl(arg, from_, to, translator=None): - assert translator is not None, "translator is None" - return sa.cast(arg, translator.get_sqla_type(to)) - - -def _strftime_int(fmt): - def translator(t, op): - return sa.cast(sa.func.strftime(fmt, t.translate(op.arg)), sa.INT) - - return translator - - -def _extract_quarter(t, op): - expr_new = ops.ExtractMonth(op.arg).to_expr() - expr_new = ( - ibis.case() - .when(expr_new.isin([1, 2, 3]), 1) - .when(expr_new.isin([4, 5, 6]), 2) - .when(expr_new.isin([7, 8, 9]), 3) - .else_(4) - .end() - ) - return sa.cast(t.translate(expr_new.op()), sa.Integer) - - -_truncate_modifiers = { - DateUnit.DAY: ("start of day",), - DateUnit.WEEK: ("weekday 0", "-6 days"), - DateUnit.MONTH: ("start of month",), - DateUnit.YEAR: ("start of year",), - IntervalUnit.DAY: ("start of day",), - IntervalUnit.WEEK: ("weekday 1",), - IntervalUnit.MONTH: ("start of month",), - IntervalUnit.YEAR: ("start of year",), -} - - -def _truncate(func): - def translator(t, op): - sa_arg = t.translate(op.arg) - try: - modifier = _truncate_modifiers[op.unit] - except KeyError: - raise com.UnsupportedOperationError( - f"Unsupported truncate unit {op.unit!r}" - ) - return func(sa_arg, *modifier) - - return translator - - -def _log(t, op): - sa_arg = t.translate(op.arg) - if op.base is None: - return sa.func._ibis_sqlite_ln(sa_arg) - return sa.func._ibis_sqlite_log(sa_arg, t.translate(op.base)) - - -def _generic_pad(arg, length, pad): - f = sa.func - arg_length = f.length(arg) - pad_length = f.length(pad) - number_of_zero_bytes = ((length - arg_length - 1 + pad_length) / pad_length + 1) / 2 - return f.substr( - f.replace( - f.replace(f.substr(f.quote(f.zeroblob(number_of_zero_bytes)), 3), "'", ""), - "0", - pad, - ), - 1, - length - f.length(arg), - ) - - -def _extract_week_of_year(t, op): - """ISO week of year. - - This solution is based on https://stackoverflow.com/a/15511864 and handle - the edge cases when computing ISO week from non-ISO week. - - The implementation gives the same results as `datetime.isocalendar()`. - - The year's week that "wins" the day is the year with more allotted days. - - For example: - - ``` - $ cal '2011-01-01' - January 2011 - Su Mo Tu We Th Fr Sa - |1| - 2 3 4 5 6 7 8 - 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 - 23 24 25 26 27 28 29 - 30 31 - ``` - - Here the ISO week number is `52` since the day occurs in a week with more - days in the week occurring in the _previous_ week's year. - - ``` - $ cal '2012-12-31' - December 2012 - Su Mo Tu We Th Fr Sa - 1 - 2 3 4 5 6 7 8 - 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 - 23 24 25 26 27 28 29 - 30 |31| - ``` - - Here the ISO week of year is `1` since the day occurs in a week with more - days in the week occurring in the _next_ week's year. - """ - date = sa.func.date(t.translate(op.arg), "-3 days", "weekday 4") - return (sa.func.strftime("%j", date) - 1) / 7 + 1 - - -def _string_join(t, op): - return functools.reduce( - operator.add, - toolz.interpose(t.translate(op.sep), map(t.translate, op.arg)), - ) - - -def _literal(t, op): - dtype = op.dtype - if dtype.is_array(): - raise NotImplementedError(f"Unsupported type: {dtype!r}") - if dtype.is_uuid(): - return sa.literal(str(op.value)) - return base_literal(t, op) - - -def _arbitrary(t, op): - if (how := op.how) == "heavy": - raise com.OperationNotDefinedError( - "how='heavy' not implemented for the SQLite backend" - ) - - return reduction(getattr(sa.func, f"_ibis_sqlite_arbitrary_{how}"))(t, op) - - -_INTERVAL_DATE_UNITS = frozenset( - (IntervalUnit.YEAR, IntervalUnit.MONTH, IntervalUnit.DAY) -) - - -def _timestamp_op(func, sign, units): - def _formatter(translator, op): - arg, offset = op.args - - unit = offset.dtype.unit - if unit not in units: - raise com.UnsupportedOperationError( - "SQLite does not allow binary operation " - f"{func} with INTERVAL offset {unit}" - ) - offset = translator.translate(offset) - result = getattr(sa.func, func)( - translator.translate(arg), - f"{sign}{offset.value} {unit.plural}", - ) - return result - - return _formatter - - -def _date_diff(t, op): - left, right = map(t.translate, op.args) - return sa.func.julianday(left) - sa.func.julianday(right) - - -def _mode(t, op): - sa_arg = op.arg - - if sa_arg.dtype.is_boolean(): - sa_arg = ops.Cast(op.arg, to=dt.int32) - - if op.where is not None: - sa_arg = ops.IfElse(op.where, sa_arg, None) - - return sa.func._ibis_sqlite_mode(t.translate(sa_arg)) - - -def _arg_min_max(agg_func): - def translate(t, op: ops.ArgMin | ops.ArgMax): - arg = t.translate(op.arg) - key = t.translate(op.key) - - conditions = [arg != sa.null()] - - if (where := op.where) is not None: - conditions.append(t.translate(where)) - - agg = agg_func(key).filter(sa.and_(*conditions)) - - return sa.func.json_extract(sa.func.json_array(arg, agg), "$[0]") - - return translate - - -def _day_of_the_week_name(arg): - return sa.case( - (sa.func.strftime("%w", arg) == "0", "Sunday"), - (sa.func.strftime("%w", arg) == "1", "Monday"), - (sa.func.strftime("%w", arg) == "2", "Tuesday"), - (sa.func.strftime("%w", arg) == "3", "Wednesday"), - (sa.func.strftime("%w", arg) == "4", "Thursday"), - (sa.func.strftime("%w", arg) == "5", "Friday"), - (sa.func.strftime("%w", arg) == "6", "Saturday"), - ) - - -def _extract_query(t, op): - arg = t.translate(op.arg) - if op.key is not None: - return sa.func._ibis_extract_query(arg, t.translate(op.key)) - else: - return sa.func._ibis_extract_query_no_param(arg) - - -operation_registry.update( - { - # TODO(kszucs): don't dispatch on op.arg since that should be always an - # instance of ops.Value - ops.Cast: ( - lambda t, op: sqlite_cast( - t.translate(op.arg), op.arg.dtype, op.to, translator=t - ) - ), - ops.StrRight: fixed_arity( - lambda arg, nchars: sa.func.substr(arg, -nchars, nchars), 2 - ), - ops.StringFind: _gen_string_find(sa.func.instr), - ops.StringJoin: _string_join, - ops.StringConcat: ( - lambda t, op: functools.reduce(operator.add, map(t.translate, op.arg)) - ), - ops.Least: varargs(sa.func.min), - ops.Greatest: varargs(sa.func.max), - ops.DateFromYMD: fixed_arity( - lambda y, m, d: sa.func.date(sa.func.printf("%04d-%02d-%02d", y, m, d)), 3 - ), - ops.TimeFromHMS: fixed_arity( - lambda h, m, s: sa.func.time(sa.func.printf("%02d:%02d:%02d", h, m, s)), 3 - ), - ops.TimestampFromYMDHMS: fixed_arity( - lambda y, mo, d, h, m, s: sa.func.datetime( - sa.func.printf("%04d-%02d-%02d %02d:%02d:%02d%s", y, mo, d, h, m, s) - ), - 6, - ), - ops.DateTruncate: _truncate(sa.func.date), - ops.Date: unary(sa.func.date), - ops.DateAdd: _timestamp_op("DATE", "+", _INTERVAL_DATE_UNITS), - ops.DateSub: _timestamp_op("DATE", "-", _INTERVAL_DATE_UNITS), - ops.DateDiff: _date_diff, - ops.Time: unary(sa.func.time), - ops.TimestampTruncate: _truncate(sa.func.datetime), - ops.Strftime: fixed_arity( - lambda arg, format_str: sa.func.strftime(format_str, arg), 2 - ), - ops.ExtractYear: _strftime_int("%Y"), - ops.ExtractMonth: _strftime_int("%m"), - ops.ExtractDay: _strftime_int("%d"), - ops.ExtractWeekOfYear: _extract_week_of_year, - ops.ExtractDayOfYear: _strftime_int("%j"), - ops.ExtractQuarter: _extract_quarter, - # example: (julianday('now') - 2440587.5) * 86400.0 - ops.ExtractEpochSeconds: fixed_arity( - lambda arg: sa.cast( - (sa.func.julianday(arg) - 2440587.5) * 86400.0, sa.BigInteger - ), - 1, - ), - ops.ExtractHour: _strftime_int("%H"), - ops.ExtractMinute: _strftime_int("%M"), - ops.ExtractSecond: _strftime_int("%S"), - ops.ExtractMicrosecond: fixed_arity( - lambda arg: (sa.func.strftime("%f", arg)) % 1000, 1 - ), - ops.ExtractMillisecond: fixed_arity( - lambda arg: (sa.func.strftime("%f", arg) * 1000) % 1000, 1 - ), - ops.DayOfWeekIndex: fixed_arity( - lambda arg: sa.cast( - sa.cast(sa.func.strftime("%w", arg) + 6, sa.SMALLINT) % 7, sa.SMALLINT - ), - 1, - ), - ops.DayOfWeekName: fixed_arity(_day_of_the_week_name, 1), - ops.TimestampNow: fixed_arity( - lambda: sa.func.datetime("now", type_=ISODATETIME()), 0 - ), - ops.RegexSearch: fixed_arity(sa.func._ibis_sqlite_regex_search, 2), - ops.RegexReplace: fixed_arity(sa.func._ibis_sqlite_regex_replace, 3), - ops.RegexExtract: fixed_arity(sa.func._ibis_sqlite_regex_extract, 3), - ops.LPad: fixed_arity( - lambda arg, length, pad: _generic_pad(arg, length, pad) + arg, 3 - ), - ops.RPad: fixed_arity( - lambda arg, length, pad: arg + _generic_pad(arg, length, pad), 3 - ), - ops.Repeat: fixed_arity( - lambda arg, times: sa.func.replace( - sa.func.substr( - sa.func.quote(sa.func.zeroblob((times + 1) / 2)), 3, times - ), - "0", - arg, - ), - 2, - ), - ops.Reverse: unary(sa.func._ibis_sqlite_reverse), - ops.StringAscii: unary(sa.func._ibis_sqlite_string_ascii), - ops.Capitalize: unary(sa.func._ibis_sqlite_capitalize), - ops.Translate: fixed_arity(sa.func._ibis_sqlite_translate, 3), - ops.Sqrt: unary(sa.func._ibis_sqlite_sqrt), - ops.Power: fixed_arity(sa.func._ibis_sqlite_power, 2), - ops.Exp: unary(sa.func._ibis_sqlite_exp), - ops.Ln: unary(sa.func._ibis_sqlite_ln), - ops.Log: _log, - ops.Log10: unary(sa.func._ibis_sqlite_log10), - ops.Log2: unary(sa.func._ibis_sqlite_log2), - ops.Floor: unary(sa.func._ibis_sqlite_floor), - ops.Ceil: unary(sa.func._ibis_sqlite_ceil), - ops.Sign: unary(sa.func._ibis_sqlite_sign), - ops.FloorDivide: fixed_arity(sa.func._ibis_sqlite_floordiv, 2), - ops.Modulus: fixed_arity(sa.func._ibis_sqlite_mod, 2), - ops.Variance: variance_reduction("_ibis_sqlite_var"), - ops.StandardDev: toolz.compose( - sa.func._ibis_sqlite_sqrt, variance_reduction("_ibis_sqlite_var") - ), - ops.RowID: lambda *_: sa.literal_column("rowid"), - ops.Cot: unary(sa.func._ibis_sqlite_cot), - ops.Cos: unary(sa.func._ibis_sqlite_cos), - ops.Sin: unary(sa.func._ibis_sqlite_sin), - ops.Tan: unary(sa.func._ibis_sqlite_tan), - ops.Acos: unary(sa.func._ibis_sqlite_acos), - ops.Asin: unary(sa.func._ibis_sqlite_asin), - ops.Atan: unary(sa.func._ibis_sqlite_atan), - ops.Atan2: fixed_arity(sa.func._ibis_sqlite_atan2, 2), - ops.BitOr: reduction(sa.func._ibis_sqlite_bit_or), - ops.BitAnd: reduction(sa.func._ibis_sqlite_bit_and), - ops.BitXor: reduction(sa.func._ibis_sqlite_bit_xor), - ops.Mode: _mode, - ops.ArgMin: _arg_min_max(sa.func.min), - ops.ArgMax: _arg_min_max(sa.func.max), - ops.Degrees: unary(sa.func._ibis_sqlite_degrees), - ops.Radians: unary(sa.func._ibis_sqlite_radians), - # sqlite doesn't implement a native xor operator - ops.BitwiseXor: fixed_arity(sa.func._ibis_sqlite_xor, 2), - ops.BitwiseNot: unary(sa.func._ibis_sqlite_inv), - ops.IfElse: fixed_arity(sa.func.iif, 3), - ops.Pi: fixed_arity(sa.func._ibis_sqlite_pi, 0), - ops.E: fixed_arity(sa.func._ibis_sqlite_e, 0), - ops.TypeOf: unary(sa.func.typeof), - ops.Literal: _literal, - ops.RandomScalar: fixed_arity( - lambda: 0.5 + sa.func.random() / sa.cast(-1 << 64, sa.REAL), 0 - ), - ops.Arbitrary: _arbitrary, - ops.First: lambda t, op: t.translate( - ops.Arbitrary(op.arg, where=op.where, how="first") - ), - ops.Last: lambda t, op: t.translate( - ops.Arbitrary(op.arg, where=op.where, how="last") - ), - ops.ExtractFragment: fixed_arity(sa.func._ibis_extract_fragment, 1), - ops.ExtractProtocol: fixed_arity(sa.func._ibis_extract_protocol, 1), - ops.ExtractAuthority: fixed_arity(sa.func._ibis_extract_authority, 1), - ops.ExtractPath: fixed_arity(sa.func._ibis_extract_path, 1), - ops.ExtractHost: fixed_arity(sa.func._ibis_extract_host, 1), - ops.ExtractQuery: _extract_query, - ops.ExtractUserInfo: fixed_arity(sa.func._ibis_extract_user_info, 1), - } -) diff --git a/ibis/backends/sqlite/tests/conftest.py b/ibis/backends/sqlite/tests/conftest.py index 3577061e9ab3..87433b8a12a8 100644 --- a/ibis/backends/sqlite/tests/conftest.py +++ b/ibis/backends/sqlite/tests/conftest.py @@ -22,7 +22,7 @@ class TestConf(BackendTest): returned_timestamp_unit = "s" supports_structs = False stateful = False - deps = ("sqlalchemy",) + deps = ("regex",) @staticmethod def connect(*, tmpdir, worker_id, **kw): @@ -30,7 +30,9 @@ def connect(*, tmpdir, worker_id, **kw): def _load_data(self, **kw: Any) -> None: """Load test data into a SQLite backend instance.""" - super()._load_data(**kw) + with self.connection.begin() as con: + for stmt in self.ddl_script: + con.execute(stmt) with self.connection.begin() as con: for table in TEST_TABLES: @@ -69,30 +71,9 @@ def con(data_dir, tmp_path_factory, worker_id): return TestConf.load_data(data_dir, tmp_path_factory, worker_id).connection -@pytest.fixture(scope="session") -def dialect(): - import sqlalchemy as sa - - return sa.dialects.sqlite.dialect() - - @pytest.fixture(scope="session") def translate(dialect): - from ibis.backends.sqlite import Backend - - context = Backend.compiler.make_context() - return lambda expr: str( - Backend.compiler.translator_class(expr, context) - .get_result() - .compile(dialect=dialect, compile_kwargs={"literal_binds": True}) - ) - - -@pytest.fixture(scope="session") -def sqla_compile(dialect): - return lambda expr: str( - expr.compile(dialect=dialect, compile_kwargs={"literal_binds": True}) - ) + return lambda expr: ibis.to_sql(expr, dialect="sqlite") @pytest.fixture(scope="session") @@ -100,12 +81,6 @@ def alltypes(con): return con.table("functional_alltypes") -@pytest.fixture(scope="session") -def alltypes_sqla(con, alltypes): - name = alltypes.op().name - return con._get_sqla_table(name) - - @pytest.fixture(scope="session") def df(alltypes): return alltypes.execute() diff --git a/ibis/backends/sqlite/tests/snapshots/test_client/test_compile_toplevel/out.sql b/ibis/backends/sqlite/tests/snapshots/test_client/test_compile_toplevel/out.sql deleted file mode 100644 index 71a34e8bb3e4..000000000000 --- a/ibis/backends/sqlite/tests/snapshots/test_client/test_compile_toplevel/out.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT sum(t0.a) AS "Sum(a)" -FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/sqlite/tests/snapshots/test_functions/test_count_on_order_by/out.sql b/ibis/backends/sqlite/tests/snapshots/test_functions/test_count_on_order_by/out.sql deleted file mode 100644 index 5a67d4e1b914..000000000000 --- a/ibis/backends/sqlite/tests/snapshots/test_functions/test_count_on_order_by/out.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT - COUNT(*) AS "CountStar()" -FROM ( - SELECT - t1."playerID" AS "playerID", - t1."yearID" AS "yearID", - t1.stint AS stint, - t1."teamID" AS "teamID", - t1."lgID" AS "lgID", - t1."G" AS "G", - t1."AB" AS "AB", - t1."R" AS "R", - t1."H" AS "H", - t1."X2B" AS "X2B", - t1."X3B" AS "X3B", - t1."HR" AS "HR", - t1."RBI" AS "RBI", - t1."SB" AS "SB", - t1."CS" AS "CS", - t1."BB" AS "BB", - t1."SO" AS "SO", - t1."IBB" AS "IBB", - t1."HBP" AS "HBP", - t1."SH" AS "SH", - t1."SF" AS "SF", - t1."GIDP" AS "GIDP" - FROM batting AS t1 - ORDER BY - t1."playerID" DESC -) AS t0 \ No newline at end of file diff --git a/ibis/backends/sqlite/tests/test_client.py b/ibis/backends/sqlite/tests/test_client.py index bfce0701139e..3d4553eaa98e 100644 --- a/ibis/backends/sqlite/tests/test_client.py +++ b/ibis/backends/sqlite/tests/test_client.py @@ -2,51 +2,14 @@ import os import sqlite3 -import uuid from pathlib import Path -import numpy as np -import pandas.testing as tm import pytest from pytest import param import ibis -import ibis.expr.types as ir -from ibis import config, udf from ibis.conftest import not_windows -pytest.importorskip("sqlalchemy") - - -def test_table(con): - table = con.table("functional_alltypes") - assert isinstance(table, ir.Table) - - -def test_column_execute(alltypes, df): - expr = alltypes.double_col - result = expr.execute() - expected = df.double_col - tm.assert_series_equal(result, expected) - - -def test_literal_execute(con): - expr = ibis.literal("1234") - result = con.execute(expr) - assert result == "1234" - - -def test_simple_aggregate_execute(alltypes, df): - expr = alltypes.double_col.sum() - result = expr.execute() - expected = df.double_col.sum() - np.testing.assert_allclose(result, expected) - - -def test_list_tables(con): - assert con.list_tables() - assert len(con.list_tables(like="functional")) == 1 - def test_attach_file(tmp_path): dbpath = str(tmp_path / "attached.db") @@ -67,58 +30,8 @@ def test_attach_file(tmp_path): assert foo_tables == bar_tables -def test_compile_toplevel(snapshot): - t = ibis.table([("a", "double")], name="t") - - expr = t.a.sum() - result = ibis.sqlite.compile(expr) - snapshot.assert_match(str(result), "out.sql") - - -def test_create_and_drop_table(con): - t = con.table("functional_alltypes") - name = str(uuid.uuid4()) - new_table = con.create_table(name, t.limit(5)) - tm.assert_frame_equal(new_table.execute(), t.limit(5).execute()) - con.drop_table(name) - assert name not in con.list_tables() - - -def test_verbose_log_queries(con): - queries = [] - - with config.option_context("verbose", True): - with config.option_context("verbose_log", queries.append): - con.table("functional_alltypes")["year"].execute() - - assert len(queries) == 1 - (query,) = queries - assert "SELECT t0.year" in query - - -def test_table_equality(dbpath): - con1 = ibis.sqlite.connect(dbpath) - t1 = con1.table("t") - - con2 = ibis.sqlite.connect(dbpath) - t2 = con2.table("t") - - assert t1.op() == t2.op() - assert t1.equals(t2) - - -def test_table_inequality(dbpath): - con = ibis.sqlite.connect(dbpath) - - t = con.table("t") - s = con.table("s") - - assert t.op() != s.op() - assert not t.equals(s) - - def test_builtin_scalar_udf(con): - @udf.scalar.builtin + @ibis.udf.scalar.builtin def zeroblob(n: int) -> bytes: """Return a length `n` blob of zero bytes.""" @@ -129,7 +42,7 @@ def zeroblob(n: int) -> bytes: def test_builtin_agg_udf(con): - @udf.agg.builtin + @ibis.udf.agg.builtin def total(x) -> float: """Totally total.""" diff --git a/ibis/backends/sqlite/tests/test_functions.py b/ibis/backends/sqlite/tests/test_functions.py deleted file mode 100644 index 1962152d6a56..000000000000 --- a/ibis/backends/sqlite/tests/test_functions.py +++ /dev/null @@ -1,713 +0,0 @@ -from __future__ import annotations - -import math -import sqlite3 -import uuid - -import numpy as np -import pandas as pd -import pandas.testing as tm -import pytest -from packaging.version import parse -from pytest import param - -import ibis -import ibis.expr.datatypes as dt -from ibis import config -from ibis import literal as L - -sa = pytest.importorskip("sqlalchemy") - - -@pytest.mark.parametrize( - ("func", "expected"), - [ - ( - lambda t: t.double_col.cast(dt.int8), - lambda at: sa.cast(at.c.double_col, sa.SMALLINT), - ), - ( - lambda t: t.string_col.cast(dt.float64), - lambda at: sa.cast(at.c.string_col, sa.REAL), - ), - ( - lambda t: t.string_col.cast(dt.float32), - lambda at: sa.cast(at.c.string_col, sa.REAL), - ), - ], -) -def test_cast(alltypes, alltypes_sqla, translate, func, expected): - expr = func(alltypes) - assert translate(expr.op()) == str(expected(alltypes_sqla.alias("t0"))) - - -@pytest.mark.parametrize( - ("func", "expected_func"), - [ - param( - lambda t: t.timestamp_col.cast(dt.timestamp), - lambda at: at.c.timestamp_col, - id="timestamp_col", - ), - param( - lambda t: t.int_col.cast(dt.timestamp), - lambda at: sa.func.datetime(at.c.int_col, "unixepoch"), - id="cast_integer_to_timestamp", - ), - ], -) -def test_timestamp_cast_noop( - alltypes, func, translate, alltypes_sqla, expected_func, sqla_compile -): - # See GH #592 - result = func(alltypes) - expected = expected_func(alltypes_sqla.alias("t0")) - assert translate(result.op()) == sqla_compile(expected) - - -def test_timestamp_functions(con): - value = ibis.timestamp("2015-09-01 14:48:05.359") - expr = value.strftime("%Y%m%d") - expected = "20150901" - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L(3) + L(4), 7), - (L(3) - L(4), -1), - (L(3) * L(4), 12), - (L(12) / L(4), 3), - (L(12) ** L(2), 144), - (L(12) % L(5), 2), - ], -) -def test_binary_arithmetic(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L(7) / L(2), 3.5), - (L(7) // L(2), 3), - (L(7).floordiv(2), 3), - (L(2).rfloordiv(7), 3), - ], -) -def test_div_floordiv(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize(("lit", "expected"), [(L(0), None), (L(5.5), 5.5)]) -def test_nullif_zero(con, lit, expected): - expr = lit.nullif(0) - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), [(L("foo_bar").length(), 7), (L("").length(), 0)] -) -def test_string_length(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L("foo_bar").left(3), "foo"), - (L("foo_bar").right(3), "bar"), - (L("foo_bar").substr(0, 3), "foo"), - (L("foo_bar").substr(4, 3), "bar"), - (L("foo_bar").substr(1), "oo_bar"), - ], -) -def test_string_substring(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L(" foo ").lstrip(), "foo "), - (L(" foo ").rstrip(), " foo"), - (L(" foo ").strip(), "foo"), - ], -) -def test_string_strip(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [(L("foo").upper(), "FOO"), (L("FOO").lower(), "foo")], -) -def test_string_upper_lower(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L("foobar").contains("bar"), True), - (L("foobar").contains("foo"), True), - (L("foobar").contains("baz"), False), - ], -) -def test_string_contains(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [(L("foobar").find("bar"), 3), (L("foobar").find("baz"), -1)], -) -def test_string_find(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L("foobar").like("%bar"), True), - (L("foobar").like("foo%"), True), - (L("foobar").like("%baz%"), False), - (L("foobar").like(["%bar"]), True), - (L("foobar").like(["foo%"]), True), - (L("foobar").like(["%baz%"]), False), - (L("foobar").like(["%bar", "foo%"]), True), - ], -) -def test_string_like(con, expr, expected): - assert con.execute(expr) == expected - - -def test_str_replace(con): - expr = L("foobarfoo").replace("foo", "H") - expected = "HbarH" - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L(-5).abs(), 5), - (L(5).abs(), 5), - (ibis.least(L(5), L(10), L(1)), 1), - (ibis.greatest(L(5), L(10), L(1)), 10), - (L(5.5).round(), 6.0), - (L(5.556).round(2), 5.56), - (L(5.556).sqrt(), math.sqrt(5.556)), - (L(5.556).ceil(), 6.0), - (L(5.556).floor(), 5.0), - (L(5.556).exp(), math.exp(5.556)), - (L(5.556).sign(), 1), - (L(-5.556).sign(), -1), - (L(0).sign(), 0), - (L(5.556).log(2), math.log(5.556, 2)), - (L(5.556).ln(), math.log(5.556)), - (L(5.556).log2(), math.log(5.556, 2)), - (L(5.556).log10(), math.log10(5.556)), - ], -) -def test_math_functions(con, expr, expected): - assert con.execute(expr) == expected - - -NULL_STRING = L(None).cast(dt.string) -NULL_INT64 = L(None).cast(dt.int64) - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L("abcd").re_search("[a-z]"), True), - (L("abcd").re_search(r"[\d]+"), False), - (L("1222").re_search(r"[\d]+"), True), - (L("abcd").re_search(None), None), - (NULL_STRING.re_search("[a-z]"), None), - (NULL_STRING.re_search(NULL_STRING), None), - ], -) -def test_regexp_search(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L("abcd").re_replace("[ab]", ""), "cd"), - (L(None).cast(dt.string).re_replace(NULL_STRING, NULL_STRING), None), - (L("abcd").re_replace(NULL_STRING, NULL_STRING), None), - (L("abcd").re_replace("a", NULL_STRING), None), - (L("abcd").re_replace(NULL_STRING, "a"), None), - (NULL_STRING.re_replace("a", NULL_STRING), None), - (NULL_STRING.re_replace(NULL_STRING, "a"), None), - ], -) -def test_regexp_replace(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (L("1222").re_extract(r"1(22)\d+", 1).cast("int64"), 22), - (L("abcd").re_extract(r"(\d+)", 1), None), - (L("1222").re_extract("([a-z]+)", 1), None), - (L("1222").re_extract(r"1(22)\d+", 2), None), - # extract nulls - (NULL_STRING.re_extract(NULL_STRING, NULL_INT64), None), - (L("abcd").re_extract(NULL_STRING, NULL_INT64), None), - (L("abcd").re_extract("a", NULL_INT64), None), - (L("abcd").re_extract(NULL_STRING, 1), None), - (NULL_STRING.re_extract("a", NULL_INT64), None), - (NULL_STRING.re_extract(NULL_STRING, 1), None), - ], -) -def test_regexp_extract(con, expr, expected): - assert con.execute(expr) == expected - - -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (ibis.NA.fillna(5), 5), - (L(5).fillna(10), 5), - (L(5).nullif(5), None), - (L(10).nullif(5), 10), - ], -) -def test_fillna_nullif(con, expr, expected): - assert con.execute(expr) == expected - - -def test_numeric_builtins_work(alltypes, df): - expr = alltypes.double_col.fillna(0).name("tmp") - result = expr.execute() - expected = df.double_col.fillna(0) - expected.name = "tmp" - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - ("func", "expected_func"), - [ - ( - lambda t: (t.double_col > 20).ifelse(10, -20), - lambda df: pd.Series( - np.where(df.double_col > 20, 10, -20), name="tmp", dtype="int8" - ), - ), - ( - lambda t: (t.double_col > 20).ifelse(10, -20).abs(), - lambda df: pd.Series( - np.where(df.double_col > 20, 10, -20), name="tmp", dtype="int8" - ).abs(), - ), - ], -) -def test_ifelse(alltypes, df, func, expected_func): - expr = func(alltypes).name("tmp") - result = expr.execute() - expected = expected_func(df) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - ("func", "expected_func"), - [ - # tier and histogram - param( - lambda d: d.bucket([0, 10, 25, 50, 100]), - lambda s: pd.cut(s, [0, 10, 25, 50, 100], right=False, labels=False).astype( - "int8" - ), - id="default", - ), - param( - lambda d: d.bucket([0, 10, 25, 50], include_over=True), - lambda s: pd.cut( - s, [0, 10, 25, 50, np.inf], right=False, labels=False - ).astype("int8"), - id="include_over", - ), - param( - lambda d: d.bucket([0, 10, 25, 50], close_extreme=False), - lambda s: pd.cut(s, [0, 10, 25, 50], right=False, labels=False), - id="no_close_extreme", - ), - param( - lambda d: d.bucket([0, 10, 25, 50], closed="right", close_extreme=False), - lambda s: pd.cut( - s, - [0, 10, 25, 50], - include_lowest=False, - right=True, - labels=False, - ), - id="closed_right_no_close_extreme", - ), - param( - lambda d: d.bucket([10, 25, 50, 100], include_under=True), - lambda s: pd.cut(s, [0, 10, 25, 50, 100], right=False, labels=False).astype( - "int8" - ), - id="include_under", - ), - ], -) -def test_bucket(alltypes, df, func, expected_func): - expr = func(alltypes.double_col) - result = expr.execute() - expected = expected_func(df.double_col) - - tm.assert_series_equal(result, expected, check_names=False) - - -def test_category_label(alltypes, df): - bins = [0, 10, 25, 50, 100] - labels = ["a", "b", "c", "d"] - expr = alltypes.double_col.bucket(bins).label(labels) - result = expr.execute() - result = pd.Series(pd.Categorical(result, ordered=True)) - - result.name = "double_col" - - expected = pd.cut(df.double_col, bins, labels=labels, right=False) - tm.assert_series_equal(result, expected) - - -@pytest.mark.xfail( - parse(sqlite3.sqlite_version) < parse("3.8.3"), - raises=sa.exc.OperationalError, - reason="SQLite versions < 3.8.3 do not support the WITH statement", -) -def test_union(alltypes): - t = alltypes - - expr = ( - t.group_by("string_col") - .aggregate(t.double_col.sum().name("foo")) - .order_by("string_col") - ) - - t1 = expr.limit(4) - t2 = expr.limit(4, offset=4) - t3 = expr.limit(8) - - result = t1.union(t2).execute() - expected = t3.execute() - - assert (result.string_col == expected.string_col).all() - - -@pytest.mark.parametrize( - ("func", "expected_func"), - [ - ( - lambda t, cond: t.bool_col.count(), - lambda df, cond: df.bool_col.count(), - ), - (lambda t, cond: t.bool_col.any(), lambda df, cond: df.bool_col.any()), - (lambda t, cond: t.bool_col.all(), lambda df, cond: df.bool_col.all()), - ( - lambda t, cond: t.bool_col.notany(), - lambda df, cond: ~df.bool_col.any(), - ), - ( - lambda t, cond: t.bool_col.notall(), - lambda df, cond: ~df.bool_col.all(), - ), - ( - lambda t, cond: t.double_col.sum(), - lambda df, cond: df.double_col.sum(), - ), - ( - lambda t, cond: t.double_col.mean(), - lambda df, cond: df.double_col.mean(), - ), - ( - lambda t, cond: t.double_col.min(), - lambda df, cond: df.double_col.min(), - ), - ( - lambda t, cond: t.double_col.max(), - lambda df, cond: df.double_col.max(), - ), - ( - lambda t, cond: t.double_col.var(), - lambda df, cond: df.double_col.var(), - ), - ( - lambda t, cond: t.double_col.std(), - lambda df, cond: df.double_col.std(), - ), - ( - lambda t, cond: t.double_col.var(how="sample"), - lambda df, cond: df.double_col.var(ddof=1), - ), - ( - lambda t, cond: t.double_col.std(how="pop"), - lambda df, cond: df.double_col.std(ddof=0), - ), - ( - lambda t, cond: t.bool_col.count(where=cond), - lambda df, cond: df.bool_col[cond].count(), - ), - ( - lambda t, cond: t.double_col.sum(where=cond), - lambda df, cond: df.double_col[cond].sum(), - ), - ( - lambda t, cond: t.double_col.mean(where=cond), - lambda df, cond: df.double_col[cond].mean(), - ), - ( - lambda t, cond: t.double_col.min(where=cond), - lambda df, cond: df.double_col[cond].min(), - ), - ( - lambda t, cond: t.double_col.max(where=cond), - lambda df, cond: df.double_col[cond].max(), - ), - ( - lambda t, cond: t.double_col.var(where=cond), - lambda df, cond: df.double_col[cond].var(), - ), - ( - lambda t, cond: t.double_col.std(where=cond), - lambda df, cond: df.double_col[cond].std(), - ), - ( - lambda t, cond: t.double_col.var(where=cond, how="sample"), - lambda df, cond: df.double_col[cond].var(), - ), - ( - lambda t, cond: t.double_col.std(where=cond, how="pop"), - lambda df, cond: df.double_col[cond].std(ddof=0), - ), - ], -) -def test_aggregations_execute(alltypes, func, df, expected_func): - cond = alltypes.string_col.isin(["1", "7"]) - expr = func(alltypes, cond) - result = expr.execute() - expected = expected_func(df, df.string_col.isin(["1", "7"])) - - np.testing.assert_allclose(result, expected) - - -def test_not_contains(alltypes, df): - n = 100 - table = alltypes.limit(n) - expr = table.string_col.notin(["1", "7"]) - result = expr.execute() - expected = ~df.head(n).string_col.isin(["1", "7"]) - tm.assert_series_equal(result, expected, check_names=False) - - -def test_distinct_aggregates(alltypes, df): - expr = alltypes.double_col.nunique() - result = expr.execute() - expected = df.double_col.nunique() - assert result == expected - - -def test_not_exists_works(alltypes): - t = alltypes - t2 = t.view() - - expr = t[-((t.string_col == t2.string_col).any())] - expr.execute() - - -def test_interactive_repr_shows_error(alltypes): - # #591. Doing this in SQLite because so many built-in functions are not - # available - - expr = alltypes.double_col.approx_median() - - with config.option_context("interactive", True): - result = repr(expr) - assert "no translation rule" in result.lower() - - -def test_subquery(alltypes, df): - t = alltypes - - expr = t.mutate(d=t.double_col.fillna(0)).limit(1000).group_by("string_col").size() - result = expr.execute() - expected = ( - df.assign(d=df.double_col.fillna(0)) - .head(1000) - .groupby("string_col") - .size() - .reset_index() - .rename(columns={0: "CountStar()"}) - ) - tm.assert_frame_equal(result, expected) - - -def test_filter(alltypes, df): - expr = alltypes.filter(alltypes.year == 2010).float_col - result = expr.execute().squeeze().reset_index(drop=True) - expected = df.query("year == 2010").float_col - assert len(result) == len(expected) - - -@pytest.mark.parametrize("column", [lambda t: "float_col", lambda t: t["float_col"]]) -def test_column_access_after_sort(alltypes, df, column): - expr = alltypes.order_by(column(alltypes)).head(10).string_col - result = expr.execute() - expected = df.sort_values("float_col").string_col.head(10).reset_index(drop=True) - tm.assert_series_equal(result, expected) - - -@pytest.fixture -def mj1(con, temp_table): - return con.create_table( - temp_table, - pd.DataFrame(dict(id1=[1, 2], val1=[10.0, 20.0])), - schema=ibis.schema(dict(id1="int64", val1="float64")), - ) - - -@pytest.fixture -def mj2(con, temp_table_orig): - return con.create_table( - temp_table_orig, - pd.DataFrame(dict(id2=[1, 2], val2=[15.0, 25.0])), - schema=ibis.schema(dict(id2="int64", val2="float64")), - ) - - -def test_simple_join(mj1, mj2): - joined = mj1.join(mj2, mj1.id1 == mj2.id2) - result = joined.val2.execute() - assert len(result) == 2 - - -def test_anonymous_aggregate(alltypes, df): - expr = alltypes[alltypes.double_col > alltypes.double_col.mean()] - result = expr.execute() - expected = df[df.double_col > df.double_col.mean()].reset_index(drop=True) - tm.assert_frame_equal(result, expected) - - -def test_head(alltypes): - t = alltypes - result = t.head().execute() - expected = t.limit(5).execute() - tm.assert_frame_equal(result, expected) - - -def test_identical_to(alltypes): - t = alltypes - dt = t[["tinyint_col", "double_col"]].execute() - expr = t.tinyint_col.identical_to(t.double_col) - result = expr.execute() - expected = (dt.tinyint_col.isnull() & dt.double_col.isnull()) | ( - dt.tinyint_col == dt.double_col - ) - expected.name = result.name - tm.assert_series_equal(result, expected) - - -@pytest.mark.xfail( - raises=AttributeError, - reason="truncate method is not yet implemented", -) -def test_truncate_method(con, alltypes): - expr = alltypes.limit(5) - name = str(uuid.uuid4()) - t = con.create_table(name, expr) - assert len(t.execute()) == 5 - t.truncate() - assert len(t.execute()) == 0 - - -def test_truncate_from_connection(con, alltypes): - expr = alltypes.limit(5) - name = str(uuid.uuid4()) - t = con.create_table(name, expr) - assert len(t.execute()) == 5 - con.truncate_table(name) - assert len(t.execute()) == 0 - - -def test_not(alltypes): - t = alltypes.limit(10) - expr = t.select([(~t.double_col.isnull()).name("double_col")]) - result = expr.execute().double_col - expected = ~t.execute().double_col.isnull() - tm.assert_series_equal(result, expected) - - -def test_compile_with_named_table(): - t = ibis.table([("a", "string")], name="t") - result = ibis.sqlite.compile(t.a) - st = sa.table("t", sa.column("a", sa.String)).alias("t0") - assert str(result) == str(sa.select(st.c.a)) - - -def test_compile_with_unnamed_table(): - t = ibis.table([("a", "string")]) - result = ibis.sqlite.compile(t.a) - st = sa.table(t.op().name, sa.column("a", sa.String)).alias("t0") - assert str(result) == str(sa.select(st.c.a)) - - -def test_compile_with_multiple_unnamed_tables(): - t = ibis.table([("a", "string")]) - s = ibis.table([("b", "string")]) - join = t.join(s, t.a == s.b) - result = ibis.sqlite.compile(join) - sqla_t = sa.table(t.op().name, sa.column("a", sa.String)).alias("t0") - sqla_s = sa.table(s.op().name, sa.column("b", sa.String)).alias("t1") - sqla_join = sqla_t.join(sqla_s, sqla_t.c.a == sqla_s.c.b) - expected = sa.select(sqla_t.c.a, sqla_s.c.b).select_from(sqla_join) - assert str(result) == str(expected) - - -def test_compile_with_one_unnamed_table(): - t = ibis.table([("a", "string")]) - s = ibis.table([("b", "string")], name="s") - join = t.join(s, t.a == s.b) - result = ibis.sqlite.compile(join) - sqla_t = sa.table(t.op().name, sa.column("a", sa.String)).alias("t0") - sqla_s = sa.table("s", sa.column("b", sa.String)).alias("t1") - sqla_join = sqla_t.join(sqla_s, sqla_t.c.a == sqla_s.c.b) - expected = sa.select(sqla_t.c.a, sqla_s.c.b).select_from(sqla_join) - assert str(result) == str(expected) - - -def test_scalar_parameter(alltypes): - start_string, end_string = "2009-03-01", "2010-07-03" - - start = ibis.param(dt.date) - end = ibis.param(dt.date) - t = alltypes - col = t.date_string_col.cast("date") - expr = col.between(start, end).name("result") - result = expr.execute(params={start: start_string, end: end_string}) - - expected_expr = col.between(start_string, end_string).name("result") - expected = expected_expr.execute() - tm.assert_series_equal(result, expected) - - -def test_count_on_order_by(con, snapshot): - t = con.table("batting") - sort_key = ibis.desc(t.playerID) - sorted_table = t.order_by(sort_key) - expr = sorted_table.count() - result = str(ibis.to_sql(expr, dialect="sqlite")) - snapshot.assert_match(result, "out.sql") - - -def test_memtable_compilation(con): - expr = ibis.memtable({"a": [1, 2, 3]}) - t = con.compile(expr) - assert t.exported_columns[0].name == "a" diff --git a/ibis/backends/sqlite/tests/test_types.py b/ibis/backends/sqlite/tests/test_types.py index dc2978332c38..14f8eeebd9f9 100644 --- a/ibis/backends/sqlite/tests/test_types.py +++ b/ibis/backends/sqlite/tests/test_types.py @@ -38,9 +38,9 @@ def db(tmp_path_factory): path = str(tmp_path_factory.mktemp("databases") / "formats.db") con = sqlite3.connect(path) con.execute("CREATE TABLE timestamps (ts TIMESTAMP)") - con.execute("CREATE TABLE timestamps_tz (ts TIMESTAMP)") + con.execute("CREATE TABLE timestamps_tz (ts TIMESTAMPTZ)") con.execute("CREATE TABLE weird (str_col STRING, date_col ITSADATE)") - con.execute("CREATE TABLE blobs (data BLOB)") + con.execute("CREATE TABLE basic (a INTEGER, b REAL, c BOOLEAN, d BLOB)") with con: con.executemany("INSERT INTO timestamps VALUES (?)", [(t,) for t in TIMESTAMPS]) con.executemany( @@ -56,7 +56,6 @@ def db(tmp_path_factory): ("d", "2022-01-04"), ], ) - con.execute("INSERT INTO blobs (data) VALUES (?)", (b"\x00\x01\x02\x03",)) con.close() return path @@ -68,21 +67,22 @@ def db(tmp_path_factory): def test_timestamps(db, table, data): con = ibis.sqlite.connect(db) t = con.table(table) - assert t.ts.type() == dt.timestamp + assert t.ts.type().is_timestamp() res = t.ts.execute() # the "mixed" format was added in pandas 2.0.0 format = "mixed" if vparse(pd.__version__) >= vparse("2.0.0") else None stamps = pd.to_datetime(data, format=format, utc=True) - # we're casting to timestamp without a timezone, so remove it in the - # expected output - localized = stamps.tz_localize(None) - sol = pd.Series(localized) + if t.ts.type().timezone is None: + # we're casting to timestamp without a timezone, so remove it in the + # expected output + stamps = stamps.tz_localize(None) + sol = pd.Series(stamps) assert res.equals(sol) def test_type_map(db): con = ibis.sqlite.connect(db, type_map={"STRING": dt.string, "ITSADATE": "date"}) - t = con.tables.weird + t = con.table("weird") expected_schema = ibis.schema({"str_col": "string", "date_col": "date"}) assert t.schema() == expected_schema res = t.filter(t.str_col == "a").execute() @@ -92,7 +92,9 @@ def test_type_map(db): assert res.equals(sol) -def test_read_blob(db): +def test_read_basic_types(db): con = ibis.sqlite.connect(db) - t = con.table("blobs") - assert t["data"].type() == dt.binary + t = con.table("basic") + assert t.schema() == ibis.schema( + {"a": "int64", "b": "float64", "c": "bool", "d": "binary"} + ) diff --git a/ibis/backends/sqlite/udf.py b/ibis/backends/sqlite/udf.py index e0a716d1756e..aa568049ef93 100644 --- a/ibis/backends/sqlite/udf.py +++ b/ibis/backends/sqlite/udf.py @@ -6,7 +6,7 @@ import math import operator from collections import defaultdict -from typing import Callable +from typing import Any, Callable, NamedTuple from urllib.parse import parse_qs, urlsplit try: @@ -14,8 +14,19 @@ except ImportError: import re -_SQLITE_UDF_REGISTRY = set() -_SQLITE_UDAF_REGISTRY = set() + +class _UDF(NamedTuple): + """An internal record holding info about a registered UDF.""" + + name: str + impl: Any + nargs: int + skip_if_exists: bool = False + deterministic: bool = True + + +_SQLITE_UDF_REGISTRY = {} +_SQLITE_UDAF_REGISTRY = {} def ignore_nulls(f): @@ -28,13 +39,36 @@ def wrapper(*args, **kwargs): return wrapper -def udf(f): - """Create a SQLite scalar UDF from `f`. +def _number_of_arguments(callable): + signature = inspect.signature(callable) + parameters = signature.parameters.values() + kinds = [param.kind for param in parameters] + valid_kinds = ( + inspect.Parameter.POSITIONAL_OR_KEYWORD, + inspect.Parameter.POSITIONAL_ONLY, + ) + if any(kind not in valid_kinds for kind in kinds) or any( + param.default is not inspect.Parameter.empty for param in parameters + ): + raise TypeError( + "Only positional arguments without defaults are supported in Ibis " + "SQLite function registration" + ) + return len(parameters) + + +def udf(func=None, *, skip_if_exists=False, deterministic=True): + """Create a SQLite scalar UDF from `func`. Parameters ---------- - f + func A callable object + skip_if_exists + If true, the UDF will only be registered if an existing function + with that name doesn't already exist. + deterministic + Whether the UDF is deterministic, defaults to True. Returns ------- @@ -42,250 +76,219 @@ def udf(f): A callable object that returns ``None`` if any of its inputs are ``None``. """ - wrapper = ignore_nulls(f) - _SQLITE_UDF_REGISTRY.add(wrapper) + if func is None: + return lambda func: udf( + func, skip_if_exists=skip_if_exists, deterministic=deterministic + ) + + name = func.__name__ + nargs = _number_of_arguments(func) + wrapper = ignore_nulls(func) + + _SQLITE_UDF_REGISTRY[name] = _UDF( + name, wrapper, nargs, skip_if_exists, deterministic + ) return wrapper def udaf(cls): """Register a UDAF class with any SQLite connection.""" - _SQLITE_UDAF_REGISTRY.add(cls) + name = cls.__name__ + nargs = _number_of_arguments(cls.step) - 1 + _SQLITE_UDAF_REGISTRY[name] = _UDF(name, cls, nargs) return cls -@udf -def _ibis_sqlite_reverse(string): - return string[::-1] - - -@udf -def _ibis_sqlite_string_ascii(string): - return ord(string[0]) - - -@udf -def _ibis_sqlite_capitalize(string): - return string.capitalize() - - -@udf -def _ibis_sqlite_translate(string, from_string, to_string): - table = str.maketrans(from_string, to_string) - return string.translate(table) - - -@udf -def _ibis_sqlite_regex_search(string, regex): - """Return whether `regex` exists in `string`.""" - return re.search(regex, string) is not None - - -@udf -def _ibis_sqlite_regex_replace(string, pattern, replacement): - """Replace occurrences of `pattern` in `string` with `replacement`.""" - return re.sub(pattern, replacement, string) - - -@udf -def _ibis_sqlite_regex_extract(string, pattern, index): - """Extract match of regular expression `pattern` from `string` at `index`.""" - result = re.search(pattern, string) - if result is not None and 0 <= index <= (result.lastindex or -1): - return result.group(index) - return None +# Optional builtin functions +# +# These functions may exist as builtins depending on the SQLite versions. +# They're only registered if they don't exist already in the connection. -@udf -def _ibis_sqlite_exp(arg): - """Exponentiate `arg`. +@udf(skip_if_exists=True) +def unhex(string): + return bytes.fromhex(string) - Parameters - ---------- - arg : number - Number to raise to `e`. - Returns - ------- - result : Optional[number] - None If the input is None - """ +@udf(skip_if_exists=True) +def exp(arg): return math.exp(arg) -@udf -def _ibis_sqlite_log(arg, base): - if arg < 0 or base < 0: - return None - return math.log(arg, base) - - -@udf -def _ibis_sqlite_ln(arg): +@udf(skip_if_exists=True) +def ln(arg): if arg < 0: return None return math.log(arg) -@udf -def _ibis_sqlite_log2(arg): - return _ibis_sqlite_log(arg, 2) +@udf(skip_if_exists=True) +def log2(arg): + if arg < 0: + return None + return math.log(arg, 2) -@udf -def _ibis_sqlite_log10(arg): - return _ibis_sqlite_log(arg, 10) +@udf(skip_if_exists=True) +def log10(arg): + if arg < 0: + return None + return math.log(arg, 10) -@udf -def _ibis_sqlite_floor(arg): +@udf(skip_if_exists=True) +def floor(arg): return math.floor(arg) -@udf -def _ibis_sqlite_ceil(arg): +@udf(skip_if_exists=True) +def ceil(arg): return math.ceil(arg) -@udf -def _ibis_sqlite_sign(arg): +@udf(skip_if_exists=True) +def sign(arg): if not arg: return 0 return math.copysign(1, arg) -@udf -def _ibis_sqlite_floordiv(left, right): - return left // right +@udf(skip_if_exists=True) +def mod(left, right): + return None if right == 0 else (left % right) -@udf -def _ibis_sqlite_mod(left, right): - return left % right +@udf(skip_if_exists=True) +def power(arg, power): + # mirroring sqlite - return NULL if negative or non-integral + if arg < 0.0 and not power.is_integer(): + return None + return arg**power -@udf -def _ibis_sqlite_power(arg, power): - """Raise `arg` to the `power` power. +@udf(skip_if_exists=True) +def sqrt(arg): + return None if arg < 0.0 else math.sqrt(arg) - Parameters - ---------- - arg : number - Number to raise to `power`. - power : number - Number to raise `arg` to. - Returns - ------- - result : Optional[number] - None If either argument is None or we're trying to take a fractional - power or a negative number - """ - if arg < 0.0 and not power.is_integer(): - return None - return arg**power +@udf(skip_if_exists=True) +def sin(arg): + return math.sin(arg) -@udf -def _ibis_sqlite_sqrt(arg): - """Square root of `arg`. +@udf(skip_if_exists=True) +def cos(arg): + return math.cos(arg) - Parameters - ---------- - arg : Optional[number] - Number to take the square root of - Returns - ------- - result : Optional[number] - None if `arg` is None or less than 0 otherwise the square root - """ - return None if arg is None or arg < 0.0 else math.sqrt(arg) +@udf(skip_if_exists=True) +def tan(arg): + return math.tan(arg) -def _trig_func_unary(func, arg): - if arg is None: - return None +@udf(skip_if_exists=True) +def asin(arg): + return math.asin(arg) - return func(float(arg)) +@udf(skip_if_exists=True) +def acos(arg): + return math.acos(arg) -def _trig_func_binary(func, arg1, arg2): - if arg1 is None or arg2 is None: - return None - return func(float(arg1), float(arg2)) +@udf(skip_if_exists=True) +def atan(arg): + return math.atan(arg) -@udf -def _ibis_sqlite_cot(arg): - return _trig_func_unary( - lambda arg: float("inf") if not arg else 1.0 / math.tan(arg), arg - ) +@udf(skip_if_exists=True) +def atan2(y, x): + return math.atan2(y, x) -@udf -def _ibis_sqlite_sin(arg): - return _trig_func_unary(math.sin, arg) +@udf(skip_if_exists=True) +def degrees(x): + return math.degrees(x) + + +@udf(skip_if_exists=True) +def radians(x): + return math.radians(x) + + +@udf(skip_if_exists=True) +def pi(): + return math.pi + + +# Additional UDFS @udf -def _ibis_sqlite_cos(arg): - return _trig_func_unary(math.cos, arg) +def _ibis_reverse(string): + return string[::-1] @udf -def _ibis_sqlite_tan(arg): - return _trig_func_unary(math.tan, arg) +def _ibis_string_ascii(string): + return ord(string[0]) @udf -def _ibis_sqlite_asin(arg): - return _trig_func_unary(math.asin, arg) +def _ibis_capitalize(string): + return string.capitalize() @udf -def _ibis_sqlite_acos(arg): - return _trig_func_unary(math.acos, arg) +def _ibis_rpad(string, width, pad): + return string.ljust(width, pad)[:width] @udf -def _ibis_sqlite_atan(arg): - return _trig_func_unary(math.atan, arg) +def _ibis_lpad(string, width, pad): + return string.rjust(width, pad)[:width] @udf -def _ibis_sqlite_atan2(y, x): - return _trig_func_binary(math.atan2, y, x) +def _ibis_repeat(string, n): + return string * n @udf -def _ibis_sqlite_degrees(x): - return None if x is None else math.degrees(x) +def _ibis_translate(string, from_string, to_string): + table = str.maketrans(from_string, to_string) + return string.translate(table) @udf -def _ibis_sqlite_radians(x): - return None if x is None else math.radians(x) +def _ibis_regex_search(string, regex): + """Return whether `regex` exists in `string`.""" + return re.search(regex, string) is not None @udf -def _ibis_sqlite_xor(x, y): - return None if x is None or y is None else x ^ y +def _ibis_regex_replace(string, pattern, replacement): + """Replace occurrences of `pattern` in `string` with `replacement`.""" + return re.sub(pattern, replacement, string) @udf -def _ibis_sqlite_inv(x): - return None if x is None else ~x +def _ibis_regex_extract(string, pattern, index): + """Extract match of regular expression `pattern` from `string` at `index`.""" + result = re.search(pattern, string) + if result is not None and 0 <= index <= (result.lastindex or -1): + return result.group(index) + return None @udf -def _ibis_sqlite_pi(): - return math.pi +def _ibis_xor(x, y): + return x ^ y @udf -def _ibis_sqlite_e(): - return math.e +def _ibis_inv(x): + return ~x @udf @@ -318,19 +321,15 @@ def _extract_url_field(data, field_name): @udf -def _ibis_extract_query(url, param_name): - query = urlsplit(url).query - if param_name is not None: - value = parse_qs(query)[param_name] - return value if len(value) > 1 else value[0] - else: - return query +def _ibis_extract_full_query(url): + return urlsplit(url).query @udf -def _ibis_extract_query_no_param(url): +def _ibis_extract_query(url, param_name): query = urlsplit(url).query - return query + value = parse_qs(query)[param_name] + return value if len(value) > 1 else value[0] @udf @@ -342,7 +341,7 @@ def _ibis_extract_user_info(url): return f"{username}:{password}" -class _ibis_sqlite_var: +class _ibis_var: def __init__(self, offset): self.mean = 0.0 self.sum_of_squares_of_differences = 0.0 @@ -364,7 +363,7 @@ def finalize(self): @udaf -class _ibis_sqlite_mode: +class _ibis_mode: def __init__(self): self.counter = defaultdict(int) @@ -379,18 +378,18 @@ def finalize(self): @udaf -class _ibis_sqlite_var_pop(_ibis_sqlite_var): +class _ibis_var_pop(_ibis_var): def __init__(self): super().__init__(0) @udaf -class _ibis_sqlite_var_samp(_ibis_sqlite_var): +class _ibis_var_sample(_ibis_var): def __init__(self): super().__init__(1) -class _ibis_sqlite_bit_agg: +class _ibis_bit_agg: def __init__(self, op): self.value: int | None = None self.count: int = 0 @@ -409,24 +408,24 @@ def finalize(self) -> int | None: @udaf -class _ibis_sqlite_bit_or(_ibis_sqlite_bit_agg): +class _ibis_bit_or(_ibis_bit_agg): def __init__(self): super().__init__(operator.or_) @udaf -class _ibis_sqlite_bit_and(_ibis_sqlite_bit_agg): +class _ibis_bit_and(_ibis_bit_agg): def __init__(self): super().__init__(operator.and_) @udaf -class _ibis_sqlite_bit_xor(_ibis_sqlite_bit_agg): +class _ibis_bit_xor(_ibis_bit_agg): def __init__(self): super().__init__(operator.xor) -class _ibis_sqlite_arbitrary(abc.ABC): +class _ibis_arbitrary(abc.ABC): def __init__(self) -> None: self.value = None @@ -439,57 +438,35 @@ def finalize(self) -> int | None: @udaf -class _ibis_sqlite_arbitrary_first(_ibis_sqlite_arbitrary): +class _ibis_arbitrary_first(_ibis_arbitrary): def step(self, value): if self.value is None: self.value = value @udaf -class _ibis_sqlite_arbitrary_last(_ibis_sqlite_arbitrary): +class _ibis_arbitrary_last(_ibis_arbitrary): def step(self, value): if value is not None: self.value = value -def _number_of_arguments(callable): - signature = inspect.signature(callable) - parameters = signature.parameters.values() - kinds = [param.kind for param in parameters] - valid_kinds = ( - inspect.Parameter.POSITIONAL_OR_KEYWORD, - inspect.Parameter.POSITIONAL_ONLY, - ) - if any(kind not in valid_kinds for kind in kinds) or any( - param.default is not inspect.Parameter.empty for param in parameters - ): - raise TypeError( - "Only positional arguments without defaults are supported in Ibis " - "SQLite function registration" - ) - return len(parameters) - - -def register_all(dbapi_connection): +def register_all(con): """Register all udf and udaf with the connection. All udf and udaf are defined in this file with the `udf` and `udaf` decorators. - - Parameters - ---------- - dbapi_connection - sqlalchemy.Connection object """ - for func in _SQLITE_UDF_REGISTRY: - dbapi_connection.create_function( - func.__name__, _number_of_arguments(func), func + existing = { + name for (name,) in con.execute("SELECT name FROM pragma_function_list()") + } + + for udf in _SQLITE_UDF_REGISTRY.values(): + if udf.skip_if_exists and udf.name in existing: + continue + con.create_function( + udf.name, udf.nargs, udf.impl, deterministic=udf.deterministic ) - for agg in _SQLITE_UDAF_REGISTRY: - dbapi_connection.create_aggregate( - agg.__name__, - # subtract one to ignore the `self` argument of the step method - _number_of_arguments(agg.step) - 1, - agg, - ) + for udf in _SQLITE_UDAF_REGISTRY.values(): + con.create_aggregate(udf.name, udf.nargs, udf.impl) diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/sqlite/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/sqlite/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/sqlite/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/sqlite/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/sqlite/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/sqlite/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/sqlite/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/sqlite/out.sql new file mode 100644 index 000000000000..3cadfca6be22 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/sqlite/out.sql @@ -0,0 +1,19 @@ +SELECT + SUM("t1"."bigint_col") AS "Sum(bigint_col)" +FROM ( + SELECT + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" + FROM "functional_alltypes" AS "t0" +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/sqlite/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/sqlite/out.sql new file mode 100644 index 000000000000..97338646649f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/sqlite/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/sqlite/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/sqlite/out.sql index fc16f2428d16..d3969647c9ea 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/sqlite/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/sqlite/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.continent + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,8 +15,8 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/sqlite/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/sqlite/out.sql index 3f66295a7f5a..c1611d8cecc3 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/sqlite/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/sqlite/out.sql @@ -1,13 +1,9 @@ SELECT - t0.x IN ( + "t0"."x" IN ( SELECT - t1.x - FROM ( - SELECT - t0.x AS x - FROM t AS t0 - WHERE - t0.x > 2 - ) AS t1 - ) AS "InColumn(x, x)" -FROM t AS t0 \ No newline at end of file + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index f5008f169585..2ae1aa8b4be4 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -907,14 +907,13 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): "dask", "datafusion", "polars", - "sqlite", "druid", "oracle", ], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( - ["mysql", "mssql", "impala", "exasol"], + ["mysql", "mssql", "impala", "exasol", "sqlite"], raises=com.UnsupportedBackendType, ), pytest.mark.notyet( @@ -1418,11 +1417,14 @@ def test_topk_op(alltypes, df): @pytest.mark.notyet( ["flink"], raises=Py4JError, reason="Flink doesn't support semi joins" ) -def test_topk_filter_op(alltypes, df, result_fn, expected_fn): +def test_topk_filter_op(con, alltypes, df, result_fn, expected_fn): # TopK expression will order rows by "count" but each backend # can have different result for that. # Note: Maybe would be good if TopK could order by "count" # and the field used by TopK + if con.name == "sqlite": + # TODO: remove after CTE extraction is reimplemented + pytest.skip("topk -> semi-join performance has increased post SQLGlot refactor") t = alltypes.order_by(alltypes.string_col) df = df.sort_values("string_col") expr = result_fn(t) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 0bda4091815a..d50d46c57fd2 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -187,7 +187,9 @@ def test_array_index(con, idx): ), ), pytest.mark.never( - ["sqlite"], reason="array types are unsupported", raises=NotImplementedError + ["sqlite"], + reason="array types are unsupported", + raises=(com.UnsupportedBackendType,), ), ) @@ -448,7 +450,7 @@ def test_array_slice(backend, start, stop): reason="Operation 'ArrayMap' is not implemented for this backend", ) @pytest.mark.notimpl( - ["sqlite"], raises=NotImplementedError, reason="Unsupported type: Array: ..." + ["sqlite"], raises=com.UnsupportedBackendType, reason="Unsupported type: Array: ..." ) @pytest.mark.parametrize( ("input", "output"), @@ -509,7 +511,7 @@ def test_array_map(con, input, output): reason="Operation 'ArrayMap' is not implemented for this backend", ) @pytest.mark.notimpl( - ["sqlite"], raises=NotImplementedError, reason="Unsupported type: Array..." + ["sqlite"], raises=com.UnsupportedBackendType, reason="Unsupported type: Array..." ) @pytest.mark.parametrize( ("input", "output"), @@ -651,7 +653,7 @@ def test_array_remove(con, a): ["dask", "datafusion", "polars"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl( - ["sqlite"], raises=NotImplementedError, reason="Unsupported type: Array..." + ["sqlite"], raises=com.UnsupportedBackendType, reason="Unsupported type: Array..." ) @pytest.mark.notyet( ["bigquery"], @@ -770,7 +772,7 @@ def test_array_union(con, a, b, expected_array): raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( - ["sqlite"], raises=NotImplementedError, reason="Unsupported type: Array..." + ["sqlite"], raises=com.UnsupportedBackendType, reason="Unsupported type: Array..." ) @pytest.mark.broken( ["risingwave"], @@ -1087,13 +1089,12 @@ def test_unnest_empty_array(con): "flink", "polars", "snowflake", - "sqlite", "dask", "pandas", ], raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl(["sqlite"], raises=NotImplementedError) +@pytest.mark.notimpl(["sqlite"], raises=com.UnsupportedBackendType) def test_array_map_with_conflicting_names(backend, con): t = ibis.memtable({"x": [[1, 2]]}, schema=ibis.schema(dict(x="!array"))) expr = t.select(a=t.x.map(lambda x: x + 1)).select( @@ -1117,6 +1118,7 @@ def test_array_map_with_conflicting_names(backend, con): ], raises=com.OperationNotDefinedError, ) +@pytest.mark.notimpl(["sqlite"], raises=com.UnsupportedBackendType) def test_complex_array_map(con): def upper(token): return token.upper() diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 23097069bc66..ffe86146b65f 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -94,6 +94,7 @@ def time_keyed_right(time_keyed_df2): "exasol", "oracle", "mssql", + "sqlite", ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): @@ -133,6 +134,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op "exasol", "oracle", "mssql", + "sqlite", ] ) def test_keyed_asof_join_with_tolerance( diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index e52ab15e0954..a20cd61a98d3 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -13,6 +13,12 @@ import ibis.expr.schema as sch from ibis.backends.tests.errors import PyDruidProgrammingError +sqlite_right_or_full_mark = pytest.mark.notyet( + ["sqlite"], + condition=vparse(sqlite3.sqlite_version) < vparse("3.39"), + reason="SQLite < 3.39 doesn't support RIGHT/FULL OUTER joins", +) + def _pandas_semi_join(left, right, on, **_): assert len(on) == 1, str(on) @@ -47,7 +53,8 @@ def check_eq(left, right, how, **kwargs): marks=[ pytest.mark.broken( ["exasol"], raises=AssertionError, reasons="results don't match" - ) + ), + sqlite_right_or_full_mark, ], ), param( @@ -56,10 +63,8 @@ def check_eq(left, right, how, **kwargs): # syntax, but we might be able to work around that using # LEFT JOIN UNION RIGHT JOIN marks=[ - pytest.mark.notimpl( - ["mysql"] - + ["sqlite"] * (vparse(sqlite3.sqlite_version) < vparse("3.39")) - ), + pytest.mark.notimpl(["mysql"]), + sqlite_right_or_full_mark, pytest.mark.xfail_version(datafusion=["datafusion<31"]), pytest.mark.broken( ["exasol"], raises=AssertionError, reasons="results don't match" @@ -181,7 +186,10 @@ def test_mutate_then_join_no_column_overlap(batting, awards_players): param(lambda left, right: left.join(right, "year", how="semi"), id="how_semi"), ], ) -def test_semi_join_topk(batting, awards_players, func): +def test_semi_join_topk(con, batting, awards_players, func): + if con.name == "sqlite": + # TODO: remove after CTE extraction is reimplemented + pytest.skip("topk -> semi-join performance has increased post SQLGlot refactor") batting = batting.mutate(year=batting.yearID) left = func(batting, batting.year.topk(5)).select("year", "RBI") expr = left.join(awards_players, left.year == awards_players.yearID) @@ -258,17 +266,8 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): [ "inner", "left", - "right", - param( - "outer", - marks=[ - pytest.mark.notyet( - ["sqlite"], - condition=vparse(sqlite3.sqlite_version) < vparse("3.39"), - reason="sqlite didn't support full outer join until 3.39", - ), - ], - ), + param("right", marks=[sqlite_right_or_full_mark]), + param("outer", marks=[sqlite_right_or_full_mark]), ], ) @pytest.mark.notimpl( @@ -295,11 +294,6 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu assert len(result) == len(expected) -outer_join_nullability_failures = [pytest.mark.notyet(["sqlite"])] * ( - vparse(sqlite3.sqlite_version) < vparse("3.39") -) - - @pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notimpl(["flink"], reason="`win` table isn't loaded") @pytest.mark.parametrize( @@ -325,6 +319,7 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu lambda left: left.filter(lambda t: t.x == 1).select(y=lambda t: t.x), [("x", "y")], id="right-xy", + marks=[sqlite_right_or_full_mark], ), param( "right", @@ -332,6 +327,7 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu lambda left: left.filter(lambda t: t.x == 1), "x", id="right-x", + marks=[sqlite_right_or_full_mark], ), param( "outer", @@ -339,7 +335,7 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu lambda left: left.filter(lambda t: t.x == 1).select(y=lambda t: t.x), [("x", "y")], id="outer-xy", - marks=outer_join_nullability_failures, + marks=[sqlite_right_or_full_mark], ), param( "outer", @@ -347,9 +343,7 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu lambda left: left.filter(lambda t: t.x == 1), "x", id="outer-x", - marks=[ - *outer_join_nullability_failures, - ], + marks=[sqlite_right_or_full_mark], ), ], ) diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index bb1e48b4fb42..05f94c9d3eb5 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -249,7 +249,7 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": decimal.Decimal("1.1"), "snowflake": decimal.Decimal("1.1"), - "sqlite": 1.1, + "sqlite": decimal.Decimal("1.1"), "trino": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), "exasol": decimal.Decimal("1"), @@ -293,7 +293,7 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": decimal.Decimal("1.1"), "snowflake": decimal.Decimal("1.1"), - "sqlite": 1.1, + "sqlite": decimal.Decimal("1.1"), "trino": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.100000000"), "risingwave": 1.1, @@ -330,7 +330,7 @@ def test_numeric_literal(con, backend, expr, expected_types): # TODO(krzysztof-kwitt): Should we unify it? { "bigquery": decimal.Decimal("1.1"), - "sqlite": 1.1, + "sqlite": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), "risingwave": 1.1, "postgres": decimal.Decimal("1.1"), @@ -384,8 +384,9 @@ def test_numeric_literal(con, backend, expr, expected_types): ibis.literal(decimal.Decimal("Infinity"), type=dt.decimal), # TODO(krzysztof-kwitt): Should we unify it? { - "sqlite": float("inf"), "risingwave": float("nan"), + "bigquery": float("inf"), + "sqlite": decimal.Decimal("Infinity"), "postgres": decimal.Decimal("Infinity"), "pandas": decimal.Decimal("Infinity"), "dask": decimal.Decimal("Infinity"), @@ -454,8 +455,9 @@ def test_numeric_literal(con, backend, expr, expected_types): ibis.literal(decimal.Decimal("-Infinity"), type=dt.decimal), # TODO(krzysztof-kwitt): Should we unify it? { - "sqlite": float("-inf"), "risingwave": float("nan"), + "bigquery": float("-inf"), + "sqlite": decimal.Decimal("-Infinity"), "postgres": decimal.Decimal("-Infinity"), "pandas": decimal.Decimal("-Infinity"), "dask": decimal.Decimal("-Infinity"), @@ -599,7 +601,6 @@ def test_numeric_literal(con, backend, expr, expected_types): @pytest.mark.notimpl(["polars"], raises=TypeError) def test_decimal_literal(con, backend, expr, expected_types, expected_result): backend_name = backend.name() - result = con.execute(expr) current_expected_result = expected_result[backend_name] if type(current_expected_result) in (float, decimal.Decimal) and math.isnan( @@ -1324,6 +1325,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "pyspark", "polars", "flink", + "sqlite", "snowflake", "trino", "postgres", diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index ae01c33023c6..c38be0c47cb9 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -933,12 +933,11 @@ def test_capitalize(con): @pytest.mark.notimpl( ["dask", "pandas", "polars", "oracle", "flink"], raises=com.OperationNotDefinedError ) -@pytest.mark.notyet(["sqlite"], reason="no arrays", raises=com.OperationNotDefinedError) @pytest.mark.never( ["mysql"], raises=com.OperationNotDefinedError, reason="no array support" ) @pytest.mark.notimpl( - ["mssql", "exasol", "impala"], + ["mssql", "exasol", "impala", "sqlite"], raises=com.UnsupportedBackendType, reason="no array support", ) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index c353d905a07d..3b637968c227 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1372,7 +1372,7 @@ def test_timestamp_comparison_filter_numpy(backend, con, alltypes, df, func_name @pytest.mark.notimpl( - ["sqlite", "snowflake", "mssql", "exasol"], + ["snowflake", "mssql", "exasol"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -1395,7 +1395,7 @@ def test_interval_add_cast_scalar(backend, alltypes): @pytest.mark.notimpl( - ["sqlite", "snowflake", "mssql", "exasol"], + ["snowflake", "mssql", "exasol"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( @@ -1873,14 +1873,6 @@ def test_timestamp_literal(con, backend): ["pandas", "mysql", "dask", "pyspark", "exasol"], raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl( - ["sqlite"], - raises=com.UnsupportedOperationError, - reason=( - "Unable to cast from Timestamp(timezone=None, scale=None, nullable=True) to " - "Timestamp(timezone='***', scale=None, nullable=True)." - ), -) @pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError) @pytest.mark.parametrize( ("timezone", "expected"), @@ -1902,9 +1894,14 @@ def test_timestamp_literal(con, backend): ) @pytest.mark.notimpl( ["bigquery"], - "BigQuery does not support timestamps with timezones other than 'UTC'", + reason="timestamps with timezones other than 'UTC' not supported", raises=com.UnsupportedBackendType, ) +@pytest.mark.notimpl( + ["sqlite"], + reason="timestamps with timezones other than 'UTC' not supported", + raises=com.UnsupportedOperationError, +) @pytest.mark.notimpl( ["druid"], raises=PyDruidProgrammingError, @@ -2412,7 +2409,6 @@ def test_timestamp_precision_output(con, ts, scale, unit): "oracle", "pandas", "polars", - "sqlite", ], raises=com.OperationNotDefinedError, ) @@ -2474,7 +2470,7 @@ def test_timestamp_precision_output(con, ts, scale, unit): ), ], ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["sqlite", "exasol"], raises=com.OperationNotDefinedError) def test_delta(con, start, end, unit, expected): expr = end.delta(start, unit) assert con.execute(expr) == expected diff --git a/ibis/backends/tests/test_udf.py b/ibis/backends/tests/test_udf.py index 2a55a30355b2..75021f577133 100644 --- a/ibis/backends/tests/test_udf.py +++ b/ibis/backends/tests/test_udf.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sqlalchemy as sa from pytest import mark, param import ibis.common.exceptions as com @@ -59,7 +58,7 @@ def num_vowels(s: str, include_y: bool = False) -> int: @mark.notyet(["datafusion"], raises=NotImplementedError) @mark.notyet( ["sqlite"], - raises=sa.exc.OperationalError, + raises=com.IbisTypeError, reason="sqlite doesn't support map types", ) def test_map_udf(batting): diff --git a/poetry.lock b/poetry.lock index 98a9021ccea0..5527cd33729a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5323,6 +5323,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -7299,7 +7300,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pyexasol", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "sqlalchemy-views", "trino"] +all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pyexasol", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "trino"] bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "pydata-google-auth"] clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] @@ -7322,11 +7323,11 @@ postgres = ["psycopg2"] risingwave = ["psycopg2"] pyspark = ["packaging", "pyspark"] snowflake = ["packaging", "snowflake-connector-python"] -sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] +sqlite = ["regex"] trino = ["trino"] visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "11da6bdc8c65ae8790ee2cbc799ca82af0c1f783f8c5ec6d0ab1477fd21b03b7" +content-hash = "46f6575d9e668129872ccb5c2fd5de6c3e2fc808b8620e1c0082b18239b36639" diff --git a/pyproject.toml b/pyproject.toml index 5b8607ac76bd..cc74e67c1a0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,8 +169,6 @@ all = [ "shapely", "snowflake-connector-python", "sqlalchemy", - "sqlalchemy-views", - "sqlalchemy-risingwave", "trino", ] bigquery = [ @@ -197,7 +195,7 @@ risingwave = ["psycopg2"] postgres = ["psycopg2"] pyspark = ["pyspark", "packaging"] snowflake = ["snowflake-connector-python", "packaging"] -sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] +sqlite = ["regex"] trino = ["trino"] # non-backend extras visualization = ["graphviz"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 53a3afa07041..ce0a3ff53b12 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -234,7 +234,6 @@ snowflake-connector-python==3.6.0 ; python_version >= "3.9" and python_version < sortedcontainers==2.4.0 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.5 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1 ; python_version >= "3.10" and python_version < "3.13" -sqlalchemy-views==0.3.2 ; python_version >= "3.9" and python_version < "4.0" sqlalchemy==1.4.51 ; python_version >= "3.9" and python_version < "4.0" sqlglot==20.11.0 ; python_version >= "3.9" and python_version < "4.0" stack-data==0.6.3 ; python_version >= "3.9" and python_version < "4.0" From 06ee3a1ee10f236ba7f1452ac3713e9ebe4c34df Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 31 Jan 2024 05:29:25 -0500 Subject: [PATCH 125/161] refactor(sql): remove temporary table creation when using inline sql (#8149) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR fixes a long-standing annoyance with our `.sql` methods. Previously we pooped a bunch of temporary tables or views (depending on the backend), but after this PR the various `.sql` methods replace this hack with much more vanilla CTEs. --------- Co-authored-by: Krisztián Szűcs --- ibis/backends/base/sqlglot/__init__.py | 35 ++-- ibis/backends/base/sqlglot/compiler.py | 58 +++--- ibis/backends/base/sqlglot/rewrites.py | 4 +- ibis/backends/clickhouse/__init__.py | 18 -- ibis/backends/datafusion/__init__.py | 2 - ibis/backends/druid/__init__.py | 1 - ibis/backends/duckdb/__init__.py | 13 ++ ibis/backends/exasol/__init__.py | 3 +- ibis/backends/mssql/__init__.py | 17 -- ibis/backends/mssql/compiler.py | 4 - ibis/backends/mysql/__init__.py | 18 -- ibis/backends/postgres/__init__.py | 9 - ibis/backends/pyspark/__init__.py | 9 - .../test_dot_sql/test_cte/bigquery/out.sql | 8 + .../test_dot_sql/test_cte/clickhouse/out.sql | 8 + .../test_dot_sql/test_cte/datafusion/out.sql | 8 + .../test_dot_sql/test_cte/duckdb/out.sql | 8 + .../test_dot_sql/test_cte/exasol/out.sql | 8 + .../test_dot_sql/test_cte/impala/out.sql | 8 + .../test_dot_sql/test_cte/mssql/out.sql | 8 + .../test_dot_sql/test_cte/mysql/out.sql | 8 + .../test_dot_sql/test_cte/oracle/out.sql | 8 + .../test_dot_sql/test_cte/postgres/out.sql | 8 + .../test_dot_sql/test_cte/pyspark/out.sql | 8 + .../test_dot_sql/test_cte/snowflake/out.sql | 8 + .../test_dot_sql/test_cte/sqlite/out.sql | 8 + .../test_dot_sql/test_cte/trino/out.sql | 8 + ibis/backends/tests/test_dot_sql.py | 181 +++++++++++------- ibis/backends/trino/__init__.py | 18 -- ibis/expr/format.py | 4 +- ibis/expr/operations/relations.py | 23 +-- ibis/expr/types/relations.py | 26 +-- ibis/tests/expr/mocks.py | 3 + .../test_format_sql_query_result/repr.txt | 41 +++- ibis/tests/expr/test_format_sql_operations.py | 5 +- 35 files changed, 351 insertions(+), 253 deletions(-) create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/bigquery/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/clickhouse/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/duckdb/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/impala/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/mssql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/mysql/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/oracle/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/postgres/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/pyspark/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/snowflake/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/sqlite/out.sql create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/trino/out.sql diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index 4025f937e3be..1a7564e2dcb1 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -151,6 +151,20 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: """Return an ibis Schema from a backend-specific SQL string.""" return sch.Schema.from_tuples(self._metadata(query)) + def _get_sql_string_view_schema(self, name, table, query) -> sch.Schema: + compiler = self.compiler + dialect = compiler.dialect + + cte = self._to_sqlglot(table) + parsed = sg.parse_one(query, read=dialect) + parsed.args["with"] = cte.args.pop("with", []) + parsed = parsed.with_( + sg.to_identifier(name, quoted=compiler.quoted), as_=cte, dialect=dialect + ) + + sql = parsed.sql(dialect) + return self._get_schema_using_query(sql) + def create_view( self, name: str, @@ -195,27 +209,6 @@ def drop_view( with self._safe_raw_sql(src): pass - def _get_temp_view_definition(self, name: str, definition: str) -> str: - return sge.Create( - this=sg.to_identifier(name, quoted=self.compiler.quoted), - kind="VIEW", - expression=definition, - replace=True, - properties=sge.Properties(expressions=[sge.TemporaryProperty()]), - ) - - def _create_temp_view(self, table_name, source): - if table_name not in self._temp_views and table_name in self.list_tables(): - raise ValueError( - f"{table_name} already exists as a non-temporary table or view" - ) - - with self._safe_raw_sql(self._get_temp_view_definition(table_name, source)): - pass - - self._temp_views.add(table_name) - self._register_temp_view_cleanup(table_name) - def _register_temp_view_cleanup(self, name: str) -> None: """Register a clean up function for a temporary view. diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index b6920f5718e6..6c7e5ddb3b7b 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -261,28 +261,36 @@ def translate(self, op, *, params: Mapping[ir.Value, Any]) -> sge.Expression: op, ctes = sqlize(op) aliases = {} - alias_counter = itertools.count() + counter = itertools.count() def fn(node, _, **kwargs): result = self.visit_node(node, **kwargs) - if node is op: - return result - elif isinstance(node, ops.Relation): - aliases[node] = alias = f"t{next(alias_counter)}" - alias = sg.to_identifier(alias, quoted=self.quoted) - try: - return result.subquery(alias) - except AttributeError: - return result.as_(alias, quoted=self.quoted) - else: + # if it's not a relation then we don't need to do anything special + if node is op or not isinstance(node, ops.Relation): return result + # alias ops.Views to their explicitly assigned name otherwise generate + alias = node.name if isinstance(node, ops.View) else f"t{next(counter)}" + aliases[node] = alias + + alias = sg.to_identifier(alias, quoted=self.quoted) + try: + return result.subquery(alias) + except AttributeError: + return result.as_(alias, quoted=self.quoted) + # apply translate rules in topological order results = op.map(fn) + + # get the root node as a sqlglot select statement out = results[op] - out = out.this if isinstance(out, sge.Subquery) else out + if isinstance(out, sge.Table): + out = sg.select(STAR).from_(out) + elif isinstance(out, sge.Subquery): + out = out.this + # add cte definitions to the select statement for cte in ctes: alias = sg.to_identifier(aliases[cte], quoted=self.quoted) out = out.with_(alias, as_=results[cte].this, dialect=self.dialect) @@ -1222,27 +1230,27 @@ def visit_FillNa(self, op, *, parent, replacements): } return sg.select(*self._cleanup_names(exprs)).from_(parent) - @visit_node.register(ops.View) - def visit_View(self, op, *, child, name: str): - # TODO: find a way to do this without creating a temporary view - backend = op.child.to_expr()._find_backend() - backend._create_temp_view(table_name=name, source=sg.select(STAR).from_(child)) - return sg.table(name, quoted=self.quoted) - @visit_node.register(CTE) def visit_CTE(self, op, *, parent): return sg.table(parent.alias_or_name, quoted=self.quoted) + @visit_node.register(ops.View) + def visit_View(self, op, *, child, name: str): + if isinstance(child, sge.Table): + child = sg.select(STAR).from_(child) + + try: + return child.subquery(name) + except AttributeError: + return child.as_(name) + @visit_node.register(ops.SQLStringView) - def visit_SQLStringView(self, op, *, query: str, name: str, child): - table = sg.table(name, quoted=self.quoted) - return ( - sg.select(STAR).from_(table).with_(table, as_=query, dialect=self.dialect) - ) + def visit_SQLStringView(self, op, *, query: str, child, schema): + return sg.parse_one(query, read=self.dialect) @visit_node.register(ops.SQLQueryResult) def visit_SQLQueryResult(self, op, *, query, schema, source): - return sg.parse_one(query, read=self.dialect).subquery() + return sg.parse_one(query, dialect=self.dialect).subquery() @visit_node.register(ops.JoinTable) def visit_JoinTable(self, op, *, parent, index): diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 9bd2d4a8b1ab..9474959c53e7 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -157,7 +157,9 @@ def extract_ctes(node): g = Graph.from_bfs(node, filter=(ops.Relation, ops.Subquery, ops.JoinLink)) for node, dependents in g.invert().items(): - if len(dependents) > 1 and isinstance(node, cte_types): + if isinstance(node, ops.View) or ( + len(dependents) > 1 and isinstance(node, cte_types) + ): result.append(node) return result diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 746d35f41cc9..d2a492d709e8 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -1,7 +1,6 @@ from __future__ import annotations import ast -import atexit import contextlib import glob from contextlib import closing @@ -166,7 +165,6 @@ def do_connect( compress=compression, **kwargs, ) - self._temp_views = set() @property def version(self) -> str: @@ -726,19 +724,3 @@ def create_view( with self._safe_raw_sql(src, external_tables=external_tables): pass return self.table(name, database=database) - - def _get_temp_view_definition(self, name: str, definition: str) -> str: - return sge.Create( - this=sg.to_identifier(name, quoted=self.compiler.quoted), - kind="VIEW", - expression=definition, - replace=True, - ) - - def _register_temp_view_cleanup(self, name: str) -> None: - def drop(self, name: str, query: str): - self.raw_sql(query) - self._temp_views.discard(name) - - query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) - atexit.register(drop, self, name=name, query=query) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index c73ef96dc0b8..8880771d9b1c 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -92,8 +92,6 @@ def do_connect( for name, path in config.items(): self.register(path, table_name=name) - self._temp_views = set() - @contextlib.contextmanager def _safe_raw_sql(self, sql: sge.Statement) -> Any: yield self.raw_sql(sql) diff --git a/ibis/backends/druid/__init__.py b/ibis/backends/druid/__init__.py index 77024e3023e4..17dbf65b29dd 100644 --- a/ibis/backends/druid/__init__.py +++ b/ibis/backends/druid/__init__.py @@ -86,7 +86,6 @@ def do_connect(self, **kwargs: Any) -> None: """Create an Ibis client using the passed connection parameters.""" header = kwargs.pop("header", True) self.con = pydruid.db.connect(**kwargs, header=header) - self._temp_views = set() @contextlib.contextmanager def _safe_raw_sql(self, query, *args, **kwargs): diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index af1e4e84893c..ff4c64c3cf3f 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1534,3 +1534,16 @@ def insert( table_name, obj if isinstance(obj, pd.DataFrame) else pd.DataFrame(obj), ) + + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=True, + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + ) + + def _create_temp_view(self, table_name, source): + with self._safe_raw_sql(self._get_temp_view_definition(table_name, source)): + pass diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index 1751309cdd74..031087f7e5b4 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -31,7 +31,7 @@ from ibis.backends.base import BaseBackend # strip trailing encodings e.g., UTF8 -_VARCHAR_REGEX = re.compile(r"^(VARCHAR(?:\(\d+\)))?(?:\s+.+)?$") +_VARCHAR_REGEX = re.compile(r"^((VAR)?CHAR(?:\(\d+\)))?(?:\s+.+)?$") class Backend(SQLGlotBackend): @@ -90,7 +90,6 @@ def do_connect( quote_ident=True, **kwargs, ) - self._temp_views = set() def _from_url(self, url: str, **kwargs) -> BaseBackend: """Construct an ibis backend from a SQLAlchemy-conforming URL.""" diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index 6c7c43f63310..cf2e72402705 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -2,7 +2,6 @@ from __future__ import annotations -import atexit import contextlib import datetime import struct @@ -92,7 +91,6 @@ def do_connect( cur.execute("SET DATEFIRST 1") self.con = con - self._temp_views = set() def get_schema( self, name: str, schema: str | None = None, database: str | None = None @@ -244,13 +242,6 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: con.commit() return cursor - def _get_temp_view_definition(self, name: str, definition) -> str: - return sge.Create( - kind="OR ALTER VIEW", - this=sg.to_identifier(name, quoted=self.compiler.quoted), - expression=definition, - ) - def create_database(self, name: str, force: bool = False) -> None: name = self._quote(name) create_stmt = ( @@ -462,14 +453,6 @@ def create_table( name, schema=schema, source=self, namespace=ops.Namespace(database=database) ).to_expr() - def _register_temp_view_cleanup(self, name: str) -> None: - def drop(self, name: str, query: str): - self.raw_sql(query) - self._temp_views.discard(name) - - query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) - atexit.register(drop, self, name=name, query=query) - def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: diff --git a/ibis/backends/mssql/compiler.py b/ibis/backends/mssql/compiler.py index 35d7afa9d3e2..294874e5606f 100644 --- a/ibis/backends/mssql/compiler.py +++ b/ibis/backends/mssql/compiler.py @@ -291,10 +291,6 @@ def visit_TimestampFromUNIX(self, op, *, arg, unit): return self.f.dateadd(self.v.s, arg / 1_000, "1970-01-01 00:00:00") raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") - @visit_node.register(ops.SQLStringView) - def visit_SQLStringView(self, op, *, query: str, name: str, child): - return sg.parse_one(query, read=self.dialect).subquery(name) - def visit_NonNullLiteral(self, op, *, value, dtype): if dtype.is_decimal(): return self.cast(str(value.normalize()), dtype) diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index b853bd14a0e0..24c9ee97f1b6 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -2,7 +2,6 @@ from __future__ import annotations -import atexit import contextlib import re import warnings @@ -174,7 +173,6 @@ def do_connect( warnings.warn(f"Unable to set session timezone to UTC: {e}") self.con = con - self._temp_views = set() @property def current_database(self) -> str: @@ -222,14 +220,6 @@ def get_schema( return sch.Schema(fields) - def _get_temp_view_definition(self, name: str, definition: str) -> str: - return sge.Create( - kind="VIEW", - replace=True, - this=sg.to_identifier(name, quoted=self.compiler.quoted), - expression=definition, - ) - def create_database(self, name: str, force: bool = False) -> None: sql = sge.Create(kind="DATABASE", exist=force, this=sg.to_identifier(name)).sql( self.name @@ -509,11 +499,3 @@ def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: raise df = MySQLPandasData.convert_table(df, schema) return df - - def _register_temp_view_cleanup(self, name: str) -> None: - def drop(self, name: str, query: str): - self.raw_sql(query) - self._temp_views.discard(name) - - query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) - atexit.register(drop, self, name=name, query=query) diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index f177d38bf126..0f51218c7f7c 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -268,8 +268,6 @@ def do_connect( with self.begin() as cur: cur.execute("SET TIMEZONE = UTC") - self._temp_views = set() - def list_tables( self, like: str | None = None, @@ -552,13 +550,6 @@ def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: with self._safe_raw_sql(drop_stmt): pass - def _get_temp_view_definition(self, name: str, definition): - drop = sge.Drop( - kind="VIEW", exists=True, this=sg.table(name), cascade=True - ).sql(self.name) - create = super()._get_temp_view_definition(name, definition) - return f"{drop}; {create}" - def create_schema( self, name: str, database: str | None = None, force: bool = False ) -> None: diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 5ea17b2d3ae8..de074a93bf2c 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations -import atexit import contextlib import os from pathlib import Path @@ -156,7 +155,6 @@ def do_connect(self, session: SparkSession) -> None: # https://spark.apache.org/docs/latest/sql-pyspark-pandas-with-arrow.html#timestamp-with-time-zone-semantics self._session.conf.set("spark.sql.session.timeZone", "UTC") self._session.conf.set("spark.sql.mapKeyDedupPolicy", "LAST_WIN") - self._temp_views = set() def _metadata(self, query: str): cursor = self.raw_sql(query) @@ -235,13 +233,6 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: df = self._session.createDataFrame(data=op.data.to_frame(), schema=schema) df.createOrReplaceTempView(op.name) - def _register_temp_view_cleanup(self, name: str) -> None: - def drop(self, name: str): - self._session.catalog.dropTempView(name) - self._temp_views.discard(name) - - atexit.register(drop, self, name=name) - def _fetch_from_cursor(self, cursor, schema): df = cursor.query.toPandas() # blocks until finished return PySparkPandasData.convert_table(df, schema) diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/bigquery/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/bigquery/out.sql new file mode 100644 index 000000000000..499888a1d390 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/bigquery/out.sql @@ -0,0 +1,8 @@ +WITH foo AS ( + SELECT + * + FROM `ibis-gbq`.ibis_gbq_testing.test_bigquery_temp_mem_t_for_cte AS t0 +) +SELECT + COUNT(*) AS `x` +FROM `foo` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/clickhouse/out.sql new file mode 100644 index 000000000000..65c261d51aed --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/clickhouse/out.sql @@ -0,0 +1,8 @@ +WITH foo AS ( + SELECT + * + FROM test_clickhouse_temp_mem_t_for_cte AS t0 +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/datafusion/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/datafusion/out.sql new file mode 100644 index 000000000000..32d2166bb0c6 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/datafusion/out.sql @@ -0,0 +1,8 @@ +WITH "foo" AS ( + SELECT + * + FROM "test_datafusion_temp_mem_t_for_cte" AS "t0" +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/duckdb/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/duckdb/out.sql new file mode 100644 index 000000000000..274da8b2928b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/duckdb/out.sql @@ -0,0 +1,8 @@ +WITH foo AS ( + SELECT + * + FROM test_duckdb_temp_mem_t_for_cte AS t0 +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/exasol/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/exasol/out.sql new file mode 100644 index 000000000000..046cea74dd44 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/exasol/out.sql @@ -0,0 +1,8 @@ +WITH "foo" AS ( + SELECT + * + FROM "test_exasol_temp_mem_t_for_cte" AS "t0" +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/impala/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/impala/out.sql new file mode 100644 index 000000000000..4fe2512bbd0a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/impala/out.sql @@ -0,0 +1,8 @@ +WITH `foo` AS ( + SELECT + * + FROM `ibis_testing`.`test_impala_temp_mem_t_for_cte` AS `t0` +) +SELECT + COUNT(*) AS `x` +FROM `foo` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/mssql/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/mssql/out.sql new file mode 100644 index 000000000000..2591a1d1c5ff --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/mssql/out.sql @@ -0,0 +1,8 @@ +WITH [foo] AS ( + SELECT + * + FROM [test_mssql_temp_mem_t_for_cte] AS [t0] +) +SELECT + COUNT(*) AS [x] +FROM [foo] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/mysql/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/mysql/out.sql new file mode 100644 index 000000000000..106ae1a29154 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/mysql/out.sql @@ -0,0 +1,8 @@ +WITH `foo` AS ( + SELECT + * + FROM `test_mysql_temp_mem_t_for_cte` AS `t0` +) +SELECT + COUNT(*) AS `x` +FROM `foo` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/oracle/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/oracle/out.sql new file mode 100644 index 000000000000..c176f302613e --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/oracle/out.sql @@ -0,0 +1,8 @@ +WITH "foo" AS ( + SELECT + * + FROM "test_oracle_temp_mem_t_for_cte" "t0" +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/postgres/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/postgres/out.sql new file mode 100644 index 000000000000..8e555d69b4e4 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/postgres/out.sql @@ -0,0 +1,8 @@ +WITH "foo" AS ( + SELECT + * + FROM "test_postgres_temp_mem_t_for_cte" AS "t0" +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/pyspark/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/pyspark/out.sql new file mode 100644 index 000000000000..912ac91c2984 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/pyspark/out.sql @@ -0,0 +1,8 @@ +WITH `foo` AS ( + SELECT + * + FROM `test_pyspark_temp_mem_t_for_cte` AS `t0` +) +SELECT + COUNT(*) AS `x` +FROM `foo` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/snowflake/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/snowflake/out.sql new file mode 100644 index 000000000000..f4b46be58430 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/snowflake/out.sql @@ -0,0 +1,8 @@ +WITH "foo" AS ( + SELECT + * + FROM "test_snowflake_temp_mem_t_for_cte" AS "t0" +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/sqlite/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/sqlite/out.sql new file mode 100644 index 000000000000..98d42659f8bc --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/sqlite/out.sql @@ -0,0 +1,8 @@ +WITH "foo" AS ( + SELECT + * + FROM "test_sqlite_temp_mem_t_for_cte" AS "t0" +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/trino/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/trino/out.sql new file mode 100644 index 000000000000..397949f6634b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/trino/out.sql @@ -0,0 +1,8 @@ +WITH "foo" AS ( + SELECT + * + FROM "test_trino_temp_mem_t_for_cte" AS "t0" +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 4f6fe5799272..62d73bd57040 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -1,21 +1,23 @@ from __future__ import annotations +import contextlib + import pandas as pd import pytest import sqlglot as sg from pytest import param import ibis +import ibis.common.exceptions as com from ibis import _ from ibis.backends.base import _IBIS_TO_SQLGLOT_DIALECT, _get_backend_names -from ibis.backends.tests.errors import PolarsComputeError - -table_dot_sql_notimpl = pytest.mark.notimpl(["bigquery", "impala", "druid"]) -dot_sql_notimpl = pytest.mark.notimpl(["exasol", "flink"]) -dot_sql_notyet = pytest.mark.notyet( - ["snowflake", "oracle"], - reason="snowflake and oracle column names are case insensitive", +from ibis.backends.tests.errors import ( + GoogleBadRequest, + OracleDatabaseError, + PolarsComputeError, ) + +dot_sql_notimpl = pytest.mark.notimpl(["flink"]) dot_sql_never = pytest.mark.never( ["dask", "pandas"], reason="dask and pandas do not accept SQL" ) @@ -42,7 +44,7 @@ def test_con_dot_sql(backend, con, schema): alltypes = backend.functional_alltypes # pull out the quoted name name = _NAMES.get(con.name, "functional_alltypes") - quoted = getattr(getattr(con, "compiler", None), "quoted", True) + quoted = True dialect = _IBIS_TO_SQLGLOT_DIALECT.get(con.name, con.name) cols = [ sg.column("string_col", quoted=quoted).as_("s", quoted=quoted).sql(dialect), @@ -75,25 +77,31 @@ def test_con_dot_sql(backend, con, schema): backend.assert_series_equal(result.astype(expected.dtype), expected) -@table_dot_sql_notimpl +@pytest.mark.notyet(["polars"], raises=PolarsComputeError) +@pytest.mark.notyet( + ["bigquery"], raises=GoogleBadRequest, reason="requires a qualified name" +) +@pytest.mark.notyet( + ["druid"], raises=com.IbisTypeError, reason="druid does not preserve case" +) @dot_sql_notimpl -@dot_sql_notyet @dot_sql_never -def test_table_dot_sql(backend, con): - alltypes = con.table("functional_alltypes") +def test_table_dot_sql(backend): + alltypes = backend.functional_alltypes t = ( alltypes.sql( """ SELECT - string_col as s, - double_col + 1.0 AS new_col - FROM functional_alltypes - """ + "string_col" AS "s", + "double_col" + CAST(1.0 AS DOUBLE) AS "new_col" + FROM "functional_alltypes" + """, + dialect="duckdb", ) .group_by("s") # group by a column from SQL .aggregate(fancy_af=lambda t: t.new_col.mean()) .alias("awesome_t") # create a name for the aggregate - .sql("SELECT fancy_af AS yas FROM awesome_t") + .sql('SELECT "fancy_af" AS "yas" FROM "awesome_t"', dialect="duckdb") .order_by(_.yas) ) @@ -109,23 +117,34 @@ def test_table_dot_sql(backend, con): .reset_index() .yas ) - backend.assert_series_equal(result, expected) + assert pytest.approx(result) == expected -@table_dot_sql_notimpl +@pytest.mark.notyet(["polars"], raises=PolarsComputeError) +@pytest.mark.notyet( + ["bigquery"], raises=GoogleBadRequest, reason="requires a qualified name" +) +@pytest.mark.notyet( + ["druid"], raises=com.IbisTypeError, reason="druid does not preserve case" +) +@pytest.mark.notimpl( + ["oracle"], + OracleDatabaseError, + reason="oracle doesn't know which of the tables in the join to sort from", +) @dot_sql_notimpl -@dot_sql_notyet @dot_sql_never -def test_table_dot_sql_with_join(backend, con): - alltypes = con.table("functional_alltypes") +def test_table_dot_sql_with_join(backend): + alltypes = backend.functional_alltypes t = ( alltypes.sql( """ SELECT - string_col as s, - double_col + 1.0 AS new_col - FROM functional_alltypes - """ + "string_col" AS "s", + "double_col" + CAST(1.0 AS DOUBLE) AS "new_col" + FROM "functional_alltypes" + """, + dialect="duckdb", ) .alias("ft") .group_by("s") # group by a column from SQL @@ -134,12 +153,13 @@ def test_table_dot_sql_with_join(backend, con): .sql( """ SELECT - l.fancy_af AS yas, - r.s AS s - FROM awesome_t AS l - LEFT JOIN ft AS r - ON l.s = r.s - """ # clickhouse needs the r.s AS s, otherwise the column name is returned as r.s + "l"."fancy_af" AS "yas", + "r"."s" AS "s" + FROM "awesome_t" AS "l" + LEFT JOIN "ft" AS "r" + ON "l"."s" = "r"."s" + """, # clickhouse needs the r.s AS s, otherwise the column name is returned as r.s + dialect="duckdb", ) .order_by(["s", "yas"]) ) @@ -159,47 +179,39 @@ def test_table_dot_sql_with_join(backend, con): backend.assert_frame_equal(result, expected) -@table_dot_sql_notimpl +@pytest.mark.notyet(["polars"], raises=PolarsComputeError) +@pytest.mark.notyet(["druid"], reason="druid doesn't respect column name case") +@pytest.mark.notyet( + ["bigquery"], raises=GoogleBadRequest, reason="requires a qualified name" +) @dot_sql_notimpl -@dot_sql_notyet @dot_sql_never -def test_table_dot_sql_repr(con): - alltypes = con.table("functional_alltypes") +def test_table_dot_sql_repr(backend): + alltypes = backend.functional_alltypes t = ( alltypes.sql( """ SELECT - string_col as s, - double_col + 1.0 AS new_col - FROM functional_alltypes - """ + "string_col" AS "s", + "double_col" + CAST(1.0 AS DOUBLE) AS "new_col" + FROM "functional_alltypes" + """, + dialect="duckdb", ) .group_by("s") # group by a column from SQL .aggregate(fancy_af=lambda t: t.new_col.mean()) .alias("awesome_t") # create a name for the aggregate - .sql("SELECT fancy_af AS yas FROM awesome_t ORDER BY fancy_af") + .sql( + 'SELECT "fancy_af" AS "yas" FROM "awesome_t" ORDER BY "fancy_af"', + dialect="duckdb", + ) ) assert repr(t) -@table_dot_sql_notimpl @dot_sql_notimpl @dot_sql_never -@pytest.mark.notimpl(["oracle"]) -@pytest.mark.notyet(["polars"], raises=PolarsComputeError) -@pytest.mark.notimpl(["exasol"], strict=False) -def test_table_dot_sql_does_not_clobber_existing_tables(con, temp_table): - t = con.create_table(temp_table, schema=ibis.schema(dict(a="string"))) - expr = t.sql("SELECT 1 as x FROM functional_alltypes") - with pytest.raises(ValueError): - expr.alias(temp_table) - - -@table_dot_sql_notimpl -@dot_sql_notimpl -@dot_sql_never -@pytest.mark.notimpl(["oracle"]) def test_dot_sql_alias_with_params(backend, alltypes, df): t = alltypes x = t.select(x=t.string_col + " abc").alias("foo") @@ -208,10 +220,8 @@ def test_dot_sql_alias_with_params(backend, alltypes, df): backend.assert_series_equal(result.x, expected) -@table_dot_sql_notimpl @dot_sql_notimpl @dot_sql_never -@pytest.mark.notimpl(["oracle"]) def test_dot_sql_reuse_alias_with_different_types(backend, alltypes, df): foo1 = alltypes.select(x=alltypes.string_col).alias("foo") foo2 = alltypes.select(x=alltypes.bigint_col).alias("foo") @@ -239,14 +249,14 @@ def test_dot_sql_reuse_alias_with_different_types(backend, alltypes, df): raises=ValueError, reason="risingwave doesn't support sqlglot.dialects.dialect.Dialect", ) -@table_dot_sql_notimpl +@pytest.mark.notyet(["polars"], raises=PolarsComputeError) @dot_sql_notimpl -@dot_sql_notyet @dot_sql_never +@pytest.mark.notyet(["druid"], reason="druid doesn't respect column name case") def test_table_dot_sql_transpile(backend, alltypes, dialect, df): name = "foo2" foo = alltypes.select(x=_.bigint_col + 1).alias(name) - expr = sg.select("x").from_(sg.table(name, quoted=True)) + expr = sg.select(sg.column("x", quoted=True)).from_(sg.table(name, quoted=True)) dialect = _IBIS_TO_SQLGLOT_DIALECT.get(dialect, dialect) sqlstr = expr.sql(dialect=dialect, pretty=True) dot_sql_expr = foo.sql(sqlstr, dialect=dialect) @@ -267,9 +277,6 @@ def test_table_dot_sql_transpile(backend, alltypes, dialect, df): ["druid"], raises=AttributeError, reason="druid doesn't respect column names" ) @pytest.mark.notyet(["snowflake", "bigquery"]) -@pytest.mark.notyet( - ["oracle"], strict=False, reason="only works with backends that quote everything" -) @pytest.mark.notyet( ["risingwave"], raises=ValueError, @@ -278,8 +285,10 @@ def test_table_dot_sql_transpile(backend, alltypes, dialect, df): @dot_sql_notimpl @dot_sql_never def test_con_dot_sql_transpile(backend, con, dialect, df): - t = sg.table("functional_alltypes") - foo = sg.select(sg.alias(sg.column("bigint_col") + 1, "x")).from_(t) + t = sg.table("functional_alltypes", quoted=True) + foo = sg.select( + sg.alias(sg.column("bigint_col", quoted=True) + 1, "x", quoted=True) + ).from_(t) dialect = _IBIS_TO_SQLGLOT_DIALECT.get(dialect, dialect) sqlstr = foo.sql(dialect=dialect, pretty=True) expr = con.sql(sqlstr, dialect=dialect) @@ -290,7 +299,7 @@ def test_con_dot_sql_transpile(backend, con, dialect, df): @dot_sql_notimpl @dot_sql_never -@pytest.mark.notimpl(["druid", "flink", "polars"]) +@pytest.mark.notimpl(["druid", "flink", "polars", "exasol"]) @pytest.mark.notyet(["snowflake"], reason="snowflake column names are case insensitive") @pytest.mark.notyet( ["risingwave"], @@ -299,9 +308,8 @@ def test_con_dot_sql_transpile(backend, con, dialect, df): ) def test_order_by_no_projection(backend): con = backend.connection - astronauts = con.table("astronauts") expr = ( - astronauts.group_by("name") + backend.astronauts.group_by("name") .agg(nbr_missions=_.count()) .order_by(_.nbr_missions.desc()) ) @@ -311,9 +319,38 @@ def test_order_by_no_projection(backend): @dot_sql_notimpl -@dot_sql_notyet @dot_sql_never @pytest.mark.notyet(["polars"], raises=PolarsComputeError) def test_dot_sql_limit(con): - expr = con.sql("SELECT * FROM (SELECT 'abc' ts) _").limit(1) - assert expr.execute().equals(pd.DataFrame({"ts": ["abc"]})) + expr = con.sql('SELECT * FROM (SELECT \'abc\' "ts") "x"', dialect="duckdb").limit(1) + result = expr.execute() + + assert len(result) == 1 + assert len(result.columns) == 1 + assert result.columns[0].lower() == "ts" + assert result.iat[0, 0] == "abc" + + +@pytest.fixture(scope="module") +def mem_t(con): + if con.name == "druid": + pytest.xfail("druid does not support create_table") + name = f"test_{con.name}_temp_mem_t_for_cte" + t = con.create_table(name, ibis.memtable({"a": list("def")})) + yield t + with contextlib.suppress(NotImplementedError): + con.drop_table(name, force=True) + + +@dot_sql_notimpl +@dot_sql_never +@pytest.mark.notyet(["polars"], raises=PolarsComputeError) +def test_cte(con, snapshot, mem_t): + t = mem_t + foo = t.alias("foo") + assert foo.schema() == t.schema() + assert foo.count().execute() == t.count().execute() + + expr = foo.sql('SELECT count(*) "x" FROM "foo"', dialect="duckdb") + sql = con.compile(expr) + snapshot.assert_match(sql, "out.sql") diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index 72ee1b48edc0..0d790fc25948 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -2,7 +2,6 @@ from __future__ import annotations -import atexit import contextlib from functools import cached_property from operator import itemgetter @@ -295,7 +294,6 @@ def do_connect( timezone=timezone, **connect_args, ) - self._temp_views = set() @contextlib.contextmanager def _prepare_metadata(self, query: str) -> Iterator[dict[str, str]]: @@ -496,22 +494,6 @@ def create_table( return self.table(orig_table_ref.name) - def _get_temp_view_definition(self, name: str, definition: str) -> str: - return sge.Create( - this=sg.to_identifier(name, quoted=self.compiler.quoted), - kind="VIEW", - expression=definition, - replace=True, - ) - - def _register_temp_view_cleanup(self, name: str) -> None: - def drop(self, name: str, query: str): - self.raw_sql(query) - self._temp_views.discard(name) - - query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) - atexit.register(drop, self, name=name, query=query) - def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: import pandas as pd diff --git a/ibis/expr/format.py b/ibis/expr/format.py index 4a904a99462b..feb348402922 100644 --- a/ibis/expr/format.py +++ b/ibis/expr/format.py @@ -223,8 +223,8 @@ def _sql_query_result(op, query, **kwargs): clsname = op.__class__.__name__ if isinstance(op, ops.SQLStringView): - child, name = kwargs["child"], kwargs["name"] - top = f"{clsname}[{child}]: {name}\n" + child = kwargs["child"] + top = f"{clsname}[{child}]\n" else: top = f"{clsname}\n" diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 15dcad52cfea..d520a4527634 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -195,6 +195,7 @@ def schema(self): return self.parent.schema +# TODO(kszucs): remove in favor of View @public class SelfReference(Simple): _uid_counter = itertools.count() @@ -408,29 +409,25 @@ class SQLQueryResult(Relation): @public -class SQLStringView(PhysicalTable): - """A view created from a SQL string.""" +class View(PhysicalTable): + """A view created from an expression.""" + # TODO(kszucs): rename it to parent child: Relation - query: str @attribute def schema(self): - # TODO(kszucs): avoid converting to expression - backend = self.child.to_expr()._find_backend() - return backend._get_schema_using_query(self.query) + return self.child.schema @public -class View(PhysicalTable): - """A view created from an expression.""" +class SQLStringView(Relation): + """A view created from a SQL string.""" child: Relation - name: str - - @attribute - def schema(self): - return self.child.schema + query: str + schema: Schema + values = FrozenDict() @public diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 1a4c263970bd..7db999d5aeca 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -27,7 +27,6 @@ import ibis.expr.operations as ops import ibis.expr.schema as sch from ibis import util -from ibis.common.annotations import annotated from ibis.common.deferred import Deferred from ibis.expr.types.core import Expr, _FixedTextJupyterMixin @@ -49,8 +48,6 @@ from ibis.selectors import IfAnyAll, Selector from ibis.formats.pyarrow import PyArrowData -_ALIASES = (f"_ibis_view_{n:d}" for n in itertools.count()) - def _regular_join_method( name: str, @@ -3185,11 +3182,6 @@ def alias(self, alias: str) -> ir.Table: └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ """ expr = ops.View(child=self, name=alias).to_expr() - - # NB: calling compile is necessary so that any temporary views are - # created so that we can infer the schema without executing the entire - # query - expr.compile() return expr def sql(self, query: str, dialect: str | None = None) -> ir.Table: @@ -3277,13 +3269,23 @@ def sql(self, query: str, dialect: str | None = None) -> ir.Table: -------- [`Table.alias`](#ibis.expr.types.relations.Table.alias) ''' + op = self.op() + backend = self._find_backend() - # only transpile if dialect was passed if dialect is not None: - backend = self._find_backend() + # only transpile if dialect was passed query = backend._transpile_sql(query, dialect=dialect) - op = ops.SQLStringView(child=self, name=next(_ALIASES), query=query) - return op.to_expr() + + if isinstance(op, ops.View): + name = op.name + expr = op.child.to_expr() + else: + name = util.gen_name("sql_query") + expr = self + + schema = backend._get_sql_string_view_schema(name, expr, query) + node = ops.SQLStringView(child=self.op(), query=query, schema=schema) + return node.to_expr() def to_pandas(self, **kwargs) -> pd.DataFrame: """Convert a table expression to a pandas DataFrame. diff --git a/ibis/tests/expr/mocks.py b/ibis/tests/expr/mocks.py index bab44651beef..d6344621893b 100644 --- a/ibis/tests/expr/mocks.py +++ b/ibis/tests/expr/mocks.py @@ -89,6 +89,9 @@ def _clean_up_cached_table(self, _): def _get_schema_using_query(self, query): return self.sql_query_schemas[query] + def _get_sql_string_view_schema(self, name, table, query): + return self.sql_query_schemas[query] + @contextlib.contextmanager def set_query_schema(self, query, schema): self.sql_query_schemas[query] = schema diff --git a/ibis/tests/expr/snapshots/test_format_sql_operations/test_format_sql_query_result/repr.txt b/ibis/tests/expr/snapshots/test_format_sql_operations/test_format_sql_query_result/repr.txt index 9589a01e618b..917551327ad2 100644 --- a/ibis/tests/expr/snapshots/test_format_sql_operations/test_format_sql_query_result/repr.txt +++ b/ibis/tests/expr/snapshots/test_format_sql_operations/test_format_sql_query_result/repr.txt @@ -29,14 +29,45 @@ r0 := DatabaseTable: airlines security_delay int32 late_aircraft_delay int32 -r1 := SQLStringView[r0]: foo +r1 := View: foo + year int32 + month int32 + day int32 + dayofweek int32 + dep_time int32 + crs_dep_time int32 + arr_time int32 + crs_arr_time int32 + carrier string + flight_num int32 + tail_num int32 + actual_elapsed_time int32 + crs_elapsed_time int32 + airtime int32 + arrdelay int32 + depdelay int32 + origin string + dest string + distance int32 + taxi_in int32 + taxi_out int32 + cancelled int32 + cancellation_code string + diverted int32 + carrier_delay int32 + weather_delay int32 + nas_delay int32 + security_delay int32 + late_aircraft_delay int32 + +r2 := SQLStringView[r1] query: SELECT carrier, mean(arrdelay) AS avg_arrdelay FROM airlines GROUP BY 1 ORDER … schema: carrier string avg_arrdelay float64 -Project[r1] - carrier: r1.carrier - avg_arrdelay: Round(r1.avg_arrdelay, digits=1) - island: Lowercase(r1.carrier) \ No newline at end of file +Project[r2] + carrier: r2.carrier + avg_arrdelay: Round(r2.avg_arrdelay, digits=1) + island: Lowercase(r2.carrier) \ No newline at end of file diff --git a/ibis/tests/expr/test_format_sql_operations.py b/ibis/tests/expr/test_format_sql_operations.py index fc1deeabccac..4025aa11cb52 100644 --- a/ibis/tests/expr/test_format_sql_operations.py +++ b/ibis/tests/expr/test_format_sql_operations.py @@ -16,10 +16,7 @@ def test_format_sql_query_result(con, snapshot): schema = ibis.schema({"carrier": "string", "avg_arrdelay": "double"}) with con.set_query_schema(query, schema): - expr = t.sql(query) - # name is autoincremented so we need to set it manually to make the - # snapshot stable - expr = expr.op().copy(name="foo").to_expr() + expr = t.alias("foo").sql(query) expr = expr.mutate( island=_.carrier.lower(), From b5fecc62d3769c52eecb21c6e11764f975abe57c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sun, 28 Jan 2024 15:05:12 +0100 Subject: [PATCH 126/161] refactor(sql): reorganize sqlglot rewrites --- ibis/backends/base/sqlglot/compiler.py | 39 ++++--- ibis/backends/base/sqlglot/rewrites.py | 137 +++++++++++++++++++------ ibis/backends/pandas/rewrites.py | 6 +- ibis/expr/rewrites.py | 64 ++---------- 4 files changed, 132 insertions(+), 114 deletions(-) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 6c7e5ddb3b7b..0f5b3e738b6f 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -4,7 +4,6 @@ import calendar import itertools import math -import operator import string from collections.abc import Iterator, Mapping from functools import partial, reduce, singledispatchmethod @@ -19,17 +18,18 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sqlglot.rewrites import CTE, Select, Window, sqlize -from ibis.expr.operations.udf import InputType -from ibis.expr.rewrites import ( +from ibis.backends.base.sqlglot.rewrites import ( + CTE, + Select, + Window, add_one_to_nth_value_input, add_order_by_to_empty_ranking_window_functions, empty_in_values_right_side, one_to_zero_index, - replace_bucket, - replace_scalar_parameter, - unwrap_scalar_parameter, + sqlize, ) +from ibis.expr.operations.udf import InputType +from ibis.expr.rewrites import replace_bucket if TYPE_CHECKING: from collections.abc import Iterable @@ -222,6 +222,15 @@ def if_(self, condition, true, false: sge.Expression | None = None) -> sge.If: def cast(self, arg, to: dt.DataType) -> sge.Cast: return sg.cast(sge.convert(arg), to=self.type_mapper.from_ibis(to)) + def _prepare_params(self, params): + result = {} + for param, value in params.items(): + node = param.op() + if isinstance(node, ops.Alias): + node = node.arg + result[node] = value + return result + def translate(self, op, *, params: Mapping[ir.Value, Any]) -> sge.Expression: """Translate an ibis operation to a sqlglot expression. @@ -245,20 +254,8 @@ def translate(self, op, *, params: Mapping[ir.Value, Any]) -> sge.Expression: """ # substitute parameters immediately to avoid having to define a # ScalarParameter translation rule - # - # this lets us avoid threading `params` through every `translate_val` - # call only to be used in the one place it would be needed: the - # ScalarParameter `translate_val` rule - params = { - # remove aliases from scalar parameters - param.op().replace(unwrap_scalar_parameter): value - for param, value in (params or {}).items() - } - - op = op.replace( - replace_scalar_parameter(params) | reduce(operator.or_, self.rewrites) - ) - op, ctes = sqlize(op) + params = self._prepare_params(params) + op, ctes = sqlize(op, params=params, rewrites=self.rewrites) aliases = {} counter = itertools.count() diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 9474959c53e7..56836962cb0c 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -3,7 +3,9 @@ from __future__ import annotations -from typing import Literal, Optional +import operator +from functools import reduce +from typing import TYPE_CHECKING, Any, Literal, Optional import toolz from public import public @@ -16,11 +18,14 @@ from ibis.common.collections import FrozenDict # noqa: TCH001 from ibis.common.deferred import var from ibis.common.graph import Graph -from ibis.common.patterns import Object, replace +from ibis.common.patterns import Object, Pattern, _, replace from ibis.common.typing import VarTuple # noqa: TCH001 -from ibis.expr.rewrites import p +from ibis.expr.rewrites import d, p, replace_parameter from ibis.expr.schema import Schema +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + x = var("x") y = var("y") @@ -77,25 +82,25 @@ def dtype(self): @replace(p.Project) -def project_to_select(_): +def project_to_select(_, **kwargs): """Convert a Project node to a Select node.""" return Select(_.parent, selections=_.values) @replace(p.Filter) -def filter_to_select(_): +def filter_to_select(_, **kwargs): """Convert a Filter node to a Select node.""" return Select(_.parent, selections=_.values, predicates=_.predicates) @replace(p.Sort) -def sort_to_select(_): +def sort_to_select(_, **kwargs): """Convert a Sort node to a Select node.""" return Select(_.parent, selections=_.values, sort_keys=_.keys) @replace(p.WindowFunction) -def window_function_to_window(_): +def window_function_to_window(_, **kwargs): """Convert a WindowFunction node to a Window node.""" if isinstance(_.frame, ops.RowsWindowFrame) and _.frame.max_lookback is not None: raise NotImplementedError("max_lookback is not supported for SQL backends") @@ -109,18 +114,8 @@ def window_function_to_window(_): ) -@replace(p.Log2) -def replace_log2(_): - return ops.Log(_.arg, base=2) - - -@replace(p.Log10) -def replace_log10(_): - return ops.Log(_.arg, base=10) - - @replace(Object(Select, Object(Select))) -def merge_select_select(_): +def merge_select_select(_, **kwargs): """Merge subsequent Select relations into one. This rewrites eliminates `_.parent` by merging the outer and the inner @@ -165,27 +160,105 @@ def extract_ctes(node): return result -def sqlize(node): - """Lower the ibis expression graph to a SQL-like relational algebra.""" +def sqlize( + node: ops.Node, + params: Mapping[ops.ScalarParameter, Any], + rewrites: Sequence[Pattern] = (), +) -> tuple[ops.Node, list[ops.Node]]: + """Lower the ibis expression graph to a SQL-like relational algebra. + + Parameters + ---------- + node + The root node of the expression graph. + params + A mapping of scalar parameters to their values. + rewrites + Supplementary rewrites to apply to the expression graph. + + Returns + ------- + Tuple of the rewritten expression graph and a list of CTEs. + """ assert isinstance(node, ops.Relation) - step1 = node.replace( - window_function_to_window + # apply the backend specific rewrites + node = node.replace(reduce(operator.or_, rewrites)) + + # lower the expression graph to a SQL-like relational algebra + context = {"params": params} + sqlized = node.replace( + replace_parameter | project_to_select | filter_to_select | sort_to_select + | window_function_to_window, + context=context, ) - step2 = step1.replace(merge_select_select) - ctes = extract_ctes(step2) + # squash subsequent Select nodes into one + simplified = sqlized.replace(merge_select_select) + + # extract common table expressions while wrapping them in a CTE node + ctes = extract_ctes(simplified) subs = {cte: CTE(cte) for cte in ctes} - step3 = step2.replace(subs) + result = simplified.replace(subs) + + return result, ctes + + +# supplemental rewrites selectively used on a per-backend basis - return step3, ctes +"""Replace `log2` and `log10` with `log`.""" +replace_log2 = p.Log2 >> d.Log(_.arg, base=2) +replace_log10 = p.Log10 >> d.Log(_.arg, base=10) + + +"""Add an ORDER BY clause to rank window functions that don't have one.""" +add_order_by_to_empty_ranking_window_functions = p.WindowFunction( + func=p.NTile(y), + frame=p.WindowFrame(order_by=()) >> _.copy(order_by=(y,)), +) + +"""Replace checks against an empty right side with `False`.""" +empty_in_values_right_side = p.InValues(options=()) >> d.Literal(False, dtype=dt.bool) + + +@replace( + p.WindowFunction(p.RankBase | p.NTile) + | p.StringFind + | p.FindInSet + | p.ArrayPosition +) +def one_to_zero_index(_, **kwargs): + """Subtract one from one-index functions.""" + return ops.Subtract(_, 1) + + +@replace(ops.NthValue) +def add_one_to_nth_value_input(_, **kwargs): + if isinstance(_.nth, ops.Literal): + nth = ops.Literal(_.nth.value + 1, dtype=_.nth.dtype) + else: + nth = ops.Add(_.nth, 1) + return _.copy(nth=nth) + + +@replace(p.Sample) +def rewrite_sample_as_filter(_, **kwargs): + """Rewrite Sample as `t.filter(random() <= fraction)`. + + Errors as unsupported if a `seed` is specified. + """ + if _.seed is not None: + raise com.UnsupportedOperationError( + "`Table.sample` with a random seed is unsupported" + ) + return ops.Filter(_.parent, (ops.LessEqual(ops.RandomScalar(), _.fraction),)) -@replace(p.WindowFunction(p.First(x, y))) -def rewrite_first_to_first_value(_, x, y): +@replace(p.WindowFunction(p.First(x, where=y))) +def rewrite_first_to_first_value(_, x, y, **kwargs): """Rewrite Ibis's first to first_value when used in a window function.""" if y is not None: raise com.UnsupportedOperationError( @@ -194,8 +267,8 @@ def rewrite_first_to_first_value(_, x, y): return _.copy(func=ops.FirstValue(x)) -@replace(p.WindowFunction(p.Last(x, y))) -def rewrite_last_to_last_value(_, x, y): +@replace(p.WindowFunction(p.Last(x, where=y))) +def rewrite_last_to_last_value(_, x, y, **kwargs): """Rewrite Ibis's last to last_value when used in a window function.""" if y is not None: raise com.UnsupportedOperationError( @@ -205,7 +278,7 @@ def rewrite_last_to_last_value(_, x, y): @replace(p.WindowFunction(frame=y @ p.WindowFrame(order_by=()))) -def rewrite_empty_order_by_window(_, y, **__): +def rewrite_empty_order_by_window(_, y, **kwargs): return _.copy(frame=y.copy(order_by=(ops.NULL,))) @@ -220,5 +293,5 @@ def exclude_unsupported_window_frame_from_row_number(_, y): y @ p.WindowFrame(start=None), ) ) -def exclude_unsupported_window_frame_from_ops(_, y): +def exclude_unsupported_window_frame_from_ops(_, y, **kwargs): return _.copy(frame=y.copy(start=None, end=0, order_by=y.order_by or (ops.NULL,))) diff --git a/ibis/backends/pandas/rewrites.py b/ibis/backends/pandas/rewrites.py index 18cf53813af5..435c931277b7 100644 --- a/ibis/backends/pandas/rewrites.py +++ b/ibis/backends/pandas/rewrites.py @@ -10,6 +10,7 @@ from ibis.common.collections import FrozenDict from ibis.common.patterns import replace from ibis.common.typing import VarTuple # noqa: TCH001 +from ibis.expr.rewrites import replace_parameter from ibis.expr.schema import Schema from ibis.util import gen_name @@ -298,11 +299,6 @@ def rewrite_scalar_subquery(_, **kwargs): return PandasScalarSubquery(_.rel) -@replace(ops.ScalarParameter) -def replace_parameter(_, params, **kwargs): - return ops.Literal(value=params[_], dtype=_.dtype) - - @replace(ops.UnboundTable) def bind_unbound_table(_, backend, **kwargs): return ops.DatabaseTable(name=_.name, schema=_.schema, source=backend) diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index f0352fefa8bd..8b1ea87de95d 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -28,20 +28,10 @@ def peel_join_field(_): return _.rel.values[_.name] -@replace(p.Alias(p.ScalarParameter)) -def unwrap_scalar_parameter(_): - """Replace aliased scalar parameters with the parameter itself.""" - return _.arg - - -def replace_scalar_parameter(params): +@replace(p.ScalarParameter) +def replace_parameter(_, params, **kwargs): """Replace scalar parameters with their values.""" - - @replace(p.ScalarParameter) - def repl(_): - return ops.Literal(value=params[_], dtype=_.dtype) - - return repl + return ops.Literal(value=params[_], dtype=_.dtype) @replace(p.FillNa) @@ -98,22 +88,21 @@ def rewrite_sample(_): Errors as unsupported if a `seed` is specified. """ - if _.seed is not None: raise com.UnsupportedOperationError( "`Table.sample` with a random seed is unsupported" ) - - return ops.Filter(_.parent, (ops.LessEqual(ops.RandomScalar(), _.fraction),)) + pred = ops.LessEqual(ops.RandomScalar(), _.fraction) + return ops.Filter(_.parent, (pred,)) -@replace(ops.Analytic) +@replace(p.Analytic) def project_wrap_analytic(_, rel): # Wrap analytic functions in a window function return ops.WindowFunction(_, ops.RowsWindowFrame(rel)) -@replace(ops.Reduction) +@replace(p.Reduction) def project_wrap_reduction(_, rel): # Query all the tables that the reduction depends on if _.relations == {rel}: @@ -203,44 +192,7 @@ def rewrite_window_input(value, frame): return node.replace(window_merge_frames, filter=p.Value, context=context) -@replace(p.InValues(..., ())) -def empty_in_values_right_side(_): - """Replace checks against an empty right side with `False`.""" - return ops.Literal(False, dtype=dt.bool) - - -@replace( - p.WindowFunction( - p.PercentRank(y) | p.RankBase(y) | p.CumeDist(y) | p.NTile(y), - p.WindowFrame(..., order_by=()) >> _.copy(order_by=(y,)), - ) -) -def add_order_by_to_empty_ranking_window_functions(_): - """Add an ORDER BY clause to rank window functions that don't have one.""" - return _ - - -@replace( - p.WindowFunction(p.RankBase | p.NTile) - | p.StringFind - | p.FindInSet - | p.ArrayPosition -) -def one_to_zero_index(_, **__): - """Subtract one from one-index functions.""" - return ops.Subtract(_, 1) - - -@replace(ops.NthValue) -def add_one_to_nth_value_input(_, **__): - if isinstance(_.nth, ops.Literal): - nth = ops.Literal(_.nth.value + 1, dtype=_.nth.dtype) - else: - nth = ops.Add(_.nth, 1) - return _.copy(nth=nth) - - -@replace(ops.Bucket) +@replace(p.Bucket) def replace_bucket(_): cases = [] results = [] From 3c8eb56168de489b10f9087eaa17d77cc9a78ba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 01:42:11 +0100 Subject: [PATCH 127/161] chore(deps): relock --- poetry.lock | 1577 +++++++++++++++++++++--------------------- requirements-dev.txt | 78 +-- 2 files changed, 839 insertions(+), 816 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5527cd33729a..1120abb6e22e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,87 +2,87 @@ [[package]] name = "aiohttp" -version = "3.9.3" +version = "3.9.1" description = "Async http client/server framework (asyncio)" optional = true python-versions = ">=3.8" files = [ - {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:939677b61f9d72a4fa2a042a5eee2a99a24001a67c13da113b2e30396567db54"}, - {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f5cd333fcf7590a18334c90f8c9147c837a6ec8a178e88d90a9b96ea03194cc"}, - {file = "aiohttp-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82e6aa28dd46374f72093eda8bcd142f7771ee1eb9d1e223ff0fa7177a96b4a5"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f56455b0c2c7cc3b0c584815264461d07b177f903a04481dfc33e08a89f0c26b"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bca77a198bb6e69795ef2f09a5f4c12758487f83f33d63acde5f0d4919815768"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e083c285857b78ee21a96ba1eb1b5339733c3563f72980728ca2b08b53826ca5"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab40e6251c3873d86ea9b30a1ac6d7478c09277b32e14745d0d3c6e76e3c7e29"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df822ee7feaaeffb99c1a9e5e608800bd8eda6e5f18f5cfb0dc7eeb2eaa6bbec"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:acef0899fea7492145d2bbaaaec7b345c87753168589cc7faf0afec9afe9b747"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cd73265a9e5ea618014802ab01babf1940cecb90c9762d8b9e7d2cc1e1969ec6"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a78ed8a53a1221393d9637c01870248a6f4ea5b214a59a92a36f18151739452c"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:6b0e029353361f1746bac2e4cc19b32f972ec03f0f943b390c4ab3371840aabf"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7cf5c9458e1e90e3c390c2639f1017a0379a99a94fdfad3a1fd966a2874bba52"}, - {file = "aiohttp-3.9.3-cp310-cp310-win32.whl", hash = "sha256:3e59c23c52765951b69ec45ddbbc9403a8761ee6f57253250c6e1536cacc758b"}, - {file = "aiohttp-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:055ce4f74b82551678291473f66dc9fb9048a50d8324278751926ff0ae7715e5"}, - {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6b88f9386ff1ad91ace19d2a1c0225896e28815ee09fc6a8932fded8cda97c3d"}, - {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c46956ed82961e31557b6857a5ca153c67e5476972e5f7190015018760938da2"}, - {file = "aiohttp-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07b837ef0d2f252f96009e9b8435ec1fef68ef8b1461933253d318748ec1acdc"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad46e6f620574b3b4801c68255492e0159d1712271cc99d8bdf35f2043ec266"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ed3e046ea7b14938112ccd53d91c1539af3e6679b222f9469981e3dac7ba1ce"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:039df344b45ae0b34ac885ab5b53940b174530d4dd8a14ed8b0e2155b9dddccb"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7943c414d3a8d9235f5f15c22ace69787c140c80b718dcd57caaade95f7cd93b"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84871a243359bb42c12728f04d181a389718710129b36b6aad0fc4655a7647d4"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5eafe2c065df5401ba06821b9a054d9cb2848867f3c59801b5d07a0be3a380ae"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9d3c9b50f19704552f23b4eaea1fc082fdd82c63429a6506446cbd8737823da3"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:f033d80bc6283092613882dfe40419c6a6a1527e04fc69350e87a9df02bbc283"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:2c895a656dd7e061b2fd6bb77d971cc38f2afc277229ce7dd3552de8313a483e"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1f5a71d25cd8106eab05f8704cd9167b6e5187bcdf8f090a66c6d88b634802b4"}, - {file = "aiohttp-3.9.3-cp311-cp311-win32.whl", hash = "sha256:50fca156d718f8ced687a373f9e140c1bb765ca16e3d6f4fe116e3df7c05b2c5"}, - {file = "aiohttp-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:5fe9ce6c09668063b8447f85d43b8d1c4e5d3d7e92c63173e6180b2ac5d46dd8"}, - {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:38a19bc3b686ad55804ae931012f78f7a534cce165d089a2059f658f6c91fa60"}, - {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:770d015888c2a598b377bd2f663adfd947d78c0124cfe7b959e1ef39f5b13869"}, - {file = "aiohttp-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee43080e75fc92bf36219926c8e6de497f9b247301bbf88c5c7593d931426679"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52df73f14ed99cee84865b95a3d9e044f226320a87af208f068ecc33e0c35b96"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc9b311743a78043b26ffaeeb9715dc360335e5517832f5a8e339f8a43581e4d"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b955ed993491f1a5da7f92e98d5dad3c1e14dc175f74517c4e610b1f2456fb11"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:504b6981675ace64c28bf4a05a508af5cde526e36492c98916127f5a02354d53"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fe5571784af92b6bc2fda8d1925cccdf24642d49546d3144948a6a1ed58ca5"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ba39e9c8627edc56544c8628cc180d88605df3892beeb2b94c9bc857774848ca"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e5e46b578c0e9db71d04c4b506a2121c0cb371dd89af17a0586ff6769d4c58c1"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:938a9653e1e0c592053f815f7028e41a3062e902095e5a7dc84617c87267ebd5"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:c3452ea726c76e92f3b9fae4b34a151981a9ec0a4847a627c43d71a15ac32aa6"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ff30218887e62209942f91ac1be902cc80cddb86bf00fbc6783b7a43b2bea26f"}, - {file = "aiohttp-3.9.3-cp312-cp312-win32.whl", hash = "sha256:38f307b41e0bea3294a9a2a87833191e4bcf89bb0365e83a8be3a58b31fb7f38"}, - {file = "aiohttp-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:b791a3143681a520c0a17e26ae7465f1b6f99461a28019d1a2f425236e6eedb5"}, - {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ed621426d961df79aa3b963ac7af0d40392956ffa9be022024cd16297b30c8c"}, - {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7f46acd6a194287b7e41e87957bfe2ad1ad88318d447caf5b090012f2c5bb528"}, - {file = "aiohttp-3.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:feeb18a801aacb098220e2c3eea59a512362eb408d4afd0c242044c33ad6d542"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f734e38fd8666f53da904c52a23ce517f1b07722118d750405af7e4123933511"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b40670ec7e2156d8e57f70aec34a7216407848dfe6c693ef131ddf6e76feb672"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdd215b7b7fd4a53994f238d0f46b7ba4ac4c0adb12452beee724ddd0743ae5d"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:017a21b0df49039c8f46ca0971b3a7fdc1f56741ab1240cb90ca408049766168"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99abf0bba688259a496f966211c49a514e65afa9b3073a1fcee08856e04425b"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:648056db9a9fa565d3fa851880f99f45e3f9a771dd3ff3bb0c048ea83fb28194"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8aacb477dc26797ee089721536a292a664846489c49d3ef9725f992449eda5a8"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:522a11c934ea660ff8953eda090dcd2154d367dec1ae3c540aff9f8a5c109ab4"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:5bce0dc147ca85caa5d33debc4f4d65e8e8b5c97c7f9f660f215fa74fc49a321"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b4af9f25b49a7be47c0972139e59ec0e8285c371049df1a63b6ca81fdd216a2"}, - {file = "aiohttp-3.9.3-cp38-cp38-win32.whl", hash = "sha256:298abd678033b8571995650ccee753d9458dfa0377be4dba91e4491da3f2be63"}, - {file = "aiohttp-3.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:69361bfdca5468c0488d7017b9b1e5ce769d40b46a9f4a2eed26b78619e9396c"}, - {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0fa43c32d1643f518491d9d3a730f85f5bbaedcbd7fbcae27435bb8b7a061b29"}, - {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:835a55b7ca49468aaaac0b217092dfdff370e6c215c9224c52f30daaa735c1c1"}, - {file = "aiohttp-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06a9b2c8837d9a94fae16c6223acc14b4dfdff216ab9b7202e07a9a09541168f"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abf151955990d23f84205286938796c55ff11bbfb4ccfada8c9c83ae6b3c89a3"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59c26c95975f26e662ca78fdf543d4eeaef70e533a672b4113dd888bd2423caa"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f95511dd5d0e05fd9728bac4096319f80615aaef4acbecb35a990afebe953b0e"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595f105710293e76b9dc09f52e0dd896bd064a79346234b521f6b968ffdd8e58"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c8b816c2b5af5c8a436df44ca08258fc1a13b449393a91484225fcb7545533"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f1088fa100bf46e7b398ffd9904f4808a0612e1d966b4aa43baa535d1b6341eb"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f59dfe57bb1ec82ac0698ebfcdb7bcd0e99c255bd637ff613760d5f33e7c81b3"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:361a1026c9dd4aba0109e4040e2aecf9884f5cfe1b1b1bd3d09419c205e2e53d"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:363afe77cfcbe3a36353d8ea133e904b108feea505aa4792dad6585a8192c55a"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e2c45c208c62e955e8256949eb225bd8b66a4c9b6865729a786f2aa79b72e9d"}, - {file = "aiohttp-3.9.3-cp39-cp39-win32.whl", hash = "sha256:f7217af2e14da0856e082e96ff637f14ae45c10a5714b63c77f26d8884cf1051"}, - {file = "aiohttp-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:27468897f628c627230dba07ec65dc8d0db566923c48f29e084ce382119802bc"}, - {file = "aiohttp-3.9.3.tar.gz", hash = "sha256:90842933e5d1ff760fae6caca4b2b3edba53ba8f4b71e95dacf2818a2aca06f7"}, + {file = "aiohttp-3.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1f80197f8b0b846a8d5cf7b7ec6084493950d0882cc5537fb7b96a69e3c8590"}, + {file = "aiohttp-3.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c72444d17777865734aa1a4d167794c34b63e5883abb90356a0364a28904e6c0"}, + {file = "aiohttp-3.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b05d5cbe9dafcdc733262c3a99ccf63d2f7ce02543620d2bd8db4d4f7a22f83"}, + {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c4fa235d534b3547184831c624c0b7c1e262cd1de847d95085ec94c16fddcd5"}, + {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:289ba9ae8e88d0ba16062ecf02dd730b34186ea3b1e7489046fc338bdc3361c4"}, + {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bff7e2811814fa2271be95ab6e84c9436d027a0e59665de60edf44e529a42c1f"}, + {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81b77f868814346662c96ab36b875d7814ebf82340d3284a31681085c051320f"}, + {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b9c7426923bb7bd66d409da46c41e3fb40f5caf679da624439b9eba92043fa6"}, + {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8d44e7bf06b0c0a70a20f9100af9fcfd7f6d9d3913e37754c12d424179b4e48f"}, + {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22698f01ff5653fe66d16ffb7658f582a0ac084d7da1323e39fd9eab326a1f26"}, + {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ca7ca5abfbfe8d39e653870fbe8d7710be7a857f8a8386fc9de1aae2e02ce7e4"}, + {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:8d7f98fde213f74561be1d6d3fa353656197f75d4edfbb3d94c9eb9b0fc47f5d"}, + {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5216b6082c624b55cfe79af5d538e499cd5f5b976820eac31951fb4325974501"}, + {file = "aiohttp-3.9.1-cp310-cp310-win32.whl", hash = "sha256:0e7ba7ff228c0d9a2cd66194e90f2bca6e0abca810b786901a569c0de082f489"}, + {file = "aiohttp-3.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:c7e939f1ae428a86e4abbb9a7c4732bf4706048818dfd979e5e2839ce0159f23"}, + {file = "aiohttp-3.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:df9cf74b9bc03d586fc53ba470828d7b77ce51b0582d1d0b5b2fb673c0baa32d"}, + {file = "aiohttp-3.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecca113f19d5e74048c001934045a2b9368d77b0b17691d905af18bd1c21275e"}, + {file = "aiohttp-3.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8cef8710fb849d97c533f259103f09bac167a008d7131d7b2b0e3a33269185c0"}, + {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bea94403a21eb94c93386d559bce297381609153e418a3ffc7d6bf772f59cc35"}, + {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91c742ca59045dce7ba76cab6e223e41d2c70d79e82c284a96411f8645e2afff"}, + {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c93b7c2e52061f0925c3382d5cb8980e40f91c989563d3d32ca280069fd6a87"}, + {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee2527134f95e106cc1653e9ac78846f3a2ec1004cf20ef4e02038035a74544d"}, + {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11ff168d752cb41e8492817e10fb4f85828f6a0142b9726a30c27c35a1835f01"}, + {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b8c3a67eb87394386847d188996920f33b01b32155f0a94f36ca0e0c635bf3e3"}, + {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c7b5d5d64e2a14e35a9240b33b89389e0035e6de8dbb7ffa50d10d8b65c57449"}, + {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:69985d50a2b6f709412d944ffb2e97d0be154ea90600b7a921f95a87d6f108a2"}, + {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:c9110c06eaaac7e1f5562caf481f18ccf8f6fdf4c3323feab28a93d34cc646bd"}, + {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d737e69d193dac7296365a6dcb73bbbf53bb760ab25a3727716bbd42022e8d7a"}, + {file = "aiohttp-3.9.1-cp311-cp311-win32.whl", hash = "sha256:4ee8caa925aebc1e64e98432d78ea8de67b2272252b0a931d2ac3bd876ad5544"}, + {file = "aiohttp-3.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:a34086c5cc285be878622e0a6ab897a986a6e8bf5b67ecb377015f06ed316587"}, + {file = "aiohttp-3.9.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f800164276eec54e0af5c99feb9494c295118fc10a11b997bbb1348ba1a52065"}, + {file = "aiohttp-3.9.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:500f1c59906cd142d452074f3811614be04819a38ae2b3239a48b82649c08821"}, + {file = "aiohttp-3.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0b0a6a36ed7e164c6df1e18ee47afbd1990ce47cb428739d6c99aaabfaf1b3af"}, + {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69da0f3ed3496808e8cbc5123a866c41c12c15baaaead96d256477edf168eb57"}, + {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:176df045597e674fa950bf5ae536be85699e04cea68fa3a616cf75e413737eb5"}, + {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b796b44111f0cab6bbf66214186e44734b5baab949cb5fb56154142a92989aeb"}, + {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f27fdaadce22f2ef950fc10dcdf8048407c3b42b73779e48a4e76b3c35bca26c"}, + {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcb6532b9814ea7c5a6a3299747c49de30e84472fa72821b07f5a9818bce0f66"}, + {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:54631fb69a6e44b2ba522f7c22a6fb2667a02fd97d636048478db2fd8c4e98fe"}, + {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4b4c452d0190c5a820d3f5c0f3cd8a28ace48c54053e24da9d6041bf81113183"}, + {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:cae4c0c2ca800c793cae07ef3d40794625471040a87e1ba392039639ad61ab5b"}, + {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:565760d6812b8d78d416c3c7cfdf5362fbe0d0d25b82fed75d0d29e18d7fc30f"}, + {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:54311eb54f3a0c45efb9ed0d0a8f43d1bc6060d773f6973efd90037a51cd0a3f"}, + {file = "aiohttp-3.9.1-cp312-cp312-win32.whl", hash = "sha256:85c3e3c9cb1d480e0b9a64c658cd66b3cfb8e721636ab8b0e746e2d79a7a9eed"}, + {file = "aiohttp-3.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:11cb254e397a82efb1805d12561e80124928e04e9c4483587ce7390b3866d213"}, + {file = "aiohttp-3.9.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8a22a34bc594d9d24621091d1b91511001a7eea91d6652ea495ce06e27381f70"}, + {file = "aiohttp-3.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:598db66eaf2e04aa0c8900a63b0101fdc5e6b8a7ddd805c56d86efb54eb66672"}, + {file = "aiohttp-3.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c9376e2b09895c8ca8b95362283365eb5c03bdc8428ade80a864160605715f1"}, + {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41473de252e1797c2d2293804e389a6d6986ef37cbb4a25208de537ae32141dd"}, + {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c5857612c9813796960c00767645cb5da815af16dafb32d70c72a8390bbf690"}, + {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffcd828e37dc219a72c9012ec44ad2e7e3066bec6ff3aaa19e7d435dbf4032ca"}, + {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:219a16763dc0294842188ac8a12262b5671817042b35d45e44fd0a697d8c8361"}, + {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f694dc8a6a3112059258a725a4ebe9acac5fe62f11c77ac4dcf896edfa78ca28"}, + {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bcc0ea8d5b74a41b621ad4a13d96c36079c81628ccc0b30cfb1603e3dfa3a014"}, + {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90ec72d231169b4b8d6085be13023ece8fa9b1bb495e4398d847e25218e0f431"}, + {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:cf2a0ac0615842b849f40c4d7f304986a242f1e68286dbf3bd7a835e4f83acfd"}, + {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:0e49b08eafa4f5707ecfb321ab9592717a319e37938e301d462f79b4e860c32a"}, + {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2c59e0076ea31c08553e868cec02d22191c086f00b44610f8ab7363a11a5d9d8"}, + {file = "aiohttp-3.9.1-cp38-cp38-win32.whl", hash = "sha256:4831df72b053b1eed31eb00a2e1aff6896fb4485301d4ccb208cac264b648db4"}, + {file = "aiohttp-3.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:3135713c5562731ee18f58d3ad1bf41e1d8883eb68b363f2ffde5b2ea4b84cc7"}, + {file = "aiohttp-3.9.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cfeadf42840c1e870dc2042a232a8748e75a36b52d78968cda6736de55582766"}, + {file = "aiohttp-3.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:70907533db712f7aa791effb38efa96f044ce3d4e850e2d7691abd759f4f0ae0"}, + {file = "aiohttp-3.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cdefe289681507187e375a5064c7599f52c40343a8701761c802c1853a504558"}, + {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7481f581251bb5558ba9f635db70908819caa221fc79ee52a7f58392778c636"}, + {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49f0c1b3c2842556e5de35f122fc0f0b721334ceb6e78c3719693364d4af8499"}, + {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d406b01a9f5a7e232d1b0d161b40c05275ffbcbd772dc18c1d5a570961a1ca4"}, + {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d8e4450e7fe24d86e86b23cc209e0023177b6d59502e33807b732d2deb6975f"}, + {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c0266cd6f005e99f3f51e583012de2778e65af6b73860038b968a0a8888487a"}, + {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab221850108a4a063c5b8a70f00dd7a1975e5a1713f87f4ab26a46e5feac5a0e"}, + {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c88a15f272a0ad3d7773cf3a37cc7b7d077cbfc8e331675cf1346e849d97a4e5"}, + {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:237533179d9747080bcaad4d02083ce295c0d2eab3e9e8ce103411a4312991a0"}, + {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:02ab6006ec3c3463b528374c4cdce86434e7b89ad355e7bf29e2f16b46c7dd6f"}, + {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04fa38875e53eb7e354ece1607b1d2fdee2d175ea4e4d745f6ec9f751fe20c7c"}, + {file = "aiohttp-3.9.1-cp39-cp39-win32.whl", hash = "sha256:82eefaf1a996060602f3cc1112d93ba8b201dbf5d8fd9611227de2003dddb3b7"}, + {file = "aiohttp-3.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:9b05d33ff8e6b269e30a7957bd3244ffbce2a7a35a81b81c382629b80af1a8bf"}, + {file = "aiohttp-3.9.1.tar.gz", hash = "sha256:8fc49a87ac269d4529da45871e2ffb6874e87779c3d0e2ccd813c0899221239d"}, ] [package.dependencies] @@ -259,40 +259,37 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p [[package]] name = "beartype" -version = "0.17.0" +version = "0.16.4" description = "Unbearably fast runtime type checking in pure Python." optional = false python-versions = ">=3.8.0" files = [ - {file = "beartype-0.17.0-py3-none-any.whl", hash = "sha256:fa84b77a8d037f2a39c4aa2f3dc71854afc7d79312e55a66b338da68fdd48c60"}, - {file = "beartype-0.17.0.tar.gz", hash = "sha256:3226fbba8c53b4e698acdb47dcaf3c0640151c4d405618c281e6631f4112947d"}, + {file = "beartype-0.16.4-py3-none-any.whl", hash = "sha256:64865952f9dff1e17f22684b3c7286fc79754553b47eaefeb1286224ae8c1bd9"}, + {file = "beartype-0.16.4.tar.gz", hash = "sha256:1ada89cf2d6eb30eb6e156eed2eb5493357782937910d74380918e53c2eae0bf"}, ] [package.extras] all = ["typing-extensions (>=3.10.0.0)"] -dev = ["autoapi (>=0.9.0)", "coverage (>=5.5)", "equinox", "mypy (>=0.800)", "numpy", "pandera", "pydata-sphinx-theme (<=0.7.2)", "pytest (>=4.0.0)", "sphinx", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)", "torch", "tox (>=3.20.1)", "typing-extensions (>=3.10.0.0)"] +dev = ["autoapi (>=0.9.0)", "coverage (>=5.5)", "mypy (>=0.800)", "numpy", "pandera", "pydata-sphinx-theme (<=0.7.2)", "pytest (>=4.0.0)", "sphinx", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)", "tox (>=3.20.1)", "typing-extensions (>=3.10.0.0)"] doc-rtd = ["autoapi (>=0.9.0)", "pydata-sphinx-theme (<=0.7.2)", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)"] -test-tox = ["equinox", "mypy (>=0.800)", "numpy", "pandera", "pytest (>=4.0.0)", "sphinx", "torch", "typing-extensions (>=3.10.0.0)"] +test-tox = ["mypy (>=0.800)", "numpy", "pandera", "pytest (>=4.0.0)", "sphinx", "typing-extensions (>=3.10.0.0)"] test-tox-coverage = ["coverage (>=5.5)"] [[package]] name = "beautifulsoup4" -version = "4.12.3" +version = "4.12.2" description = "Screen-scraping library" optional = false python-versions = ">=3.6.0" files = [ - {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, - {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, ] [package.dependencies] soupsieve = ">1.2" [package.extras] -cchardet = ["cchardet"] -chardet = ["chardet"] -charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] @@ -445,33 +442,33 @@ files = [ [[package]] name = "black" -version = "24.1.1" +version = "23.12.1" description = "The uncompromising code formatter." optional = true python-versions = ">=3.8" files = [ - {file = "black-24.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2588021038bd5ada078de606f2a804cadd0a3cc6a79cb3e9bb3a8bf581325a4c"}, - {file = "black-24.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a95915c98d6e32ca43809d46d932e2abc5f1f7d582ffbe65a5b4d1588af7445"}, - {file = "black-24.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fa6a0e965779c8f2afb286f9ef798df770ba2b6cee063c650b96adec22c056a"}, - {file = "black-24.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:5242ecd9e990aeb995b6d03dc3b2d112d4a78f2083e5a8e86d566340ae80fec4"}, - {file = "black-24.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fc1ec9aa6f4d98d022101e015261c056ddebe3da6a8ccfc2c792cbe0349d48b7"}, - {file = "black-24.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0269dfdea12442022e88043d2910429bed717b2d04523867a85dacce535916b8"}, - {file = "black-24.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3d64db762eae4a5ce04b6e3dd745dcca0fb9560eb931a5be97472e38652a161"}, - {file = "black-24.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5d7b06ea8816cbd4becfe5f70accae953c53c0e53aa98730ceccb0395520ee5d"}, - {file = "black-24.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e2c8dfa14677f90d976f68e0c923947ae68fa3961d61ee30976c388adc0b02c8"}, - {file = "black-24.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a21725862d0e855ae05da1dd25e3825ed712eaaccef6b03017fe0853a01aa45e"}, - {file = "black-24.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07204d078e25327aad9ed2c64790d681238686bce254c910de640c7cc4fc3aa6"}, - {file = "black-24.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:a83fe522d9698d8f9a101b860b1ee154c1d25f8a82ceb807d319f085b2627c5b"}, - {file = "black-24.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08b34e85170d368c37ca7bf81cf67ac863c9d1963b2c1780c39102187ec8dd62"}, - {file = "black-24.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7258c27115c1e3b5de9ac6c4f9957e3ee2c02c0b39222a24dc7aa03ba0e986f5"}, - {file = "black-24.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40657e1b78212d582a0edecafef133cf1dd02e6677f539b669db4746150d38f6"}, - {file = "black-24.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e298d588744efda02379521a19639ebcd314fba7a49be22136204d7ed1782717"}, - {file = "black-24.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:34afe9da5056aa123b8bfda1664bfe6fb4e9c6f311d8e4a6eb089da9a9173bf9"}, - {file = "black-24.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:854c06fb86fd854140f37fb24dbf10621f5dab9e3b0c29a690ba595e3d543024"}, - {file = "black-24.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3897ae5a21ca132efa219c029cce5e6bfc9c3d34ed7e892113d199c0b1b444a2"}, - {file = "black-24.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:ecba2a15dfb2d97105be74bbfe5128bc5e9fa8477d8c46766505c1dda5883aac"}, - {file = "black-24.1.1-py3-none-any.whl", hash = "sha256:5cdc2e2195212208fbcae579b931407c1fa9997584f0a415421748aeafff1168"}, - {file = "black-24.1.1.tar.gz", hash = "sha256:48b5760dcbfe5cf97fd4fba23946681f3a81514c6ab8a45b50da67ac8fbc6c7b"}, + {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, + {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, + {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"}, + {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"}, + {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"}, + {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"}, + {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"}, + {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"}, + {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"}, + {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"}, + {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"}, + {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"}, + {file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"}, + {file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"}, + {file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"}, + {file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"}, + {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"}, + {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"}, + {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"}, + {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"}, + {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"}, + {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"}, ] [package.dependencies] @@ -509,17 +506,17 @@ traittypes = ">=0.0.6" [[package]] name = "branca" -version = "0.7.1" +version = "0.7.0" description = "Generate complex HTML+JS pages with Python" optional = false python-versions = ">=3.7" files = [ - {file = "branca-0.7.1-py3-none-any.whl", hash = "sha256:70515944ed2d1ed2784c552508df58037ca19402a8a1069d57f9113e3e012f51"}, - {file = "branca-0.7.1.tar.gz", hash = "sha256:e6b6f37a37bc0abffd960c68c045a7fe025d628eff87fedf6ab6ca814812110c"}, + {file = "branca-0.7.0-py3-none-any.whl", hash = "sha256:c653d9a3fef1e6cd203757c77d3eb44810f11998506451f9a27d52b983500c16"}, + {file = "branca-0.7.0.tar.gz", hash = "sha256:503ccb589a9ee9464cb7b5b17e5ffd8d5082c5c28624197f58f20d4d377a68bb"}, ] [package.dependencies] -jinja2 = ">=3" +jinja2 = "*" [[package]] name = "build" @@ -821,77 +818,91 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] [[package]] name = "clickhouse-connect" -version = "0.7.0" +version = "0.6.23" description = "ClickHouse Database Core Driver for Python, Pandas, and Superset" optional = true -python-versions = "~=3.8" -files = [ - {file = "clickhouse-connect-0.7.0.tar.gz", hash = "sha256:4fc0c7c58632237d91a26691507ab37dc28233461f6bbe42699a4f36bba86181"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0ca2e06e033afe8bbf5bad97b5766501f11886414b2f5b504a15cf703ad2d774"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:96480e2c36b265ec1b619e610e3d691be33327a0accb8ba4b4e9b3e6e0437e6f"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8f990b247caa6560f7b5d266d86364c68dbace9d44515c77b62bbd9bdeb8011"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6de3043b32f40d3300a0931ef91d783eba2d67e12437747a59dfda72b796987d"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80545c875038830bb57f28c37c0d0924f99d149cea8c603528abb37022b66ac5"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:01f8a33949d42085207638ed21d7e5442794680f276f9b3903511f6abe08bdce"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:860e117edfca7b4bdc89aa5f8dd89fc1625f90ec0ced0eccf5572bec205fb2c0"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:32a048eab8d415318c5983db7dfeb73dc431f1a53e2e253fffe795906bed1cc6"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-win32.whl", hash = "sha256:f26d9bc7a25193e4e27e636a8b3162ffd67c29c49945e0087ff27a0fbc87a980"}, - {file = "clickhouse_connect-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:ac6a5bc0fb93e003291a22c74802560dc7b47ac8e17c400014728072f3296ce4"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d5a037afd82a3ea21106f0de0b556e2ec619b2d836af5268381f939f8a78c2d"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f4560a6eed525ce02aaa42891876e6566a59427a5c8709533fca3fabd49b308"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f03e79bce8535936a938eb5c6bb5d83ae51d70f2f8ecc09c9b6b56c55141b36"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aac5cc6f769ba2b5fc9da6e331cdfe6d1586e3a2af59b28ff9b0408ddd4bf61"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dedf464abe72e1807b5fc86761760e5e736730c2ca2185ef2931b6d2fac860c"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3c3af22a296caec7680a1e6a94eccb78e2aabccd33dd5d5f37187566f6072cb2"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9c0a1e26584bce35642632c28aef99e9a19502ce1148ca78974f0e84fdfe2676"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a37ba5fe6d9dfde5299b6a04e2c4086ebe6c3b6652e5841de2b12fea51694d99"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-win32.whl", hash = "sha256:739779d942f2103991d85f0d0297a05e17f0ee46dbc370d1420590eb836a44b1"}, - {file = "clickhouse_connect-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:b9f2a19a2e53463694046e835dea78dfb1ab1891115148020568dc3d18f40e64"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6b9e1a818dcb2287c327f2ae5e103094bbeb50656b21f7e1536551f668a6348a"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5cba0f75c07d8ee8b8acc974134b04184a9d971511a0cd0cc794d4de0edd4786"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab38f3cd100c1f97f24d12b41a97f18117f4e77e2b00d35e92898a115a328bef"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73a95a3a32f036aff1ce4616082bcb1b2246de36aad13dc60641fa592f7bbcee"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71e54b434cf7905957338b8db8e2a9981a6d9bb0a8fa8ee32d6ce30a8f2e7996"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:dd3b4aec4cb695eacbcbbe6a3d88aef7b72e4829d5b1003edf87a4b0bebb17a1"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ca0eb351d1b9e913887a340878bc444cfd1c4ded337446bf014c281a7254c089"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0625fe98c746f3d66baf30630863f61c1decd2e86dba8d024c7bc3175728794c"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-win32.whl", hash = "sha256:9b9b83e8e630564c4045ebf9ff38d6d5cef5fd9fb669ab7357dd981cd58959b4"}, - {file = "clickhouse_connect-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca0e3b7ece52cb24bee459b42eb2bc9d2460b53c5de47e99f89454f197509f3e"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:46558c4f54149fb82d06977e536ca19ee5d343ea77cdffbdd1398f534cb5b9a9"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6267326bf1a4e4f6803bead7a28fc148c499e5e4aec5aff9a142bde7a4b269b6"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31019259fdc38759884efaf7e5b5ea6b3612767ac52934f1f4e79913e66ddc09"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be688d9a885035c1604f846ea44d400af7d7e14c49b72ec04ee932216860755d"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b99319b8b08e4696e4011f8c8e3e5a5041a9f98920e8e2abf8c444e9e2d1aae2"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1ffc7465c509bb10c6d8f8d66b31298a203b6a85c137d2cd21195e86243eaf70"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0d3a2e600b50360ac36bb6b97ac44d4851ef2144a3c055df19fff2f48e84ab3e"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:85fedc194b35b7676660bbe510b6eb0fd8ae6c78dca4038dec09a93a227168e8"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-win32.whl", hash = "sha256:61b140694779843f6c2110f1068fb4acbcb3601599d9a721c897605e5939e3ac"}, - {file = "clickhouse_connect-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:29afdd5edc77dec17db140df4f1fde66ccffd384011627ce96cd73f0c67ed75f"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d8f14faeafcf9add0eeeb4781d4a5aa8e3b83648db401c5e76237b7a2c631141"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:85a12697c0c3ebcc24bc2c4f5636f986a36f040b28e079b90c7974e12db3424f"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db497029d455e07278b4f714d63936d4462e63554d68c3285f3e0a01e5f7aaa1"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b5462bbd9d776c899a16d17ec49ca4c43793565f5a6956fd64272eecb6bfc55"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d641717b0f675c6cd7c93dc452863a1eac6cf91d637b483a9c42d23b5617ec23"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a9531260d1ef35119ed9d909fda51578279270e38ecf54ff5f1d9d6b6a2b39f8"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fa88a35cb47e38f4ce3d1c3dbc61656537de22c84a5d751f775b974a4efd2e32"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3eb7e0dc1c87e4e9126b2bc614e312022fa741f53f003d98b2f240e6ce8c1e1c"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-win32.whl", hash = "sha256:f479e6422578bc4fb7828f22b882e5294fe9ac9f9af8c660d216c24746bb2be0"}, - {file = "clickhouse_connect-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:b1c81338664b2457fae97c1334182e81b77ec057ea9ec3a47d682d14a03b6573"}, - {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f167de0f3639aaa0807d011e175ff33be86e2727a4644da65a019306ff3f021"}, - {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:185ee65eab42bdd545e00c8244a72c797d1961173b78e37b0ea7b130ef0d9c73"}, - {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48dde9b8cd9daf7ec0e4baa72e94e40cdd749ae7aef1dfbe7c7d22af53dae8f4"}, - {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d75b1a01770c04650a7dd3ebbee21369939b00125fbb70c02067ac779f523c8"}, - {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9f895290de6fa8347114a361cc944ade1ddeba895f936752533b85984d4d183e"}, - {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:359eab438b3d6f20090b592084493e04dac369e65d0cf4e1da3ecc84750b52c4"}, - {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efc6b0af171681844fbb39d7b756235aea5b416ce7fca163834e068afbd3f833"}, - {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cc1a64bc8fb91e594efbd5b4560e6c661ebf75a11d37e08d48c45f3f4e439f7"}, - {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99c2d25ceb1fbf187c7b9815373dbcdfc04d1b233dafb3547b56dfeca6904584"}, - {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95a1e292b4cf230c2944bdc31c19c4e8fcbcd5609e24322ff5211af357beb26a"}, - {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e94de62c3dacfb7b6874f6593ad4268d38c17a1117847acdc1ad0c7b34c4e373"}, - {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a34a3f713f0148e30ddaf431af671ed16baf732eff7437ff2c7519adeda2f9c9"}, - {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9d08af8c5383d377a12e576106d7c3e0de0d03a3cbc6b9de89932e4b40f550d"}, - {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e06b70e4a4a54810ee293875febf71562c346688e2bc517c141958ef1c2af710"}, - {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:5c1bc46af3e0eca5a580aaecffd7dc47d541173d3189f250c59ffdd9d1cb0dd1"}, +python-versions = "~=3.7" +files = [ + {file = "clickhouse-connect-0.6.23.tar.gz", hash = "sha256:a74d01349390c0e2713603539927f105c1b9772cd81f850deb1d1ec57e4d9cfc"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cfc8e1cd68f7824e90a3492bf64f66934ad95529fac282cf96bc5a50255a5932"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0726b5f19343dde5b337e8495713a28e0449f42504ea47a691a5a39768ccd79"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e35c767497b22f4c9069f99c24f965ae266b2448e7d2c69407d82f632616bbf7"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c677748c2fb6087fce8fb185980cb539887db2253d0f81900c4a21ef38d7cb89"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:106c680114649d5bfacf76e26bcc87df8d07141b1a3c944099ba0ce297694c7e"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4cd72d85d246fe000377035b1858720b12045f3df1042cc03a5fad4dba6b7f78"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4f7c68297ac933603768f5dd4f49c94f88dacd9344e099b0221ead6b9914643e"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cff31e4fd52517ad79c6d50037c1da7fcaf1270d4c1c70a52852701ff415d86b"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-win32.whl", hash = "sha256:931d9bf3ecd212107e43dd8ed735a79b840b8577d4508b2cf866b1503350b415"}, + {file = "clickhouse_connect-0.6.23-cp310-cp310-win_amd64.whl", hash = "sha256:a9e55a50fb165a7be30d335da84e04ec025b2783999312917df86815dc8be3af"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d3a7734e48f0494764ef481c694e02bc78415df60a49d5119c032e75b5e1f8b"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5a66bee81dcbdea969f39a7f75b11225e985cfa752dccd148f54bacac800f72"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d62335114e5792fa95548465d831bb33a1b226c85b87b075c7e6c692a5edc77"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b9fa3eb7d8cbc87f635f7942cb6817a38c6491c8b40cfb6a7c0a6a8e0d59e4"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac7c459641dd99fa7e48921d2c4148d8c0cb171697aa487b55364b0b9081bf07"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:746be95964105fa2eca05ab2dab02b1319e9c94f4a9599b4d3c2894f9090c9bc"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f2593096fc0af049dcf55e03aaf3858bbc94bedddd1fd504087127ec48b68c7b"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38f480f264b648333f90a9f715f2357bf6ec284a9152d3a4a249dea87c797a60"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-win32.whl", hash = "sha256:f9793b8ae15ca93f7ae5d2c96c7de79ab7f6cf311b0c115d9e7948f0887086a0"}, + {file = "clickhouse_connect-0.6.23-cp311-cp311-win_amd64.whl", hash = "sha256:e6301202780893d5e3f2f62874670790a450bcbf8009d48ca360d04197205efa"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:66883e21a1598688c2a32f46a3ab9a858eca609bcd6fa6e4e0e758993027f356"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:257482716a9563cd2c964543e46af01848779fcb3665063c30b49e13f82ad27a"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7489202b7eec462e40b52066393f5ec62c82f1e514013a4e9e5f5eab962ad61"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e297da4ab46a405ce3555b89573cd256c80efc505130e08bac673095d731c6d"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f5d0097ae7ef1ff13afb2d56e5b93dfca515480d491f280315b0c16ce58c93c"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1dbd63e6bd2189259a5a9506e8efe5d8117f3d8b114c8d76bb4397eed4751927"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1d861a3b7b877227fe136e6648f3aca070a69ed407fd77c49722ad6b8f3a5aa2"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e5912503717237e0d4cafee19fbe579442484bfe6923063053e21e42c952a043"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-win32.whl", hash = "sha256:d288cf60ef846720fa8b2d2758b72dc488072121d331d7c4b27547e935129472"}, + {file = "clickhouse_connect-0.6.23-cp312-cp312-win_amd64.whl", hash = "sha256:4948ca8f292984d1d0cd7ea3bd9aa909101bf62e7d0d7902546d19b3f965f3be"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ad213ef12b0c5a474e38a13b95113aa3aafe1a35d7e81035b4c1bcc2d8d50e93"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed9ea8f2be2dcc4cfda1d33ce07d61467c93f1dbb7a98f09d69464b991dcaba0"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3fd4dcdefcfa2a7175c3c97c53bf66c38544ef84a276932fae4ffcb4c273998"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:102a04bf1eb7612e0924031c751e31dd4bad58e79543c13e8805fddcecbbfe45"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ac3943d4d522bcb1a4becb8850cdb3bfba07713178e84e4cadcd955b9002e28c"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7fe4d55c6f8b72eeedce439ed1287ea1971e30b54879df6059dc87f73bf472d2"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e2dc8127159d5415859af6ac8b42bc70b71709d834477a1dd82e5b147de66e82"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-win32.whl", hash = "sha256:854fcd6cbf7919aa2b9e9f92c52cb5b2d1c976c4e2145193910662237a8d879c"}, + {file = "clickhouse_connect-0.6.23-cp37-cp37m-win_amd64.whl", hash = "sha256:24c17054e395908eb483fad3dd899748a4d919e5b4e0db2a31c56df503f0921d"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3af001ef95f8dcb572f5cb4518d367b449fa6baf2b8fccc0e6112e04f1932b2b"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9ad33e0949dd8842beb34e6c5f01fac293bfa7701a2697f64d400d30b2858fe0"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b73130d09bb6eddf57d94d9c3bca4953af03231cc14a6757fddd9d3839720c3"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f27d725f054c54d6d8340b5545c8713ca6af640c75ade9eb0eef8b441ec37d66"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ac6bcbf730538106c376d029a0e9aa3e155490cae326e7256a51804d9576345"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9a7b35ccc8526456ad2794ab6af014506cb4472eed7f864d4d7d58bc4acf3b83"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3ac5fe6ac94ca77eed1ba440df81b5f4ff99fa85120afe46676e185f3f6f840d"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6eb8576ab497f9425e1dc3595e0fbe6c97615ae5dc7d184f2f65df874bb31539"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-win32.whl", hash = "sha256:c936e1adf8257acfc459282477ad65e2ef38eba26f72d58187154e8cbeaa1641"}, + {file = "clickhouse_connect-0.6.23-cp38-cp38-win_amd64.whl", hash = "sha256:07756dd0c610765000e9d7e202557cb6a06d1e0fd007234458d752998cd8c6da"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bcc1e103b1af2ae8b0485d1c09a55f8e9cb80e02fdaf8a039b813d07950a9586"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:10e6d20b61e5342fed752fb5221c10517af92182072fc792c5252541850d7340"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ce7caa2ceff666aaa86da11b9f42ddd09ae7ffe727db1617f352991f7a67667"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7cafc6ed2214321f3363fe5f23cf9880544ba05dc1820a994f0975bdd7e31d9"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f7e5ba4c78ef49354fac3d91eb951611430c8be2298f1dc2a1516be3149a41c"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a3691ed25e971bbf62c8bed843e80eecc0e4cb9b990e662e0916e8f2b05cd4ec"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b43948da665bbcb5c60e3871e878e068384cd9d2f845bc02fc4c22c934831cd"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9d1b7fb932e45482199f4abce61c95ead8e8628cf1fb882c2b28dc11318742da"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-win32.whl", hash = "sha256:4315c7718a9cc8eedc4b40b53a954e2d5a5dfb705c4a659e3a167537889268da"}, + {file = "clickhouse_connect-0.6.23-cp39-cp39-win_amd64.whl", hash = "sha256:040307cad9c11f503290e75d14c0b402454502fa7ab3c742ad8dac1a419219eb"}, + {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:086599316e5a532c34dadcf9fa2ea19a923d0acdcc891a829b3cc5cc061fd26a"}, + {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bb73a0ee0a0161fce7c38b7f8f66e3c5f0598b8d1f3c30e24ccd17ba1c117b3"}, + {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e4b0111ed72058836a44313369dd05e7c550da8e8ca486834c599ae81c7cd6b"}, + {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e70e97eb15e89995a220fdec19b448b48f8ea65a014e71bc1cc17763a7d8fd0e"}, + {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8e541c2f8921cd48dc57551391441b35af5991ae49f1d221ba77950ad195b807"}, + {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c7301862b6eb87aeb48f257932f60b3141777cae317217185279b7a036840e07"}, + {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f06348ecd72036d22d65d528221010c86559bdfc048f1633c5cd009993219a0c"}, + {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b40d1ec596f7c3ecf1e0d07916ab8c4f7ee52eb867758977335b285c4916e585"}, + {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09753a2ce1dfbe0a54fe8a7a464f67b2e0f01c1731f06d68a3ec821a00985d88"}, + {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a8ff9600c9721a574c7716a2ad6b436fd043eb05a544ed08d895504d18fb6d5d"}, + {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7d5ec217ae361c8c18c605030f9d601d861e23dc23af502d9608e769f3e467b4"}, + {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de6bc3e4ac94545a6f80c49f49ad9a9945efe1084ecd89804ebbb048b022699"}, + {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d6277c7ec66585e1f03da95617de172aeb38232d8da8a3e69f67b4f7149017"}, + {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee1cb7130f0d5e03066e9d4b94ae2052dd96329466c20a3f8688eeebc6f16033"}, + {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:aa7009a68df2f464861c694a15f06221f308ee9758a6848c4541936d0b5a9448"}, + {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:019bf068b38cb0b94fda3cb51d776f556911777197d1a3d0e73eb41259449879"}, + {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a378b6c6b0c092b18e0169b0b94a1f709b80d192e8e6275cfe6eff9c3cb26df0"}, + {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd2c4356a7a496d8920c756b0ddac916d7a9a902e6183fe4da67c86a6bf19b34"}, + {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:075acc6085c8ece277ce91688a739cbfd54c48de2c839d554045457defdbb81c"}, + {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7fca7189195785f5ff2a0805e52dd68f684bd5e4f475ba5ade06c5c6769d562e"}, ] [package.dependencies] @@ -1067,63 +1078,63 @@ test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" -version = "7.4.1" +version = "7.4.0" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7"}, - {file = "coverage-7.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b"}, - {file = "coverage-7.4.1-cp310-cp310-win32.whl", hash = "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016"}, - {file = "coverage-7.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018"}, - {file = "coverage-7.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295"}, - {file = "coverage-7.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6"}, - {file = "coverage-7.4.1-cp311-cp311-win32.whl", hash = "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5"}, - {file = "coverage-7.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968"}, - {file = "coverage-7.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581"}, - {file = "coverage-7.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc"}, - {file = "coverage-7.4.1-cp312-cp312-win32.whl", hash = "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74"}, - {file = "coverage-7.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448"}, - {file = "coverage-7.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218"}, - {file = "coverage-7.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad"}, - {file = "coverage-7.4.1-cp38-cp38-win32.whl", hash = "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042"}, - {file = "coverage-7.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d"}, - {file = "coverage-7.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54"}, - {file = "coverage-7.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35"}, - {file = "coverage-7.4.1-cp39-cp39-win32.whl", hash = "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c"}, - {file = "coverage-7.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a"}, - {file = "coverage-7.4.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166"}, - {file = "coverage-7.4.1.tar.gz", hash = "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04"}, + {file = "coverage-7.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36b0ea8ab20d6a7564e89cb6135920bc9188fb5f1f7152e94e8300b7b189441a"}, + {file = "coverage-7.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0676cd0ba581e514b7f726495ea75aba3eb20899d824636c6f59b0ed2f88c471"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ca5c71a5a1765a0f8f88022c52b6b8be740e512980362f7fdbb03725a0d6b9"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c97726520f784239f6c62506bc70e48d01ae71e9da128259d61ca5e9788516"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:815ac2d0f3398a14286dc2cea223a6f338109f9ecf39a71160cd1628786bc6f5"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:80b5ee39b7f0131ebec7968baa9b2309eddb35b8403d1869e08f024efd883566"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5b2ccb7548a0b65974860a78c9ffe1173cfb5877460e5a229238d985565574ae"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:995ea5c48c4ebfd898eacb098164b3cc826ba273b3049e4a889658548e321b43"}, + {file = "coverage-7.4.0-cp310-cp310-win32.whl", hash = "sha256:79287fd95585ed36e83182794a57a46aeae0b64ca53929d1176db56aacc83451"}, + {file = "coverage-7.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b14b4f8760006bfdb6e08667af7bc2d8d9bfdb648351915315ea17645347137"}, + {file = "coverage-7.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04387a4a6ecb330c1878907ce0dc04078ea72a869263e53c72a1ba5bbdf380ca"}, + {file = "coverage-7.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea81d8f9691bb53f4fb4db603203029643caffc82bf998ab5b59ca05560f4c06"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74775198b702868ec2d058cb92720a3c5a9177296f75bd97317c787daf711505"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76f03940f9973bfaee8cfba70ac991825611b9aac047e5c80d499a44079ec0bc"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:485e9f897cf4856a65a57c7f6ea3dc0d4e6c076c87311d4bc003f82cfe199d25"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6ae8c9d301207e6856865867d762a4b6fd379c714fcc0607a84b92ee63feff70"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bf477c355274a72435ceb140dc42de0dc1e1e0bf6e97195be30487d8eaaf1a09"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:83c2dda2666fe32332f8e87481eed056c8b4d163fe18ecc690b02802d36a4d26"}, + {file = "coverage-7.4.0-cp311-cp311-win32.whl", hash = "sha256:697d1317e5290a313ef0d369650cfee1a114abb6021fa239ca12b4849ebbd614"}, + {file = "coverage-7.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:26776ff6c711d9d835557ee453082025d871e30b3fd6c27fcef14733f67f0590"}, + {file = "coverage-7.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:13eaf476ec3e883fe3e5fe3707caeb88268a06284484a3daf8250259ef1ba143"}, + {file = "coverage-7.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846f52f46e212affb5bcf131c952fb4075b55aae6b61adc9856222df89cbe3e2"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26f66da8695719ccf90e794ed567a1549bb2644a706b41e9f6eae6816b398c4a"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:164fdcc3246c69a6526a59b744b62e303039a81e42cfbbdc171c91a8cc2f9446"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:316543f71025a6565677d84bc4df2114e9b6a615aa39fb165d697dba06a54af9"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bb1de682da0b824411e00a0d4da5a784ec6496b6850fdf8c865c1d68c0e318dd"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0e8d06778e8fbffccfe96331a3946237f87b1e1d359d7fbe8b06b96c95a5407a"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a56de34db7b7ff77056a37aedded01b2b98b508227d2d0979d373a9b5d353daa"}, + {file = "coverage-7.4.0-cp312-cp312-win32.whl", hash = "sha256:51456e6fa099a8d9d91497202d9563a320513fcf59f33991b0661a4a6f2ad450"}, + {file = "coverage-7.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd3c1e4cb2ff0083758f09be0f77402e1bdf704adb7f89108007300a6da587d0"}, + {file = "coverage-7.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d1bf53c4c8de58d22e0e956a79a5b37f754ed1ffdbf1a260d9dcfa2d8a325e"}, + {file = "coverage-7.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:109f5985182b6b81fe33323ab4707011875198c41964f014579cf82cebf2bb85"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc9d4bc55de8003663ec94c2f215d12d42ceea128da8f0f4036235a119c88ac"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc6d65b21c219ec2072c1293c505cf36e4e913a3f936d80028993dd73c7906b1"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a10a4920def78bbfff4eff8a05c51be03e42f1c3735be42d851f199144897ba"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b8e99f06160602bc64da35158bb76c73522a4010f0649be44a4e167ff8555952"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7d360587e64d006402b7116623cebf9d48893329ef035278969fa3bbf75b697e"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29f3abe810930311c0b5d1a7140f6395369c3db1be68345638c33eec07535105"}, + {file = "coverage-7.4.0-cp38-cp38-win32.whl", hash = "sha256:5040148f4ec43644702e7b16ca864c5314ccb8ee0751ef617d49aa0e2d6bf4f2"}, + {file = "coverage-7.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9864463c1c2f9cb3b5db2cf1ff475eed2f0b4285c2aaf4d357b69959941aa555"}, + {file = "coverage-7.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:936d38794044b26c99d3dd004d8af0035ac535b92090f7f2bb5aa9c8e2f5cd42"}, + {file = "coverage-7.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:799c8f873794a08cdf216aa5d0531c6a3747793b70c53f70e98259720a6fe2d7"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7defbb9737274023e2d7af02cac77043c86ce88a907c58f42b580a97d5bcca9"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1526d265743fb49363974b7aa8d5899ff64ee07df47dd8d3e37dcc0818f09ed"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf635a52fc1ea401baf88843ae8708591aa4adff875e5c23220de43b1ccf575c"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:756ded44f47f330666843b5781be126ab57bb57c22adbb07d83f6b519783b870"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0eb3c2f32dabe3a4aaf6441dde94f35687224dfd7eb2a7f47f3fd9428e421058"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bfd5db349d15c08311702611f3dccbef4b4e2ec148fcc636cf8739519b4a5c0f"}, + {file = "coverage-7.4.0-cp39-cp39-win32.whl", hash = "sha256:53d7d9158ee03956e0eadac38dfa1ec8068431ef8058fe6447043db1fb40d932"}, + {file = "coverage-7.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfd2a8b6b0d8e66e944d47cdec2f47c48fef2ba2f2dff5a9a75757f64172857e"}, + {file = "coverage-7.4.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:c530833afc4707fe48524a44844493f36d8727f04dcce91fb978c414a8556cc6"}, + {file = "coverage-7.4.0.tar.gz", hash = "sha256:707c0f58cb1712b8809ece32b68996ee1e609f71bd14615bd8f87a1293cb610e"}, ] [package.dependencies] @@ -1205,13 +1216,13 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "dask" -version = "2024.1.1" +version = "2024.1.0" description = "Parallel PyData with Task Scheduling" optional = false python-versions = ">=3.9" files = [ - {file = "dask-2024.1.1-py3-none-any.whl", hash = "sha256:860ce2797905095beff0187c214840b80c77d752dcb9098a8283e3655a762bf5"}, - {file = "dask-2024.1.1.tar.gz", hash = "sha256:d0dc92e81ce68594a0a0ce23ba33f4d648f2c2f4217ab9b79068b7ecfb0416c7"}, + {file = "dask-2024.1.0-py3-none-any.whl", hash = "sha256:717102ef7c309297291095a0061d374f3b72e11ce4e1115ab9faff940e274b4b"}, + {file = "dask-2024.1.0.tar.gz", hash = "sha256:f24fdc7a07e59a1403bf6903e6d8dc15ed6f8607d3311b4f00f88d8a2ac63e49"}, ] [package.dependencies] @@ -1231,7 +1242,7 @@ array = ["numpy (>=1.21)"] complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=7.0)", "pyarrow-hotfix"] dataframe = ["dask[array]", "pandas (>=1.3)"] diagnostics = ["bokeh (>=2.4.2)", "jinja2 (>=2.10.3)"] -distributed = ["distributed (==2024.1.1)"] +distributed = ["distributed (==2024.1.0)"] test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist"] [[package]] @@ -1344,19 +1355,19 @@ files = [ [[package]] name = "distributed" -version = "2024.1.1" +version = "2024.1.0" description = "Distributed scheduler for Dask" optional = false python-versions = ">=3.9" files = [ - {file = "distributed-2024.1.1-py3-none-any.whl", hash = "sha256:cf05d3b38e1700339b3e36395729ab62110e723efefaecc21a8260fdc7555cf9"}, - {file = "distributed-2024.1.1.tar.gz", hash = "sha256:28cf5e9f4f07197b03ea8e5272e374ce2b9e9dc6742f6c9b525fd81645213c67"}, + {file = "distributed-2024.1.0-py3-none-any.whl", hash = "sha256:b552c9331350ba0e7cb8eccb1da8942b44997ccb680338f61c43fe9843c69988"}, + {file = "distributed-2024.1.0.tar.gz", hash = "sha256:f1d0e2dd5249085e32c6ff5c0ce0521c7e844dd52337683a69363a6bb1799a30"}, ] [package.dependencies] click = ">=8.0" cloudpickle = ">=1.5.0" -dask = "2024.1.1" +dask = "2024.1.0" jinja2 = ">=2.10.3" locket = ">=1.0.0" msgpack = ">=1.0.0" @@ -1873,24 +1884,22 @@ gcsfuse = ["fusepy"] [[package]] name = "gdown" -version = "5.0.1" -description = "Google Drive Public File/Folder Downloader" +version = "4.7.1" +description = "Google Drive direct download of big files." optional = false -python-versions = ">=3.8" +python-versions = "*" files = [ - {file = "gdown-5.0.1-py3-none-any.whl", hash = "sha256:3f595fcfd4b1bccd5cf73453f60984c5fa1c18eed499277a52b23337238c2670"}, - {file = "gdown-5.0.1.tar.gz", hash = "sha256:173557b4d33aad9f7dc75ce2ff963d8b313f36371e15da4b5ebb35ac6c7d5af6"}, + {file = "gdown-4.7.1-py3-none-any.whl", hash = "sha256:65d495699e7c2c61af0d0e9c32748fb4f79abaf80d747a87456c7be14aac2560"}, + {file = "gdown-4.7.1.tar.gz", hash = "sha256:347f23769679aaf7efa73e5655270fcda8ca56be65eb84a4a21d143989541045"}, ] [package.dependencies] beautifulsoup4 = "*" filelock = "*" requests = {version = "*", extras = ["socks"]} +six = "*" tqdm = "*" -[package.extras] -test = ["build", "mypy", "pytest", "ruff", "twine", "types-requests"] - [[package]] name = "geojson" version = "3.1.0" @@ -1904,13 +1913,13 @@ files = [ [[package]] name = "geopandas" -version = "0.14.3" +version = "0.14.2" description = "Geographic pandas extensions" optional = false python-versions = ">=3.9" files = [ - {file = "geopandas-0.14.3-py3-none-any.whl", hash = "sha256:41b31ad39e21bc9e8c4254f78f8dc4ce3d33d144e22e630a00bb336c83160204"}, - {file = "geopandas-0.14.3.tar.gz", hash = "sha256:748af035d4a068a4ae00cab384acb61d387685c833b0022e0729aa45216b23ac"}, + {file = "geopandas-0.14.2-py3-none-any.whl", hash = "sha256:0efa61235a68862c1c6be89fc3707cdeba67667d5676bb19e24f3c57a8c2f723"}, + {file = "geopandas-0.14.2.tar.gz", hash = "sha256:6e71d57b8376f9fdc9f1c3aa3170e7e420e91778de854f51013ae66fd371ccdb"}, ] [package.dependencies] @@ -1922,13 +1931,13 @@ shapely = ">=1.8.0" [[package]] name = "google-api-core" -version = "2.16.1" +version = "2.15.0" description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google-api-core-2.16.1.tar.gz", hash = "sha256:7f668ffa3d5b9f3c6930407e5f5d691c05a376050a5a5fd772b9dc32e70a0c30"}, - {file = "google_api_core-2.16.1-py3-none-any.whl", hash = "sha256:257e9e152cd18da0c6701113c122ade04dca04731e179fc5c7dca48e1396ec4c"}, + {file = "google-api-core-2.15.0.tar.gz", hash = "sha256:abc978a72658f14a2df1e5e12532effe40f94f868f6e23d95133bd6abcca35ca"}, + {file = "google_api_core-2.15.0-py3-none-any.whl", hash = "sha256:2aa56d2be495551e66bbff7f729b790546f87d5c90e74781aa77233bcb395a8a"}, ] [package.dependencies] @@ -1952,13 +1961,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.27.0" +version = "2.26.2" description = "Google Authentication Library" optional = false python-versions = ">=3.7" files = [ - {file = "google-auth-2.27.0.tar.gz", hash = "sha256:e863a56ccc2d8efa83df7a80272601e43487fa9a728a376205c86c26aaefa821"}, - {file = "google_auth-2.27.0-py2.py3-none-any.whl", hash = "sha256:8e4bad367015430ff253fe49d500fdc3396c1a434db5740828c728e45bcce245"}, + {file = "google-auth-2.26.2.tar.gz", hash = "sha256:97327dbbf58cccb58fc5a1712bba403ae76668e64814eb30f7316f7e27126b81"}, + {file = "google_auth-2.26.2-py2.py3-none-any.whl", hash = "sha256:3f445c8ce9b61ed6459aad86d8ccdba4a9afed841b2d1451a11ef4db08957424"}, ] [package.dependencies] @@ -1993,13 +2002,13 @@ tool = ["click (>=6.0.0)"] [[package]] name = "google-cloud-bigquery" -version = "3.17.1" +version = "3.16.0" description = "Google BigQuery API client library" optional = true python-versions = ">=3.7" files = [ - {file = "google-cloud-bigquery-3.17.1.tar.gz", hash = "sha256:0ae07b90d5052ba3a296a2210a2144c28469300d71f6f455881f94c2df543057"}, - {file = "google_cloud_bigquery-3.17.1-py2.py3-none-any.whl", hash = "sha256:7a9a92c7b1f6a6bf8b4c05c150e49f4ad1a03dd591dbd4522381b3f23bf07c73"}, + {file = "google-cloud-bigquery-3.16.0.tar.gz", hash = "sha256:1d6abf4b1d740df17cb43a078789872af8059a0b1dd999f32ea69ebc6f7ba7ef"}, + {file = "google_cloud_bigquery-3.16.0-py2.py3-none-any.whl", hash = "sha256:8bac7754f92bf87ee81f38deabb7554d82bb9591fbe06a5c82f33e46e5a482f9"}, ] [package.dependencies] @@ -2289,13 +2298,13 @@ test = ["objgraph", "psutil"] [[package]] name = "griffe" -version = "0.40.0" +version = "0.38.1" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.8" files = [ - {file = "griffe-0.40.0-py3-none-any.whl", hash = "sha256:db1da6d1d8e08cbb20f1a7dee8c09da940540c2d4c1bfa26a9091cf6fc36a9ec"}, - {file = "griffe-0.40.0.tar.gz", hash = "sha256:76c4439eaa2737af46ae003c331ab6ca79c5365b552f7b5aed263a3b4125735b"}, + {file = "griffe-0.38.1-py3-none-any.whl", hash = "sha256:334c79d3b5964ade65c05dfcaf53518c576dedd387aaba5c9fd71212f34f1483"}, + {file = "griffe-0.38.1.tar.gz", hash = "sha256:bd68d7da7f3d87bc57eb9962b250db123efd9bbcc06c11c1a91b6e583b2a9361"}, ] [package.dependencies] @@ -2399,13 +2408,13 @@ tests = ["freezegun", "pytest", "pytest-cov"] [[package]] name = "hypothesis" -version = "6.97.4" +version = "6.93.0" description = "A library for property-based testing" optional = false python-versions = ">=3.8" files = [ - {file = "hypothesis-6.97.4-py3-none-any.whl", hash = "sha256:9069fe3fb18d9b7dd218bd69ab50bbc66426819dfac7cc7168ba85034d98a4df"}, - {file = "hypothesis-6.97.4.tar.gz", hash = "sha256:28ff724fa81ccc55f64f0f1eb06e4a75db6a195fe0857e9b3184cf4ff613a103"}, + {file = "hypothesis-6.93.0-py3-none-any.whl", hash = "sha256:bfe6173e36c8cf0779a79de757a8a7151568b2703cb14dcbc186517c7a79144b"}, + {file = "hypothesis-6.93.0.tar.gz", hash = "sha256:e9ceaa5bbd244471fa1c28272fb2b0c68bb6ee014473394d63519ed02bd2d4de"}, ] [package.dependencies] @@ -2567,13 +2576,13 @@ ipywidgets = "*" [[package]] name = "ipykernel" -version = "6.29.0" +version = "6.28.0" description = "IPython Kernel for Jupyter" optional = false python-versions = ">=3.8" files = [ - {file = "ipykernel-6.29.0-py3-none-any.whl", hash = "sha256:076663ca68492576f051e4af7720d33f34383e655f2be0d544c8b1c9de915b2f"}, - {file = "ipykernel-6.29.0.tar.gz", hash = "sha256:b5dd3013cab7b330df712891c96cd1ab868c27a7159e606f762015e9bf8ceb3f"}, + {file = "ipykernel-6.28.0-py3-none-any.whl", hash = "sha256:c6e9a9c63a7f4095c0a22a79f765f079f9ec7be4f2430a898ddea889e8665661"}, + {file = "ipykernel-6.28.0.tar.gz", hash = "sha256:69c11403d26de69df02225916f916b37ea4b9af417da0a8c827f84328d88e5f3"}, ] [package.dependencies] @@ -2596,17 +2605,17 @@ cov = ["coverage[toml]", "curio", "matplotlib", "pytest-cov", "trio"] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "trio"] pyqt5 = ["pyqt5"] pyside6 = ["pyside6"] -test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (==0.23.2)", "pytest-cov", "pytest-timeout"] +test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio", "pytest-cov", "pytest-timeout"] [[package]] name = "ipyleaflet" -version = "0.18.2" +version = "0.18.1" description = "A Jupyter widget for dynamic Leaflet maps" optional = false python-versions = ">=3.7" files = [ - {file = "ipyleaflet-0.18.2-py3-none-any.whl", hash = "sha256:dc5bed1bad3ba3244fe97aac9d4ed8f8096ae3d5e6ac0c5fdfbe7f1d2a01d3f8"}, - {file = "ipyleaflet-0.18.2.tar.gz", hash = "sha256:8f166529ec7784de08822b253b8cc593fa81af8a8f967d70cbc53e45a6d3755f"}, + {file = "ipyleaflet-0.18.1-py3-none-any.whl", hash = "sha256:c941429945248fb0fb8a7b30cc4f248d3194e4a409066a068495a633c97eb6c6"}, + {file = "ipyleaflet-0.18.1.tar.gz", hash = "sha256:f35d70ad0e0bb2c0c160b499ab8c788333fc54576596e33b974f0dfeee941d12"}, ] [package.dependencies] @@ -2769,13 +2778,13 @@ files = [ [[package]] name = "jsonschema" -version = "4.21.1" +version = "4.20.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.8" files = [ - {file = "jsonschema-4.21.1-py3-none-any.whl", hash = "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f"}, - {file = "jsonschema-4.21.1.tar.gz", hash = "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5"}, + {file = "jsonschema-4.20.0-py3-none-any.whl", hash = "sha256:ed6231f0429ecf966f5bc8dfef245998220549cbbcf140f913b7464c52c3b6b3"}, + {file = "jsonschema-4.20.0.tar.gz", hash = "sha256:4f614fd46d8d61258610998997743ec5492a648b33cf478c1ddc23ed4598a5fa"}, ] [package.dependencies] @@ -3142,71 +3151,71 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] [[package]] name = "markupsafe" -version = "2.1.4" +version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" files = [ - {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-win32.whl", hash = "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-win32.whl", hash = "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-win32.whl", hash = "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-win32.whl", hash = "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-win32.whl", hash = "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-win_amd64.whl", hash = "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-win32.whl", hash = "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959"}, - {file = "MarkupSafe-2.1.4.tar.gz", hash = "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] [[package]] @@ -3534,13 +3543,13 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] [[package]] name = "nest-asyncio" -version = "1.6.0" +version = "1.5.8" description = "Patch asyncio to allow nested event loops" optional = false python-versions = ">=3.5" files = [ - {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, - {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, + {file = "nest_asyncio-1.5.8-py3-none-any.whl", hash = "sha256:accda7a339a70599cb08f9dd09a67e0c2ef8d8d6f4c07f96ab203f2ae254e48d"}, + {file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"}, ] [[package]] @@ -3685,40 +3694,36 @@ files = [ [[package]] name = "pandas" -version = "2.2.0" +version = "2.1.4" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" files = [ - {file = "pandas-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8108ee1712bb4fa2c16981fba7e68b3f6ea330277f5ca34fa8d557e986a11670"}, - {file = "pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:736da9ad4033aeab51d067fc3bd69a0ba36f5a60f66a527b3d72e2030e63280a"}, - {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e0b4fc3ddceb56ec8a287313bc22abe17ab0eb184069f08fc6a9352a769b18"}, - {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20404d2adefe92aed3b38da41d0847a143a09be982a31b85bc7dd565bdba0f4e"}, - {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ea3ee3f125032bfcade3a4cf85131ed064b4f8dd23e5ce6fa16473e48ebcaf5"}, - {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9670b3ac00a387620489dfc1bca66db47a787f4e55911f1293063a78b108df1"}, - {file = "pandas-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a946f210383c7e6d16312d30b238fd508d80d927014f3b33fb5b15c2f895430"}, - {file = "pandas-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a1b438fa26b208005c997e78672f1aa8138f67002e833312e6230f3e57fa87d5"}, - {file = "pandas-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ce2fbc8d9bf303ce54a476116165220a1fedf15985b09656b4b4275300e920b"}, - {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2707514a7bec41a4ab81f2ccce8b382961a29fbe9492eab1305bb075b2b1ff4f"}, - {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85793cbdc2d5bc32620dc8ffa715423f0c680dacacf55056ba13454a5be5de88"}, - {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cfd6c2491dc821b10c716ad6776e7ab311f7df5d16038d0b7458bc0b67dc10f3"}, - {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a146b9dcacc3123aa2b399df1a284de5f46287a4ab4fbfc237eac98a92ebcb71"}, - {file = "pandas-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbc1b53c0e1fdf16388c33c3cca160f798d38aea2978004dd3f4d3dec56454c9"}, - {file = "pandas-2.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a41d06f308a024981dcaa6c41f2f2be46a6b186b902c94c2674e8cb5c42985bc"}, - {file = "pandas-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:159205c99d7a5ce89ecfc37cb08ed179de7783737cea403b295b5eda8e9c56d1"}, - {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1e1f3861ea9132b32f2133788f3b14911b68102d562715d71bd0013bc45440"}, - {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:761cb99b42a69005dec2b08854fb1d4888fdf7b05db23a8c5a099e4b886a2106"}, - {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a20628faaf444da122b2a64b1e5360cde100ee6283ae8effa0d8745153809a2e"}, - {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f5be5d03ea2073627e7111f61b9f1f0d9625dc3c4d8dda72cc827b0c58a1d042"}, - {file = "pandas-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:a626795722d893ed6aacb64d2401d017ddc8a2341b49e0384ab9bf7112bdec30"}, - {file = "pandas-2.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9f66419d4a41132eb7e9a73dcec9486cf5019f52d90dd35547af11bc58f8637d"}, - {file = "pandas-2.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57abcaeda83fb80d447f28ab0cc7b32b13978f6f733875ebd1ed14f8fbc0f4ab"}, - {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e60f1f7dba3c2d5ca159e18c46a34e7ca7247a73b5dd1a22b6d59707ed6b899a"}, - {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb61dc8567b798b969bcc1fc964788f5a68214d333cade8319c7ab33e2b5d88a"}, - {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:52826b5f4ed658fa2b729264d63f6732b8b29949c7fd234510d57c61dbeadfcd"}, - {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bde2bc699dbd80d7bc7f9cab1e23a95c4375de615860ca089f34e7c64f4a8de7"}, - {file = "pandas-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3de918a754bbf2da2381e8a3dcc45eede8cd7775b047b923f9006d5f876802ae"}, - {file = "pandas-2.2.0.tar.gz", hash = "sha256:30b83f7c3eb217fb4d1b494a57a2fda5444f17834f5df2de6b2ffff68dc3c8e2"}, + {file = "pandas-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9"}, + {file = "pandas-2.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034"}, + {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d"}, + {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9"}, + {file = "pandas-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139"}, + {file = "pandas-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46"}, + {file = "pandas-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092"}, + {file = "pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821"}, + {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d"}, + {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171"}, + {file = "pandas-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623"}, + {file = "pandas-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e"}, + {file = "pandas-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6"}, + {file = "pandas-2.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b"}, + {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540"}, + {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead"}, + {file = "pandas-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1"}, + {file = "pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf"}, + {file = "pandas-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34"}, + {file = "pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a"}, + {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732"}, + {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8"}, + {file = "pandas-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860"}, + {file = "pandas-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984"}, + {file = "pandas-2.1.4.tar.gz", hash = "sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7"}, ] [package.dependencies] @@ -3729,31 +3734,31 @@ numpy = [ ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" -tzdata = ">=2022.7" +tzdata = ">=2022.1" [package.extras] -all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] -aws = ["s3fs (>=2022.11.0)"] -clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] -compression = ["zstandard (>=0.19.0)"] -computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] -feather = ["pyarrow (>=10.0.1)"] -fss = ["fsspec (>=2022.11.0)"] -gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] -hdf5 = ["tables (>=3.8.0)"] -html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] -mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] -parquet = ["pyarrow (>=10.0.1)"] -performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] -plot = ["matplotlib (>=3.6.3)"] -postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] -spss = ["pyreadstat (>=1.2.0)"] -sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.9.2)"] +xml = ["lxml (>=4.8.0)"] [[package]] name = "parso" @@ -4035,13 +4040,13 @@ typing = ["ipython", "pandas-stubs", "pyright"] [[package]] name = "pluggy" -version = "1.4.0" +version = "1.3.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" files = [ - {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, - {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, ] [package.extras] @@ -4050,22 +4055,21 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "plum-dispatch" -version = "2.3.2" +version = "2.2.2" description = "Multiple dispatch in Python" optional = false python-versions = ">=3.8" files = [ - {file = "plum_dispatch-2.3.2-py3-none-any.whl", hash = "sha256:96f519d416accf9a009117682f689114eb23e867bb6f977eed74ef85ef7fef9d"}, - {file = "plum_dispatch-2.3.2.tar.gz", hash = "sha256:f49f00dfdf7ab0f16c9b85cc27cc5241ffb59aee02218bac671ec7c1ac65e139"}, + {file = "plum_dispatch-2.2.2-py3-none-any.whl", hash = "sha256:d7ee415bd166ffa90eaa4b24d7c9dc7ca6f8875750586001e7c9baff706223bd"}, + {file = "plum_dispatch-2.2.2.tar.gz", hash = "sha256:d5d180225c9fbf0277375bb558b649d97d0b651a91037bb7155cedbe9f52764b"}, ] [package.dependencies] beartype = ">=0.16.2" -rich = ">=10.0" typing-extensions = {version = "*", markers = "python_version <= \"3.10\""} [package.extras] -dev = ["black (==23.9.0)", "build", "coveralls", "ghp-import", "ipython", "jupyter-book", "mypy", "numpy", "pre-commit", "pyright (>=1.1.331)", "pytest (>=6)", "pytest-cov", "ruff (==0.1.0)", "tox", "wheel"] +dev = ["black (==23.9.0)", "build", "coveralls", "ghp-import", "ipython", "jupyter-book", "mypy", "numpy", "pre-commit", "pyright", "pytest (>=6)", "pytest-cov", "tox", "wheel"] [[package]] name = "poetry" @@ -4151,17 +4155,17 @@ poetry-core = ">=1.7.0,<2.0.0" [[package]] name = "polars" -version = "0.20.6" +version = "0.20.4" description = "Blazingly fast DataFrame library" optional = true python-versions = ">=3.8" files = [ - {file = "polars-0.20.6-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59845bae0b614b3291baa889cfc2a251e1024129696bb655596f2b5556e9f9a1"}, - {file = "polars-0.20.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:9e86736f68440bf97a9100fa0a79ae7ce616d1af6fd4669fff1345f03aab14c0"}, - {file = "polars-0.20.6-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4f4e3335fdcc863f6aac0616510b1baa5e13d5e818ebbfcb980ad534bd6edc2"}, - {file = "polars-0.20.6-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:7c7b494beea914a54bcae8868dee3988a88ecb48525df948e07aacf2fb83e711"}, - {file = "polars-0.20.6-cp38-abi3-win_amd64.whl", hash = "sha256:a96b157d68697c8d6ef2f7c2cc1734d498c3c6cc0c9c18d4fff7283ccfabdd1d"}, - {file = "polars-0.20.6.tar.gz", hash = "sha256:b53553308bc7e2b4f841b18f1949b61ed7f2cf155c5c64712298efa5af67a997"}, + {file = "polars-0.20.4-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:5fa84f74fc2274e3926d083ccd084c81b3e04debdc66fd917cafe7026d1df19c"}, + {file = "polars-0.20.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:4c5a9f981708f3d090dd5513d83806bcb8a1725653d80bcf63bb738a097b1162"}, + {file = "polars-0.20.4-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfdc2672423c9c73e34161b7b4833c40d042b9d36e899866dc858e8a221b0849"}, + {file = "polars-0.20.4-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:76f270fb17955c97958c2e301c5c2fa10015ccf3048697964ad9c2198e4c6fe6"}, + {file = "polars-0.20.4-cp38-abi3-win_amd64.whl", hash = "sha256:96a067be35745942d3fe6cd3ad1513f9ab7f4249d2b2502484ee64b30d221f96"}, + {file = "polars-0.20.4.tar.gz", hash = "sha256:21a90aa0c7401c80fc814b4db371dced780df6bd5ac81a329307e796b5821190"}, ] [package.extras] @@ -4268,27 +4272,27 @@ files = [ [[package]] name = "psutil" -version = "5.9.8" +version = "5.9.7" description = "Cross-platform lib for process and system monitoring in Python." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, - {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"}, - {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"}, - {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"}, - {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"}, - {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"}, - {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"}, - {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"}, - {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"}, - {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"}, - {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"}, - {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"}, - {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"}, - {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"}, - {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"}, - {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"}, + {file = "psutil-5.9.7-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:0bd41bf2d1463dfa535942b2a8f0e958acf6607ac0be52265ab31f7923bcd5e6"}, + {file = "psutil-5.9.7-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:5794944462509e49d4d458f4dbfb92c47539e7d8d15c796f141f474010084056"}, + {file = "psutil-5.9.7-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:fe361f743cb3389b8efda21980d93eb55c1f1e3898269bc9a2a1d0bb7b1f6508"}, + {file = "psutil-5.9.7-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:e469990e28f1ad738f65a42dcfc17adaed9d0f325d55047593cb9033a0ab63df"}, + {file = "psutil-5.9.7-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:3c4747a3e2ead1589e647e64aad601981f01b68f9398ddf94d01e3dc0d1e57c7"}, + {file = "psutil-5.9.7-cp27-none-win32.whl", hash = "sha256:1d4bc4a0148fdd7fd8f38e0498639ae128e64538faa507df25a20f8f7fb2341c"}, + {file = "psutil-5.9.7-cp27-none-win_amd64.whl", hash = "sha256:4c03362e280d06bbbfcd52f29acd79c733e0af33d707c54255d21029b8b32ba6"}, + {file = "psutil-5.9.7-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ea36cc62e69a13ec52b2f625c27527f6e4479bca2b340b7a452af55b34fcbe2e"}, + {file = "psutil-5.9.7-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1132704b876e58d277168cd729d64750633d5ff0183acf5b3c986b8466cd0284"}, + {file = "psutil-5.9.7-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe8b7f07948f1304497ce4f4684881250cd859b16d06a1dc4d7941eeb6233bfe"}, + {file = "psutil-5.9.7-cp36-cp36m-win32.whl", hash = "sha256:b27f8fdb190c8c03914f908a4555159327d7481dac2f01008d483137ef3311a9"}, + {file = "psutil-5.9.7-cp36-cp36m-win_amd64.whl", hash = "sha256:44969859757f4d8f2a9bd5b76eba8c3099a2c8cf3992ff62144061e39ba8568e"}, + {file = "psutil-5.9.7-cp37-abi3-win32.whl", hash = "sha256:c727ca5a9b2dd5193b8644b9f0c883d54f1248310023b5ad3e92036c5e2ada68"}, + {file = "psutil-5.9.7-cp37-abi3-win_amd64.whl", hash = "sha256:f37f87e4d73b79e6c5e749440c3113b81d1ee7d26f21c19c47371ddea834f414"}, + {file = "psutil-5.9.7-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:032f4f2c909818c86cea4fe2cc407f1c0f0cde8e6c6d702b28b8ce0c0d143340"}, + {file = "psutil-5.9.7.tar.gz", hash = "sha256:3f02134e82cfb5d089fddf20bb2e03fd5cd52395321d1c8458a9e58500ff417c"}, ] [package.extras] @@ -4426,51 +4430,51 @@ files = [ [[package]] name = "pyarrow" -version = "15.0.0" +version = "14.0.2" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" files = [ - {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"}, - {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"}, - {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"}, - {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"}, - {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"}, - {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"}, - {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"}, - {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"}, - {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"}, - {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"}, - {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"}, - {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"}, - {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"}, - {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"}, - {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"}, - {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"}, -] - -[package.dependencies] -numpy = ">=1.16.6,<2" + {file = "pyarrow-14.0.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:ba9fe808596c5dbd08b3aeffe901e5f81095baaa28e7d5118e01354c64f22807"}, + {file = "pyarrow-14.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:22a768987a16bb46220cef490c56c671993fbee8fd0475febac0b3e16b00a10e"}, + {file = "pyarrow-14.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dbba05e98f247f17e64303eb876f4a80fcd32f73c7e9ad975a83834d81f3fda"}, + {file = "pyarrow-14.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a898d134d00b1eca04998e9d286e19653f9d0fcb99587310cd10270907452a6b"}, + {file = "pyarrow-14.0.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:87e879323f256cb04267bb365add7208f302df942eb943c93a9dfeb8f44840b1"}, + {file = "pyarrow-14.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:76fc257559404ea5f1306ea9a3ff0541bf996ff3f7b9209fc517b5e83811fa8e"}, + {file = "pyarrow-14.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0c4a18e00f3a32398a7f31da47fefcd7a927545b396e1f15d0c85c2f2c778cd"}, + {file = "pyarrow-14.0.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:87482af32e5a0c0cce2d12eb3c039dd1d853bd905b04f3f953f147c7a196915b"}, + {file = "pyarrow-14.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:059bd8f12a70519e46cd64e1ba40e97eae55e0cbe1695edd95384653d7626b23"}, + {file = "pyarrow-14.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f16111f9ab27e60b391c5f6d197510e3ad6654e73857b4e394861fc79c37200"}, + {file = "pyarrow-14.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06ff1264fe4448e8d02073f5ce45a9f934c0f3db0a04460d0b01ff28befc3696"}, + {file = "pyarrow-14.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd4f4b472ccf4042f1eab77e6c8bce574543f54d2135c7e396f413046397d5a"}, + {file = "pyarrow-14.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:32356bfb58b36059773f49e4e214996888eeea3a08893e7dbde44753799b2a02"}, + {file = "pyarrow-14.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:52809ee69d4dbf2241c0e4366d949ba035cbcf48409bf404f071f624ed313a2b"}, + {file = "pyarrow-14.0.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:c87824a5ac52be210d32906c715f4ed7053d0180c1060ae3ff9b7e560f53f944"}, + {file = "pyarrow-14.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a25eb2421a58e861f6ca91f43339d215476f4fe159eca603c55950c14f378cc5"}, + {file = "pyarrow-14.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c1da70d668af5620b8ba0a23f229030a4cd6c5f24a616a146f30d2386fec422"}, + {file = "pyarrow-14.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cc61593c8e66194c7cdfae594503e91b926a228fba40b5cf25cc593563bcd07"}, + {file = "pyarrow-14.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:78ea56f62fb7c0ae8ecb9afdd7893e3a7dbeb0b04106f5c08dbb23f9c0157591"}, + {file = "pyarrow-14.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:37c233ddbce0c67a76c0985612fef27c0c92aef9413cf5aa56952f359fcb7379"}, + {file = "pyarrow-14.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:e4b123ad0f6add92de898214d404e488167b87b5dd86e9a434126bc2b7a5578d"}, + {file = "pyarrow-14.0.2-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:e354fba8490de258be7687f341bc04aba181fc8aa1f71e4584f9890d9cb2dec2"}, + {file = "pyarrow-14.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:20e003a23a13da963f43e2b432483fdd8c38dc8882cd145f09f21792e1cf22a1"}, + {file = "pyarrow-14.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc0de7575e841f1595ac07e5bc631084fd06ca8b03c0f2ecece733d23cd5102a"}, + {file = "pyarrow-14.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66e986dc859712acb0bd45601229021f3ffcdfc49044b64c6d071aaf4fa49e98"}, + {file = "pyarrow-14.0.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f7d029f20ef56673a9730766023459ece397a05001f4e4d13805111d7c2108c0"}, + {file = "pyarrow-14.0.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:209bac546942b0d8edc8debda248364f7f668e4aad4741bae58e67d40e5fcf75"}, + {file = "pyarrow-14.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:1e6987c5274fb87d66bb36816afb6f65707546b3c45c44c28e3c4133c010a881"}, + {file = "pyarrow-14.0.2-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:a01d0052d2a294a5f56cc1862933014e696aa08cc7b620e8c0cce5a5d362e976"}, + {file = "pyarrow-14.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a51fee3a7db4d37f8cda3ea96f32530620d43b0489d169b285d774da48ca9785"}, + {file = "pyarrow-14.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64df2bf1ef2ef14cee531e2dfe03dd924017650ffaa6f9513d7a1bb291e59c15"}, + {file = "pyarrow-14.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c0fa3bfdb0305ffe09810f9d3e2e50a2787e3a07063001dcd7adae0cee3601a"}, + {file = "pyarrow-14.0.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c65bf4fd06584f058420238bc47a316e80dda01ec0dfb3044594128a6c2db794"}, + {file = "pyarrow-14.0.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:63ac901baec9369d6aae1cbe6cca11178fb018a8d45068aaf5bb54f94804a866"}, + {file = "pyarrow-14.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:75ee0efe7a87a687ae303d63037d08a48ef9ea0127064df18267252cfe2e9541"}, + {file = "pyarrow-14.0.2.tar.gz", hash = "sha256:36cef6ba12b499d864d1def3e990f97949e0b79400d08b7cf74504ffbd3eb025"}, +] + +[package.dependencies] +numpy = ">=1.16.6" [[package]] name = "pyarrow-hotfix" @@ -4521,18 +4525,18 @@ files = [ [[package]] name = "pydantic" -version = "2.6.0" +version = "2.5.3" description = "Data validation using Python type hints" optional = false -python-versions = ">=3.8" +python-versions = ">=3.7" files = [ - {file = "pydantic-2.6.0-py3-none-any.whl", hash = "sha256:1440966574e1b5b99cf75a13bec7b20e3512e8a61b894ae252f56275e2c465ae"}, - {file = "pydantic-2.6.0.tar.gz", hash = "sha256:ae887bd94eb404b09d86e4d12f93893bdca79d766e738528c6fa1c849f3c6bcf"}, + {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"}, + {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.16.1" +pydantic-core = "2.14.6" typing-extensions = ">=4.6.1" [package.extras] @@ -4540,90 +4544,116 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.16.1" +version = "2.14.6" description = "" optional = false -python-versions = ">=3.8" +python-versions = ">=3.7" files = [ - {file = "pydantic_core-2.16.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:300616102fb71241ff477a2cbbc847321dbec49428434a2f17f37528721c4948"}, - {file = "pydantic_core-2.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5511f962dd1b9b553e9534c3b9c6a4b0c9ded3d8c2be96e61d56f933feef9e1f"}, - {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98f0edee7ee9cc7f9221af2e1b95bd02810e1c7a6d115cfd82698803d385b28f"}, - {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9795f56aa6b2296f05ac79d8a424e94056730c0b860a62b0fdcfe6340b658cc8"}, - {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c45f62e4107ebd05166717ac58f6feb44471ed450d07fecd90e5f69d9bf03c48"}, - {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:462d599299c5971f03c676e2b63aa80fec5ebc572d89ce766cd11ca8bcb56f3f"}, - {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ebaa4bf6386a3b22eec518da7d679c8363fb7fb70cf6972161e5542f470798"}, - {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:99f9a50b56713a598d33bc23a9912224fc5d7f9f292444e6664236ae471ddf17"}, - {file = "pydantic_core-2.16.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8ec364e280db4235389b5e1e6ee924723c693cbc98e9d28dc1767041ff9bc388"}, - {file = "pydantic_core-2.16.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:653a5dfd00f601a0ed6654a8b877b18d65ac32c9d9997456e0ab240807be6cf7"}, - {file = "pydantic_core-2.16.1-cp310-none-win32.whl", hash = "sha256:1661c668c1bb67b7cec96914329d9ab66755911d093bb9063c4c8914188af6d4"}, - {file = "pydantic_core-2.16.1-cp310-none-win_amd64.whl", hash = "sha256:561be4e3e952c2f9056fba5267b99be4ec2afadc27261505d4992c50b33c513c"}, - {file = "pydantic_core-2.16.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:102569d371fadc40d8f8598a59379c37ec60164315884467052830b28cc4e9da"}, - {file = "pydantic_core-2.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:735dceec50fa907a3c314b84ed609dec54b76a814aa14eb90da31d1d36873a5e"}, - {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e83ebbf020be727d6e0991c1b192a5c2e7113eb66e3def0cd0c62f9f266247e4"}, - {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:30a8259569fbeec49cfac7fda3ec8123486ef1b729225222f0d41d5f840b476f"}, - {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:920c4897e55e2881db6a6da151198e5001552c3777cd42b8a4c2f72eedc2ee91"}, - {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f5247a3d74355f8b1d780d0f3b32a23dd9f6d3ff43ef2037c6dcd249f35ecf4c"}, - {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5bea8012df5bb6dda1e67d0563ac50b7f64a5d5858348b5c8cb5043811c19d"}, - {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ed3025a8a7e5a59817b7494686d449ebfbe301f3e757b852c8d0d1961d6be864"}, - {file = "pydantic_core-2.16.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:06f0d5a1d9e1b7932477c172cc720b3b23c18762ed7a8efa8398298a59d177c7"}, - {file = "pydantic_core-2.16.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:150ba5c86f502c040b822777e2e519b5625b47813bd05f9273a8ed169c97d9ae"}, - {file = "pydantic_core-2.16.1-cp311-none-win32.whl", hash = "sha256:d6cbdf12ef967a6aa401cf5cdf47850559e59eedad10e781471c960583f25aa1"}, - {file = "pydantic_core-2.16.1-cp311-none-win_amd64.whl", hash = "sha256:afa01d25769af33a8dac0d905d5c7bb2d73c7c3d5161b2dd6f8b5b5eea6a3c4c"}, - {file = "pydantic_core-2.16.1-cp311-none-win_arm64.whl", hash = "sha256:1a2fe7b00a49b51047334d84aafd7e39f80b7675cad0083678c58983662da89b"}, - {file = "pydantic_core-2.16.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f478ec204772a5c8218e30eb813ca43e34005dff2eafa03931b3d8caef87d51"}, - {file = "pydantic_core-2.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1936ef138bed2165dd8573aa65e3095ef7c2b6247faccd0e15186aabdda7f66"}, - {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99d3a433ef5dc3021c9534a58a3686c88363c591974c16c54a01af7efd741f13"}, - {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd88f40f2294440d3f3c6308e50d96a0d3d0973d6f1a5732875d10f569acef49"}, - {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fac641bbfa43d5a1bed99d28aa1fded1984d31c670a95aac1bf1d36ac6ce137"}, - {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:72bf9308a82b75039b8c8edd2be2924c352eda5da14a920551a8b65d5ee89253"}, - {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb4363e6c9fc87365c2bc777a1f585a22f2f56642501885ffc7942138499bf54"}, - {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:20f724a023042588d0f4396bbbcf4cffd0ddd0ad3ed4f0d8e6d4ac4264bae81e"}, - {file = "pydantic_core-2.16.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fb4370b15111905bf8b5ba2129b926af9470f014cb0493a67d23e9d7a48348e8"}, - {file = "pydantic_core-2.16.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23632132f1fd608034f1a56cc3e484be00854db845b3a4a508834be5a6435a6f"}, - {file = "pydantic_core-2.16.1-cp312-none-win32.whl", hash = "sha256:b9f3e0bffad6e238f7acc20c393c1ed8fab4371e3b3bc311020dfa6020d99212"}, - {file = "pydantic_core-2.16.1-cp312-none-win_amd64.whl", hash = "sha256:a0b4cfe408cd84c53bab7d83e4209458de676a6ec5e9c623ae914ce1cb79b96f"}, - {file = "pydantic_core-2.16.1-cp312-none-win_arm64.whl", hash = "sha256:d195add190abccefc70ad0f9a0141ad7da53e16183048380e688b466702195dd"}, - {file = "pydantic_core-2.16.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:502c062a18d84452858f8aea1e520e12a4d5228fc3621ea5061409d666ea1706"}, - {file = "pydantic_core-2.16.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d8c032ccee90b37b44e05948b449a2d6baed7e614df3d3f47fe432c952c21b60"}, - {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:920f4633bee43d7a2818e1a1a788906df5a17b7ab6fe411220ed92b42940f818"}, - {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9f5d37ff01edcbace53a402e80793640c25798fb7208f105d87a25e6fcc9ea06"}, - {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:399166f24c33a0c5759ecc4801f040dbc87d412c1a6d6292b2349b4c505effc9"}, - {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ac89ccc39cd1d556cc72d6752f252dc869dde41c7c936e86beac5eb555041b66"}, - {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73802194f10c394c2bedce7a135ba1d8ba6cff23adf4217612bfc5cf060de34c"}, - {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8fa00fa24ffd8c31fac081bf7be7eb495be6d248db127f8776575a746fa55c95"}, - {file = "pydantic_core-2.16.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:601d3e42452cd4f2891c13fa8c70366d71851c1593ed42f57bf37f40f7dca3c8"}, - {file = "pydantic_core-2.16.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:07982b82d121ed3fc1c51faf6e8f57ff09b1325d2efccaa257dd8c0dd937acca"}, - {file = "pydantic_core-2.16.1-cp38-none-win32.whl", hash = "sha256:d0bf6f93a55d3fa7a079d811b29100b019784e2ee6bc06b0bb839538272a5610"}, - {file = "pydantic_core-2.16.1-cp38-none-win_amd64.whl", hash = "sha256:fbec2af0ebafa57eb82c18c304b37c86a8abddf7022955d1742b3d5471a6339e"}, - {file = "pydantic_core-2.16.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a497be217818c318d93f07e14502ef93d44e6a20c72b04c530611e45e54c2196"}, - {file = "pydantic_core-2.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:694a5e9f1f2c124a17ff2d0be613fd53ba0c26de588eb4bdab8bca855e550d95"}, - {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d4dfc66abea3ec6d9f83e837a8f8a7d9d3a76d25c9911735c76d6745950e62c"}, - {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8655f55fe68c4685673265a650ef71beb2d31871c049c8b80262026f23605ee3"}, - {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:21e3298486c4ea4e4d5cc6fb69e06fb02a4e22089304308817035ac006a7f506"}, - {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71b4a48a7427f14679f0015b13c712863d28bb1ab700bd11776a5368135c7d60"}, - {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10dca874e35bb60ce4f9f6665bfbfad050dd7573596608aeb9e098621ac331dc"}, - {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa496cd45cda0165d597e9d6f01e36c33c9508f75cf03c0a650018c5048f578e"}, - {file = "pydantic_core-2.16.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5317c04349472e683803da262c781c42c5628a9be73f4750ac7d13040efb5d2d"}, - {file = "pydantic_core-2.16.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:42c29d54ed4501a30cd71015bf982fa95e4a60117b44e1a200290ce687d3e640"}, - {file = "pydantic_core-2.16.1-cp39-none-win32.whl", hash = "sha256:ba07646f35e4e49376c9831130039d1b478fbfa1215ae62ad62d2ee63cf9c18f"}, - {file = "pydantic_core-2.16.1-cp39-none-win_amd64.whl", hash = "sha256:2133b0e412a47868a358713287ff9f9a328879da547dc88be67481cdac529118"}, - {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d25ef0c33f22649b7a088035fd65ac1ce6464fa2876578df1adad9472f918a76"}, - {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:99c095457eea8550c9fa9a7a992e842aeae1429dab6b6b378710f62bfb70b394"}, - {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b49c604ace7a7aa8af31196abbf8f2193be605db6739ed905ecaf62af31ccae0"}, - {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c56da23034fe66221f2208c813d8aa509eea34d97328ce2add56e219c3a9f41c"}, - {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cebf8d56fee3b08ad40d332a807ecccd4153d3f1ba8231e111d9759f02edfd05"}, - {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1ae8048cba95f382dba56766525abca438328455e35c283bb202964f41a780b0"}, - {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:780daad9e35b18d10d7219d24bfb30148ca2afc309928e1d4d53de86822593dc"}, - {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c94b5537bf6ce66e4d7830c6993152940a188600f6ae044435287753044a8fe2"}, - {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:adf28099d061a25fbcc6531febb7a091e027605385de9fe14dd6a97319d614cf"}, - {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:644904600c15816a1f9a1bafa6aab0d21db2788abcdf4e2a77951280473f33e1"}, - {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87bce04f09f0552b66fca0c4e10da78d17cb0e71c205864bab4e9595122cb9d9"}, - {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:877045a7969ace04d59516d5d6a7dee13106822f99a5d8df5e6822941f7bedc8"}, - {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9c46e556ee266ed3fb7b7a882b53df3c76b45e872fdab8d9cf49ae5e91147fd7"}, - {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4eebbd049008eb800f519578e944b8dc8e0f7d59a5abb5924cc2d4ed3a1834ff"}, - {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c0be58529d43d38ae849a91932391eb93275a06b93b79a8ab828b012e916a206"}, - {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b1fc07896fc1851558f532dffc8987e526b682ec73140886c831d773cef44b76"}, - {file = "pydantic_core-2.16.1.tar.gz", hash = "sha256:daff04257b49ab7f4b3f73f98283d3dbb1a65bf3500d55c7beac3c66c310fe34"}, + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"}, + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"}, + {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"}, + {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"}, + {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"}, + {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"}, + {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"}, + {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"}, + {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"}, + {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"}, + {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"}, + {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"}, + {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"}, + {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"}, + {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"}, + {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"}, + {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"}, ] [package.dependencies] @@ -4647,13 +4677,13 @@ setuptools = "*" [[package]] name = "pydeps" -version = "1.12.18" +version = "1.12.17" description = "Display module dependencies" optional = false python-versions = "*" files = [ - {file = "pydeps-1.12.18-py3-none-any.whl", hash = "sha256:fc57f56a6eaf92ea6b9b503dc43d55f098661e253a868bbb52fccfbbcc8e79de"}, - {file = "pydeps-1.12.18.tar.gz", hash = "sha256:15c5d023b5053308e19a69591da06d9f3ff038e7a47111c40c9986b6a2929a4b"}, + {file = "pydeps-1.12.17-py3-none-any.whl", hash = "sha256:4fb2e86071c78c1b85a1c63745a267d100e91daf6bab2f14331b3c77433b58b4"}, + {file = "pydeps-1.12.17.tar.gz", hash = "sha256:c308e8355a1e77ff0af899d6f9f1665d4eb07019692dba9fb1dc1cab05df36a4"}, ] [package.dependencies] @@ -4720,71 +4750,71 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyinstrument" -version = "4.6.2" +version = "4.6.1" description = "Call stack profiler for Python. Shows you why your code is slow!" optional = false python-versions = ">=3.7" files = [ - {file = "pyinstrument-4.6.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7a1b1cd768ea7ea9ab6f5490f7e74431321bcc463e9441dbc2f769617252d9e2"}, - {file = "pyinstrument-4.6.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8a386b9d09d167451fb2111eaf86aabf6e094fed42c15f62ec51d6980bce7d96"}, - {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c3e3ca8553b9aac09bd978c73d21b9032c707ac6d803bae6a20ecc048df4a8"}, - {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5f329f5534ca069420246f5ce57270d975229bcb92a3a3fd6b2ca086527d9764"}, - {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4dcdcc7ba224a0c5edfbd00b0f530f5aed2b26da5aaa2f9af5519d4aa8c7e41"}, - {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73db0c2c99119c65b075feee76e903b4ed82e59440fe8b5724acf5c7cb24721f"}, - {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:da58f265326f3cf3975366ccb8b39014f1e69ff8327958a089858d71c633d654"}, - {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:feebcf860f955401df30d029ec8de7a0c5515d24ea809736430fd1219686fe14"}, - {file = "pyinstrument-4.6.2-cp310-cp310-win32.whl", hash = "sha256:b2b66ff0b16c8ecf1ec22de001cfff46872b2c163c62429055105564eef50b2e"}, - {file = "pyinstrument-4.6.2-cp310-cp310-win_amd64.whl", hash = "sha256:8d104b7a7899d5fa4c5bf1ceb0c1a070615a72c5dc17bc321b612467ad5c5d88"}, - {file = "pyinstrument-4.6.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:62f6014d2b928b181a52483e7c7b82f2c27e22c577417d1681153e5518f03317"}, - {file = "pyinstrument-4.6.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dcb5c8d763c5df55131670ba2a01a8aebd0d490a789904a55eb6a8b8d497f110"}, - {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ed4e8c6c84e0e6429ba7008a66e435ede2d8cb027794c20923c55669d9c5633"}, - {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c0f0e1d8f8c70faa90ff57f78ac0dda774b52ea0bfb2d9f0f41ce6f3e7c869e"}, - {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b3c44cb037ad0d6e9d9a48c14d856254ada641fbd0ae9de40da045fc2226a2a"}, - {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:be9901f17ac2f527c352f2fdca3d717c1d7f2ce8a70bad5a490fc8cc5d2a6007"}, - {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8a9791bf8916c1cf439c202fded32de93354b0f57328f303d71950b0027c7811"}, - {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d6162615e783c59e36f2d7caf903a7e3ecb6b32d4a4ae8907f2760b2ef395bf6"}, - {file = "pyinstrument-4.6.2-cp311-cp311-win32.whl", hash = "sha256:28af084aa84bbfd3620ebe71d5f9a0deca4451267f363738ca824f733de55056"}, - {file = "pyinstrument-4.6.2-cp311-cp311-win_amd64.whl", hash = "sha256:dd6007d3c2e318e09e582435dd8d111cccf30d342af66886b783208813caf3d7"}, - {file = "pyinstrument-4.6.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e3813c8ecfab9d7d855c5f0f71f11793cf1507f40401aa33575c7fd613577c23"}, - {file = "pyinstrument-4.6.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6c761372945e60fc1396b7a49f30592e8474e70a558f1a87346d27c8c4ce50f7"}, - {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fba3244e94c117bf4d9b30b8852bbdcd510e7329fdd5c7c8b3799e00a9215a8"}, - {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:803ac64e526473d64283f504df3b0d5c2c203ea9603cab428641538ffdc753a7"}, - {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2e554b1bb0df78f5ce8a92df75b664912ca93aa94208386102af454ec31b647"}, - {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7c671057fad22ee3ded897a6a361204ea2538e44c1233cad0e8e30f6d27f33db"}, - {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:d02f31fa13a9e8dc702a113878419deba859563a32474c9f68e04619d43d6f01"}, - {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b55983a884f083f93f0fc6d12ff8df0acd1e2fb0580d2f4c7bfe6def33a84b58"}, - {file = "pyinstrument-4.6.2-cp312-cp312-win32.whl", hash = "sha256:fdc0a53b27e5d8e47147489c7dab596ddd1756b1e053217ef5bc6718567099ff"}, - {file = "pyinstrument-4.6.2-cp312-cp312-win_amd64.whl", hash = "sha256:dd5c53a0159126b5ce7cbc4994433c9c671e057c85297ff32645166a06ad2c50"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b082df0bbf71251a7f4880a12ed28421dba84ea7110bb376e0533067a4eaff40"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90350533396071cb2543affe01e40bf534c35cb0d4b8fa9fdb0f052f9ca2cfe3"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67268bb0d579330cff40fd1c90b8510363ca1a0e7204225840614068658dab77"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20e15b4e1d29ba0b7fc81aac50351e0dc0d7e911e93771ebc3f408e864a2c93b"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2e625fc6ffcd4fd420493edd8276179c3f784df207bef4c2192725c1b310534c"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:113d2fc534c9ca7b6b5661d6ada05515bf318f6eb34e8d05860fe49eb7cfe17e"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3098cd72b71a322a72dafeb4ba5c566465e193d2030adad4c09566bd2f89bf4f"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-win32.whl", hash = "sha256:08fdc7f88c989316fa47805234c37a40fafe7b614afd8ae863f0afa9d1707b37"}, - {file = "pyinstrument-4.6.2-cp37-cp37m-win_amd64.whl", hash = "sha256:5ebeba952c0056dcc9b9355328c78c4b5c2a33b4b4276a9157a3ab589f3d1bac"}, - {file = "pyinstrument-4.6.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:34e59e91c88ec9ad5630c0964eca823949005e97736bfa838beb4789e94912a2"}, - {file = "pyinstrument-4.6.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cd0320c39e99e3c0a3129d1ed010ac41e5a7eb96fb79900d270080a97962e995"}, - {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46992e855d630575ec635eeca0068a8ddf423d4fd32ea0875a94e9f8688f0b95"}, - {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e474c56da636253dfdca7cd1998b240d6b39f7ed34777362db69224fcf053b1"}, - {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4b559322f30509ad8f082561792352d0805b3edfa508e492a36041fdc009259"}, - {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:06a8578b2943eb1dbbf281e1e59e44246acfefd79e1b06d4950f01b693de12af"}, - {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7bd3da31c46f1c1cb7ae89031725f6a1d1015c2041d9c753fe23980f5f9fd86c"}, - {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e63f4916001aa9c625976a50779282e0a5b5e9b17c52a50ef4c651e468ed5b88"}, - {file = "pyinstrument-4.6.2-cp38-cp38-win32.whl", hash = "sha256:32ec8db6896b94af790a530e1e0edad4d0f941a0ab8dd9073e5993e7ea46af7d"}, - {file = "pyinstrument-4.6.2-cp38-cp38-win_amd64.whl", hash = "sha256:a59fc4f7db738a094823afe6422509fa5816a7bf74e768ce5a7a2ddd91af40ac"}, - {file = "pyinstrument-4.6.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3a165e0d2deb212d4cf439383982a831682009e1b08733c568cac88c89784e62"}, - {file = "pyinstrument-4.6.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7ba858b3d6f6e5597c641edcc0e7e464f85aba86d71bc3b3592cb89897bf43f6"}, - {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fd8e547cf3df5f0ec6e4dffbe2e857f6b28eda51b71c3c0b5a2fc0646527835"}, - {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de2c1714a37a820033b19cf134ead43299a02662f1379140974a9ab733c5f3a"}, - {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01fc45dedceec3df81668d702bca6d400d956c8b8494abc206638c167c78dfd9"}, - {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5b6e161ef268d43ee6bbfae7fd2cdd0a52c099ddd21001c126ca1805dc906539"}, - {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6ba8e368d0421f15ba6366dfd60ec131c1b46505d021477e0f865d26cf35a605"}, - {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edca46f04a573ac2fb11a84b937844e6a109f38f80f4b422222fb5be8ecad8cb"}, - {file = "pyinstrument-4.6.2-cp39-cp39-win32.whl", hash = "sha256:baf375953b02fe94d00e716f060e60211ede73f49512b96687335f7071adb153"}, - {file = "pyinstrument-4.6.2-cp39-cp39-win_amd64.whl", hash = "sha256:af1a953bce9fd530040895d01ff3de485e25e1576dccb014f76ba9131376fcad"}, - {file = "pyinstrument-4.6.2.tar.gz", hash = "sha256:0002ee517ed8502bbda6eb2bb1ba8f95a55492fcdf03811ba13d4806e50dd7f6"}, + {file = "pyinstrument-4.6.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:73476e4bc6e467ac1b2c3c0dd1f0b71c9061d4de14626676adfdfbb14aa342b4"}, + {file = "pyinstrument-4.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4d1da8efd974cf9df52ee03edaee2d3875105ddd00de35aa542760f7c612bdf7"}, + {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:507be1ee2f2b0c9fba74d622a272640dd6d1b0c9ec3388b2cdeb97ad1e77125f"}, + {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95cee6de08eb45754ef4f602ce52b640d1c535d934a6a8733a974daa095def37"}, + {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7873e8cec92321251fdf894a72b3c78f4c5c20afdd1fef0baf9042ec843bb04"}, + {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a242f6cac40bc83e1f3002b6b53681846dfba007f366971db0bf21e02dbb1903"}, + {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:97c9660cdb4bd2a43cf4f3ab52cffd22f3ac9a748d913b750178fb34e5e39e64"}, + {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e304cd0723e2b18ada5e63c187abf6d777949454c734f5974d64a0865859f0f4"}, + {file = "pyinstrument-4.6.1-cp310-cp310-win32.whl", hash = "sha256:cee21a2d78187dd8a80f72f5d0f1ddb767b2d9800f8bb4d94b6d11f217c22cdb"}, + {file = "pyinstrument-4.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:2000712f71d693fed2f8a1c1638d37b7919124f367b37976d07128d49f1445eb"}, + {file = "pyinstrument-4.6.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a366c6f3dfb11f1739bdc1dee75a01c1563ad0bf4047071e5e77598087df457f"}, + {file = "pyinstrument-4.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6be327be65d934796558aa9cb0f75ce62ebd207d49ad1854610c97b0579ad47"}, + {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e160d9c5d20d3e4ef82269e4e8b246ff09bdf37af5fb8cb8ccca97936d95ad6"}, + {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ffbf56605ef21c2fcb60de2fa74ff81f417d8be0c5002a407e414d6ef6dee43"}, + {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92cc4924596d6e8f30a16182bbe90893b1572d847ae12652f72b34a9a17c24a"}, + {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f4b48a94d938cae981f6948d9ec603bab2087b178d2095d042d5a48aabaecaab"}, + {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7a386392275bdef4a1849712dc5b74f0023483fca14ef93d0ca27d453548982"}, + {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:871b131b83e9b1122f2325061c68ed1e861eebcb568c934d2fb193652f077f77"}, + {file = "pyinstrument-4.6.1-cp311-cp311-win32.whl", hash = "sha256:8d8515156dd91f5652d13b5fcc87e634f8fe1c07b68d1d0840348cdd50bf5ace"}, + {file = "pyinstrument-4.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb868fbe089036e9f32525a249f4c78b8dc46967612393f204b8234f439c9cc4"}, + {file = "pyinstrument-4.6.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a18cd234cce4f230f1733807f17a134e64a1f1acabf74a14d27f583cf2b183df"}, + {file = "pyinstrument-4.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:574cfca69150be4ce4461fb224712fbc0722a49b0dc02fa204d02807adf6b5a0"}, + {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e02cf505e932eb8ccf561b7527550a67ec14fcae1fe0e25319b09c9c166e914"}, + {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:832fb2acef9d53701c1ab546564c45fb70a8770c816374f8dd11420d399103c9"}, + {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13cb57e9607545623ebe462345b3d0c4caee0125d2d02267043ece8aca8f4ea0"}, + {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9be89e7419bcfe8dd6abb0d959d6d9c439c613a4a873514c43d16b48dae697c9"}, + {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:476785cfbc44e8e1b1ad447398aa3deae81a8df4d37eb2d8bbb0c404eff979cd"}, + {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e9cebd90128a3d2fee36d3ccb665c1b9dce75261061b2046203e45c4a8012d54"}, + {file = "pyinstrument-4.6.1-cp312-cp312-win32.whl", hash = "sha256:1d0b76683df2ad5c40eff73607dc5c13828c92fbca36aff1ddf869a3c5a55fa6"}, + {file = "pyinstrument-4.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:c4b7af1d9d6a523cfbfedebcb69202242d5bd0cb89c4e094cc73d5d6e38279bd"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:79ae152f8c6a680a188fb3be5e0f360ac05db5bbf410169a6c40851dfaebcce9"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cad2745964c174c65aa75f1bf68a4394d1b4d28f33894837cfd315d1e836f0"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb81f66f7f94045d723069cf317453d42375de9ff3c69089cf6466b078ac1db4"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab30ae75969da99e9a529e21ff497c18fdf958e822753db4ae7ed1e67094040"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f36cb5b644762fb3c86289324bbef17e95f91cd710603ac19444a47f638e8e96"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8b45075d9dbbc977dbc7007fb22bb0054c6990fbe91bf48dd80c0b96c6307ba7"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:475ac31477f6302e092463896d6a2055f3e6abcd293bad16ff94fc9185308a88"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-win32.whl", hash = "sha256:29172ab3d8609fdf821c3f2562dc61e14f1a8ff5306607c32ca743582d3a760e"}, + {file = "pyinstrument-4.6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:bd176f297c99035127b264369d2bb97a65255f65f8d4e843836baf55ebb3cee4"}, + {file = "pyinstrument-4.6.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:23e9b4526978432e9999021da9a545992cf2ac3df5ee82db7beb6908fc4c978c"}, + {file = "pyinstrument-4.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2dbcaccc9f456ef95557ec501caeb292119c24446d768cb4fb43578b0f3d572c"}, + {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2097f63c66c2bc9678c826b9ff0c25acde3ed455590d9dcac21220673fe74fbf"}, + {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:205ac2e76bd65d61b9611a9ce03d5f6393e34ec5b41dd38808f25d54e6b3e067"}, + {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f414ddf1161976a40fc0a333000e6a4ad612719eac0b8c9bb73f47153187148"}, + {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:65e62ebfa2cd8fb57eda90006f4505ac4c70da00fc2f05b6d8337d776ea76d41"}, + {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d96309df4df10be7b4885797c5f69bb3a89414680ebaec0722d8156fde5268c3"}, + {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f3d1ad3bc8ebb4db925afa706aa865c4bfb40d52509f143491ac0df2440ee5d2"}, + {file = "pyinstrument-4.6.1-cp38-cp38-win32.whl", hash = "sha256:dc37cb988c8854eb42bda2e438aaf553536566657d157c4473cc8aad5692a779"}, + {file = "pyinstrument-4.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:2cd4ce750c34a0318fc2d6c727cc255e9658d12a5cf3f2d0473f1c27157bdaeb"}, + {file = "pyinstrument-4.6.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6ca95b21f022e995e062b371d1f42d901452bcbedd2c02f036de677119503355"}, + {file = "pyinstrument-4.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ac1e1d7e1f1b64054c4eb04eb4869a7a5eef2261440e73943cc1b1bc3c828c18"}, + {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0711845e953fce6ab781221aacffa2a66dbc3289f8343e5babd7b2ea34da6c90"}, + {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b7d28582017de35cb64eb4e4fa603e753095108ca03745f5d17295970ee631f"}, + {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7be57db08bd366a37db3aa3a6187941ee21196e8b14975db337ddc7d1490649d"}, + {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9a0ac0f56860398d2628ce389826ce83fb3a557d0c9a2351e8a2eac6eb869983"}, + {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a9045186ff13bc826fef16be53736a85029aae3c6adfe52e666cad00d7ca623b"}, + {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6c4c56b6eab9004e92ad8a48bb54913fdd71fc8a748ae42a27b9e26041646f8b"}, + {file = "pyinstrument-4.6.1-cp39-cp39-win32.whl", hash = "sha256:37e989c44b51839d0c97466fa2b623638b9470d56d79e329f359f0e8fa6d83db"}, + {file = "pyinstrument-4.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:5494c5a84fee4309d7d973366ca6b8b9f8ba1d6b254e93b7c506264ef74f2cef"}, + {file = "pyinstrument-4.6.1.tar.gz", hash = "sha256:f4731b27121350f5a983d358d2272fe3df2f538aed058f57217eef7801a89288"}, ] [package.extras] @@ -5040,13 +5070,13 @@ docs = ["Sphinx (>=6.2,<7.0)", "boto3 (>=1.26,<2.0)", "cartopy (>=0.21,<1.0)", " [[package]] name = "pytest" -version = "8.0.0" +version = "7.4.4" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.7" files = [ - {file = "pytest-8.0.0-py3-none-any.whl", hash = "sha256:50fb9cbe836c3f20f0dfa99c565201fb75dc54c8d76373cd1bde06b06657bdb6"}, - {file = "pytest-8.0.0.tar.gz", hash = "sha256:249b1b0864530ba251b7438274c4d251c58d868edaaec8762893ad4a0d71c36c"}, + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, ] [package.dependencies] @@ -5054,7 +5084,7 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.3.0,<2.0" +pluggy = ">=0.12,<2.0" tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] @@ -5255,13 +5285,13 @@ six = ">=1.5" [[package]] name = "pytz" -version = "2023.4" +version = "2023.3.post1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.4-py2.py3-none-any.whl", hash = "sha256:f90ef520d95e7c46951105338d918664ebfd6f1d995bd7d153127ce90efafa6a"}, - {file = "pytz-2023.4.tar.gz", hash = "sha256:31d4583c4ed539cd037956140d695e42c033a19e984bfce9964a3f7d59bc2b40"}, + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] [[package]] @@ -5323,7 +5353,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -5594,13 +5623,13 @@ full = ["numpy"] [[package]] name = "referencing" -version = "0.33.0" +version = "0.32.1" description = "JSON Referencing + Python" optional = false python-versions = ">=3.8" files = [ - {file = "referencing-0.33.0-py3-none-any.whl", hash = "sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5"}, - {file = "referencing-0.33.0.tar.gz", hash = "sha256:c775fedf74bc0f9189c2a3be1c12fd03e8c23f4d371dce795df44e06c5b412f7"}, + {file = "referencing-0.32.1-py3-none-any.whl", hash = "sha256:7e4dc12271d8e15612bfe35792f5ea1c40970dadf8624602e33db2758f7ee554"}, + {file = "referencing-0.32.1.tar.gz", hash = "sha256:3c57da0513e9563eb7e203ebe9bb3a1b509b042016433bd1e45a2853466c3dd3"}, ] [package.dependencies] @@ -5905,133 +5934,118 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.1.15" +version = "0.1.13" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"}, - {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"}, - {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"}, - {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"}, - {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"}, - {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"}, - {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"}, - {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"}, - {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"}, - {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"}, - {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"}, - {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"}, - {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"}, - {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"}, - {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"}, - {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"}, - {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, + {file = "ruff-0.1.13-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e3fd36e0d48aeac672aa850045e784673449ce619afc12823ea7868fcc41d8ba"}, + {file = "ruff-0.1.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9fb6b3b86450d4ec6a6732f9f60c4406061b6851c4b29f944f8c9d91c3611c7a"}, + {file = "ruff-0.1.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b13ba5d7156daaf3fd08b6b993360a96060500aca7e307d95ecbc5bb47a69296"}, + {file = "ruff-0.1.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9ebb40442f7b531e136d334ef0851412410061e65d61ca8ce90d894a094feb22"}, + {file = "ruff-0.1.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226b517f42d59a543d6383cfe03cccf0091e3e0ed1b856c6824be03d2a75d3b6"}, + {file = "ruff-0.1.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5f0312ba1061e9b8c724e9a702d3c8621e3c6e6c2c9bd862550ab2951ac75c16"}, + {file = "ruff-0.1.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2f59bcf5217c661254bd6bc42d65a6fd1a8b80c48763cb5c2293295babd945dd"}, + {file = "ruff-0.1.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6894b00495e00c27b6ba61af1fc666f17de6140345e5ef27dd6e08fb987259d"}, + {file = "ruff-0.1.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1600942485c6e66119da294c6294856b5c86fd6df591ce293e4a4cc8e72989"}, + {file = "ruff-0.1.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ee3febce7863e231a467f90e681d3d89210b900d49ce88723ce052c8761be8c7"}, + {file = "ruff-0.1.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dcaab50e278ff497ee4d1fe69b29ca0a9a47cd954bb17963628fa417933c6eb1"}, + {file = "ruff-0.1.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f57de973de4edef3ad3044d6a50c02ad9fc2dff0d88587f25f1a48e3f72edf5e"}, + {file = "ruff-0.1.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:7a36fa90eb12208272a858475ec43ac811ac37e91ef868759770b71bdabe27b6"}, + {file = "ruff-0.1.13-py3-none-win32.whl", hash = "sha256:a623349a505ff768dad6bd57087e2461be8db58305ebd5577bd0e98631f9ae69"}, + {file = "ruff-0.1.13-py3-none-win_amd64.whl", hash = "sha256:f988746e3c3982bea7f824c8fa318ce7f538c4dfefec99cd09c8770bd33e6539"}, + {file = "ruff-0.1.13-py3-none-win_arm64.whl", hash = "sha256:6bbbc3042075871ec17f28864808540a26f0f79a4478c357d3e3d2284e832998"}, + {file = "ruff-0.1.13.tar.gz", hash = "sha256:e261f1baed6291f434ffb1d5c6bd8051d1c2a26958072d38dfbec39b3dda7352"}, ] [[package]] name = "scikit-learn" -version = "1.4.0" +version = "1.3.2" description = "A set of python modules for machine learning and data mining" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" files = [ - {file = "scikit-learn-1.4.0.tar.gz", hash = "sha256:d4373c984eba20e393216edd51a3e3eede56cbe93d4247516d205643c3b93121"}, - {file = "scikit_learn-1.4.0-1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fce93a7473e2f4ee4cc280210968288d6a7d7ad8dc6fa7bb7892145e407085f9"}, - {file = "scikit_learn-1.4.0-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d77df3d1e15fc37a9329999979fa7868ba8655dbab21fe97fc7ddabac9e08cc7"}, - {file = "scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2404659fedec40eeafa310cd14d613e564d13dbf8f3c752d31c095195ec05de6"}, - {file = "scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e98632da8f6410e6fb6bf66937712c949b4010600ccd3f22a5388a83e610cc3c"}, - {file = "scikit_learn-1.4.0-1-cp310-cp310-win_amd64.whl", hash = "sha256:11b3b140f70fbc9f6a08884631ae8dd60a4bb2d7d6d1de92738ea42b740d8992"}, - {file = "scikit_learn-1.4.0-1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8341eabdc754d5ab91641a7763243845e96b6d68e03e472531e88a4f1b09f21"}, - {file = "scikit_learn-1.4.0-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d1f6bce875ac2bb6b52514f67c185c564ccd299a05b65b7bab091a4c13dde12d"}, - {file = "scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c408b46b2fd61952d519ea1af2f8f0a7a703e1433923ab1704c4131520b2083b"}, - {file = "scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b465dd1dcd237b7b1dcd1a9048ccbf70a98c659474324fa708464c3a2533fad"}, - {file = "scikit_learn-1.4.0-1-cp311-cp311-win_amd64.whl", hash = "sha256:0db8e22c42f7980fe5eb22069b1f84c48966f3e0d23a01afde5999e3987a2501"}, - {file = "scikit_learn-1.4.0-1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7eef6ea2ed289af40e88c0be9f7704ca8b5de18508a06897c3fe21e0905efdf"}, - {file = "scikit_learn-1.4.0-1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:349669b01435bc4dbf25c6410b0892073befdaec52637d1a1d1ff53865dc8db3"}, - {file = "scikit_learn-1.4.0-1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d439c584e58434d0350701bd33f6c10b309e851fccaf41c121aed55f6851d8cf"}, - {file = "scikit_learn-1.4.0-1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0e2427d9ef46477625ab9b55c1882844fe6fc500f418c3f8e650200182457bc"}, - {file = "scikit_learn-1.4.0-1-cp312-cp312-win_amd64.whl", hash = "sha256:d3d75343940e7bf9b85c830c93d34039fa015eeb341c5c0b4cd7a90dadfe00d4"}, - {file = "scikit_learn-1.4.0-1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:76986d22e884ab062b1beecdd92379656e9d3789ecc1f9870923c178de55f9fe"}, - {file = "scikit_learn-1.4.0-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e22446ad89f1cb7657f0d849dcdc345b48e2d10afa3daf2925fdb740f85b714c"}, - {file = "scikit_learn-1.4.0-1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74812c9eabb265be69d738a8ea8d4884917a59637fcbf88a5f0e9020498bc6b3"}, - {file = "scikit_learn-1.4.0-1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad2a63e0dd386b92da3270887a29b308af4d7c750d8c4995dfd9a4798691bcc"}, - {file = "scikit_learn-1.4.0-1-cp39-cp39-win_amd64.whl", hash = "sha256:53b9e29177897c37e2ff9d4ba6ca12fdb156e22523e463db05def303f5c72b5c"}, - {file = "scikit_learn-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb8f044a8f5962613ce1feb4351d66f8d784bd072d36393582f351859b065f7d"}, - {file = "scikit_learn-1.4.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:a6372c90bbf302387792108379f1ec77719c1618d88496d0df30cb8e370b4661"}, - {file = "scikit_learn-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:785ce3c352bf697adfda357c3922c94517a9376002971bc5ea50896144bc8916"}, - {file = "scikit_learn-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0aba2a20d89936d6e72d95d05e3bf1db55bca5c5920926ad7b92c34f5e7d3bbe"}, - {file = "scikit_learn-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2bac5d56b992f8f06816f2cd321eb86071c6f6d44bb4b1cb3d626525820d754b"}, - {file = "scikit_learn-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27ae4b0f1b2c77107c096a7e05b33458354107b47775428d1f11b23e30a73e8a"}, - {file = "scikit_learn-1.4.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5c5c62ffb52c3ffb755eb21fa74cc2cbf2c521bd53f5c04eaa10011dbecf5f80"}, - {file = "scikit_learn-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f0d2018ac6fa055dab65fe8a485967990d33c672d55bc254c56c35287b02fab"}, - {file = "scikit_learn-1.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a8918c415c4b4bf1d60c38d32958849a9191c2428ab35d30b78354085c7c7a"}, - {file = "scikit_learn-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:80a21de63275f8bcd7877b3e781679d2ff1eddfed515a599f95b2502a3283d42"}, - {file = "scikit_learn-1.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0f33bbafb310c26b81c4d41ecaebdbc1f63498a3f13461d50ed9a2e8f24d28e4"}, - {file = "scikit_learn-1.4.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:8b6ac1442ec714b4911e5aef8afd82c691b5c88b525ea58299d455acc4e8dcec"}, - {file = "scikit_learn-1.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05fc5915b716c6cc60a438c250108e9a9445b522975ed37e416d5ea4f9a63381"}, - {file = "scikit_learn-1.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:842b7d6989f3c574685e18da6f91223eb32301d0f93903dd399894250835a6f7"}, - {file = "scikit_learn-1.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:88bcb586fdff865372df1bc6be88bb7e6f9e0aa080dab9f54f5cac7eca8e2b6b"}, - {file = "scikit_learn-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f77674647dd31f56cb12ed13ed25b6ed43a056fffef051715022d2ebffd7a7d1"}, - {file = "scikit_learn-1.4.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:833999872e2920ce00f3a50839946bdac7539454e200eb6db54898a41f4bfd43"}, - {file = "scikit_learn-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:970ec697accaef10fb4f51763f3a7b1250f9f0553cf05514d0e94905322a0172"}, - {file = "scikit_learn-1.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:923d778f378ebacca2c672ab1740e5a413e437fb45ab45ab02578f8b689e5d43"}, - {file = "scikit_learn-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:1d041bc95006b545b59e458399e3175ab11ca7a03dc9a74a573ac891f5df1489"}, -] - -[package.dependencies] -joblib = ">=1.2.0" -numpy = ">=1.19.5" -scipy = ">=1.6.0" + {file = "scikit-learn-1.3.2.tar.gz", hash = "sha256:a2f54c76accc15a34bfb9066e6c7a56c1e7235dda5762b990792330b52ccfb05"}, + {file = "scikit_learn-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e326c0eb5cf4d6ba40f93776a20e9a7a69524c4db0757e7ce24ba222471ee8a1"}, + {file = "scikit_learn-1.3.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:535805c2a01ccb40ca4ab7d081d771aea67e535153e35a1fd99418fcedd1648a"}, + {file = "scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1215e5e58e9880b554b01187b8c9390bf4dc4692eedeaf542d3273f4785e342c"}, + {file = "scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ee107923a623b9f517754ea2f69ea3b62fc898a3641766cb7deb2f2ce450161"}, + {file = "scikit_learn-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:35a22e8015048c628ad099da9df5ab3004cdbf81edc75b396fd0cff8699ac58c"}, + {file = "scikit_learn-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6fb6bc98f234fda43163ddbe36df8bcde1d13ee176c6dc9b92bb7d3fc842eb66"}, + {file = "scikit_learn-1.3.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:18424efee518a1cde7b0b53a422cde2f6625197de6af36da0b57ec502f126157"}, + {file = "scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3271552a5eb16f208a6f7f617b8cc6d1f137b52c8a1ef8edf547db0259b2c9fb"}, + {file = "scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4144a5004a676d5022b798d9e573b05139e77f271253a4703eed295bde0433"}, + {file = "scikit_learn-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:67f37d708f042a9b8d59551cf94d30431e01374e00dc2645fa186059c6c5d78b"}, + {file = "scikit_learn-1.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8db94cd8a2e038b37a80a04df8783e09caac77cbe052146432e67800e430c028"}, + {file = "scikit_learn-1.3.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:61a6efd384258789aa89415a410dcdb39a50e19d3d8410bd29be365bcdd512d5"}, + {file = "scikit_learn-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb06f8dce3f5ddc5dee1715a9b9f19f20d295bed8e3cd4fa51e1d050347de525"}, + {file = "scikit_learn-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b2de18d86f630d68fe1f87af690d451388bb186480afc719e5f770590c2ef6c"}, + {file = "scikit_learn-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107"}, + {file = "scikit_learn-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a19f90f95ba93c1a7f7924906d0576a84da7f3b2282ac3bfb7a08a32801add93"}, + {file = "scikit_learn-1.3.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b8692e395a03a60cd927125eef3a8e3424d86dde9b2370d544f0ea35f78a8073"}, + {file = "scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15e1e94cc23d04d39da797ee34236ce2375ddea158b10bee3c343647d615581d"}, + {file = "scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:785a2213086b7b1abf037aeadbbd6d67159feb3e30263434139c98425e3dcfcf"}, + {file = "scikit_learn-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:64381066f8aa63c2710e6b56edc9f0894cc7bf59bd71b8ce5613a4559b6145e0"}, + {file = "scikit_learn-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c43290337f7a4b969d207e620658372ba3c1ffb611f8bc2b6f031dc5c6d1d03"}, + {file = "scikit_learn-1.3.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:dc9002fc200bed597d5d34e90c752b74df516d592db162f756cc52836b38fe0e"}, + {file = "scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d08ada33e955c54355d909b9c06a4789a729977f165b8bae6f225ff0a60ec4a"}, + {file = "scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763f0ae4b79b0ff9cca0bf3716bcc9915bdacff3cebea15ec79652d1cc4fa5c9"}, + {file = "scikit_learn-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:ed932ea780517b00dae7431e031faae6b49b20eb6950918eb83bd043237950e0"}, +] + +[package.dependencies] +joblib = ">=1.1.1" +numpy = ">=1.17.3,<2.0" +scipy = ">=1.5.0" threadpoolctl = ">=2.0.0" [package.extras] -benchmark = ["matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "pandas (>=1.1.5)"] -docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] -examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] -tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"] +benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.10.1)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.16.2)"] [[package]] name = "scipy" -version = "1.12.0" +version = "1.11.4" description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"}, - {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"}, - {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"}, - {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"}, - {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"}, - {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"}, - {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"}, - {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"}, - {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"}, - {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"}, - {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"}, - {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"}, - {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"}, - {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"}, - {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"}, - {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"}, - {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"}, - {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"}, - {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"}, - {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"}, - {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"}, - {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"}, - {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"}, - {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"}, - {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"}, -] - -[package.dependencies] -numpy = ">=1.22.4,<1.29.0" + {file = "scipy-1.11.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc9a714581f561af0848e6b69947fda0614915f072dfd14142ed1bfe1b806710"}, + {file = "scipy-1.11.4-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cf00bd2b1b0211888d4dc75656c0412213a8b25e80d73898083f402b50f47e41"}, + {file = "scipy-1.11.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9999c008ccf00e8fbcce1236f85ade5c569d13144f77a1946bef8863e8f6eb4"}, + {file = "scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:933baf588daa8dc9a92c20a0be32f56d43faf3d1a60ab11b3f08c356430f6e56"}, + {file = "scipy-1.11.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8fce70f39076a5aa62e92e69a7f62349f9574d8405c0a5de6ed3ef72de07f446"}, + {file = "scipy-1.11.4-cp310-cp310-win_amd64.whl", hash = "sha256:6550466fbeec7453d7465e74d4f4b19f905642c89a7525571ee91dd7adabb5a3"}, + {file = "scipy-1.11.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f313b39a7e94f296025e3cffc2c567618174c0b1dde173960cf23808f9fae4be"}, + {file = "scipy-1.11.4-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1b7c3dca977f30a739e0409fb001056484661cb2541a01aba0bb0029f7b68db8"}, + {file = "scipy-1.11.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00150c5eae7b610c32589dda259eacc7c4f1665aedf25d921907f4d08a951b1c"}, + {file = "scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:530f9ad26440e85766509dbf78edcfe13ffd0ab7fec2560ee5c36ff74d6269ff"}, + {file = "scipy-1.11.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5e347b14fe01003d3b78e196e84bd3f48ffe4c8a7b8a1afbcb8f5505cb710993"}, + {file = "scipy-1.11.4-cp311-cp311-win_amd64.whl", hash = "sha256:acf8ed278cc03f5aff035e69cb511741e0418681d25fbbb86ca65429c4f4d9cd"}, + {file = "scipy-1.11.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:028eccd22e654b3ea01ee63705681ee79933652b2d8f873e7949898dda6d11b6"}, + {file = "scipy-1.11.4-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c6ff6ef9cc27f9b3db93a6f8b38f97387e6e0591600369a297a50a8e96e835d"}, + {file = "scipy-1.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b030c6674b9230d37c5c60ab456e2cf12f6784596d15ce8da9365e70896effc4"}, + {file = "scipy-1.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad669df80528aeca5f557712102538f4f37e503f0c5b9541655016dd0932ca79"}, + {file = "scipy-1.11.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ce7fff2e23ab2cc81ff452a9444c215c28e6305f396b2ba88343a567feec9660"}, + {file = "scipy-1.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:36750b7733d960d7994888f0d148d31ea3017ac15eef664194b4ef68d36a4a97"}, + {file = "scipy-1.11.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6e619aba2df228a9b34718efb023966da781e89dd3d21637b27f2e54db0410d7"}, + {file = "scipy-1.11.4-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f3cd9e7b3c2c1ec26364856f9fbe78695fe631150f94cd1c22228456404cf1ec"}, + {file = "scipy-1.11.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d10e45a6c50211fe256da61a11c34927c68f277e03138777bdebedd933712fea"}, + {file = "scipy-1.11.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91af76a68eeae0064887a48e25c4e616fa519fa0d38602eda7e0f97d65d57937"}, + {file = "scipy-1.11.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6df1468153a31cf55ed5ed39647279beb9cfb5d3f84369453b49e4b8502394fd"}, + {file = "scipy-1.11.4-cp39-cp39-win_amd64.whl", hash = "sha256:ee410e6de8f88fd5cf6eadd73c135020bfbbbdfcd0f6162c36a7638a1ea8cc65"}, + {file = "scipy-1.11.4.tar.gz", hash = "sha256:90a2b78e7f5733b9de748f589f09225013685f9b218275257f8a8168ededaeaa"}, +] + +[package.dependencies] +numpy = ">=1.21.6,<1.28.0" [package.extras] dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] -test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] [[package]] name = "scooby" @@ -6049,13 +6063,13 @@ cpu = ["mkl", "psutil"] [[package]] name = "seaborn" -version = "0.13.2" +version = "0.13.1" description = "Statistical data visualization" optional = false python-versions = ">=3.8" files = [ - {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"}, - {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"}, + {file = "seaborn-0.13.1-py3-none-any.whl", hash = "sha256:6baa69b6d1169ae59037971491c450c0b73332b42bd4b23570b62a546bc61cb8"}, + {file = "seaborn-0.13.1.tar.gz", hash = "sha256:bfad65e9c5989e5e1897e61bdbd2f22e62455940ca76fd49eca3ed69345b9179"}, ] [package.dependencies] @@ -6180,37 +6194,32 @@ files = [ [[package]] name = "snowflake-connector-python" -version = "3.7.0" +version = "3.6.0" description = "Snowflake Connector for Python" optional = true python-versions = ">=3.8" files = [ - {file = "snowflake-connector-python-3.7.0.tar.gz", hash = "sha256:b2bfaec64059307b08caadad40214d488fefb4a23fcd7553ac75f5ea758a9169"}, - {file = "snowflake_connector_python-3.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f38070af24d15cd103d565b63b08c5eac3bdf72ad06ad27cd98c46359cb4bee2"}, - {file = "snowflake_connector_python-3.7.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:f8f3912699030291fd82d75321cda44205c9f8fb27841ffbaaf6d3dc4065b798"}, - {file = "snowflake_connector_python-3.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7ac1190c6ca48297429f0fb6515b54e3fd3bceb1b72fce7b59097044a9e98e0"}, - {file = "snowflake_connector_python-3.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57deaa28baa601b64c1ae5a5c75260ab1c6a22bd07a8d8c7ac785c8deb1c556e"}, - {file = "snowflake_connector_python-3.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:715635ed5b6e5ef8de659fc336c1b89296fe72fdec180c40915c10df885c8082"}, - {file = "snowflake_connector_python-3.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d51f3a8912fcc5169731d2b42262087e8a6da20f7344dd001ed97fbdf6ff972c"}, - {file = "snowflake_connector_python-3.7.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:46bfa243875eff9c6dfe1afc26f2034b00ac6eb9f77010b2949a174c38a59722"}, - {file = "snowflake_connector_python-3.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7004ccfe3c16075d33b0440b4d5241a50156bbc5dcbf11dec61674d0ac830f46"}, - {file = "snowflake_connector_python-3.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee9e6a0a94e0ac1f15fa93c0f61f6e930240280bd043f61216d942e837beb7f"}, - {file = "snowflake_connector_python-3.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:b545fd11c9bd200681e182cf46bb4cbc8250ca6acc41fbea749799a2b23f574f"}, - {file = "snowflake_connector_python-3.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:258541df8ba90201ce6f7c4ae9f59e3a9f585ed30fbbaafd207e0774104cf6dc"}, - {file = "snowflake_connector_python-3.7.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:e548642913f7d0ef9d5a35c69c7a8308cbab8fe255fdc3c9f7e18c71e52a0c2e"}, - {file = "snowflake_connector_python-3.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:639d09de40c014c7ab0308f622bd1d29a9b9dd05c0ced2d858da31323fa16bda"}, - {file = "snowflake_connector_python-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da9cf62665ee47c7ec8c18ae554a31c72cacf1cef4b42d55cfbdbae4b5ddb3f2"}, - {file = "snowflake_connector_python-3.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad1d0e339cadb5ba79d24783c39ba21a63e2159f0d3d9540da0168f97043904c"}, - {file = "snowflake_connector_python-3.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d8e4d0fad8b00b55bc99035ad2c54d9aa3ca8495f7dfcce736a961b5dbd1d9f"}, - {file = "snowflake_connector_python-3.7.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:fc3e95d4c99472444ffda35b9bbfe4cd4c775279c7eca579f1eee9d8d2ec1e2a"}, - {file = "snowflake_connector_python-3.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f93a5861333c2f87ecd1fea34a0fae35c12c196e86fa75c2dd89741e83f2d82"}, - {file = "snowflake_connector_python-3.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdf0fe7d77e02949a8a2a7d365217b822bcaf2fc9541095a241116576458568"}, - {file = "snowflake_connector_python-3.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:1ec29946b224d8089070477f60ffe58923433d8c2308b6403684500e85c37699"}, - {file = "snowflake_connector_python-3.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f945c512383a8b5f1d2404c40d20e0c915ba3f0ac01983f2e43987d6eecda02"}, - {file = "snowflake_connector_python-3.7.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:574cf5be3c61a6ea421ac9710ac791a80f6dfcc53986ab81e68d1085dad79dab"}, - {file = "snowflake_connector_python-3.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb8168458e5d23a0ba4d4e0a276bbd477ddd26d35c554f2c3c64cfe29622499a"}, - {file = "snowflake_connector_python-3.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecf8f520692653775f51307140d326b53a51e338d67dc522b1d376b51b12d14e"}, - {file = "snowflake_connector_python-3.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:5ed928055ed40da22b2d6bdde62eee5068c352f66415e1c9aee7c45eb67d39cb"}, + {file = "snowflake-connector-python-3.6.0.tar.gz", hash = "sha256:15667a918780d79da755e6a60bbf6918051854951e8f56ccdf5692283e9a8479"}, + {file = "snowflake_connector_python-3.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4093b38cf9abf95c38119f0b23b07e23dc7a8689b956cd5d34975e1875741f20"}, + {file = "snowflake_connector_python-3.6.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:cf5a964fe01b177063f8c44d14df3a72715580bcd195788ec2822090f37330a5"}, + {file = "snowflake_connector_python-3.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55a6418cec585b050e6f05404f25e62b075a3bbea587dc1f903de15640565c58"}, + {file = "snowflake_connector_python-3.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c76aea92b87f6ecd604e9c934aac8a779f2e20f3be1d990d53bb5b6d87b009"}, + {file = "snowflake_connector_python-3.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:9dfcf178271e892e64e4092b9e011239a066ce5de848afd2efe3f13197a9f8b3"}, + {file = "snowflake_connector_python-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4916f9b4a0efd7c96d1fa50a157e05907b6935f91492cca7f200b43cc178a25e"}, + {file = "snowflake_connector_python-3.6.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:f15024c66db5e87d359216ec733a2974d7562aa38f3f18c8b6e65489839e00d7"}, + {file = "snowflake_connector_python-3.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcbd3102f807ebbbae52b1b5683d45cd7b3dcb0eaec131233ba6b156e8d70fa4"}, + {file = "snowflake_connector_python-3.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7662e2de25b885abe08ab866cf7c7b026ad1af9faa39c25e2c25015ef807abe3"}, + {file = "snowflake_connector_python-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:d1fa102f55ee166cc766aeee3f9333b17b4bede6fb088eee1e1f022df15b6d81"}, + {file = "snowflake_connector_python-3.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fde1e0727e2f23c2a07b49b30e1bc0f49977f965d08ddfda10015b24a2beeb76"}, + {file = "snowflake_connector_python-3.6.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:1b51fe000c8cf6372d30b73c7136275e52788e6af47010cd1984c9fb03378e86"}, + {file = "snowflake_connector_python-3.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7a11699689a19916e65794ce58dca72b8a40fe6a7eea06764931ede10b47bcc"}, + {file = "snowflake_connector_python-3.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d810be5b180c6f47ce9b6f989fe64b9984383e4b77e30b284a83e33f229a3a82"}, + {file = "snowflake_connector_python-3.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b5db47d4164d6b7a07c413a46f9edc4a1d687e3df44fd9d5fa89a89aecb94a8e"}, + {file = "snowflake_connector_python-3.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bf8c1ad5aab5304fefa2a4178061a24c96da45e3e3db9d901621e9953e005402"}, + {file = "snowflake_connector_python-3.6.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:1058ab5c98cc62fde8b3f021f0a5076cb7865b5cdab8a9bccde0df88b9e91334"}, + {file = "snowflake_connector_python-3.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b93f55989f80d69278e0f40a7a1c0e737806b7c0ddb0351513a752b837243e8"}, + {file = "snowflake_connector_python-3.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50dd954ea5918d3242ded69225b72f701963cd9c043ee7d9ab35dc22211611c8"}, + {file = "snowflake_connector_python-3.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4ad42613b87f31441d07a8ea242f4c28ed5eb7b6e05986f9e94a7e44b96d3d1e"}, ] [package.dependencies] @@ -6354,6 +6363,20 @@ postgresql-psycopg2cffi = ["psycopg2cffi"] pymysql = ["pymysql", "pymysql (<1)"] sqlcipher = ["sqlcipher3_binary"] +[[package]] +name = "sqlalchemy-risingwave" +version = "1.0.0" +description = "RisingWave dialect for SQLAlchemy" +optional = true +python-versions = "*" +files = [ + {file = "sqlalchemy-risingwave-1.0.0.tar.gz", hash = "sha256:856a3c44b98cba34d399c3cc9785a74896caca152b3685d87553e4210e3e07a4"}, + {file = "sqlalchemy_risingwave-1.0.0-py3-none-any.whl", hash = "sha256:c733365abc38e88f4d23d83713cfc3f21c0b0d3c81210cbc2f569b49a912ba08"}, +] + +[package.dependencies] +SQLAlchemy = ">=1.4,<2" + [[package]] name = "sqlalchemy-views" version = "0.3.2" @@ -6370,17 +6393,17 @@ sqlalchemy = ">=1.0.0" [[package]] name = "sqlglot" -version = "20.11.0" +version = "20.8.0" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.7" files = [ - {file = "sqlglot-20.11.0-py3-none-any.whl", hash = "sha256:658509272da15e90dd1c59d9ca5281d7bff2e87121f87e6f9e6541067a057c9c"}, - {file = "sqlglot-20.11.0.tar.gz", hash = "sha256:79a1510ffad1f1e4c5915751f0ed978c099e7e83cd4010ecbd471c00331b6902"}, + {file = "sqlglot-20.8.0-py3-none-any.whl", hash = "sha256:cb73b81a26da462c34b12b98cf193d679d4b5693703d309db236d9784cef60bb"}, + {file = "sqlglot-20.8.0.tar.gz", hash = "sha256:5636e97fab9efdb4a8690c0e32bbd2d657fe91eb650f10e913a56b4bd979faef"}, ] [package.extras] -dev = ["autoflake", "black", "duckdb (>=0.6)", "isort", "maturin (>=1.4,<2.0)", "mypy (>=0.990)", "pandas", "pdoc", "pre-commit", "pyspark", "python-dateutil", "types-python-dateutil", "typing-extensions"] +dev = ["autoflake", "black", "duckdb (>=0.6)", "isort", "maturin (>=1.4,<2.0)", "mypy (>=0.990)", "pandas", "pdoc", "pre-commit", "pyspark", "python-dateutil", "types-python-dateutil"] rs = ["sqlglotrs (==0.1.0)"] [[package]] @@ -6581,13 +6604,13 @@ files = [ [[package]] name = "toolz" -version = "0.12.1" +version = "0.12.0" description = "List processing tools and functional utilities" optional = false -python-versions = ">=3.7" +python-versions = ">=3.5" files = [ - {file = "toolz-0.12.1-py3-none-any.whl", hash = "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85"}, - {file = "toolz-0.12.1.tar.gz", hash = "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d"}, + {file = "toolz-0.12.0-py3-none-any.whl", hash = "sha256:2059bd4148deb1884bb0eb770a3cde70e7f954cfbbdc2285f1f2de01fd21eb6f"}, + {file = "toolz-0.12.0.tar.gz", hash = "sha256:88c570861c440ee3f2f6037c4654613228ff40c93a6c25e0eba70d17282c6194"}, ] [[package]] @@ -6688,13 +6711,13 @@ tests = ["black", "httpretty (<1.1)", "isort", "pre-commit", "pytest", "pytest-r [[package]] name = "trove-classifiers" -version = "2024.1.31" +version = "2024.1.8" description = "Canonical source for classifiers on PyPI (pypi.org)." optional = false python-versions = "*" files = [ - {file = "trove-classifiers-2024.1.31.tar.gz", hash = "sha256:bfdfe60bbf64985c524416afb637ecc79c558e0beb4b7f52b0039e01044b0229"}, - {file = "trove_classifiers-2024.1.31-py3-none-any.whl", hash = "sha256:854aba3358f3cf10e5c0916aa533f5a39e27aadd8ade26a54cdc2a93257e39c4"}, + {file = "trove-classifiers-2024.1.8.tar.gz", hash = "sha256:6e36caf430ff6485c4b57a4c6b364a13f6a898d16b9417c6c37467e59c14b05a"}, + {file = "trove_classifiers-2024.1.8-py3-none-any.whl", hash = "sha256:3c1ff4deb10149c7e39ede6e5bbc107def64362ef1ee7590ec98d71fb92f1b6a"}, ] [[package]] @@ -7300,7 +7323,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pyexasol", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "trino"] +all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pyexasol", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "trino"] bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "pydata-google-auth"] clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] @@ -7320,8 +7343,8 @@ oracle = ["oracledb", "packaging"] pandas = ["regex"] polars = ["packaging", "polars"] postgres = ["psycopg2"] -risingwave = ["psycopg2"] pyspark = ["packaging", "pyspark"] +risingwave = ["psycopg2"] snowflake = ["packaging", "snowflake-connector-python"] sqlite = ["regex"] trino = ["trino"] @@ -7330,4 +7353,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "46f6575d9e668129872ccb5c2fd5de6c3e2fc808b8620e1c0082b18239b36639" +content-hash = "39f5035cdd3ae4bcc67b642e3f6b733f3c8956e903897c946624f39362a0b2cb" diff --git a/requirements-dev.txt b/requirements-dev.txt index ce0a3ff53b12..8307b0d22465 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -aiohttp==3.9.3 ; python_version >= "3.9" and python_version < "4.0" +aiohttp==3.9.1 ; python_version >= "3.9" and python_version < "4.0" aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "4.0" altair==5.2.0 ; python_version >= "3.10" and python_version < "3.13" annotated-types==0.6.0 ; python_version >= "3.10" and python_version < "3.13" @@ -10,13 +10,13 @@ asttokens==2.4.1 ; python_version >= "3.9" and python_version < "4.0" async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.11" atpublic==4.0 ; python_version >= "3.9" and python_version < "4.0" attrs==23.2.0 ; python_version >= "3.9" and python_version < "4.0" -beartype==0.17.0 ; python_version >= "3.10" and python_version < "3.13" -beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "3.13" +beartype==0.16.4 ; python_version >= "3.10" and python_version < "3.13" +beautifulsoup4==4.12.2 ; python_version >= "3.10" and python_version < "3.13" bidict==0.22.1 ; python_version >= "3.9" and python_version < "4.0" bitarray==2.9.2 ; python_version >= "3.9" and python_version < "4.0" -black==24.1.1 ; python_version >= "3.9" and python_version < "4.0" +black==23.12.1 ; python_version >= "3.9" and python_version < "4.0" bqplot==0.12.42 ; python_version >= "3.10" and python_version < "3.13" -branca==0.7.1 ; python_version >= "3.10" and python_version < "3.13" +branca==0.7.0 ; python_version >= "3.10" and python_version < "3.13" build==1.0.3 ; python_version >= "3.9" and python_version < "4.0" cachecontrol[filecache]==0.13.1 ; python_version >= "3.9" and python_version < "4.0" cachetools==5.3.2 ; python_version >= "3.9" and python_version < "4.0" @@ -28,7 +28,7 @@ charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "4.0" cleo==2.1.0 ; python_version >= "3.9" and python_version < "4.0" click-plugins==1.1.1 ; python_version >= "3.9" and python_version < "4.0" click==8.1.7 ; python_version >= "3.9" and python_version < "4.0" -clickhouse-connect[arrow,numpy,pandas]==0.7.0 ; python_version >= "3.9" and python_version < "4.0" +clickhouse-connect[arrow,numpy,pandas]==0.6.23 ; python_version >= "3.9" and python_version < "4.0" cligj==0.7.2 ; python_version >= "3.9" and python_version < "4" cloudpickle==3.0.0 ; python_version >= "3.9" and python_version < "4.0" codespell[hard-encoding-detection,toml]==2.2.6 ; python_version >= "3.9" and python_version < "4.0" @@ -36,12 +36,12 @@ colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and (sys_pl colour==0.1.5 ; python_version >= "3.10" and python_version < "3.13" comm==0.2.1 ; python_version >= "3.10" and python_version < "3.13" contourpy==1.2.0 ; python_version >= "3.10" and python_version < "3.13" -coverage[toml]==7.4.1 ; python_version >= "3.9" and python_version < "4.0" +coverage[toml]==7.4.0 ; python_version >= "3.9" and python_version < "4.0" crashtest==0.4.1 ; python_version >= "3.9" and python_version < "4.0" cryptography==41.0.7 ; python_version >= "3.9" and python_version < "4.0" cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" -dask==2024.1.1 ; python_version >= "3.10" and python_version < "3.13" -dask[array,dataframe]==2024.1.1 ; python_version >= "3.9" and python_version < "4.0" +dask==2024.1.0 ; python_version >= "3.10" and python_version < "3.13" +dask[array,dataframe]==2024.1.0 ; python_version >= "3.9" and python_version < "4.0" datafusion==34.0.0 ; python_version >= "3.9" and python_version < "4.0" db-dtypes==1.2.0 ; python_version >= "3.9" and python_version < "4.0" debugpy==1.8.0 ; python_version >= "3.10" and python_version < "3.13" @@ -65,13 +65,13 @@ fsspec==2023.12.2 ; python_version >= "3.9" and python_version < "4.0" gcsfs==2023.12.2.post1 ; python_version >= "3.9" and python_version < "4.0" gdown==4.7.1 ; python_version >= "3.10" and python_version < "3.13" geojson==3.1.0 ; python_version >= "3.10" and python_version < "3.13" -geopandas==0.14.3 ; python_version >= "3.9" and python_version < "4.0" -google-api-core==2.16.1 ; python_version >= "3.9" and python_version < "4.0" -google-api-core[grpc]==2.16.1 ; python_version >= "3.9" and python_version < "4.0" +geopandas==0.14.2 ; python_version >= "3.9" and python_version < "4.0" +google-api-core==2.15.0 ; python_version >= "3.9" and python_version < "4.0" +google-api-core[grpc]==2.15.0 ; python_version >= "3.9" and python_version < "4.0" google-auth-oauthlib==1.2.0 ; python_version >= "3.9" and python_version < "4.0" -google-auth==2.27.0 ; python_version >= "3.9" and python_version < "4.0" +google-auth==2.26.2 ; python_version >= "3.9" and python_version < "4.0" google-cloud-bigquery-storage==2.24.0 ; python_version >= "3.9" and python_version < "4.0" -google-cloud-bigquery==3.17.1 ; python_version >= "3.9" and python_version < "4.0" +google-cloud-bigquery==3.16.0 ; python_version >= "3.9" and python_version < "4.0" google-cloud-core==2.4.1 ; python_version >= "3.9" and python_version < "4.0" google-cloud-storage==2.14.0 ; python_version >= "3.9" and python_version < "4.0" google-crc32c==1.5.0 ; python_version >= "3.9" and python_version < "4.0" @@ -79,11 +79,11 @@ google-resumable-media==2.7.0 ; python_version >= "3.9" and python_version < "4. googleapis-common-protos==1.62.0 ; python_version >= "3.9" and python_version < "4.0" graphviz==0.20.1 ; python_version >= "3.9" and python_version < "4.0" greenlet==3.0.3 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0" -griffe==0.40.0 ; python_version >= "3.10" and python_version < "3.13" +griffe==0.38.1 ; python_version >= "3.10" and python_version < "3.13" grpcio-status==1.60.0 ; python_version >= "3.9" and python_version < "4.0" grpcio==1.60.0 ; python_version >= "3.9" and python_version < "4.0" humanize==4.9.0 ; python_version >= "3.9" and python_version < "4.0" -hypothesis==6.97.4 ; python_version >= "3.9" and python_version < "4.0" +hypothesis==6.93.0 ; python_version >= "3.9" and python_version < "4.0" identify==2.5.33 ; python_version >= "3.9" and python_version < "4.0" idna==3.6 ; python_version >= "3.9" and python_version < "4.0" importlib-metadata==7.0.1 ; python_version >= "3.9" and python_version < "4.0" @@ -93,8 +93,8 @@ iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0" installer==0.7.0 ; python_version >= "3.9" and python_version < "4.0" ipyevents==2.0.2 ; python_version >= "3.10" and python_version < "3.13" ipyfilechooser==0.6.0 ; python_version >= "3.10" and python_version < "3.13" -ipykernel==6.29.0 ; python_version >= "3.10" and python_version < "3.13" -ipyleaflet==0.18.2 ; python_version >= "3.10" and python_version < "3.13" +ipykernel==6.28.0 ; python_version >= "3.10" and python_version < "3.13" +ipyleaflet==0.18.1 ; python_version >= "3.10" and python_version < "3.13" ipython==8.18.1 ; python_version >= "3.9" and python_version < "4.0" ipytree==0.2.2 ; python_version >= "3.10" and python_version < "3.13" ipywidgets==8.1.1 ; python_version >= "3.10" and python_version < "3.13" @@ -104,7 +104,7 @@ jeepney==0.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_plat jinja2==3.1.3 ; python_version >= "3.9" and python_version < "4.0" joblib==1.3.2 ; python_version >= "3.9" and python_version < "4.0" jsonschema-specifications==2023.12.1 ; python_version >= "3.10" and python_version < "3.13" -jsonschema==4.21.1 ; python_version >= "3.10" and python_version < "3.13" +jsonschema==4.20.0 ; python_version >= "3.10" and python_version < "3.13" jupyter-client==8.6.0 ; python_version >= "3.10" and python_version < "3.13" jupyter-core==5.7.1 ; python_version >= "3.10" and python_version < "3.13" jupyterlab-widgets==3.0.9 ; python_version >= "3.10" and python_version < "3.13" @@ -115,7 +115,7 @@ locket==1.0.0 ; python_version >= "3.9" and python_version < "4.0" lonboard==0.5.0 ; python_version >= "3.10" and python_version < "3.13" lz4==4.3.3 ; python_version >= "3.9" and python_version < "4.0" markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "4.0" -markupsafe==2.1.4 ; python_version >= "3.9" and python_version < "4.0" +markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "4.0" matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0" matplotlib==3.8.2 ; python_version >= "3.10" and python_version < "3.13" mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0" @@ -127,14 +127,14 @@ multipledispatch==1.0.0 ; python_version >= "3.9" and python_version < "4.0" mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0" nbclient==0.9.0 ; python_version >= "3.10" and python_version < "3.13" nbformat==5.9.2 ; python_version >= "3.10" and python_version < "3.13" -nest-asyncio==1.6.0 ; python_version >= "3.10" and python_version < "3.13" +nest-asyncio==1.5.8 ; python_version >= "3.10" and python_version < "3.13" nodeenv==1.8.0 ; python_version >= "3.9" and python_version < "4.0" numpy==1.26.3 ; python_version >= "3.9" and python_version < "4.0" oauthlib==3.2.2 ; python_version >= "3.9" and python_version < "4.0" oracledb==2.0.1 ; python_version >= "3.9" and python_version < "4.0" packaging==23.2 ; python_version >= "3.9" and python_version < "4.0" palettable==3.3.3 ; python_version >= "3.10" and python_version < "3.13" -pandas==2.2.0 ; python_version >= "3.9" and python_version < "4.0" +pandas==2.1.4 ; python_version >= "3.9" and python_version < "4.0" parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0" parsy==2.1 ; python_version >= "3.9" and python_version < "4.0" partd==1.4.1 ; python_version >= "3.9" and python_version < "4.0" @@ -147,19 +147,19 @@ pkginfo==1.9.6 ; python_version >= "3.9" and python_version < "4.0" platformdirs==3.11.0 ; python_version >= "3.9" and python_version < "4.0" plotly==5.18.0 ; python_version >= "3.10" and python_version < "3.13" plotnine==0.12.4 ; python_version >= "3.10" and python_version < "3.13" -pluggy==1.4.0 ; python_version >= "3.9" and python_version < "4.0" -plum-dispatch==2.3.2 ; python_version >= "3.10" and python_version < "3.13" +pluggy==1.3.0 ; python_version >= "3.9" and python_version < "4.0" +plum-dispatch==2.2.2 ; python_version >= "3.10" and python_version < "3.13" poetry-core==1.8.1 ; python_version >= "3.9" and python_version < "4.0" poetry-dynamic-versioning==1.2.0 ; python_version >= "3.9" and python_version < "4.0" poetry-plugin-export==1.6.0 ; python_version >= "3.9" and python_version < "4.0" poetry==1.7.1 ; python_version >= "3.9" and python_version < "4.0" -polars==0.20.6 ; python_version >= "3.9" and python_version < "4.0" +polars==0.20.4 ; python_version >= "3.9" and python_version < "4.0" pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0" pre-commit==3.6.0 ; python_version >= "3.9" and python_version < "4.0" prompt-toolkit==3.0.43 ; python_version >= "3.9" and python_version < "4.0" proto-plus==1.23.0 ; python_version >= "3.9" and python_version < "4.0" protobuf==4.25.2 ; python_version >= "3.9" and python_version < "4.0" -psutil==5.9.8 ; python_version >= "3.10" and python_version < "3.13" +psutil==5.9.7 ; python_version >= "3.10" and python_version < "3.13" psycopg2==2.9.9 ; python_version >= "3.9" and python_version < "4.0" psygnal==0.9.5 ; python_version >= "3.10" and python_version < "3.13" ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" @@ -168,18 +168,18 @@ pure-sasl==0.6.2 ; python_version >= "3.9" and python_version < "4.0" py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "4.0" py4j==0.10.9.7 ; python_version >= "3.9" and python_version < "4.0" pyarrow-hotfix==0.6 ; python_version >= "3.9" and python_version < "4.0" -pyarrow==15.0.0 ; python_version >= "3.9" and python_version < "4.0" +pyarrow==14.0.2 ; python_version >= "3.9" and python_version < "4.0" pyasn1-modules==0.3.0 ; python_version >= "3.9" and python_version < "4.0" pyasn1==0.5.1 ; python_version >= "3.9" and python_version < "4" pycparser==2.21 ; python_version >= "3.9" and python_version < "4.0" -pydantic-core==2.16.1 ; python_version >= "3.10" and python_version < "3.13" -pydantic==2.6.0 ; python_version >= "3.10" and python_version < "3.13" +pydantic-core==2.14.6 ; python_version >= "3.10" and python_version < "3.13" +pydantic==2.5.3 ; python_version >= "3.10" and python_version < "3.13" pydata-google-auth==1.8.2 ; python_version >= "3.9" and python_version < "4.0" pydeps==1.12.17 ; python_version >= "3.9" and python_version < "4.0" pydruid==0.6.6 ; python_version >= "3.9" and python_version < "4.0" pyexasol[pandas]==0.25.2 ; python_version >= "3.9" and python_version < "4.0" pygments==2.17.2 ; python_version >= "3.9" and python_version < "4.0" -pyinstrument==4.6.2 ; python_version >= "3.9" and python_version < "4.0" +pyinstrument==4.6.1 ; python_version >= "3.9" and python_version < "4.0" pyjwt==2.8.0 ; python_version >= "3.9" and python_version < "4.0" pymysql==1.1.0 ; python_version >= "3.9" and python_version < "4.0" pyodbc==5.0.1 ; python_version >= "3.9" and python_version < "4.0" @@ -201,17 +201,17 @@ pytest-randomly==3.15.0 ; python_version >= "3.9" and python_version < "4.0" pytest-repeat==0.9.3 ; python_version >= "3.9" and python_version < "4.0" pytest-snapshot==0.9.0 ; python_version >= "3.9" and python_version < "4.0" pytest-xdist==3.5.0 ; python_version >= "3.9" and python_version < "4.0" -pytest==8.0.0 ; python_version >= "3.9" and python_version < "4.0" +pytest==7.4.4 ; python_version >= "3.9" and python_version < "4.0" python-box==7.1.1 ; python_version >= "3.10" and python_version < "3.13" python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0" -pytz==2023.4 ; python_version >= "3.9" and python_version < "4.0" +pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "4.0" pywin32-ctypes==0.2.2 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32" pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "3.13" pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0" pyzmq==25.1.2 ; python_version >= "3.10" and python_version < "3.13" quartodoc==0.7.2 ; python_version >= "3.10" and python_version < "3.13" rapidfuzz==3.6.1 ; python_version >= "3.9" and python_version < "4.0" -referencing==0.33.0 ; python_version >= "3.10" and python_version < "3.13" +referencing==0.32.1 ; python_version >= "3.10" and python_version < "3.13" regex==2023.12.25 ; python_version >= "3.9" and python_version < "4.0" requests-oauthlib==1.3.1 ; python_version >= "3.9" and python_version < "4.0" requests-toolbelt==1.0.0 ; python_version >= "3.9" and python_version < "4.0" @@ -220,11 +220,11 @@ requests[socks]==2.31.0 ; python_version >= "3.10" and python_version < "3.13" rich==13.7.0 ; python_version >= "3.9" and python_version < "4.0" rpds-py==0.17.1 ; python_version >= "3.10" and python_version < "3.13" rsa==4.9 ; python_version >= "3.9" and python_version < "4" -ruff==0.1.15 ; python_version >= "3.9" and python_version < "4.0" -scikit-learn==1.4.0 ; python_version >= "3.10" and python_version < "3.13" -scipy==1.12.0 ; python_version >= "3.10" and python_version < "3.13" +ruff==0.1.13 ; python_version >= "3.9" and python_version < "4.0" +scikit-learn==1.3.2 ; python_version >= "3.10" and python_version < "3.13" +scipy==1.11.4 ; python_version >= "3.10" and python_version < "3.13" scooby==0.9.2 ; python_version >= "3.10" and python_version < "3.13" -seaborn==0.13.2 ; python_version >= "3.10" and python_version < "3.13" +seaborn==0.13.1 ; python_version >= "3.10" and python_version < "3.13" secretstorage==3.3.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "linux" setuptools==69.0.3 ; python_version >= "3.9" and python_version < "4.0" shapely==2.0.2 ; python_version >= "3.9" and python_version < "4.0" @@ -235,7 +235,7 @@ sortedcontainers==2.4.0 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.5 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1 ; python_version >= "3.10" and python_version < "3.13" sqlalchemy==1.4.51 ; python_version >= "3.9" and python_version < "4.0" -sqlglot==20.11.0 ; python_version >= "3.9" and python_version < "4.0" +sqlglot==20.8.0 ; python_version >= "3.9" and python_version < "4.0" stack-data==0.6.3 ; python_version >= "3.9" and python_version < "4.0" statsmodels==0.14.1 ; python_version >= "3.10" and python_version < "3.13" stdlib-list==0.10.0 ; python_version >= "3.9" and python_version < "4.0" @@ -247,7 +247,7 @@ thrift-sasl==0.4.3 ; python_version >= "3.9" and python_version < "4.0" thrift==0.16.0 ; python_version >= "3.9" and python_version < "4.0" tomli==2.0.1 ; python_version >= "3.9" and python_full_version <= "3.11.0a6" tomlkit==0.12.3 ; python_version >= "3.9" and python_version < "4.0" -toolz==0.12.1 ; python_version >= "3.9" and python_version < "4.0" +toolz==0.12.0 ; python_version >= "3.9" and python_version < "4.0" tornado==6.4 ; python_version >= "3.10" and python_version < "3.13" tqdm==4.66.1 ; python_version >= "3.9" and python_version < "4.0" traitlets==5.14.1 ; python_version >= "3.9" and python_version < "4.0" From 52c0d845bfe003129597f3d29b6d72c50157e7ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 01:47:56 +0100 Subject: [PATCH 128/161] chore(oracle): fix ops.ArrayColum reference to be ops.Array --- ibis/backends/oracle/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/oracle/compiler.py b/ibis/backends/oracle/compiler.py index e0b3e711ebc1..029a865e1f0f 100644 --- a/ibis/backends/oracle/compiler.py +++ b/ibis/backends/oracle/compiler.py @@ -518,7 +518,7 @@ def visit_Window(self, op, *, how, func, start, end, group_by, order_by): @visit_node.register(ops.ArgMax) @visit_node.register(ops.ArgMin) @visit_node.register(ops.ArrayCollect) - @visit_node.register(ops.ArrayColumn) + @visit_node.register(ops.Array) @visit_node.register(ops.ArrayFlatten) @visit_node.register(ops.ArrayMap) @visit_node.register(ops.ArrayStringJoin) From 2cefd16e738440d0f19c71e3c46b8b452986ce54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 01:50:01 +0100 Subject: [PATCH 129/161] chore(polars): enable xpassing .sql test --- ibis/backends/tests/test_dot_sql.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 62d73bd57040..f2621791442a 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -272,7 +272,6 @@ def test_table_dot_sql_transpile(backend, alltypes, dialect, df): *no_sqlglot_dialect, ], ) -@pytest.mark.notyet(["polars"], raises=PolarsComputeError) @pytest.mark.notyet( ["druid"], raises=AttributeError, reason="druid doesn't respect column names" ) From 61e39a3480313d2024af007ea5718bf5e70902b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 01:56:31 +0100 Subject: [PATCH 130/161] chore(sqlite): fix expected errors for array test cases --- ibis/backends/tests/test_array.py | 15 ++++++++++++--- ibis/backends/tests/test_string.py | 7 ++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index d50d46c57fd2..5b182d4f9489 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -8,9 +8,9 @@ import pandas.testing as tm import pytest import pytz +import sqlalchemy as sa import toolz from pytest import param -import sqlalchemy as sa import ibis import ibis.common.exceptions as com @@ -43,7 +43,15 @@ AssertionError, ), ), - pytest.mark.never(["mysql"], reason="No array support", raises=(com.UnsupportedBackendType, com.OperationNotDefinedError, MySQLOperationalError)), + pytest.mark.never( + ["mysql"], + reason="No array support", + raises=( + com.UnsupportedBackendType, + com.OperationNotDefinedError, + MySQLOperationalError, + ), + ), pytest.mark.notyet( ["impala"], reason="No array support", @@ -1115,10 +1123,10 @@ def test_array_map_with_conflicting_names(backend, con): "sqlite", "dask", "pandas", + "sqlite", ], raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl(["sqlite"], raises=com.UnsupportedBackendType) def test_complex_array_map(con): def upper(token): return token.upper() @@ -1301,6 +1309,7 @@ def test_repr_timestamp_array(con, monkeypatch): expr = ibis.array(pd.date_range("2010-01-01", "2010-01-03", freq="D").tolist()) assert "No translation rule" not in repr(expr) + assert "OperationNotDefinedError" not in repr(expr) @pytest.mark.notyet( diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index c38be0c47cb9..f75028f4f197 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -5,8 +5,8 @@ import numpy as np import pandas as pd import pytest -from pytest import param import sqlalchemy as sa +from pytest import param import ibis import ibis.common.exceptions as com @@ -931,13 +931,14 @@ def test_capitalize(con): @pytest.mark.notimpl( - ["dask", "pandas", "polars", "oracle", "flink"], raises=com.OperationNotDefinedError + ["dask", "pandas", "polars", "oracle", "flink", "sqlite"], + raises=com.OperationNotDefinedError, ) @pytest.mark.never( ["mysql"], raises=com.OperationNotDefinedError, reason="no array support" ) @pytest.mark.notimpl( - ["mssql", "exasol", "impala", "sqlite"], + ["mssql", "exasol", "impala"], raises=com.UnsupportedBackendType, reason="no array support", ) From 2942069faa20ac1a9bd54624ad2154501d7118bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 02:07:45 +0100 Subject: [PATCH 131/161] chore(duckdb): generate snapshots for geospatial tests --- .../expr0-POINT_1_0/out.sql | 6 ++++++ .../test_literal_geospatial_explicit/expr0/out.sql | 6 ++++++ .../test_literal_geospatial_explicit/expr1/out.sql | 6 ++++++ .../shp0-0_0/out.sql | 6 ++++++ .../shp1-1_1/out.sql | 6 ++++++ .../shp2-2_2/out.sql | 6 ++++++ .../shp3-0_0_1_1_2_2/out.sql | 6 ++++++ .../shp4-2_2_1_1_0_0/out.sql | 6 ++++++ .../shp5-0_0_1_1_2_2_0_0/out.sql | 6 ++++++ .../shp6-0_0_1_1_2_2_0_0/out.sql | 6 ++++++ .../shp7-0_0_1_1_2_2_2_2_1_1_0_0/out.sql | 6 ++++++ .../shp8-0_0_1_1_2_2/out.sql | 6 ++++++ ibis/backends/duckdb/tests/test_datatypes.py | 7 ------- ibis/backends/duckdb/tests/test_geospatial.py | 12 +++--------- 14 files changed, 75 insertions(+), 16 deletions(-) create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0-POINT_1_0/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp0-0_0/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp1-1_1/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp2-2_2/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp3-0_0_1_1_2_2/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp4-2_2_1_1_0_0/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp5-0_0_1_1_2_2_0_0/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp6-0_0_1_1_2_2_0_0/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp7-0_0_1_1_2_2_2_2_1_1_0_0/out.sql create mode 100644 ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp8-0_0_1_1_2_2/out.sql diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0-POINT_1_0/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0-POINT_1_0/out.sql new file mode 100644 index 000000000000..96cbdcc32ba4 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0-POINT_1_0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(p) AS p +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (1 0)') AS p +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql new file mode 100644 index 000000000000..96cbdcc32ba4 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(p) AS p +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (1 0)') AS p +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql new file mode 100644 index 000000000000..96cbdcc32ba4 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(p) AS p +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (1 0)') AS p +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp0-0_0/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp0-0_0/out.sql new file mode 100644 index 000000000000..571188823775 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp0-0_0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (0 0)') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp1-1_1/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp1-1_1/out.sql new file mode 100644 index 000000000000..63b01c1ee412 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp1-1_1/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (1 1)') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp2-2_2/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp2-2_2/out.sql new file mode 100644 index 000000000000..f53ceacdc377 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp2-2_2/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('POINT (2 2)') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp3-0_0_1_1_2_2/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp3-0_0_1_1_2_2/out.sql new file mode 100644 index 000000000000..ce17eb20e559 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp3-0_0_1_1_2_2/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('LINESTRING (0 0, 1 1, 2 2)') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp4-2_2_1_1_0_0/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp4-2_2_1_1_0_0/out.sql new file mode 100644 index 000000000000..e72e71b8f0c7 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp4-2_2_1_1_0_0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('LINESTRING (2 2, 1 1, 0 0)') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp5-0_0_1_1_2_2_0_0/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp5-0_0_1_1_2_2_0_0/out.sql new file mode 100644 index 000000000000..566c99750f51 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp5-0_0_1_1_2_2_0_0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('POLYGON ((0 0, 1 1, 2 2, 0 0))') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp6-0_0_1_1_2_2_0_0/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp6-0_0_1_1_2_2_0_0/out.sql new file mode 100644 index 000000000000..f17d69601359 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp6-0_0_1_1_2_2_0_0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTIPOLYGON (((0 0, 1 1, 2 2, 0 0)))') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp7-0_0_1_1_2_2_2_2_1_1_0_0/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp7-0_0_1_1_2_2_2_2_1_1_0_0/out.sql new file mode 100644 index 000000000000..831463379abf --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp7-0_0_1_1_2_2_2_2_1_1_0_0/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTILINESTRING ((0 0, 1 1, 2 2), (2 2, 1 1, 0 0))') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp8-0_0_1_1_2_2/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp8-0_0_1_1_2_2/out.sql new file mode 100644 index 000000000000..831532c342ae --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_inferred/shp8-0_0_1_1_2_2/out.sql @@ -0,0 +1,6 @@ +SELECT + ST_ASWKB(result) AS result +FROM ( + SELECT + ST_GEOMFROMTEXT('MULTIPOINT (0 0, 1 1, 2 2)') AS result +) \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/test_datatypes.py b/ibis/backends/duckdb/tests/test_datatypes.py index 7bd989209895..b40cc0233e92 100644 --- a/ibis/backends/duckdb/tests/test_datatypes.py +++ b/ibis/backends/duckdb/tests/test_datatypes.py @@ -1,9 +1,7 @@ from __future__ import annotations -import duckdb_engine import numpy as np import pytest -from packaging.version import parse as vparse from pytest import param import ibis @@ -91,11 +89,6 @@ def test_parse_quoted_struct_field(): ) -@pytest.mark.xfail( - condition=vparse(duckdb_engine.__version__) < vparse("0.9.2"), - raises=AssertionError, - reason="mapping from UINTEGER query metadata fixed in 0.9.2", -) def test_read_uint8_from_parquet(tmp_path): con = ibis.duckdb.connect() diff --git a/ibis/backends/duckdb/tests/test_geospatial.py b/ibis/backends/duckdb/tests/test_geospatial.py index bbe01d51954a..dd7f9979670a 100644 --- a/ibis/backends/duckdb/tests/test_geospatial.py +++ b/ibis/backends/duckdb/tests/test_geospatial.py @@ -251,16 +251,10 @@ def test_create_table_geospatial_types(geotable, con): point_geom = ibis.literal((1, 0), type="point:geometry").name("p") -@pytest.mark.parametrize( - ("expr", "expected"), - [ - (point, "'POINT (1.0 0.0)'"), - (point_geom, "'POINT (1.0 0.0)'::geometry"), - ], -) -def test_literal_geospatial_explicit(con, expr, expected): +@pytest.mark.parametrize("expr", [point, point_geom]) +def test_literal_geospatial_explicit(con, expr, snapshot): result = str(con.compile(expr)) - assert result == f"SELECT {expected} AS p" + snapshot.assert_match(result, "out.sql") # test input data with shapely geometries From ab8e3f2a0aef4b0c44d30becf285d26ad807fc53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 02:17:19 +0100 Subject: [PATCH 132/161] chore(pandas): xfail test_unnest_range() --- ibis/backends/tests/test_array.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 5b182d4f9489..c87169af3dbd 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -1321,6 +1321,11 @@ def test_repr_timestamp_array(con, monkeypatch): raises=sa.exc.OperationalError, reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14734", ) +@pytest.mark.broken( + ["pandas"], + raises=ValueError, + reason="cannot reindex on an axis with duplicate labels", +) def test_unnest_range(con): expr = ibis.range(2).unnest().name("x").as_table().mutate({"y": 1.0}) result = con.execute(expr) From d7feef1e3631cbd90eed570abbf5fba65a60911f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 11:17:20 +0100 Subject: [PATCH 133/161] chore(impala): rename ops.ArrayColumn to ops.Array --- ibis/backends/impala/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/impala/compiler.py b/ibis/backends/impala/compiler.py index 2f3386ece696..1593a634f51f 100644 --- a/ibis/backends/impala/compiler.py +++ b/ibis/backends/impala/compiler.py @@ -365,7 +365,7 @@ def visit_Sign(self, op, *, arg): @visit_node.register(ops.ArgMax) @visit_node.register(ops.ArgMin) @visit_node.register(ops.ArrayCollect) - @visit_node.register(ops.ArrayColumn) + @visit_node.register(ops.Array) @visit_node.register(ops.Covariance) @visit_node.register(ops.DateDelta) @visit_node.register(ops.ExtractDayOfYear) From 5c233ecccebbc9fec978932f5ca0e607ea0afa24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 11:17:32 +0100 Subject: [PATCH 134/161] chore(mssql): rename ops.ArrayColumn to ops.Array --- ibis/backends/mssql/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/mssql/compiler.py b/ibis/backends/mssql/compiler.py index 294874e5606f..4609d241610d 100644 --- a/ibis/backends/mssql/compiler.py +++ b/ibis/backends/mssql/compiler.py @@ -383,7 +383,7 @@ def visit_Not(self, op, *, arg): @visit_node.register(ops.ArgMax) @visit_node.register(ops.ArgMin) @visit_node.register(ops.ArrayCollect) - @visit_node.register(ops.ArrayColumn) + @visit_node.register(ops.Array) @visit_node.register(ops.ArrayDistinct) @visit_node.register(ops.ArrayFlatten) @visit_node.register(ops.ArrayMap) From e6e4081d2e3d08ddfc1efa0d5d5b3aadd7c86782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 11:26:46 +0100 Subject: [PATCH 135/161] chore(pyspark): enable xpassing tests --- ibis/backends/tests/test_generic.py | 7 +++---- ibis/backends/tests/test_temporal.py | 23 +++-------------------- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index c4d41f4546d1..59c31fb5c1dc 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -9,9 +9,9 @@ import numpy as np import pandas as pd import pytest +import sqlalchemy as sa import toolz from pytest import param -import sqlalchemy as sa import ibis import ibis.common.exceptions as com @@ -24,15 +24,14 @@ ExaQueryError, GoogleBadRequest, ImpalaHiveServer2Error, - Py4JJavaError, MySQLProgrammingError, OracleDatabaseError, + Py4JJavaError, PyDruidProgrammingError, PyODBCDataError, PyODBCProgrammingError, SnowflakeProgrammingError, TrinoUserError, - PsycoPg2InvalidTextRepresentation ) from ibis.common.annotations import ValidationError @@ -1322,6 +1321,7 @@ def hash_256(col): "risingwave", "snowflake", "trino", + "pyspark", ] ) @pytest.mark.notyet( @@ -1442,7 +1442,6 @@ def test_try_cast_null(con, from_val, to_type): "oracle", "postgres", "risingwave", - "pyspark", "snowflake", "sqlite", "exasol", diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 3b637968c227..68a9306c26d5 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -9,9 +9,9 @@ import numpy as np import pandas as pd import pytest +import sqlalchemy as sa import sqlglot as sg from pytest import param -import sqlalchemy as sa import ibis import ibis.common.exceptions as com @@ -831,14 +831,7 @@ def convert_to_offset(x): id="timestamp-add-interval-binop", marks=[ pytest.mark.notimpl( - [ - "dask", - "risingwave", - "snowflake", - "sqlite", - "bigquery", - "exasol" - ], + ["dask", "risingwave", "snowflake", "sqlite", "bigquery", "exasol"], raises=com.OperationNotDefinedError, ), pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError), @@ -864,7 +857,7 @@ def convert_to_offset(x): "polars", "snowflake", "bigquery", - "exasol" + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -1435,13 +1428,6 @@ def test_interval_add_cast_column(backend, alltypes, df): ), "%Y%m%d", marks=[ - pytest.mark.notimpl( - [ - "pyspark", - ], - raises=AttributeError, - reason="'StringConcat' object has no attribute 'value'", - ), pytest.mark.notimpl( [ "risingwave", @@ -1804,10 +1790,7 @@ def test_now_from_projection(alltypes): "snowflake": "DATE", "sqlite": "text", "trino": "date", - "duckdb": "DATE", - "postgres": "date", "risingwave": "date", - "flink": "DATE NOT NULL", } From 445110924a857a70ab56959aec1306dd2f3f8902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 11:31:34 +0100 Subject: [PATCH 136/161] chore(mssql): disable test cases for unsupported ops.Array, ops.HexDigest and ops.HashBytes --- ibis/backends/tests/test_array.py | 6 ++---- ibis/backends/tests/test_generic.py | 2 ++ ibis/backends/tests/test_string.py | 7 ++----- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index c87169af3dbd..ef6320d5d893 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -577,6 +577,7 @@ def test_array_contains(backend, con): assert frozenset(result.values) == frozenset(expected.values) +@builtin_array @pytest.mark.parametrize( ("a", "expected_array"), [ @@ -609,10 +610,7 @@ def test_array_contains(backend, con): ), ], ) -@builtin_array -@pytest.mark.notimpl( - ["dask", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["dask", "impala", "polars"], raises=com.OperationNotDefinedError) def test_array_position(backend, con, a, expected_array): t = ibis.memtable({"a": a}) expr = t.a.index(42) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 59c31fb5c1dc..f4065eb9058f 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1288,6 +1288,7 @@ def test_hash_consistent(backend, alltypes): "risingwave", "sqlite", "clickhouse", + "mssql", ] ) def test_hashbytes(backend, alltypes): @@ -1322,6 +1323,7 @@ def hash_256(col): "snowflake", "trino", "pyspark", + "mssql", ] ) @pytest.mark.notyet( diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index f75028f4f197..a9d9dff36156 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -931,14 +931,11 @@ def test_capitalize(con): @pytest.mark.notimpl( - ["dask", "pandas", "polars", "oracle", "flink", "sqlite"], + ["dask", "pandas", "polars", "oracle", "flink", "sqlite", "mssql", "mysql"], raises=com.OperationNotDefinedError, ) -@pytest.mark.never( - ["mysql"], raises=com.OperationNotDefinedError, reason="no array support" -) @pytest.mark.notimpl( - ["mssql", "exasol", "impala"], + ["exasol", "impala"], raises=com.UnsupportedBackendType, reason="no array support", ) From eb345efc52f28d2f14fadfae07086a9650f2ec53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 11:34:39 +0100 Subject: [PATCH 137/161] chore(exasol): enable xpassing tests --- ibis/backends/tests/test_numeric.py | 14 ++------------ ibis/backends/tests/test_sql.py | 2 +- ibis/backends/tests/test_string.py | 14 ++++++++++++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 05f94c9d3eb5..76ffe94aae62 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -781,12 +781,7 @@ def test_isnan_isinf( pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), - param( - L(5.556).log10(), - math.log10(5.556), - marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), - id="log10", - ), + param(L(5.556).log10(), math.log10(5.556), id="log10"), param( L(5.556).radians(), math.radians(5.556), @@ -797,12 +792,7 @@ def test_isnan_isinf( math.degrees(5.556), id="degrees", ), - param( - L(11) % 3, - 11 % 3, - marks=pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - id="mod", - ), + param(L(11) % 3, 11 % 3, id="mod"), param(L(5.556).log10(), math.log10(5.556), id="log10"), param(L(5.556).radians(), math.radians(5.556), id="radians"), param(L(5.556).degrees(), math.degrees(5.556), id="degrees"), diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index e0d306766730..4cd7d0d8ffa2 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -18,7 +18,7 @@ ibis.array([1]), marks=[ pytest.mark.never( - ["mysql", "mssql", "oracle", "impala", "sqlite", "exasol"], + ["mysql", "mssql", "oracle", "impala", "sqlite"], raises=(exc.OperationNotDefinedError, exc.UnsupportedBackendType), reason="arrays not supported in the backend", ), diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index a9d9dff36156..3a832c5d7d40 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -931,11 +931,21 @@ def test_capitalize(con): @pytest.mark.notimpl( - ["dask", "pandas", "polars", "oracle", "flink", "sqlite", "mssql", "mysql"], + [ + "dask", + "pandas", + "polars", + "oracle", + "flink", + "sqlite", + "mssql", + "mysql", + "exasol", + ], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( - ["exasol", "impala"], + ["impala"], raises=com.UnsupportedBackendType, reason="no array support", ) From f0930477e9af41c2ccd7e98d641859caa31b4cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 11:50:51 +0100 Subject: [PATCH 138/161] style(tests): reformat the pytest marker blocks --- ibis/backends/conftest.py | 6 +----- ibis/backends/tests/test_export.py | 2 +- ibis/backends/tests/test_param.py | 2 +- ibis/backends/tests/test_set_ops.py | 2 +- ibis/backends/tests/test_uuid.py | 5 +---- ibis/backends/tests/test_window.py | 2 +- 6 files changed, 6 insertions(+), 13 deletions(-) diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 5c835ae0ba22..949ae7752b94 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -567,11 +567,7 @@ def ddl_con(ddl_backend): @pytest.fixture( - params=_get_backends_to_test( - keep=( - "risingwave", - ) - ), + params=_get_backends_to_test(keep=("risingwave",)), scope="session", ) def alchemy_backend(request, data_dir, tmp_path_factory, worker_id): diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index cfbc1b2d0360..b3ce4d4cfaf0 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -4,8 +4,8 @@ import pyarrow as pa import pyarrow.csv as pcsv import pytest -from pytest import param import sqlalchemy as sa +from pytest import param import ibis import ibis.expr.datatypes as dt diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index b97401a9d47e..a72b7c140b22 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -6,8 +6,8 @@ import numpy as np import pandas as pd import pytest -from pytest import param import sqlalchemy as sa +from pytest import param import ibis import ibis.expr.datatypes as dt diff --git a/ibis/backends/tests/test_set_ops.py b/ibis/backends/tests/test_set_ops.py index 3b8b78d4f7ec..41102559ad9c 100644 --- a/ibis/backends/tests/test_set_ops.py +++ b/ibis/backends/tests/test_set_ops.py @@ -4,8 +4,8 @@ import pandas as pd import pytest -from pytest import param import sqlalchemy as sa +from pytest import param import ibis import ibis.common.exceptions as com diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index 1cb3acbf0df8..5b1eadb790d8 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -10,7 +10,6 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt - RAW_TEST_UUID = "08f48812-7948-4718-96c7-27fa6a398db6" TEST_UUID = uuid.UUID(RAW_TEST_UUID) @@ -28,9 +27,7 @@ } -@pytest.mark.notimpl( - ["datafusion", "polars"], raises=NotImplementedError -) +@pytest.mark.notimpl(["datafusion", "polars"], raises=NotImplementedError) @pytest.mark.notimpl( ["risingwave"], raises=sqlalchemy.exc.InternalError, diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 20001f5bac9e..e68e70773ad7 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -6,8 +6,8 @@ import numpy as np import pandas as pd import pytest -from pytest import param import sqlalchemy as sa +from pytest import param import ibis import ibis.common.exceptions as com From c3e3da0067132c407462c9924cd9595dbf3f4b21 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 1 Feb 2024 07:38:19 -0500 Subject: [PATCH 139/161] chore(impala): add early failure in the type system for complex datatypes --- ibis/backends/base/sqlglot/datatypes.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index dc70f7b6e3ba..370afc7a06b9 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -658,6 +658,18 @@ class ImpalaType(SqlglotType): default_decimal_precision = 9 default_decimal_scale = 0 + @classmethod + def _from_ibis_Array(cls, dtype: dt.Array) -> NoReturn: + raise com.UnsupportedBackendType("Array types aren't supported in Impala") + + @classmethod + def _from_ibis_Map(cls, dtype: dt.Map) -> NoReturn: + raise com.UnsupportedBackendType("Map types aren't supported in Impala") + + @classmethod + def _from_ibis_Struct(cls, dtype: dt.Struct) -> sge.DataType: + raise com.UnsupportedBackendType("Struct types aren't supported in Impala") + class PySparkType(SqlglotType): dialect = "spark" From f7a904ef6a736e264579c0390bbc5eb51d64a2be Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 1 Feb 2024 07:38:32 -0500 Subject: [PATCH 140/161] chore(impala): disable array position --- ibis/backends/impala/compiler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ibis/backends/impala/compiler.py b/ibis/backends/impala/compiler.py index 1593a634f51f..1afba29a20ea 100644 --- a/ibis/backends/impala/compiler.py +++ b/ibis/backends/impala/compiler.py @@ -365,6 +365,7 @@ def visit_Sign(self, op, *, arg): @visit_node.register(ops.ArgMax) @visit_node.register(ops.ArgMin) @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.ArrayPosition) @visit_node.register(ops.Array) @visit_node.register(ops.Covariance) @visit_node.register(ops.DateDelta) From bdc58db2e0cfd9981133c3c9287b6c8259be3eed Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 1 Feb 2024 07:39:21 -0500 Subject: [PATCH 141/161] test: fix array, string and uuid tests --- ibis/backends/tests/test_array.py | 12 ++++-------- ibis/backends/tests/test_string.py | 6 +----- ibis/backends/tests/test_uuid.py | 2 +- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index ef6320d5d893..97125fc97a24 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -584,7 +584,7 @@ def test_array_contains(backend, con): param( [[1], [], [42, 42], []], [-1, -1, 0, -1], - id="including-empty-array", + id="some-empty", marks=[ pytest.mark.notyet( ["flink"], @@ -601,16 +601,12 @@ def test_array_contains(backend, con): param( [[1], [1], [42, 42], [1]], [-1, -1, 0, -1], - id="all-non-empty-arrays", - ), - param( - [[1], [1, 42], [42, 42, 42], [42, 1]], - [-1, 1, 0, 0], - id="all-non-empty-arrays-2", + id="none-empty", ), ], ) -@pytest.mark.notimpl(["dask", "impala", "polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["dask", "polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notyet(["impala"], raises=com.UnsupportedBackendType) def test_array_position(backend, con, a, expected_array): t = ibis.memtable({"a": a}) expr = t.a.index(42) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 3a832c5d7d40..e25ab5f62d13 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -941,14 +941,10 @@ def test_capitalize(con): "mssql", "mysql", "exasol", + "impala", ], raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl( - ["impala"], - raises=com.UnsupportedBackendType, - reason="no array support", -) def test_array_string_join(con): s = ibis.array(["a", "b", "c"]) expected = "a,b,c" diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index 5b1eadb790d8..5802727f205d 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -33,7 +33,7 @@ raises=sqlalchemy.exc.InternalError, reason="Feature is not yet implemented: unsupported data type: UUID", ) -@pytest.mark.notimpl(["impala", "polars"], raises=NotImplementedError) +@pytest.mark.notimpl(["polars"], raises=NotImplementedError) @pytest.mark.notimpl(["datafusion"], raises=Exception) def test_uuid_literal(con, backend): backend_name = backend.name() From 4ca96b298ef34ab782bd5ab3ab85a9761ae2d2c1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 1 Feb 2024 07:45:34 -0500 Subject: [PATCH 142/161] test: fix more array tests --- ibis/backends/tests/test_array.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 97125fc97a24..2586782954ba 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -612,7 +612,6 @@ def test_array_position(backend, con, a, expected_array): expr = t.a.index(42) result = con.execute(expr) expected = pd.Series(expected_array, dtype="object") - backend.assert_series_equal(result, expected, check_names=False, check_dtype=False) assert frozenset(result.values) == frozenset(expected.values) From 7313cc093d729d62d63333315c9fc979c83d9ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sun, 28 Jan 2024 15:40:20 +0100 Subject: [PATCH 143/161] fix(common): don't match an `Object` pattern with more positional arguments defined than `__match_args__` has --- .../base/sqlglot/tests/test_compiler.py | 18 +++++++++++++++ ibis/common/patterns.py | 17 ++++++++++---- ibis/common/tests/test_patterns.py | 23 +++++++++++++++++++ 3 files changed, 54 insertions(+), 4 deletions(-) create mode 100644 ibis/backends/base/sqlglot/tests/test_compiler.py diff --git a/ibis/backends/base/sqlglot/tests/test_compiler.py b/ibis/backends/base/sqlglot/tests/test_compiler.py new file mode 100644 index 000000000000..95db51e76de3 --- /dev/null +++ b/ibis/backends/base/sqlglot/tests/test_compiler.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import ibis +from ibis import _ + + +def test_window_with_row_number_compiles(): + # GH #8058: the add_order_by_to_empty_ranking_window_functions rule was + # matching on `RankBase` subclasses with a pattern expecting an `arg` + # attribute, which is not present on `RowNumber` + expr = ( + ibis.memtable({"a": range(30)}) + .mutate(id=ibis.row_number()) + .sample(fraction=0.25, seed=0) + .mutate(is_test=_.id.isin(_.id)) + .filter(~_.is_test) + ) + assert ibis.to_sql(expr) diff --git a/ibis/common/patterns.py b/ibis/common/patterns.py index 3b4085868aa6..77b97ed88834 100644 --- a/ibis/common/patterns.py +++ b/ibis/common/patterns.py @@ -1264,17 +1264,26 @@ def __create__(cls, type, *args, **kwargs): return InstanceOf(type) return super().__create__(type, *args, **kwargs) - def __init__(self, type, *args, **kwargs): - type = pattern(type) + def __init__(self, typ, *args, **kwargs): + if isinstance(typ, type) and len(typ.__match_args__) < len(args): + raise ValueError( + "The type to match has fewer `__match_args__` than the number " + "of positional arguments in the pattern" + ) + typ = pattern(typ) args = tuple(map(pattern, args)) kwargs = frozendict(toolz.valmap(pattern, kwargs)) - super().__init__(type=type, args=args, kwargs=kwargs) + super().__init__(type=typ, args=args, kwargs=kwargs) def match(self, value, context): if self.type.match(value, context) is NoMatch: return NoMatch - patterns = {**dict(zip(value.__match_args__, self.args)), **self.kwargs} + # the pattern requirest more positional arguments than the object has + if len(value.__match_args__) < len(self.args): + return NoMatch + patterns = dict(zip(value.__match_args__, self.args)) + patterns.update(self.kwargs) fields = {} changed = False diff --git a/ibis/common/tests/test_patterns.py b/ibis/common/tests/test_patterns.py index aa770da9e79c..32b761599671 100644 --- a/ibis/common/tests/test_patterns.py +++ b/ibis/common/tests/test_patterns.py @@ -580,6 +580,8 @@ def test_object_pattern_complex_type(): def test_object_pattern_from_instance_of(): class MyType: + __match_args__ = ("a", "b") + def __init__(self, a, b): self.a = a self.b = b @@ -593,6 +595,8 @@ def __init__(self, a, b): def test_object_pattern_from_coerced_to(): class MyCoercibleType(Coercible): + __match_args__ = ("a", "b") + def __init__(self, a, b): self.a = a self.b = b @@ -651,6 +655,25 @@ def test_object_pattern_matching_dictionary_field(): assert match(pattern, d) is d +def test_object_pattern_requires_its_arguments_to_match(): + class Empty: + __match_args__ = () + + msg = "The type to match has fewer `__match_args__`" + with pytest.raises(ValueError, match=msg): + Object(Empty, 1) + + # if the type matcher (first argument of Object) receives a generic pattern + # instead of an explicit type, the validation above cannot occur, so test + # the the pattern still doesn't match when it requires more positional + # arguments than the object `__match_args__` has + pattern = Object(InstanceOf(Empty), var("a")) + assert match(pattern, Empty()) is NoMatch + + pattern = Object(InstanceOf(Empty), a=var("a")) + assert match(pattern, Empty()) is NoMatch + + def test_callable_with(): def func(a, b): return str(a) + b From 954228f08580c0f08a5f46f25595108dc768f336 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Thu, 1 Feb 2024 14:29:43 -0500 Subject: [PATCH 144/161] refactor(risingwave): port to sqlglot (#8171) Co-authored-by: Kexiang Wang Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Co-authored-by: Jim Crist-Harif --- .github/workflows/ibis-backends.yml | 14 + ci/schema/risingwave.sql | 128 +- ibis/backends/base/__init__.py | 44 + ibis/backends/base/sqlglot/compiler.py | 7 +- ibis/backends/base/sqlglot/datatypes.py | 18 + ibis/backends/clickhouse/compiler.py | 34 +- ibis/backends/conftest.py | 1 + ibis/backends/duckdb/compiler.py | 11 +- ibis/backends/mssql/compiler.py | 30 + ibis/backends/postgres/__init__.py | 42 +- ibis/backends/pyspark/compiler.py | 11 + ibis/backends/risingwave/__init__.py | 380 ++-- ibis/backends/risingwave/compiler.py | 112 +- ibis/backends/risingwave/dialect.py | 35 + ibis/backends/risingwave/tests/conftest.py | 51 +- .../test_client/test_compile_toplevel/out.sql | 5 +- .../test_union_cte/False/out.sql | 2 +- .../test_union_cte/True/out.sql | 2 +- ibis/backends/risingwave/tests/test_client.py | 66 +- .../risingwave/tests/test_functions.py | 180 +- ibis/backends/tests/errors.py | 6 +- .../test_dot_sql/test_cte/risingwave/out.sql | 8 + .../test_default_limit/risingwave/out.sql | 5 + .../risingwave/out.sql | 5 + .../risingwave/out.sql | 3 + .../test_respect_set_limit/risingwave/out.sql | 10 + .../risingwave/out.sql | 22 + .../test_sql/test_isin_bug/risingwave/out.sql | 9 + .../test_union_aliasing/risingwave/out.sql | 60 + ibis/backends/tests/test_aggregation.py | 62 +- ibis/backends/tests/test_array.py | 149 +- ibis/backends/tests/test_asof_join.py | 2 + ibis/backends/tests/test_benchmarks.py | 900 -------- ibis/backends/tests/test_client.py | 45 +- ibis/backends/tests/test_dot_sql.py | 17 +- ibis/backends/tests/test_export.py | 34 +- ibis/backends/tests/test_generic.py | 88 +- ibis/backends/tests/test_join.py | 2 +- ibis/backends/tests/test_map.py | 23 +- ibis/backends/tests/test_numeric.py | 87 +- ibis/backends/tests/test_param.py | 19 +- ibis/backends/tests/test_register.py | 48 +- ibis/backends/tests/test_set_ops.py | 9 +- ibis/backends/tests/test_sql.py | 8 +- ibis/backends/tests/test_string.py | 53 +- ibis/backends/tests/test_struct.py | 2 +- ibis/backends/tests/test_temporal.py | 83 +- ibis/backends/tests/test_uuid.py | 7 +- ibis/backends/tests/test_window.py | 76 +- poetry.lock | 1851 ++++++++--------- pyproject.toml | 4 +- 51 files changed, 1951 insertions(+), 2919 deletions(-) create mode 100644 ibis/backends/risingwave/dialect.py create mode 100644 ibis/backends/tests/snapshots/test_dot_sql/test_cte/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_isin_bug/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_union_aliasing/risingwave/out.sql delete mode 100644 ibis/backends/tests/test_benchmarks.py diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 1de52e0b4a15..873a3994dfeb 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -123,6 +123,12 @@ jobs: - postgres sys-deps: - libgeos-dev + - name: risingwave + title: Risingwave + services: + - risingwave + extras: + - risingwave - name: impala title: Impala serial: true @@ -218,6 +224,14 @@ jobs: - postgres sys-deps: - libgeos-dev + - os: windows-latest + backend: + name: risingwave + title: Risingwave + services: + - risingwave + extras: + - risingwave - os: windows-latest backend: name: postgres diff --git a/ci/schema/risingwave.sql b/ci/schema/risingwave.sql index cedfa8449d60..251b689ada0d 100644 --- a/ci/schema/risingwave.sql +++ b/ci/schema/risingwave.sql @@ -1,27 +1,27 @@ SET RW_IMPLICIT_FLUSH=true; -DROP TABLE IF EXISTS diamonds CASCADE; - -CREATE TABLE diamonds ( - carat FLOAT, - cut TEXT, - color TEXT, - clarity TEXT, - depth FLOAT, +DROP TABLE IF EXISTS "diamonds" CASCADE; + +CREATE TABLE "diamonds" ( + "carat" FLOAT, + "cut" TEXT, + "color" TEXT, + "clarity" TEXT, + "depth" FLOAT, "table" FLOAT, - price BIGINT, - x FLOAT, - y FLOAT, - z FLOAT + "price" BIGINT, + "x" FLOAT, + "y" FLOAT, + "z" FLOAT ) WITH ( connector = 'posix_fs', match_pattern = 'diamonds.csv', posix_fs.root = '/data', ) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' ); -DROP TABLE IF EXISTS astronauts CASCADE; +DROP TABLE IF EXISTS "astronauts" CASCADE; -CREATE TABLE astronauts ( +CREATE TABLE "astronauts" ( "id" BIGINT, "number" BIGINT, "nationwide_number" BIGINT, @@ -52,12 +52,12 @@ CREATE TABLE astronauts ( posix_fs.root = '/data', ) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' ); -DROP TABLE IF EXISTS batting CASCADE; +DROP TABLE IF EXISTS "batting" CASCADE; -CREATE TABLE batting ( +CREATE TABLE "batting" ( "playerID" TEXT, "yearID" BIGINT, - stint BIGINT, + "stint" BIGINT, "teamID" TEXT, "lgID" TEXT, "G" BIGINT, @@ -83,71 +83,71 @@ CREATE TABLE batting ( posix_fs.root = '/data', ) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' ); -DROP TABLE IF EXISTS awards_players CASCADE; +DROP TABLE IF EXISTS "awards_players" CASCADE; -CREATE TABLE awards_players ( +CREATE TABLE "awards_players" ( "playerID" TEXT, "awardID" TEXT, "yearID" BIGINT, "lgID" TEXT, - tie TEXT, - notes TEXT + "tie" TEXT, + "notes" TEXT ) WITH ( connector = 'posix_fs', match_pattern = 'awards_players.csv', posix_fs.root = '/data', ) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' ); -DROP TABLE IF EXISTS functional_alltypes CASCADE; - -CREATE TABLE functional_alltypes ( - id INTEGER, - bool_col BOOLEAN, - tinyint_col SMALLINT, - smallint_col SMALLINT, - int_col INTEGER, - bigint_col BIGINT, - float_col REAL, - double_col DOUBLE PRECISION, - date_string_col TEXT, - string_col TEXT, - timestamp_col TIMESTAMP WITHOUT TIME ZONE, - year INTEGER, - month INTEGER +DROP TABLE IF EXISTS "functional_alltypes" CASCADE; + +CREATE TABLE "functional_alltypes" ( + "id" INTEGER, + "bool_col" BOOLEAN, + "tinyint_col" SMALLINT, + "smallint_col" SMALLINT, + "int_col" INTEGER, + "bigint_col" BIGINT, + "float_col" REAL, + "double_col" DOUBLE PRECISION, + "date_string_col" TEXT, + "string_col" TEXT, + "timestamp_col" TIMESTAMP WITHOUT TIME ZONE, + "year" INTEGER, + "month" INTEGER ) WITH ( connector = 'posix_fs', match_pattern = 'functional_alltypes.csv', posix_fs.root = '/data', ) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' ); -DROP TABLE IF EXISTS tzone CASCADE; +DROP TABLE IF EXISTS "tzone" CASCADE; -CREATE TABLE tzone ( - ts TIMESTAMP WITH TIME ZONE, - key TEXT, - value DOUBLE PRECISION +CREATE TABLE "tzone" ( + "ts" TIMESTAMP WITH TIME ZONE, + "key" TEXT, + "value" DOUBLE PRECISION ); -INSERT INTO tzone +INSERT INTO "tzone" SELECT CAST('2017-05-28 11:01:31.000400' AS TIMESTAMP WITH TIME ZONE) + - t * INTERVAL '1 day 1 second' AS ts, - CHR(97 + t) AS key, - t + t / 10.0 AS value - FROM generate_series(0, 9) AS t; - -DROP TABLE IF EXISTS array_types CASCADE; - -CREATE TABLE IF NOT EXISTS array_types ( - x BIGINT[], - y TEXT[], - z DOUBLE PRECISION[], - grouper TEXT, - scalar_column DOUBLE PRECISION, - multi_dim BIGINT[][] + t * INTERVAL '1 day 1 second' AS "ts", + CHR(97 + t) AS "key", + t + t / 10.0 AS "value" + FROM generate_series(0, 9) AS "t"; + +DROP TABLE IF EXISTS "array_types" CASCADE; + +CREATE TABLE IF NOT EXISTS "array_types" ( + "x" BIGINT[], + "y" TEXT[], + "z" DOUBLE PRECISION[], + "grouper" TEXT, + "scalar_column" DOUBLE PRECISION, + "multi_dim" BIGINT[][] ); -INSERT INTO array_types VALUES +INSERT INTO "array_types" VALUES (ARRAY[1, 2, 3], ARRAY['a', 'b', 'c'], ARRAY[1.0, 2.0, 3.0], 'a', 1.0, ARRAY[ARRAY[NULL::BIGINT, NULL, NULL], ARRAY[1, 2, 3]]), (ARRAY[4, 5], ARRAY['d', 'e'], ARRAY[4.0, 5.0], 'a', 2.0, ARRAY[]::BIGINT[][]), (ARRAY[6, NULL], ARRAY['f', NULL], ARRAY[6.0, NULL], 'a', 3.0, ARRAY[NULL, ARRAY[]::BIGINT[], NULL]), @@ -155,11 +155,11 @@ INSERT INTO array_types VALUES (ARRAY[2, NULL, 3], ARRAY['b', NULL, 'c'], NULL, 'b', 5.0, NULL), (ARRAY[4, NULL, NULL, 5], ARRAY['d', NULL, NULL, 'e'], ARRAY[4.0, NULL, NULL, 5.0], 'c', 6.0, ARRAY[ARRAY[1, 2, 3]]); -DROP TABLE IF EXISTS json_t CASCADE; +DROP TABLE IF EXISTS "json_t" CASCADE; -CREATE TABLE IF NOT EXISTS json_t (js JSONB); +CREATE TABLE IF NOT EXISTS "json_t" ("js" JSONB); -INSERT INTO json_t VALUES +INSERT INTO "json_t" VALUES ('{"a": [1,2,3,4], "b": 1}'), ('{"a":null,"b":2}'), ('{"a":"foo", "c":null}'), @@ -167,9 +167,9 @@ INSERT INTO json_t VALUES ('[42,47,55]'), ('[]'); -DROP TABLE IF EXISTS win CASCADE; -CREATE TABLE win (g TEXT, x BIGINT, y BIGINT); -INSERT INTO win VALUES +DROP TABLE IF EXISTS "win" CASCADE; +CREATE TABLE "win" ("g" TEXT, "x" BIGINT, "y" BIGINT); +INSERT INTO "win" VALUES ('a', 0, 3), ('a', 1, 2), ('a', 2, 0), diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py index 7411077514d1..445997da3b09 100644 --- a/ibis/backends/base/__init__.py +++ b/ibis/backends/base/__init__.py @@ -41,6 +41,7 @@ "datafusion": "postgres", # closest match see https://github.com/ibis-project/ibis/pull/7303#discussion_r1350223901 "exasol": "oracle", + "risingwave": "postgres", } _SQLALCHEMY_TO_SQLGLOT_DIALECT = { @@ -75,6 +76,7 @@ def __dir__(self) -> list[str]: ------- list[str] A list of the attributes and tables available in the database. + """ attrs = dir(type(self)) unqualified_tables = [self._unqualify(x) for x in self.tables] @@ -92,6 +94,7 @@ def __contains__(self, table: str) -> bool: ------- bool True if the given table is available in the current database. + """ return table in self.tables @@ -103,6 +106,7 @@ def tables(self) -> list[str]: ------- list[str] The list of tables in the database + """ return self.list_tables() @@ -118,6 +122,7 @@ def __getitem__(self, table: str) -> ir.Table: ------- Table Table expression + """ return self.table(table) @@ -133,6 +138,7 @@ def __getattr__(self, table: str) -> ir.Table: ------- Table Table expression + """ return self.table(table) @@ -150,6 +156,7 @@ def drop(self, force: bool = False) -> None: force If `True`, drop any objects that exist, and do not fail if the database does not exist. + """ self.client.drop_database(self.name, force=force) @@ -165,6 +172,7 @@ def table(self, name: str) -> ir.Table: ------- Table Table expression + """ qualified_name = self._qualify(name) return self.client.table(qualified_name, self.name) @@ -178,6 +186,7 @@ def list_tables(self, like=None, database=None): A pattern to use for listing tables. database The database to perform the list against + """ return self.client.list_tables(like, database=database or self.name) @@ -192,6 +201,7 @@ class TablesAccessor(collections.abc.Mapping): >>> con = ibis.sqlite.connect("example.db") >>> people = con.tables["people"] # access via index >>> people = con.tables.people # access via attribute + """ def __init__(self, backend: BaseBackend): @@ -276,6 +286,7 @@ def to_pandas( "no limit". The default is in `ibis/config.py`. kwargs Keyword arguments + """ return self.execute(expr, params=params, limit=limit, **kwargs) @@ -309,6 +320,7 @@ def to_pandas_batches( ------- Iterator[pd.DataFrame] An iterator of pandas `DataFrame`s. + """ from ibis.formats.pandas import PandasData @@ -354,6 +366,7 @@ def to_pyarrow( ------- Table A pyarrow table holding the results of the executed expression. + """ pa = self._import_pyarrow() self._run_pre_execute_hooks(expr) @@ -403,6 +416,7 @@ def to_pyarrow_batches( ------- results RecordBatchReader + """ raise NotImplementedError @@ -432,6 +446,7 @@ def to_torch( ------- dict[str, torch.Tensor] A dictionary of torch tensors, keyed by column name. + """ import torch @@ -463,6 +478,7 @@ def read_parquet( ------- ir.Table The just-registered table + """ raise NotImplementedError( f"{self.name} does not support direct registration of parquet data." @@ -487,6 +503,7 @@ def read_csv( ------- ir.Table The just-registered table + """ raise NotImplementedError( f"{self.name} does not support direct registration of CSV data." @@ -511,6 +528,7 @@ def read_json( ------- ir.Table The just-registered table + """ raise NotImplementedError( f"{self.name} does not support direct registration of JSON data." @@ -536,6 +554,7 @@ def read_delta( ------- ir.Table The just-registered table. + """ raise NotImplementedError( f"{self.name} does not support direct registration of DeltaLake tables." @@ -567,6 +586,7 @@ def to_parquet( Additional keyword arguments passed to pyarrow.parquet.ParquetWriter https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html + """ self._import_pyarrow() import pyarrow.parquet as pq @@ -602,6 +622,7 @@ def to_csv( Additional keyword arguments passed to pyarrow.csv.CSVWriter https://arrow.apache.org/docs/python/generated/pyarrow.csv.CSVWriter.html + """ self._import_pyarrow() import pyarrow.csv as pcsv @@ -666,6 +687,7 @@ def list_databases(self, like: str | None = None) -> list[str]: list[str] The database names that exist in the current connection, that match the `like` pattern if provided. + """ @property @@ -685,6 +707,7 @@ def create_database(self, name: str, force: bool = False) -> None: Name of the new database. force If `False`, an exception is raised if the database already exists. + """ @abc.abstractmethod @@ -697,6 +720,7 @@ def drop_database(self, name: str, force: bool = False) -> None: Database to drop. force If `False`, an exception is raised if the database does not exist. + """ @@ -716,6 +740,7 @@ def create_schema( current database is used. force If `False`, an exception is raised if the schema exists. + """ @abc.abstractmethod @@ -733,6 +758,7 @@ def drop_schema( current database is used. force If `False`, an exception is raised if the schema does not exist. + """ @abc.abstractmethod @@ -755,6 +781,7 @@ def list_schemas( list[str] The schema names that exist in the current connection, that match the `like` pattern if provided. + """ @property @@ -814,6 +841,7 @@ def db_identity(self) -> str: ------- Hashable Database identity + """ parts = [self.__class__] parts.extend(self._con_args) @@ -844,6 +872,7 @@ def connect(self, *args, **kwargs) -> BaseBackend: ------- BaseBackend An instance of the backend + """ new_backend = self.__class__(*args, **kwargs) new_backend.reconnect() @@ -880,6 +909,7 @@ def database(self, name: str | None = None) -> Database: ------- Database A database object for the specified database. + """ return Database(name=name or self.current_database, client=self) @@ -905,6 +935,7 @@ def _filter_with_like(values: Iterable[str], like: str | None = None) -> list[st ------- list[str] Names filtered by the `like` pattern. + """ if like is None: return sorted(values) @@ -933,6 +964,7 @@ def list_tables( ------- list[str] The list of the table names that match the pattern `like`. + """ @abc.abstractmethod @@ -950,6 +982,7 @@ def table(self, name: str, database: str | None = None) -> ir.Table: ------- Table Table expression + """ @functools.cached_property @@ -963,6 +996,7 @@ def tables(self): >>> con = ibis.sqlite.connect("example.db") >>> people = con.tables["people"] # access via index >>> people = con.tables.people # access via attribute + """ return TablesAccessor(self) @@ -980,6 +1014,7 @@ def version(self) -> str: ------- str The backend version + """ @classmethod @@ -1088,6 +1123,7 @@ def create_table( ------- Table The table that was created. + """ @abc.abstractmethod @@ -1108,6 +1144,7 @@ def drop_table( Name of the database where the table exists, if not the default. force If `False`, an exception is raised if the table does not exist. + """ raise NotImplementedError( f'Backend "{self.name}" does not implement "drop_table"' @@ -1122,6 +1159,7 @@ def rename_table(self, old_name: str, new_name: str) -> None: The old name of the table. new_name The new name of the table. + """ raise NotImplementedError( f'Backend "{self.name}" does not implement "rename_table"' @@ -1154,6 +1192,7 @@ def create_view( ------- Table The view that was created. + """ @abc.abstractmethod @@ -1170,6 +1209,7 @@ def drop_view( Name of the database where the view exists, if not the default. force If `False`, an exception is raised if the view does not exist. + """ @classmethod @@ -1194,6 +1234,7 @@ def has_operation(cls, operation: type[ops.Value]) -> bool: False >>> ibis.postgres.has_operation(ops.ArrayIndex) True + """ raise NotImplementedError( f"{cls.name} backend has not implemented `has_operation` API" @@ -1228,6 +1269,7 @@ def _release_cached(self, expr: ir.CachedTable) -> None: ---------- expr Cached expression to release + """ del self._query_cache[expr.op()] @@ -1268,6 +1310,7 @@ def _get_backend_names() -> frozenset[str]: If a `set` is used, then any in-place modifications to the set are visible to every caller of this function. + """ if sys.version_info < (3, 10): @@ -1325,6 +1368,7 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend: >>> con = ibis.connect( ... "bigquery://my-project/my-dataset" ... ) # quartodoc: +SKIP # doctest: +SKIP + """ url = resource = str(resource) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index 0f5b3e738b6f..db90f5ea8061 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -141,9 +141,9 @@ def parenthesize(op, arg): C = ColGen() F = FuncGen() -NULL = sge.NULL -FALSE = sge.FALSE -TRUE = sge.TRUE +NULL = sge.Null() +FALSE = sge.false() +TRUE = sge.true() STAR = sge.Star() @@ -251,6 +251,7 @@ def translate(self, op, *, params: Mapping[ir.Value, Any]) -> sge.Expression: ------- sqlglot.expressions.Expression A sqlglot expression + """ # substitute parameters immediately to avoid having to define a # ScalarParameter translation rule diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 370afc7a06b9..db1bae762c9a 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -394,6 +394,24 @@ def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType: return sge.DataType(this=typecode.HSTORE) +class RisingWaveType(PostgresType): + dialect = "risingwave" + + @classmethod + def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType: + if dtype.timezone is not None: + return sge.DataType(this=typecode.TIMESTAMPTZ) + return sge.DataType(this=typecode.TIMESTAMP) + + @classmethod + def _from_ibis_Decimal(cls, dtype: dt.Decimal) -> sge.DataType: + return sge.DataType(this=typecode.DECIMAL) + + @classmethod + def _from_ibis_UUID(cls, dtype: dt.UUID) -> sge.DataType: + return sge.DataType(this=typecode.VARCHAR) + + class DataFusionType(PostgresType): unknown_type_strings = { "utf8": dt.string, diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py index e4121a2ef9b2..6439fb99a5e2 100644 --- a/ibis/backends/clickhouse/compiler.py +++ b/ibis/backends/clickhouse/compiler.py @@ -209,24 +209,26 @@ def visit_Hash(self, op, *, arg): @visit_node.register(ops.HashBytes) def visit_HashBytes(self, op, *, arg, how): - supported_algorithms = frozenset( - ( - "MD5", - "halfMD5", - "SHA1", - "SHA224", - "SHA256", - "intHash32", - "intHash64", - "cityHash64", - "sipHash64", - "sipHash128", - ) - ) - if how not in supported_algorithms: + supported_algorithms = { + "md5": "MD5", + "MD5": "MD5", + "halfMD5": "halfMD5", + "SHA1": "SHA1", + "sha1": "SHA1", + "SHA224": "SHA224", + "sha224": "SHA224", + "SHA256": "SHA256", + "sha256": "SHA256", + "intHash32": "intHash32", + "intHash64": "intHash64", + "cityHash64": "cityHash64", + "sipHash64": "sipHash64", + "sipHash128": "sipHash128", + } + if (funcname := supported_algorithms.get(how)) is None: raise com.UnsupportedOperationError(f"Unsupported hash algorithm {how}") - return self.f[how](arg) + return self.f[funcname](arg) @visit_node.register(ops.IntervalFromInteger) def visit_IntervalFromInteger(self, op, *, arg, unit): diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 949ae7752b94..5fe94de29418 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -26,6 +26,7 @@ if TYPE_CHECKING: from collections.abc import Iterable + from ibis.backends.tests.base import BackendTest diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index e42bb5e733b0..a21735474c6a 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -329,10 +329,6 @@ def visit_GeoConvert(self, op, *, arg, source, target): # matches the behavior of the equivalent geopandas functionality return self.f.st_transform(arg, source, target, True) - @visit_node.register(ops.HexDigest) - def visit_HexDigest(self, op, *, arg, how): - return self.f[how](arg) - @visit_node.register(ops.TimestampNow) def visit_TimestampNow(self, op): """DuckDB current timestamp defaults to timestamp + tz.""" @@ -349,6 +345,13 @@ def visit_Quantile(self, op, *, arg, quantile, where): funcname = f"percentile_{suffix}" return self.agg[funcname](arg, quantile, where=where) + @visit_node.register(ops.HexDigest) + def visit_HexDigest(self, op, *, arg, how): + if how in ("md5", "sha256"): + return getattr(self.f, how)(arg) + else: + raise NotImplementedError(f"No available hashing function for {how}") + _SIMPLE_OPS = { ops.ArrayPosition: "list_indexof", diff --git a/ibis/backends/mssql/compiler.py b/ibis/backends/mssql/compiler.py index 4609d241610d..d0c4470d7489 100644 --- a/ibis/backends/mssql/compiler.py +++ b/ibis/backends/mssql/compiler.py @@ -376,6 +376,36 @@ def visit_Not(self, op, *, arg): return sge.FALSE if arg == sge.TRUE else sge.TRUE return self.if_(arg, 1, 0).eq(0) + @visit_node.register(ops.HashBytes) + def visit_HashBytes(self, op, *, arg, how): + if how in ("md5", "sha1"): + return self.f.hashbytes(how, arg) + elif how == "sha256": + return self.f.hashbytes("sha2_256", arg) + elif how == "sha512": + return self.f.hashbytes("sha2_512", arg) + else: + raise NotImplementedError(how) + + @visit_node.register(ops.HexDigest) + def visit_HexDigest(self, op, *, arg, how): + if how in ("md5", "sha1"): + hashbinary = self.f.hashbytes(how, arg) + elif how == "sha256": + hashbinary = self.f.hashbytes("sha2_256", arg) + elif how == "sha512": + hashbinary = self.f.hashbytes("sha2_512", arg) + else: + raise NotImplementedError(how) + + # mssql uppercases the hexdigest which is inconsistent with several other + # implementations and inconsistent with Python, so lowercase it. + return self.f.lower( + self.f.convert( + sge.Literal(this="VARCHAR(MAX)", is_string=False), hashbinary, 2 + ) + ) + @visit_node.register(ops.Any) @visit_node.register(ops.All) @visit_node.register(ops.ApproxMedian) diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index 0f51218c7f7c..10365658518b 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -44,6 +44,7 @@ def _verify_source_line(func_name: str, line: str): class Backend(SQLGlotBackend): name = "postgres" + dialect = "postgres" compiler = PostgresCompiler() supports_python_udfs = True @@ -61,6 +62,7 @@ def _from_url(self, url: str, **kwargs): ------- BaseBackend A backend instance + """ url = urlparse(url) @@ -106,7 +108,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: raise exc.IbisTypeError( - "Postgres cannot yet reliably handle `null` typed columns; " + f"{self.name} cannot yet reliably handle `null` typed columns; " f"got null typed columns: {null_columns}" ) @@ -137,18 +139,18 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: ), properties=sg.exp.Properties(expressions=[sge.TemporaryProperty()]), ) - create_stmt_sql = create_stmt.sql(self.name) + create_stmt_sql = create_stmt.sql(self.dialect) columns = schema.keys() df = op.data.to_frame() data = df.itertuples(index=False) cols = ", ".join( - ident.sql(self.name) + ident.sql(self.dialect) for ident in map(partial(sg.to_identifier, quoted=quoted), columns) ) specs = ", ".join(repeat("%s", len(columns))) table = sg.table(name, quoted=quoted) - sql = f"INSERT INTO {table.sql(self.name)} ({cols}) VALUES ({specs})" + sql = f"INSERT INTO {table.sql(self.dialect)} ({cols}) VALUES ({specs})" with self.begin() as cur: cur.execute(create_stmt_sql) extras.execute_batch(cur, sql, data, 128) @@ -254,6 +256,7 @@ def do_connect( timestamp_col : timestamp year : int32 month : int32 + """ self.con = psycopg2.connect( @@ -291,6 +294,7 @@ def list_tables( The `schema` parameter does **not** refer to the column names and types of `table`. ::: + """ if database is not None: util.warn_deprecated( @@ -314,7 +318,7 @@ def list_tables( .from_(sg.table("tables", db="information_schema")) .distinct() .where(*conditions) - .sql(self.name) + .sql(self.dialect) ) with self._safe_raw_sql(sql) as cur: @@ -447,10 +451,10 @@ def _compile_builtin_udf(self, udf_node: ops.ScalarUDF) -> None: """No op.""" def _compile_pyarrow_udf(self, udf_node: ops.ScalarUDF) -> None: - raise NotImplementedError("pyarrow UDFs are not supported in Postgres") + raise NotImplementedError(f"pyarrow UDFs are not supported in {self.name}") def _compile_pandas_udf(self, udf_node: ops.ScalarUDF) -> str: - raise NotImplementedError("pandas UDFs are not supported in Postgres") + raise NotImplementedError(f"pandas UDFs are not supported in {self.name}") def _define_udf_translation_rules(self, expr: ir.Expr) -> None: """No-op, these are defined in the compiler.""" @@ -535,11 +539,11 @@ def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: create_stmt = sge.Create( kind="VIEW", this=sg.table(name), - expression=sg.parse_one(query, read=self.name), + expression=sg.parse_one(query, read=self.dialect), properties=sge.Properties(expressions=[sge.TemporaryProperty()]), ) drop_stmt = sge.Drop(kind="VIEW", this=sg.table(name), exists=True).sql( - self.name + self.dialect ) with self._safe_raw_sql(create_stmt): @@ -555,7 +559,7 @@ def create_schema( ) -> None: if database is not None and database != self.current_database: raise exc.UnsupportedOperationError( - "Postgres does not support creating a schema in a different database" + f"{self.name} does not support creating a schema in a different database" ) sql = sge.Create( kind="SCHEMA", this=sg.table(name, catalog=database), exists=force @@ -572,7 +576,7 @@ def drop_schema( ) -> None: if database is not None and database != self.current_database: raise exc.UnsupportedOperationError( - "Postgres does not support dropping a schema in a different database" + f"{self.name} does not support dropping a schema in a different database" ) sql = sge.Drop( @@ -614,13 +618,14 @@ def create_table( overwrite If `True`, replace the table if it already exists, otherwise fail if the table exists + """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") if database is not None and database != self.current_database: raise com.UnsupportedOperationError( - "Creating tables in other databases is not supported by Postgres" + f"Creating tables in other databases is not supported by {self.name}" ) else: database = None @@ -672,15 +677,15 @@ def create_table( this = sg.table(name, catalog=database, quoted=self.compiler.quoted) with self._safe_raw_sql(create_stmt) as cur: if query is not None: - insert_stmt = sge.Insert(this=table, expression=query).sql(self.name) + insert_stmt = sge.Insert(this=table, expression=query).sql(self.dialect) cur.execute(insert_stmt) if overwrite: cur.execute( - sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.dialect) ) cur.execute( - f"ALTER TABLE IF EXISTS {table.sql(self.name)} RENAME TO {this.sql(self.name)}" + f"ALTER TABLE IF EXISTS {table.sql(self.dialect)} RENAME TO {this.sql(self.dialect)}" ) if schema is None: @@ -700,7 +705,7 @@ def drop_table( ) -> None: if database is not None and database != self.current_database: raise com.UnsupportedOperationError( - "Droppping tables in other databases is not supported by Postgres" + f"Droppping tables in other databases is not supported by {self.name}" ) else: database = None @@ -721,7 +726,7 @@ def _safe_raw_sql(self, *args, **kwargs): def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): - query = query.sql(dialect=self.name) + query = query.sql(dialect=self.dialect) con = self.con cursor = con.cursor() @@ -771,7 +776,8 @@ def truncate_table(self, name: str, database: str | None = None) -> None: Table name database Schema name + """ - ident = sg.table(name, db=database).sql(self.name) + ident = sg.table(name, db=database).sql(self.dialect) with self._safe_raw_sql(f"TRUNCATE TABLE {ident}"): pass diff --git a/ibis/backends/pyspark/compiler.py b/ibis/backends/pyspark/compiler.py index bc2bbf2b7584..b4e75c959735 100644 --- a/ibis/backends/pyspark/compiler.py +++ b/ibis/backends/pyspark/compiler.py @@ -457,6 +457,17 @@ def visit_JoinLink(self, op, **kwargs): def visit_Undefined(self, op, **_): raise com.OperationNotDefinedError(type(op).__name__) + @visit_node.register(ops.HexDigest) + def visit_HexDigest(self, op, *, arg, how): + if how == "md5": + return self.f.md5(arg) + elif how == "sha1": + return self.f.sha1(arg) + elif how in ("sha256", "sha512"): + return self.f.sha2(arg, int(how[-3:])) + else: + raise NotImplementedError(f"No available hashing function for {how}") + _SIMPLE_OPS = { ops.ArrayDistinct: "array_distinct", diff --git a/ibis/backends/risingwave/__init__.py b/ibis/backends/risingwave/__init__.py index 04de491f6dfe..996f776fd12e 100644 --- a/ibis/backends/risingwave/__init__.py +++ b/ibis/backends/risingwave/__init__.py @@ -2,36 +2,40 @@ from __future__ import annotations -import inspect -from typing import TYPE_CHECKING, Callable, Literal - -import sqlalchemy as sa - -import ibis.common.exceptions as exc +import atexit +from functools import partial +from itertools import repeat +from typing import TYPE_CHECKING + +import psycopg2 +import sqlglot as sg +import sqlglot.expressions as sge +from psycopg2 import extras + +import ibis +import ibis.common.exceptions as com import ibis.expr.operations as ops +import ibis.expr.types as ir from ibis import util -from ibis.backends.base.sql.alchemy import AlchemyCanCreateSchema, BaseAlchemyBackend +from ibis.backends.postgres import Backend as PostgresBackend from ibis.backends.risingwave.compiler import RisingwaveCompiler -from ibis.backends.risingwave.datatypes import RisingwaveType -from ibis.common.exceptions import InvalidDecoratorError +from ibis.backends.risingwave.dialect import RisingWave as RisingWaveDialect if TYPE_CHECKING: - from collections.abc import Iterable - - import ibis.expr.datatypes as dt + import pandas as pd + import pyarrow as pa def _verify_source_line(func_name: str, line: str): if line.startswith("@"): - raise InvalidDecoratorError(func_name, line) + raise com.InvalidDecoratorError(func_name, line) return line -class Backend(BaseAlchemyBackend, AlchemyCanCreateSchema): +class Backend(PostgresBackend): name = "risingwave" - compiler = RisingwaveCompiler - supports_temporary_tables = False - supports_create_or_replace = False + dialect = RisingWaveDialect + compiler = RisingwaveCompiler() supports_python_udfs = False def do_connect( @@ -42,10 +46,8 @@ def do_connect( port: int = 5432, database: str | None = None, schema: str | None = None, - url: str | None = None, - driver: Literal["psycopg2"] = "psycopg2", ) -> None: - """Create an Ibis client connected to Risingwave database. + """Create an Ibis client connected to RisingWave database. Parameters ---------- @@ -60,13 +62,7 @@ def do_connect( database Database to connect to schema - Risingwave schema to use. If `None`, use the default `search_path`. - url - SQLAlchemy connection string. - - If passed, the other connection arguments are ignored. - driver - Database driver + RisingWave schema to use. If `None`, use the default `search_path`. Examples -------- @@ -98,185 +94,199 @@ def do_connect( timestamp_col : timestamp year : int32 month : int32 + """ - if driver != "psycopg2": - raise NotImplementedError("psycopg2 is currently the only supported driver") - alchemy_url = self._build_alchemy_url( - url=url, + self.con = psycopg2.connect( host=host, port=port, user=user, password=password, database=database, - driver=f"risingwave+{driver}", - ) - - connect_args = {} - if schema is not None: - connect_args["options"] = f"-csearch_path={schema}" - - engine = sa.create_engine( - alchemy_url, connect_args=connect_args, poolclass=sa.pool.StaticPool + options=(f"-csearch_path={schema}" * (schema is not None)) or None, ) - @sa.event.listens_for(engine, "connect") - def connect(dbapi_connection, connection_record): - with dbapi_connection.cursor() as cur: - cur.execute("SET TIMEZONE = UTC") + with self.begin() as cur: + cur.execute("SET TIMEZONE = UTC") - super().do_connect(engine) + self._temp_views = set() - def list_tables(self, like=None, schema=None): - """List the tables in the database. + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: ibis.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ): + """Create a table in Risingwave. Parameters ---------- - like - A pattern to use for listing tables. + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified schema - The schema to perform the list against. - - ::: {.callout-warning} - ## `schema` refers to database hierarchy + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The name of the database in which to create the table; if not + passed, the current database is used. + temp + Create a temporary table + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists - The `schema` parameter does **not** refer to the column names and - types of `table`. - ::: """ - tables = self.inspector.get_table_names(schema=schema) - views = self.inspector.get_view_names(schema=schema) - return self._filter_with_like(tables + views, like) - - def list_databases(self, like=None) -> list[str]: - # http://dba.stackexchange.com/a/1304/58517 - dbs = sa.table( - "pg_database", - sa.column("datname", sa.TEXT()), - sa.column("datistemplate", sa.BOOLEAN()), - schema="pg_catalog", + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + if database is not None and database != self.current_database: + raise com.UnsupportedOperationError( + f"Creating tables in other databases is not supported by {self.name}" + ) + else: + database = None + + properties = [] + + if temp: + properties.append(sge.TemporaryProperty()) + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self._to_sqlglot(table) + else: + query = None + + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name(f"{self.name}_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create( + kind="TABLE", + this=target, + properties=sge.Properties(expressions=properties), ) - query = sa.select(dbs.c.datname).where(sa.not_(dbs.c.datistemplate)) - with self.begin() as con: - databases = list(con.execute(query).scalars()) - - return self._filter_with_like(databases, like) - - @property - def current_database(self) -> str: - return self._scalar_query(sa.select(sa.func.current_database())) - - @property - def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.func.current_schema())) - - def function(self, name: str, *, schema: str | None = None) -> Callable: - query = sa.text( - """ -SELECT - n.nspname as schema, - pg_catalog.pg_get_function_result(p.oid) as return_type, - string_to_array(pg_catalog.pg_get_function_arguments(p.oid), ', ') as signature, - CASE p.prokind - WHEN 'a' THEN 'agg' - WHEN 'w' THEN 'window' - WHEN 'p' THEN 'proc' - ELSE 'func' - END as "Type" -FROM pg_catalog.pg_proc p -LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace -WHERE p.proname = :name -""" - + "AND n.nspname OPERATOR(pg_catalog.~) :schema COLLATE pg_catalog.default" - * (schema is not None) - ).bindparams(name=name, schema=f"^({schema})$") - - def split_name_type(arg: str) -> tuple[str, dt.DataType]: - name, typ = arg.split(" ", 1) - return name, RisingwaveType.from_string(typ) - - with self.begin() as con: - rows = con.execute(query).mappings().fetchall() - - if not rows: - name = f"{schema}.{name}" if schema else name - raise exc.MissingUDFError(name) - elif len(rows) > 1: - raise exc.AmbiguousUDFError(name) - - [row] = rows - return_type = RisingwaveType.from_string(row["return_type"]) - signature = list(map(split_name_type, row["signature"])) - - # dummy callable - def fake_func(*args, **kwargs): - ... - - fake_func.__name__ = name - fake_func.__signature__ = inspect.Signature( - [ - inspect.Parameter( - name, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=typ + + this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + with self._safe_raw_sql(create_stmt) as cur: + if query is not None: + insert_stmt = sge.Insert(this=table, expression=query).sql(self.dialect) + cur.execute(insert_stmt) + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.dialect) ) - for name, typ in signature - ], - return_annotation=return_type, - ) - fake_func.__annotations__ = {"return": return_type, **dict(signature)} - op = ops.udf.scalar.builtin(fake_func, schema=schema) - return op - - def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: - name = util.gen_name("risingwave_metadata") - type_info_sql = """\ - SELECT - attname, - format_type(atttypid, atttypmod) AS type - FROM pg_attribute - WHERE attrelid = CAST(:name AS regclass) - AND attnum > 0 - AND NOT attisdropped - ORDER BY attnum""" - if self.inspector.has_table(query): - query = f"TABLE {query}" - - text = sa.text(type_info_sql).bindparams(name=name) - with self.begin() as con: - con.exec_driver_sql(f"CREATE VIEW IF NOT EXISTS {name} AS {query}") - try: - yield from ( - (col, RisingwaveType.from_string(typestr)) - for col, typestr in con.execute(text) + cur.execute( + f"ALTER TABLE {table.sql(self.dialect)} RENAME TO {this.sql(self.dialect)}" ) - finally: - con.exec_driver_sql(f"DROP VIEW IF EXISTS {name}") - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - yield f"DROP VIEW IF EXISTS {name}" - yield f"CREATE TEMPORARY VIEW {name} AS {definition}" - - def create_schema( - self, name: str, database: str | None = None, force: bool = False - ) -> None: - if database is not None and database != self.current_database: - raise exc.UnsupportedOperationError( - "Risingwave does not support creating a schema in a different database" + if schema is None: + return self.table(name, schema=database) + + # preserve the input schema if it was provided + return ops.DatabaseTable( + name, schema=schema, source=self, namespace=ops.Namespace(database=database) + ).to_expr() + + def _get_temp_view_definition(self, name: str, definition): + drop = sge.Drop( + kind="VIEW", exists=True, this=sg.table(name), cascade=True + ).sql(self.dialect) + + create = sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=False, + ).sql(self.dialect) + + atexit.register(self._clean_up_tmp_view, name) + return f"{drop}; {create}" + + def _clean_up_tmp_view(self, name: str) -> None: + drop = sge.Drop( + kind="VIEW", exists=True, this=sg.table(name), cascade=True + ).sql(self.dialect) + with self.begin() as bind: + bind.execute(drop) + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise com.IbisTypeError( + f"{self.name} cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" ) - if_not_exists = "IF NOT EXISTS " * force - name = self._quote(name) - with self.begin() as con: - con.exec_driver_sql(f"CREATE SCHEMA {if_not_exists}{name}") - def drop_schema( - self, name: str, database: str | None = None, force: bool = False - ) -> None: - if database is not None and database != self.current_database: - raise exc.UnsupportedOperationError( - "Risingwave does not support dropping a schema in a different database" + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [ + sg.exp.ColumnConstraint( + kind=sg.exp.NotNullColumnConstraint() + ) + ] + ), + ) + for colname, typ in schema.items() + ] + + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema( + this=sg.to_identifier(name, quoted=quoted), expressions=column_defs + ), + ) + create_stmt_sql = create_stmt.sql(self.dialect) + + columns = schema.keys() + df = op.data.to_frame() + data = df.itertuples(index=False) + cols = ", ".join( + ident.sql(self.dialect) + for ident in map(partial(sg.to_identifier, quoted=quoted), columns) ) - name = self._quote(name) - if_exists = "IF EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"DROP SCHEMA {if_exists}{name}") + specs = ", ".join(repeat("%s", len(columns))) + table = sg.table(name, quoted=quoted) + sql = f"INSERT INTO {table.sql(self.dialect)} ({cols}) VALUES ({specs})" + with self.begin() as cur: + cur.execute(create_stmt_sql) + extras.execute_batch(cur, sql, data, 128) diff --git a/ibis/backends/risingwave/compiler.py b/ibis/backends/risingwave/compiler.py index b4bcd9c0b9d5..5bc7bfef2f5b 100644 --- a/ibis/backends/risingwave/compiler.py +++ b/ibis/backends/risingwave/compiler.py @@ -1,34 +1,104 @@ from __future__ import annotations +from functools import singledispatchmethod + +import sqlglot.expressions as sge +from public import public + +import ibis.common.exceptions as com +import ibis.expr.datashape as ds +import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.risingwave.datatypes import RisingwaveType -from ibis.backends.risingwave.registry import operation_registry -from ibis.expr.rewrites import rewrite_sample +from ibis.backends.base.sqlglot.datatypes import RisingWaveType +from ibis.backends.postgres.compiler import PostgresCompiler +from ibis.backends.risingwave.dialect import RisingWave # noqa: F401 + + +@public +class RisingwaveCompiler(PostgresCompiler): + __slots__ = () + + dialect = "risingwave" + name = "risingwave" + type_mapper = RisingWaveType + + @singledispatchmethod + def visit_node(self, op, **kwargs): + return super().visit_node(op, **kwargs) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "sample": + raise com.UnsupportedOperationError( + f"{self.name} only implements `pop` correlation coefficient" + ) + return super().visit_Correlation( + op, left=left, right=right, how=how, where=where + ) + + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimeTruncate) + def visit_TimestampTruncate(self, op, *, arg, unit): + unit_mapping = { + "Y": "year", + "Q": "quarter", + "M": "month", + "W": "week", + "D": "day", + "h": "hour", + "m": "minute", + "s": "second", + "ms": "milliseconds", + "us": "microseconds", + } + + if (unit := unit_mapping.get(unit.short)) is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") + + return self.f.date_trunc(unit, arg) + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + if op.arg.shape == ds.scalar: + return sge.Interval(this=arg, unit=self.v[unit.name]) + elif op.arg.shape == ds.columnar: + return arg * sge.Interval(this=sge.convert(1), unit=self.v[unit.name]) + else: + raise ValueError("Invalid shape for converting to interval") -class RisingwaveExprTranslator(AlchemyExprTranslator): - _registry = operation_registry.copy() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _has_reduction_filter_syntax = True - _supports_tuple_syntax = True - _dialect_name = "risingwave" + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_binary(): + return self.cast("".join(map(r"\x{:0>2x}".format, value)), dt.binary) + elif dtype.is_date(): + return self.cast(value.isoformat(), dtype) + elif dtype.is_json(): + return sge.convert(str(value)) + return None - # it does support it, but we can't use it because of support for pivot - supports_unnest_in_select = False + @visit_node.register(ops.DateFromYMD) + @visit_node.register(ops.Mode) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) - type_mapper = RisingwaveType +_SIMPLE_OPS = { + ops.First: "first_value", + ops.Last: "last_value", +} -rewrites = RisingwaveExprTranslator.rewrites +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + @RisingwaveCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) -@rewrites(ops.Any) -@rewrites(ops.All) -def _any_all_no_op(expr): - return expr + else: + @RisingwaveCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) -class RisingwaveCompiler(AlchemyCompiler): - translator_class = RisingwaveExprTranslator - rewrites = AlchemyCompiler.rewrites | rewrite_sample + setattr(RisingwaveCompiler, f"visit_{_op.__name__}", _fmt) diff --git a/ibis/backends/risingwave/dialect.py b/ibis/backends/risingwave/dialect.py new file mode 100644 index 000000000000..2237c2a4d188 --- /dev/null +++ b/ibis/backends/risingwave/dialect.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import sqlglot.expressions as sge +from sqlglot import generator +from sqlglot.dialects import Postgres + + +class RisingWave(Postgres): + # Need to disable timestamp precision + # No "or replace" allowed in create statements + # no "not null" clause for column constraints + + class Generator(generator.Generator): + SINGLE_STRING_INTERVAL = True + RENAME_TABLE_WITH_DB = False + LOCKING_READS_SUPPORTED = True + JOIN_HINTS = False + TABLE_HINTS = False + QUERY_HINTS = False + NVL2_SUPPORTED = False + PARAMETER_TOKEN = "$" + TABLESAMPLE_SIZE_IS_ROWS = False + TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" + SUPPORTS_SELECT_INTO = True + JSON_TYPE_REQUIRED_FOR_EXTRACTION = True + SUPPORTS_UNLOGGED_TABLES = True + + TYPE_MAPPING = { + **Postgres.Generator.TYPE_MAPPING, + sge.DataType.Type.TIMESTAMPTZ: "TIMESTAMPTZ", + } + + TRANSFORMS = { + **Postgres.Generator.TRANSFORMS, + } diff --git a/ibis/backends/risingwave/tests/conftest.py b/ibis/backends/risingwave/tests/conftest.py index 35cfe6b8e1db..4ffb2ab85722 100644 --- a/ibis/backends/risingwave/tests/conftest.py +++ b/ibis/backends/risingwave/tests/conftest.py @@ -4,10 +4,8 @@ from typing import TYPE_CHECKING, Any import pytest -import sqlalchemy as sa import ibis -from ibis.backends.conftest import init_database from ibis.backends.tests.base import ServiceBackendTest if TYPE_CHECKING: @@ -35,23 +33,14 @@ class TestConf(ServiceBackendTest): supports_structs = False rounding_method = "half_to_even" service_name = "risingwave" - deps = "psycopg2", "sqlalchemy" + deps = ("psycopg2",) @property def test_files(self) -> Iterable[Path]: return self.data_dir.joinpath("csv").glob("*.csv") - def _load_data( - self, - *, - user: str = PG_USER, - password: str = PG_PASS, - host: str = PG_HOST, - port: int = PG_PORT, - database: str = IBIS_TEST_RISINGWAVE_DB, - **_: Any, - ) -> None: - """Load test data into a Risingwave backend instance. + def _load_data(self, **_: Any) -> None: + """Load test data into a PostgreSQL backend instance. Parameters ---------- @@ -60,15 +49,8 @@ def _load_data( script_dir Location of scripts defining schemas """ - init_database( - url=sa.engine.make_url( - f"risingwave://{user}:{password}@{host}:{port:d}/{database}" - ), - database=database, - schema=self.ddl_script, - isolation_level="AUTOCOMMIT", - recreate=False, - ) + with self.connection._safe_raw_sql(";".join(self.ddl_script)): + pass @staticmethod def connect(*, tmpdir, worker_id, port: int | None = None, **kw): @@ -91,13 +73,8 @@ def con(tmp_path_factory, data_dir, worker_id): @pytest.fixture(scope="module") -def db(con): - return con.database() - - -@pytest.fixture(scope="module") -def alltypes(db): - return db.functional_alltypes +def alltypes(con): + return con.tables.functional_alltypes @pytest.fixture(scope="module") @@ -105,20 +82,6 @@ def df(alltypes): return alltypes.execute() -@pytest.fixture(scope="module") -def alltypes_sqla(con, alltypes): - name = alltypes.op().name - return con._get_sqla_table(name) - - @pytest.fixture(scope="module") def intervals(con): return con.table("intervals") - - -@pytest.fixture -def translate(): - from ibis.backends.risingwave import Backend - - context = Backend.compiler.make_context() - return lambda expr: Backend.compiler.translator_class(expr, context).get_result() diff --git a/ibis/backends/risingwave/tests/snapshots/test_client/test_compile_toplevel/out.sql b/ibis/backends/risingwave/tests/snapshots/test_client/test_compile_toplevel/out.sql index cfbcf133a863..c0b4a0b83304 100644 --- a/ibis/backends/risingwave/tests/snapshots/test_client/test_compile_toplevel/out.sql +++ b/ibis/backends/risingwave/tests/snapshots/test_client/test_compile_toplevel/out.sql @@ -1,2 +1,3 @@ -SELECT sum(t0.foo) AS "Sum(foo)" -FROM t0 AS t0 \ No newline at end of file +SELECT + SUM("t0"."foo") AS "Sum(foo)" +FROM "t0" AS "t0" \ No newline at end of file diff --git a/ibis/backends/risingwave/tests/snapshots/test_functions/test_union_cte/False/out.sql b/ibis/backends/risingwave/tests/snapshots/test_functions/test_union_cte/False/out.sql index 34761d9a76e0..f0366d83444d 100644 --- a/ibis/backends/risingwave/tests/snapshots/test_functions/test_union_cte/False/out.sql +++ b/ibis/backends/risingwave/tests/snapshots/test_functions/test_union_cte/False/out.sql @@ -1 +1 @@ -WITH anon_2 AS (SELECT t2.string_col AS string_col, sum(t2.double_col) AS metric FROM functional_alltypes AS t2 GROUP BY 1), anon_3 AS (SELECT t3.string_col AS string_col, sum(t3.double_col) AS metric FROM functional_alltypes AS t3 GROUP BY 1), anon_1 AS (SELECT t2.string_col AS string_col, t2.metric AS metric FROM (SELECT anon_2.string_col AS string_col, anon_2.metric AS metric FROM anon_2 UNION ALL SELECT anon_3.string_col AS string_col, anon_3.metric AS metric FROM anon_3) AS t2), anon_4 AS (SELECT t3.string_col AS string_col, sum(t3.double_col) AS metric FROM functional_alltypes AS t3 GROUP BY 1) SELECT t1.string_col, t1.metric FROM (SELECT anon_1.string_col AS string_col, anon_1.metric AS metric FROM anon_1 UNION ALL SELECT anon_4.string_col AS string_col, anon_4.metric AS metric FROM anon_4) AS t1 \ No newline at end of file +WITH "t1" AS ( SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) SELECT "t7"."string_col", "t7"."metric" FROM ( SELECT "t5"."string_col", "t5"."metric" FROM ( SELECT * FROM "t1" AS "t2" UNION ALL SELECT * FROM "t1" AS "t4" ) AS "t5" UNION ALL SELECT * FROM "t1" AS "t3" ) AS "t7" \ No newline at end of file diff --git a/ibis/backends/risingwave/tests/snapshots/test_functions/test_union_cte/True/out.sql b/ibis/backends/risingwave/tests/snapshots/test_functions/test_union_cte/True/out.sql index 6ce31e7468bb..5a873785e92b 100644 --- a/ibis/backends/risingwave/tests/snapshots/test_functions/test_union_cte/True/out.sql +++ b/ibis/backends/risingwave/tests/snapshots/test_functions/test_union_cte/True/out.sql @@ -1 +1 @@ -WITH anon_2 AS (SELECT t2.string_col AS string_col, sum(t2.double_col) AS metric FROM functional_alltypes AS t2 GROUP BY 1), anon_3 AS (SELECT t3.string_col AS string_col, sum(t3.double_col) AS metric FROM functional_alltypes AS t3 GROUP BY 1), anon_1 AS (SELECT t2.string_col AS string_col, t2.metric AS metric FROM (SELECT anon_2.string_col AS string_col, anon_2.metric AS metric FROM anon_2 UNION SELECT anon_3.string_col AS string_col, anon_3.metric AS metric FROM anon_3) AS t2), anon_4 AS (SELECT t3.string_col AS string_col, sum(t3.double_col) AS metric FROM functional_alltypes AS t3 GROUP BY 1) SELECT t1.string_col, t1.metric FROM (SELECT anon_1.string_col AS string_col, anon_1.metric AS metric FROM anon_1 UNION SELECT anon_4.string_col AS string_col, anon_4.metric AS metric FROM anon_4) AS t1 \ No newline at end of file +WITH "t1" AS ( SELECT "t0"."string_col", SUM("t0"."double_col") AS "metric" FROM "functional_alltypes" AS "t0" GROUP BY 1 ) SELECT "t7"."string_col", "t7"."metric" FROM ( SELECT "t5"."string_col", "t5"."metric" FROM ( SELECT * FROM "t1" AS "t2" UNION SELECT * FROM "t1" AS "t4" ) AS "t5" UNION SELECT * FROM "t1" AS "t3" ) AS "t7" \ No newline at end of file diff --git a/ibis/backends/risingwave/tests/test_client.py b/ibis/backends/risingwave/tests/test_client.py index b5c7cfa98560..918b648b7bc8 100644 --- a/ibis/backends/risingwave/tests/test_client.py +++ b/ibis/backends/risingwave/tests/test_client.py @@ -4,17 +4,15 @@ import pandas as pd import pytest +import sqlglot as sg from pytest import param import ibis import ibis.expr.datatypes as dt import ibis.expr.types as ir -from ibis.tests.util import assert_equal +from ibis.util import gen_name pytest.importorskip("psycopg2") -sa = pytest.importorskip("sqlalchemy") - -from sqlalchemy.dialects import postgresql # noqa: E402 RISINGWAVE_TEST_DB = os.environ.get("IBIS_TEST_RISINGWAVE_DATABASE", "dev") IBIS_RISINGWAVE_HOST = os.environ.get("IBIS_TEST_RISINGWAVE_HOST", "localhost") @@ -64,47 +62,15 @@ def test_list_databases(con): assert RISINGWAVE_TEST_DB in con.list_databases() -def test_schema_type_conversion(con): - typespec = [ - # name, type, nullable - ("jsonb", postgresql.JSONB, True, dt.JSON), - ] - - sqla_types = [] - ibis_types = [] - for name, t, nullable, ibis_type in typespec: - sqla_types.append(sa.Column(name, t, nullable=nullable)) - ibis_types.append((name, ibis_type(nullable=nullable))) - - # Create a table with placeholder stubs for JSON, JSONB, and UUID. - table = sa.Table("tname", sa.MetaData(), *sqla_types) - - # Check that we can correctly create a schema with dt.any for the - # missing types. - schema = con._schema_from_sqla_table(table) - expected = ibis.schema(ibis_types) - - assert_equal(schema, expected) +def test_create_and_drop_table(con, temp_table): + sch = ibis.schema([("first_name", "string")]) + con.create_table(temp_table, schema=sch) + assert con.table(temp_table) is not None -@pytest.mark.parametrize("params", [{}, {"database": RISINGWAVE_TEST_DB}]) -def test_create_and_drop_table(con, temp_table, params): - sch = ibis.schema( - [ - ("first_name", "string"), - ("last_name", "string"), - ("department_name", "string"), - ("salary", "float64"), - ] - ) - - con.create_table(temp_table, schema=sch, **params) - assert con.table(temp_table, **params) is not None - - con.drop_table(temp_table, **params) + con.drop_table(temp_table) - with pytest.raises(sa.exc.NoSuchTableError): - con.table(temp_table, **params) + assert temp_table not in con.list_tables() @pytest.mark.parametrize( @@ -124,8 +90,8 @@ def test_create_and_drop_table(con, temp_table, params): ("date", dt.date), ("time", dt.time), ("time without time zone", dt.time), - ("timestamp without time zone", dt.timestamp), - ("timestamp with time zone", dt.Timestamp("UTC")), + ("timestamp without time zone", dt.Timestamp(scale=6)), + ("timestamp with time zone", dt.Timestamp("UTC", scale=6)), ("interval", dt.Interval("s")), ("numeric", dt.decimal), ("jsonb", dt.json), @@ -133,17 +99,16 @@ def test_create_and_drop_table(con, temp_table, params): ], ) def test_get_schema_from_query(con, pg_type, expected_type): - name = con._quote(ibis.util.guid()) + name = sg.table(gen_name("risingwave_temp_table"), quoted=True) with con.begin() as c: - c.exec_driver_sql(f"CREATE TABLE {name} (x {pg_type}, y {pg_type}[])") + c.execute(f"CREATE TABLE {name} (x {pg_type}, y {pg_type}[])") expected_schema = ibis.schema(dict(x=expected_type, y=dt.Array(expected_type))) result_schema = con._get_schema_using_query(f"SELECT x, y FROM {name}") assert result_schema == expected_schema with con.begin() as c: - c.exec_driver_sql(f"DROP TABLE {name}") + c.execute(f"DROP TABLE {name}") -@pytest.mark.xfail(reason="unsupported insert with CTEs") def test_insert_with_cte(con): X = con.create_table("X", schema=ibis.schema(dict(id="int")), temp=False) expr = X.join(X.mutate(a=X["id"] + 1), ["id"]) @@ -151,8 +116,3 @@ def test_insert_with_cte(con): assert Y.execute().empty con.drop_table("Y") con.drop_table("X") - - -def test_connect_url_with_empty_host(): - con = ibis.connect("risingwave:///dev") - assert con.con.url.host is None diff --git a/ibis/backends/risingwave/tests/test_functions.py b/ibis/backends/risingwave/tests/test_functions.py index c8874e390c60..d680fb3190f9 100644 --- a/ibis/backends/risingwave/tests/test_functions.py +++ b/ibis/backends/risingwave/tests/test_functions.py @@ -1,7 +1,6 @@ from __future__ import annotations import operator -import string import warnings from datetime import datetime @@ -13,104 +12,9 @@ import ibis import ibis.expr.datatypes as dt -import ibis.expr.types as ir -from ibis import config from ibis import literal as L pytest.importorskip("psycopg2") -sa = pytest.importorskip("sqlalchemy") - -from sqlalchemy.dialects import postgresql # noqa: E402 - - -@pytest.mark.parametrize( - ("left_func", "right_func"), - [ - param( - lambda t: t.double_col.cast("int8"), - lambda at: sa.cast(at.c.double_col, sa.SMALLINT), - id="double_to_int8", - ), - param( - lambda t: t.double_col.cast("int16"), - lambda at: sa.cast(at.c.double_col, sa.SMALLINT), - id="double_to_int16", - ), - param( - lambda t: t.string_col.cast("double"), - lambda at: sa.cast(at.c.string_col, postgresql.DOUBLE_PRECISION), - id="string_to_double", - ), - param( - lambda t: t.string_col.cast("float32"), - lambda at: sa.cast(at.c.string_col, postgresql.REAL), - id="string_to_float", - ), - param( - lambda t: t.string_col.cast("decimal"), - lambda at: sa.cast(at.c.string_col, sa.NUMERIC()), - id="string_to_decimal_no_params", - ), - param( - lambda t: t.string_col.cast("decimal(9, 3)"), - lambda at: sa.cast(at.c.string_col, sa.NUMERIC(9, 3)), - id="string_to_decimal_params", - ), - ], -) -def test_cast(alltypes, alltypes_sqla, translate, left_func, right_func): - left = left_func(alltypes) - right = right_func(alltypes_sqla.alias("t0")) - assert str(translate(left.op()).compile()) == str(right.compile()) - - -def test_date_cast(alltypes, alltypes_sqla, translate): - result = alltypes.date_string_col.cast("date") - expected = sa.cast(alltypes_sqla.alias("t0").c.date_string_col, sa.DATE) - assert str(translate(result.op())) == str(expected) - - -@pytest.mark.parametrize( - "column", - [ - "id", - "bool_col", - "tinyint_col", - "smallint_col", - "int_col", - "bigint_col", - "float_col", - "double_col", - "date_string_col", - "string_col", - "timestamp_col", - "year", - "month", - ], -) -def test_noop_cast(alltypes, alltypes_sqla, translate, column): - col = alltypes[column] - result = col.cast(col.type()) - expected = alltypes_sqla.alias("t0").c[column] - assert result.equals(col) - assert str(translate(result.op())) == str(expected) - - -def test_timestamp_cast_noop(alltypes, alltypes_sqla, translate): - # See GH #592 - result1 = alltypes.timestamp_col.cast("timestamp") - result2 = alltypes.int_col.cast("timestamp") - - assert isinstance(result1, ir.TimestampColumn) - assert isinstance(result2, ir.TimestampColumn) - - expected1 = alltypes_sqla.alias("t0").c.timestamp_col - expected2 = sa.cast( - sa.func.to_timestamp(alltypes_sqla.alias("t0").c.int_col), sa.TIMESTAMP() - ) - - assert str(translate(result1.op())) == str(expected1) - assert str(translate(result2.op())) == str(expected2) @pytest.mark.parametrize(("value", "expected"), [(0, None), (5.5, 5.5)]) @@ -427,12 +331,7 @@ def test_union_cte(alltypes, distinct, snapshot): expr2 = expr1.view() expr3 = expr1.view() expr = expr1.union(expr2, distinct=distinct).union(expr3, distinct=distinct) - result = " ".join( - line.strip() - for line in str( - expr.compile().compile(compile_kwargs={"literal_binds": True}) - ).splitlines() - ) + result = " ".join(line.strip() for line in expr.compile().splitlines()) snapshot.assert_match(result, "out.sql") @@ -568,18 +467,6 @@ def test_not_exists(alltypes, df): tm.assert_frame_equal(result, expected, check_index_type=False, check_dtype=False) -def test_interactive_repr_shows_error(alltypes): - # #591. Doing this in Postgres because so many built-in functions are - # not available - - expr = alltypes.int_col.convert_base(10, 2) - - with config.option_context("interactive", True): - result = repr(expr) - - assert "no translation rule" in result.lower() - - def test_subquery(alltypes, df): t = alltypes @@ -758,9 +645,6 @@ def array_types(con): return con.table("array_types") -@pytest.mark.xfail( - reason="Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype" -) def test_array_length(array_types): expr = array_types.select( array_types.x.length().name("x_length"), @@ -861,60 +745,6 @@ def test_timestamp_with_timezone(con): assert str(result.dtype.tz) -@pytest.fixture( - params=[ - None, - "UTC", - "America/New_York", - "America/Los_Angeles", - "Europe/Paris", - "Chile/Continental", - "Asia/Tel_Aviv", - "Asia/Tokyo", - "Africa/Nairobi", - "Australia/Sydney", - ] -) -def tz(request): - return request.param - - -@pytest.fixture -def tzone_compute(con, temp_table, tz): - schema = ibis.schema([("ts", dt.Timestamp(tz)), ("b", "double"), ("c", "string")]) - con.create_table(temp_table, schema=schema, temp=False) - t = con.table(temp_table) - - n = 10 - df = pd.DataFrame( - { - "ts": pd.date_range("2017-04-01", periods=n, tz=tz).values, - "b": np.arange(n).astype("float64"), - "c": list(string.ascii_lowercase[:n]), - } - ) - - df.to_sql( - temp_table, - con.con, - index=False, - if_exists="append", - dtype={"ts": sa.TIMESTAMP(timezone=True), "b": sa.FLOAT, "c": sa.TEXT}, - ) - - yield t - con.drop_table(temp_table) - - -def test_ts_timezone_is_preserved(tzone_compute, tz): - assert dt.Timestamp(tz).equals(tzone_compute.ts.type()) - - -def test_timestamp_with_timezone_select(tzone_compute, tz): - ts = tzone_compute.ts.execute() - assert str(getattr(ts.dtype, "tz", None)) == str(tz) - - @pytest.mark.parametrize( ("left", "right", "type"), [ @@ -1010,8 +840,8 @@ def test_string_to_binary_cast(con): "FROM functional_alltypes LIMIT 10" ) with con.begin() as c: - cur = c.exec_driver_sql(sql_string) - raw_data = [row[0][0] for row in cur] + c.execute(sql_string) + raw_data = [row[0][0] for row in c.fetchall()] expected = pd.Series(raw_data, name=name) tm.assert_series_equal(result, expected) @@ -1027,6 +857,6 @@ def test_string_to_binary_round_trip(con): "FROM functional_alltypes LIMIT 10" ) with con.begin() as c: - cur = c.exec_driver_sql(sql_string) - expected = pd.Series([row[0][0] for row in cur], name=name) + c.execute(sql_string) + expected = pd.Series([row[0][0] for row in c.fetchall()], name=name) tm.assert_series_equal(result, expected) diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index a314b4f7543c..e9a8347ab094 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -92,14 +92,18 @@ try: from psycopg2.errors import DivisionByZero as PsycoPg2DivisionByZero from psycopg2.errors import IndeterminateDatatype as PsycoPg2IndeterminateDatatype + from psycopg2.errors import InternalError_ as PsycoPg2InternalError from psycopg2.errors import ( InvalidTextRepresentation as PsycoPg2InvalidTextRepresentation, ) + from psycopg2.errors import ProgrammingError as PsycoPg2ProgrammingError from psycopg2.errors import SyntaxError as PsycoPg2SyntaxError except ImportError: PsycoPg2SyntaxError = ( PsycoPg2IndeterminateDatatype - ) = PsycoPg2InvalidTextRepresentation = PsycoPg2DivisionByZero = None + ) = ( + PsycoPg2InvalidTextRepresentation + ) = PsycoPg2DivisionByZero = PsycoPg2InternalError = PsycoPg2ProgrammingError = None try: from pymysql.err import NotSupportedError as MySQLNotSupportedError diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/risingwave/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/risingwave/out.sql new file mode 100644 index 000000000000..efc0daaef0d6 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/risingwave/out.sql @@ -0,0 +1,8 @@ +WITH "foo" AS ( + SELECT + * + FROM "test_risingwave_temp_mem_t_for_cte" AS "t0" +) +SELECT + COUNT(*) AS "x" +FROM "foo" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/risingwave/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/risingwave/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/risingwave/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/risingwave/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/risingwave/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/risingwave/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/risingwave/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/risingwave/out.sql new file mode 100644 index 000000000000..6bd0ba8c995d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/risingwave/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/risingwave/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/risingwave/out.sql new file mode 100644 index 000000000000..97338646649f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/risingwave/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/risingwave/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/risingwave/out.sql new file mode 100644 index 000000000000..d3969647c9ea --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/risingwave/out.sql @@ -0,0 +1,22 @@ +SELECT + CASE "t0"."continent" + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/risingwave/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/risingwave/out.sql new file mode 100644 index 000000000000..c1611d8cecc3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/risingwave/out.sql @@ -0,0 +1,9 @@ +SELECT + "t0"."x" IN ( + SELECT + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/risingwave/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/risingwave/out.sql new file mode 100644 index 000000000000..b7508b9ef535 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/risingwave/out.sql @@ -0,0 +1,60 @@ +WITH "t5" AS ( + SELECT + "t4"."field_of_study", + FIRST("t4"."diff") AS "diff" + FROM ( + SELECT + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + FROM ( + SELECT + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + FROM ( + SELECT + "t1"."field_of_study", + CAST(TO_JSONB("t1"."__pivoted__") -> 'f1' AS VARCHAR) AS "years", + CAST(TO_JSONB("t1"."__pivoted__") -> 'f2' AS BIGINT) AS "degrees" + FROM ( + SELECT + "t0"."field_of_study", + UNNEST( + ARRAY[ROW(CAST('1970-71' AS VARCHAR), CAST("t0"."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS VARCHAR), CAST("t0"."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS VARCHAR), CAST("t0"."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS VARCHAR), CAST("t0"."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS VARCHAR), CAST("t0"."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS VARCHAR), CAST("t0"."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS VARCHAR), CAST("t0"."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS VARCHAR), CAST("t0"."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS VARCHAR), CAST("t0"."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS VARCHAR), CAST("t0"."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS VARCHAR), CAST("t0"."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS VARCHAR), CAST("t0"."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS VARCHAR), CAST("t0"."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS VARCHAR), CAST("t0"."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS VARCHAR), CAST("t0"."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS VARCHAR), CAST("t0"."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS VARCHAR), CAST("t0"."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS VARCHAR), CAST("t0"."2019-20" AS BIGINT))] + ) AS "__pivoted__" + FROM "humanities" AS "t0" + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 +) +SELECT + "t11"."field_of_study", + "t11"."diff" +FROM ( + SELECT + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" + ORDER BY + "t6"."diff" DESC NULLS LAST + LIMIT 10 + UNION ALL + SELECT + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" + WHERE + "t6"."diff" < 0 + ORDER BY + "t6"."diff" ASC + LIMIT 10 +) AS "t11" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 2ae1aa8b4be4..63c40a7a24a7 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -22,6 +22,7 @@ MySQLNotSupportedError, OracleDatabaseError, PolarsInvalidOperationError, + PsycoPg2InternalError, Py4JError, PyDruidProgrammingError, PyODBCProgrammingError, @@ -92,6 +93,7 @@ def mean_udf(s): "druid", "oracle", "flink", + "risingwave", "exasol", ], raises=com.OperationNotDefinedError, @@ -439,6 +441,7 @@ def mean_and_std(v): "oracle", "exasol", "flink", + "risingwave", ], raises=com.OperationNotDefinedError, ), @@ -537,7 +540,7 @@ def mean_and_std(v): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, ), ], ), @@ -562,7 +565,7 @@ def mean_and_std(v): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, ), ], ), @@ -592,7 +595,7 @@ def mean_and_std(v): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, ), ], ), @@ -649,7 +652,7 @@ def mean_and_std(v): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, ), ], ), @@ -664,7 +667,7 @@ def mean_and_std(v): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, ), ], ), @@ -771,21 +774,25 @@ def mean_and_std(v): param( lambda t: t.string_col.isin(["1", "7"]), lambda t: t.string_col.isin(["1", "7"]), - marks=pytest.mark.notimpl( - ["exasol"], - raises=(com.OperationNotDefinedError, ExaQueryError), - strict=False, - ), + marks=[ + pytest.mark.notimpl( + ["exasol"], + raises=(com.OperationNotDefinedError, ExaQueryError), + strict=False, + ), + ], id="is_in", ), param( lambda _: ibis._.string_col.isin(["1", "7"]), lambda t: t.string_col.isin(["1", "7"]), - marks=pytest.mark.notimpl( - ["exasol"], - raises=(com.OperationNotDefinedError, ExaQueryError), - strict=False, - ), + marks=[ + pytest.mark.notimpl( + ["exasol"], + raises=(com.OperationNotDefinedError, ExaQueryError), + strict=False, + ), + ], id="is_in_deferred", ), ], @@ -939,7 +946,7 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): pytest.mark.broken( ["risingwave"], reason="Invalid input syntax: direct arg in `percentile_cont` must be castable to float64", - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, ), ], ), @@ -954,7 +961,14 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): lambda t: t.string_col.isin(["1", "7"]), id="is_in", marks=[ - pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) + pytest.mark.notimpl( + ["datafusion"], raises=com.OperationNotDefinedError + ), + pytest.mark.notimpl( + "risingwave", + raises=PsycoPg2InternalError, + reason="probably incorrect filter syntax but not sure", + ), ], ), ], @@ -991,7 +1005,7 @@ def test_quantile( ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function covar_pop(integer, integer) does not exist", ), ], @@ -1011,7 +1025,7 @@ def test_quantile( ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function covar_pop(integer, integer) does not exist", ), ], @@ -1036,7 +1050,7 @@ def test_quantile( ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function covar_pop(integer, integer) does not exist", ), ], @@ -1051,7 +1065,7 @@ def test_quantile( raises=com.OperationNotDefinedError, ), pytest.mark.notyet( - ["postgres", "duckdb", "snowflake"], + ["postgres", "duckdb", "snowflake", "risingwave"], raises=com.UnsupportedOperationError, reason="backend only implements population correlation coefficient", ), @@ -1095,7 +1109,7 @@ def test_quantile( ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function covar_pop(integer, integer) does not exist", ), ], @@ -1124,7 +1138,7 @@ def test_quantile( ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function covar_pop(integer, integer) does not exist", ), ], @@ -1608,7 +1622,9 @@ def test_grouped_case(backend, con): @pytest.mark.notyet(["oracle"], raises=OracleDatabaseError) @pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) @pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) +@pytest.mark.notyet(["risingwave"], raises=AssertionError, strict=False) def test_group_concat_over_window(backend, con): + # TODO: this test is flaky on risingwave and I DO NOT LIKE IT input_df = pd.DataFrame( { "s": ["a|b|c", "b|a|c", "b|b|b|c|a"], diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 2586782954ba..08485208bd3a 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -8,7 +8,6 @@ import pandas.testing as tm import pytest import pytz -import sqlalchemy as sa import toolz from pytest import param @@ -24,6 +23,8 @@ MySQLOperationalError, PolarsComputeError, PsycoPg2IndeterminateDatatype, + PsycoPg2InternalError, + PsycoPg2ProgrammingError, PsycoPg2SyntaxError, Py4JJavaError, PySparkAnalysisException, @@ -83,7 +84,19 @@ def test_array_column(backend, alltypes, df): backend.assert_series_equal(result, expected, check_names=False) -def test_array_scalar(con): +ARRAY_BACKEND_TYPES = { + "clickhouse": "Array(Float64)", + "snowflake": "ARRAY", + "trino": "array(double)", + "bigquery": "ARRAY", + "duckdb": "DOUBLE[]", + "postgres": "numeric[]", + "risingwave": "numeric[]", + "flink": "ARRAY NOT NULL", +} + + +def test_array_scalar(con, backend): expr = ibis.array([1.0, 2.0, 3.0]) assert isinstance(expr, ir.ArrayScalar) @@ -126,11 +139,6 @@ def test_array_concat_variadic(con): # Issues #2370 @pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) -@pytest.mark.notyet( - ["risingwave"], - raises=sa.exc.InternalError, - reason="Bind error: cannot determine type of empty array", -) @pytest.mark.notyet(["trino"], raises=TrinoUserError) def test_array_concat_some_empty(con): left = ibis.literal([]) @@ -210,7 +218,7 @@ def test_array_index(con, idx): ) @pytest.mark.notimpl( ["risingwave"], - raises=ValueError, + raises=AssertionError, reason="Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", ) @pytest.mark.never( @@ -243,10 +251,11 @@ def test_array_discovery(backend): raises=GoogleBadRequest, ) @pytest.mark.notimpl(["dask"], raises=ValueError) -@pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], - raises=ValueError, + # TODO: valueerror -> assertion error + raises=AssertionError, reason="Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", ) def test_unnest_simple(backend): @@ -266,11 +275,6 @@ def test_unnest_simple(backend): @builtin_array @pytest.mark.notimpl("dask", raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["risingwave"], - raises=ValueError, - reason="ValueError: Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", -) def test_unnest_complex(backend): array_types = backend.array_types df = array_types.execute() @@ -309,11 +313,6 @@ def test_unnest_complex(backend): ) @pytest.mark.notimpl(["dask"], raises=ValueError) @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["risingwave"], - raises=ValueError, - reason="Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", -) def test_unnest_idempotent(backend): array_types = backend.array_types df = array_types.execute() @@ -335,11 +334,6 @@ def test_unnest_idempotent(backend): @builtin_array @pytest.mark.notimpl("dask", raises=ValueError) @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["risingwave"], - raises=ValueError, - reason="ValueError: Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", -) def test_unnest_no_nulls(backend): array_types = backend.array_types df = array_types.execute() @@ -366,17 +360,8 @@ def test_unnest_no_nulls(backend): @builtin_array @pytest.mark.notimpl("dask", raises=ValueError) -@pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["risingwave"], - raises=ValueError, - reason="ValueError: Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", -) -@pytest.mark.broken( - ["pandas"], - raises=ValueError, - reason="all the input arrays must have same number of dimensions", -) +@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.broken(["risingwave"], raises=AssertionError) def test_unnest_default_name(backend): array_types = backend.array_types df = array_types.execute() @@ -426,10 +411,11 @@ def test_unnest_default_name(backend): ["datafusion"], raises=Exception, reason="array_types table isn't defined" ) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( +@pytest.mark.broken( ["risingwave"], - raises=ValueError, - reason="ValueError: Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", + raises=AssertionError, + reason="not broken; row ordering is not guaranteed and sometimes this test will pass", + strict=False, ) def test_array_slice(backend, start, stop): array_types = backend.array_types @@ -452,6 +438,11 @@ def test_array_slice(backend, start, stop): ], raises=com.OperationNotDefinedError, ) +@pytest.mark.broken( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="TODO(Kexiang): seems a bug", +) @pytest.mark.notimpl( ["dask", "pandas"], raises=com.OperationNotDefinedError, @@ -480,7 +471,7 @@ def test_array_slice(backend, start, stop): ) @pytest.mark.broken( ["risingwave"], - raises=AssertionError, + raises=PsycoPg2InternalError, reason="TODO(Kexiang): seems a bug", ) def test_array_map(con, input, output): @@ -539,6 +530,11 @@ def test_array_map(con, input, output): param({"a": [[1, 2], [4]]}, {"a": [[2], [4]]}, id="no_nulls"), ], ) +@pytest.mark.notyet( + "risingwave", + raises=PsycoPg2InternalError, + reason="no support for not null column constraint", +) def test_array_filter(con, input, output): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.Series(output["a"]) @@ -559,15 +555,11 @@ def test_array_filter(con, input, output): @builtin_array @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["risingwave"], - raises=ValueError, - reason="ValueError: Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", -) @pytest.mark.broken( - ["flink"], - raises=Py4JJavaError, - reason="Caused by: java.lang.NullPointerException", + ["risingwave"], + raises=AssertionError, + reason="not broken; row ordering is not guaranteed and sometimes this test will pass", + strict=False, ) def test_array_contains(backend, con): t = backend.array_types @@ -617,11 +609,6 @@ def test_array_position(backend, con, a, expected_array): @builtin_array @pytest.mark.notimpl(["dask", "polars"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["risingwave"], - raises=AssertionError, - reason="TODO(Kexiang): seems a bug", -) @pytest.mark.parametrize( ("a"), [ @@ -712,13 +699,13 @@ def test_array_unique(con, input, expected): raises=AssertionError, reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14735", ) -def test_array_sort(con): - t = ibis.memtable({"a": [[3, 2], [], [42, 42], []]}) - expr = t.a.sort() +def test_array_sort(backend, con): + t = ibis.memtable({"a": [[3, 2], [], [42, 42], []], "id": range(4)}) + expr = t.mutate(a=t.a.sort()).order_by("id") result = con.execute(expr) expected = pd.Series([[2, 3], [], [42, 42], []], dtype="object") - assert frozenset(map(tuple, result.values)) == frozenset( + assert frozenset(map(tuple, result["a"].values)) == frozenset( map(tuple, expected.values) ) @@ -818,9 +805,9 @@ def test_array_intersect(con, data): raises=ClickHouseDatabaseError, reason="ClickHouse won't accept dicts for struct type values", ) -@pytest.mark.notimpl(["risingwave"], raises=sa.exc.ProgrammingError) @pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError) -@pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError) +@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError ) @@ -841,7 +828,6 @@ def test_unnest_struct(con): "dask", "datafusion", "druid", - "flink", "oracle", "pandas", "polars", @@ -852,7 +838,7 @@ def test_unnest_struct(con): ) @pytest.mark.notimpl( ["risingwave"], - raises=ValueError, + raises=com.OperationNotDefinedError, reason="Do not nest ARRAY types; ARRAY(basetype) handles multi-dimensional arrays of basetype", ) def test_zip(backend): @@ -879,9 +865,9 @@ def test_zip(backend): raises=ClickHouseDatabaseError, reason="https://github.com/ClickHouse/ClickHouse/issues/41112", ) -@pytest.mark.notimpl(["risingwave"], raises=sa.exc.ProgrammingError) @pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError) -@pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError) +@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["polars"], raises=com.OperationNotDefinedError, @@ -940,7 +926,11 @@ def flatten_data(): @pytest.mark.notyet( ["postgres", "risingwave"], reason="Postgres doesn't truly support arrays of arrays", - raises=(com.OperationNotDefinedError, PsycoPg2IndeterminateDatatype), + raises=( + com.OperationNotDefinedError, + PsycoPg2IndeterminateDatatype, + PsycoPg2InternalError, + ), ) @pytest.mark.parametrize( ("column", "expected"), @@ -1057,7 +1047,7 @@ def test_range_start_stop_step(con, start, stop, step): @pytest.mark.notimpl(["flink", "dask"], raises=com.OperationNotDefinedError) @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Invalid parameter step: step size cannot equal zero", ) def test_range_start_stop_step_zero(con, start, stop): @@ -1096,6 +1086,11 @@ def test_unnest_empty_array(con): raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl(["sqlite"], raises=com.UnsupportedBackendType) +@pytest.mark.notyet( + "risingwave", + raises=PsycoPg2InternalError, + reason="no support for not null column constraint", +) def test_array_map_with_conflicting_names(backend, con): t = ibis.memtable({"x": [[1, 2]]}, schema=ibis.schema(dict(x="!array"))) expr = t.select(a=t.x.map(lambda x: x + 1)).select( @@ -1184,7 +1179,7 @@ def swap(token): id="pos", marks=pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function make_interval() does not exist", ), ), @@ -1200,7 +1195,7 @@ def swap(token): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function neg(interval) does not exist", ), ], @@ -1220,7 +1215,7 @@ def swap(token): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function neg(interval) does not exist", ), ], @@ -1252,7 +1247,7 @@ def test_timestamp_range(con, start, stop, step, freq, tzinfo): pytest.mark.notyet(["polars"], raises=PolarsComputeError), pytest.mark.notyet( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function make_interval() does not exist", ), ], @@ -1271,7 +1266,7 @@ def test_timestamp_range(con, start, stop, step, freq, tzinfo): ), pytest.mark.notyet( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function neg(interval) does not exist", ), ], @@ -1301,24 +1296,14 @@ def test_repr_timestamp_array(con, monkeypatch): assert ibis.options.default_backend is con expr = ibis.array(pd.date_range("2010-01-01", "2010-01-03", freq="D").tolist()) - assert "No translation rule" not in repr(expr) - assert "OperationNotDefinedError" not in repr(expr) + assert "Translation to backend failed" not in repr(expr) @pytest.mark.notyet( ["dask", "datafusion", "flink", "polars"], raises=com.OperationNotDefinedError, ) -@pytest.mark.broken( - ["risingwave"], - raises=sa.exc.OperationalError, - reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14734", -) -@pytest.mark.broken( - ["pandas"], - raises=ValueError, - reason="cannot reindex on an axis with duplicate labels", -) +@pytest.mark.broken(["pandas"], raises=ValueError, reason="reindex on duplicate values") def test_unnest_range(con): expr = ibis.range(2).unnest().name("x").as_table().mutate({"y": 1.0}) result = con.execute(expr) diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index ffe86146b65f..2a1901efc520 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -95,6 +95,7 @@ def time_keyed_right(time_keyed_df2): "oracle", "mssql", "sqlite", + "risingwave", ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): @@ -135,6 +136,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op "oracle", "mssql", "sqlite", + "risingwave", ] ) def test_keyed_asof_join_with_tolerance( diff --git a/ibis/backends/tests/test_benchmarks.py b/ibis/backends/tests/test_benchmarks.py deleted file mode 100644 index 3234d3c8693f..000000000000 --- a/ibis/backends/tests/test_benchmarks.py +++ /dev/null @@ -1,900 +0,0 @@ -from __future__ import annotations - -import copy -import functools -import inspect -import itertools -import os -import string - -import numpy as np -import pandas as pd -import pytest -import sqlalchemy as sa -from packaging.version import parse as vparse - -import ibis -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis.backends.base import _get_backend_names - -# from ibis.backends.pandas.udf import udf - -# FIXME(kszucs): pytestmark = pytest.mark.benchmark -pytestmark = pytest.mark.skip(reason="the backends must be rewritten first") - - -def make_t(): - return ibis.table( - [ - ("_timestamp", "int32"), - ("dim1", "int32"), - ("dim2", "int32"), - ("valid_seconds", "int32"), - ("meas1", "int32"), - ("meas2", "int32"), - ("year", "int32"), - ("month", "int32"), - ("day", "int32"), - ("hour", "int32"), - ("minute", "int32"), - ], - name="t", - ) - - -@pytest.fixture(scope="module") -def t(): - return make_t() - - -def make_base(t): - return t[ - ( - (t.year > 2016) - | ((t.year == 2016) & (t.month > 6)) - | ((t.year == 2016) & (t.month == 6) & (t.day > 6)) - | ((t.year == 2016) & (t.month == 6) & (t.day == 6) & (t.hour > 6)) - | ( - (t.year == 2016) - & (t.month == 6) - & (t.day == 6) - & (t.hour == 6) - & (t.minute >= 5) - ) - ) - & ( - (t.year < 2016) - | ((t.year == 2016) & (t.month < 6)) - | ((t.year == 2016) & (t.month == 6) & (t.day < 6)) - | ((t.year == 2016) & (t.month == 6) & (t.day == 6) & (t.hour < 6)) - | ( - (t.year == 2016) - & (t.month == 6) - & (t.day == 6) - & (t.hour == 6) - & (t.minute <= 5) - ) - ) - ] - - -@pytest.fixture(scope="module") -def base(t): - return make_base(t) - - -def make_large_expr(base): - src_table = base - src_table = src_table.mutate( - _timestamp=(src_table["_timestamp"] - src_table["_timestamp"] % 3600) - .cast("int32") - .name("_timestamp"), - valid_seconds=300, - ) - - aggs = [] - for meas in ["meas1", "meas2"]: - aggs.append(src_table[meas].sum().cast("float").name(meas)) - src_table = src_table.aggregate( - aggs, by=["_timestamp", "dim1", "dim2", "valid_seconds"] - ) - - part_keys = ["year", "month", "day", "hour", "minute"] - ts_col = src_table["_timestamp"].cast("timestamp") - new_cols = {} - for part_key in part_keys: - part_col = getattr(ts_col, part_key)() - new_cols[part_key] = part_col - src_table = src_table.mutate(**new_cols) - return src_table[ - [ - "_timestamp", - "dim1", - "dim2", - "meas1", - "meas2", - "year", - "month", - "day", - "hour", - "minute", - ] - ] - - -@pytest.fixture(scope="module") -def large_expr(base): - return make_large_expr(base) - - -@pytest.mark.benchmark(group="construction") -@pytest.mark.parametrize( - "construction_fn", - [ - pytest.param(lambda *_: make_t(), id="small"), - pytest.param(lambda t, *_: make_base(t), id="medium"), - pytest.param(lambda _, base: make_large_expr(base), id="large"), - ], -) -def test_construction(benchmark, construction_fn, t, base): - benchmark(construction_fn, t, base) - - -@pytest.mark.benchmark(group="builtins") -@pytest.mark.parametrize( - "expr_fn", - [ - pytest.param(lambda t, _base, _large_expr: t, id="small"), - pytest.param(lambda _t, base, _large_expr: base, id="medium"), - pytest.param(lambda _t, _base, large_expr: large_expr, id="large"), - ], -) -@pytest.mark.parametrize("builtin", [hash, str]) -def test_builtins(benchmark, expr_fn, builtin, t, base, large_expr): - expr = expr_fn(t, base, large_expr) - benchmark(builtin, expr) - - -_backends = set(_get_backend_names()) -# compile is a no-op -_backends.remove("pandas") - -_XFAIL_COMPILE_BACKENDS = {"dask", "pyspark", "polars", "risingwave"} - - -@pytest.mark.benchmark(group="compilation") -@pytest.mark.parametrize( - "module", - [ - pytest.param( - mod, - marks=pytest.mark.xfail( - condition=mod in _XFAIL_COMPILE_BACKENDS, - reason=f"{mod} backend doesn't support compiling UnboundTable", - ), - ) - for mod in _backends - ], -) -@pytest.mark.parametrize( - "expr_fn", - [ - pytest.param(lambda t, _base, _large_expr: t, id="small"), - pytest.param(lambda _t, base, _large_expr: base, id="medium"), - pytest.param(lambda _t, _base, large_expr: large_expr, id="large"), - ], -) -def test_compile(benchmark, module, expr_fn, t, base, large_expr): - try: - mod = getattr(ibis, module) - except (AttributeError, ImportError) as e: - pytest.skip(str(e)) - else: - expr = expr_fn(t, base, large_expr) - try: - benchmark(mod.compile, expr) - except (sa.exc.NoSuchModuleError, ImportError) as e: # delayed imports - pytest.skip(str(e)) - - -@pytest.fixture(scope="module") -def pt(): - n = 60_000 - data = pd.DataFrame( - { - "key": np.random.choice(16000, size=n), - "low_card_key": np.random.choice(30, size=n), - "value": np.random.rand(n), - "timestamps": pd.date_range( - start="2023-05-05 16:37:57", periods=n, freq="s" - ).values, - "timestamp_strings": pd.date_range( - start="2023-05-05 16:37:39", periods=n, freq="s" - ).values.astype(str), - "repeated_timestamps": pd.date_range(start="2018-09-01", periods=30).repeat( - int(n / 30) - ), - } - ) - - return ibis.pandas.connect(dict(df=data)).table("df") - - -def high_card_group_by(t): - return t.group_by(t.key).aggregate(avg_value=t.value.mean()) - - -def cast_to_dates(t): - return t.timestamps.cast(dt.date) - - -def cast_to_dates_from_strings(t): - return t.timestamp_strings.cast(dt.date) - - -def multikey_group_by_with_mutate(t): - return ( - t.mutate(dates=t.timestamps.cast("date")) - .group_by(["low_card_key", "dates"]) - .aggregate(avg_value=lambda t: t.value.mean()) - ) - - -def simple_sort(t): - return t.order_by([t.key]) - - -def simple_sort_projection(t): - return t[["key", "value"]].order_by(["key"]) - - -def multikey_sort(t): - return t.order_by(["low_card_key", "key"]) - - -def multikey_sort_projection(t): - return t[["low_card_key", "key", "value"]].order_by(["low_card_key", "key"]) - - -def low_card_rolling_window(t): - return ibis.trailing_range_window( - ibis.interval(days=2), - order_by=t.repeated_timestamps, - group_by=t.low_card_key, - ) - - -def low_card_grouped_rolling(t): - return t.value.mean().over(low_card_rolling_window(t)) - - -def high_card_rolling_window(t): - return ibis.trailing_range_window( - ibis.interval(days=2), - order_by=t.repeated_timestamps, - group_by=t.key, - ) - - -def high_card_grouped_rolling(t): - return t.value.mean().over(high_card_rolling_window(t)) - - -# @udf.reduction(["double"], "double") -# def my_mean(series): -# return series.mean() - - -def low_card_grouped_rolling_udf_mean(t): - return my_mean(t.value).over(low_card_rolling_window(t)) - - -def high_card_grouped_rolling_udf_mean(t): - return my_mean(t.value).over(high_card_rolling_window(t)) - - -# @udf.analytic(["double"], "double") -# def my_zscore(series): -# return (series - series.mean()) / series.std() - - -def low_card_window(t): - return ibis.window(group_by=t.low_card_key) - - -def high_card_window(t): - return ibis.window(group_by=t.key) - - -def low_card_window_analytics_udf(t): - return my_zscore(t.value).over(low_card_window(t)) - - -def high_card_window_analytics_udf(t): - return my_zscore(t.value).over(high_card_window(t)) - - -# @udf.reduction(["double", "double"], "double") -# def my_wm(v, w): -# return np.average(v, weights=w) - - -def low_card_grouped_rolling_udf_wm(t): - return my_wm(t.value, t.value).over(low_card_rolling_window(t)) - - -def high_card_grouped_rolling_udf_wm(t): - return my_wm(t.value, t.value).over(low_card_rolling_window(t)) - - -broken_pandas_grouped_rolling = pytest.mark.xfail( - condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"), - raises=ValueError, - reason="https://github.com/pandas-dev/pandas/pull/44068", -) - - -@pytest.mark.benchmark(group="execution") -@pytest.mark.parametrize( - "expression_fn", - [ - pytest.param(high_card_group_by, id="high_card_group_by"), - pytest.param(cast_to_dates, id="cast_to_dates"), - pytest.param(cast_to_dates_from_strings, id="cast_to_dates_from_strings"), - pytest.param(multikey_group_by_with_mutate, id="multikey_group_by_with_mutate"), - pytest.param(simple_sort, id="simple_sort"), - pytest.param(simple_sort_projection, id="simple_sort_projection"), - pytest.param(multikey_sort, id="multikey_sort"), - pytest.param(multikey_sort_projection, id="multikey_sort_projection"), - pytest.param( - low_card_grouped_rolling, - id="low_card_grouped_rolling", - marks=[broken_pandas_grouped_rolling], - ), - pytest.param( - high_card_grouped_rolling, - id="high_card_grouped_rolling", - marks=[broken_pandas_grouped_rolling], - ), - pytest.param( - low_card_grouped_rolling_udf_mean, - id="low_card_grouped_rolling_udf_mean", - marks=[broken_pandas_grouped_rolling], - ), - pytest.param( - high_card_grouped_rolling_udf_mean, - id="high_card_grouped_rolling_udf_mean", - marks=[broken_pandas_grouped_rolling], - ), - pytest.param(low_card_window_analytics_udf, id="low_card_window_analytics_udf"), - pytest.param( - high_card_window_analytics_udf, id="high_card_window_analytics_udf" - ), - pytest.param( - low_card_grouped_rolling_udf_wm, - id="low_card_grouped_rolling_udf_wm", - marks=[broken_pandas_grouped_rolling], - ), - pytest.param( - high_card_grouped_rolling_udf_wm, - id="high_card_grouped_rolling_udf_wm", - marks=[broken_pandas_grouped_rolling], - ), - ], -) -def test_execute(benchmark, expression_fn, pt): - expr = expression_fn(pt) - benchmark(expr.execute) - - -@pytest.fixture(scope="module") -def part(): - return ibis.table( - dict( - p_partkey="int64", - p_size="int64", - p_type="string", - p_mfgr="string", - ), - name="part", - ) - - -@pytest.fixture(scope="module") -def supplier(): - return ibis.table( - dict( - s_suppkey="int64", - s_nationkey="int64", - s_name="string", - s_acctbal="decimal(15, 3)", - s_address="string", - s_phone="string", - s_comment="string", - ), - name="supplier", - ) - - -@pytest.fixture(scope="module") -def partsupp(): - return ibis.table( - dict( - ps_partkey="int64", - ps_suppkey="int64", - ps_supplycost="decimal(15, 3)", - ), - name="partsupp", - ) - - -@pytest.fixture(scope="module") -def nation(): - return ibis.table( - dict(n_nationkey="int64", n_regionkey="int64", n_name="string"), - name="nation", - ) - - -@pytest.fixture(scope="module") -def region(): - return ibis.table(dict(r_regionkey="int64", r_name="string"), name="region") - - -@pytest.fixture(scope="module") -def tpc_h02(part, supplier, partsupp, nation, region): - REGION = "EUROPE" - SIZE = 25 - TYPE = "BRASS" - - expr = ( - part.join(partsupp, part.p_partkey == partsupp.ps_partkey) - .join(supplier, supplier.s_suppkey == partsupp.ps_suppkey) - .join(nation, supplier.s_nationkey == nation.n_nationkey) - .join(region, nation.n_regionkey == region.r_regionkey) - ) - - subexpr = ( - partsupp.join(supplier, supplier.s_suppkey == partsupp.ps_suppkey) - .join(nation, supplier.s_nationkey == nation.n_nationkey) - .join(region, nation.n_regionkey == region.r_regionkey) - ) - - subexpr = subexpr[ - (subexpr.r_name == REGION) & (expr.p_partkey == subexpr.ps_partkey) - ] - - filters = [ - expr.p_size == SIZE, - expr.p_type.like(f"%{TYPE}"), - expr.r_name == REGION, - expr.ps_supplycost == subexpr.ps_supplycost.min(), - ] - q = expr.filter(filters) - - q = q.select( - [ - q.s_acctbal, - q.s_name, - q.n_name, - q.p_partkey, - q.p_mfgr, - q.s_address, - q.s_phone, - q.s_comment, - ] - ) - - return q.order_by( - [ - ibis.desc(q.s_acctbal), - q.n_name, - q.s_name, - q.p_partkey, - ] - ).limit(100) - - -@pytest.mark.benchmark(group="repr") -def test_repr_tpc_h02(benchmark, tpc_h02): - benchmark(repr, tpc_h02) - - -@pytest.mark.benchmark(group="repr") -def test_repr_huge_union(benchmark): - n = 10 - raw_types = [ - "int64", - "float64", - "string", - "array, b: map>>>", - ] - tables = [ - ibis.table( - list(zip(string.ascii_letters, itertools.cycle(raw_types))), - name=f"t{i:d}", - ) - for i in range(n) - ] - expr = functools.reduce(ir.Table.union, tables) - benchmark(repr, expr) - - -@pytest.mark.benchmark(group="node_args") -def test_op_argnames(benchmark): - t = ibis.table([("a", "int64")]) - expr = t[["a"]] - benchmark(lambda op: op.argnames, expr.op()) - - -@pytest.mark.benchmark(group="node_args") -def test_op_args(benchmark): - t = ibis.table([("a", "int64")]) - expr = t[["a"]] - benchmark(lambda op: op.args, expr.op()) - - -@pytest.mark.benchmark(group="datatype") -def test_complex_datatype_parse(benchmark): - type_str = "array, b: map>>>" - expected = dt.Array( - dt.Struct(dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)))) - ) - assert dt.parse(type_str) == expected - benchmark(dt.parse, type_str) - - -@pytest.mark.benchmark(group="datatype") -@pytest.mark.parametrize("func", [str, hash]) -def test_complex_datatype_builtins(benchmark, func): - datatype = dt.Array( - dt.Struct(dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)))) - ) - benchmark(func, datatype) - - -@pytest.mark.benchmark(group="equality") -def test_large_expr_equals(benchmark, tpc_h02): - benchmark(ir.Expr.equals, tpc_h02, copy.deepcopy(tpc_h02)) - - -@pytest.mark.benchmark(group="datatype") -@pytest.mark.parametrize( - "dtypes", - [ - pytest.param( - [ - obj - for _, obj in inspect.getmembers( - dt, - lambda obj: isinstance(obj, dt.DataType), - ) - ], - id="singletons", - ), - pytest.param( - dt.Array( - dt.Struct( - dict( - a=dt.Array(dt.string), - b=dt.Map(dt.string, dt.Array(dt.int64)), - ) - ) - ), - id="complex", - ), - ], -) -def test_eq_datatypes(benchmark, dtypes): - def eq(a, b): - assert a == b - - benchmark(eq, dtypes, copy.deepcopy(dtypes)) - - -def multiple_joins(table, num_joins): - for _ in range(num_joins): - table = table.mutate(dummy=ibis.literal("")) - table = table.left_join(table, ["dummy"])[[table]] - - -@pytest.mark.parametrize("num_joins", [1, 10]) -@pytest.mark.parametrize("num_columns", [1, 10, 100]) -def test_multiple_joins(benchmark, num_joins, num_columns): - table = ibis.table( - {f"col_{i:d}": "string" for i in range(num_columns)}, - name="t", - ) - benchmark(multiple_joins, table, num_joins) - - -@pytest.fixture -def customers(): - return ibis.table( - dict( - customerid="int32", - name="string", - address="string", - citystatezip="string", - birthdate="date", - phone="string", - timezone="string", - lat="float64", - long="float64", - ), - name="customers", - ) - - -@pytest.fixture -def orders(): - return ibis.table( - dict( - orderid="int32", - customerid="int32", - ordered="timestamp", - shipped="timestamp", - items="string", - total="float64", - ), - name="orders", - ) - - -@pytest.fixture -def orders_items(): - return ibis.table( - dict(orderid="int32", sku="string", qty="int32", unit_price="float64"), - name="orders_items", - ) - - -@pytest.fixture -def products(): - return ibis.table( - dict( - sku="string", - desc="string", - weight_kg="float64", - cost="float64", - dims_cm="string", - ), - name="products", - ) - - -@pytest.mark.benchmark(group="compilation") -@pytest.mark.parametrize( - "module", - [ - pytest.param( - mod, - marks=pytest.mark.xfail( - condition=mod in _XFAIL_COMPILE_BACKENDS, - reason=f"{mod} backend doesn't support compiling UnboundTable", - ), - ) - for mod in _backends - ], -) -def test_compile_with_drops( - benchmark, module, customers, orders, orders_items, products -): - expr = ( - customers.join(orders, "customerid") - .join(orders_items, "orderid") - .join(products, "sku") - .drop("customerid", "qty", "total", "items") - .drop("dims_cm", "cost") - .mutate(o_date=lambda t: t.shipped.date()) - .filter(lambda t: t.ordered == t.shipped) - ) - - try: - mod = getattr(ibis, module) - except (AttributeError, ImportError) as e: - pytest.skip(str(e)) - else: - try: - benchmark(mod.compile, expr) - except sa.exc.NoSuchModuleError as e: - pytest.skip(str(e)) - - -def test_repr_join(benchmark, customers, orders, orders_items, products): - expr = ( - customers.join(orders, "customerid") - .join(orders_items, "orderid") - .join(products, "sku") - .drop("customerid", "qty", "total", "items") - ) - op = expr.op() - benchmark(repr, op) - - -@pytest.mark.parametrize("overwrite", [True, False], ids=["overwrite", "no_overwrite"]) -def test_insert_duckdb(benchmark, overwrite, tmp_path): - pytest.importorskip("duckdb") - - n_rows = int(1e4) - table_name = "t" - schema = ibis.schema(dict(a="int64", b="int64", c="int64")) - t = ibis.memtable(dict.fromkeys(list("abc"), range(n_rows)), schema=schema) - - con = ibis.duckdb.connect(tmp_path / "test_insert.ddb") - con.create_table(table_name, schema=schema) - benchmark(con.insert, table_name, t, overwrite=overwrite) - - -def test_snowflake_medium_sized_to_pandas(benchmark): - pytest.importorskip("snowflake.connector") - - if (url := os.environ.get("SNOWFLAKE_URL")) is None: - pytest.skip("SNOWFLAKE_URL environment variable not set") - - con = ibis.connect(url) - - # LINEITEM at scale factor 1 is around 6MM rows, but we limit to 1,000,000 - # to make the benchmark fast enough for development, yet large enough to show a - # difference if there's a performance hit - lineitem = con.table("LINEITEM", schema="SNOWFLAKE_SAMPLE_DATA.TPCH_SF1").limit( - 1_000_000 - ) - - benchmark.pedantic(lineitem.to_pandas, rounds=5, iterations=1, warmup_rounds=1) - - -def test_parse_many_duckdb_types(benchmark): - parse = pytest.importorskip("ibis.backends.duckdb.datatypes").DuckDBType.from_string - - def parse_many(types): - list(map(parse, types)) - - types = ["VARCHAR", "INTEGER", "DOUBLE", "BIGINT"] * 1000 - benchmark(parse_many, types) - - -@pytest.fixture(scope="session") -def sql() -> str: - return """ - SELECT t1.id as t1_id, x, t2.id as t2_id, y - FROM t1 INNER JOIN t2 - ON t1.id = t2.id - """ - - -@pytest.fixture(scope="session") -def ddb(tmp_path_factory): - duckdb = pytest.importorskip("duckdb") - - N = 20_000_000 - - con = duckdb.connect() - - path = str(tmp_path_factory.mktemp("duckdb") / "data.ddb") - sql = ( - lambda var, table, n=N: f""" - CREATE TABLE {table} AS - SELECT ROW_NUMBER() OVER () AS id, {var} - FROM ( - SELECT {var} - FROM RANGE({n}) _ ({var}) - ORDER BY RANDOM() - ) - """ - ) - - with duckdb.connect(path) as con: - con.execute(sql("x", table="t1")) - con.execute(sql("y", table="t2")) - return path - - -def test_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: - # yes, we're benchmarking duckdb here, not ibis - # - # we do this to get a baseline for comparison - duckdb = pytest.importorskip("duckdb") - con = duckdb.connect(ddb, read_only=True) - - benchmark(lambda sql: con.sql(sql).to_arrow_table(), sql) - - -def test_ibis_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: - pytest.importorskip("duckdb") - - con = ibis.duckdb.connect(ddb, read_only=True) - - expr = con.sql(sql) - benchmark(expr.to_pyarrow) - - -@pytest.fixture -def diffs(): - return ibis.table( - { - "id": "int64", - "validation_name": "string", - "difference": "float64", - "pct_difference": "float64", - "pct_threshold": "float64", - "validation_status": "string", - }, - name="diffs", - ) - - -@pytest.fixture -def srcs(): - return ibis.table( - { - "id": "int64", - "validation_name": "string", - "validation_type": "string", - "aggregation_type": "string", - "table_name": "string", - "column_name": "string", - "primary_keys": "string", - "num_random_rows": "string", - "agg_value": "float64", - }, - name="srcs", - ) - - -@pytest.fixture -def nrels(): - return 300 - - -def make_big_union(t, nrels): - return ibis.union(*[t] * nrels) - - -@pytest.fixture -def src(srcs, nrels): - return make_big_union(srcs, nrels) - - -@pytest.fixture -def diff(diffs, nrels): - return make_big_union(diffs, nrels) - - -def test_big_eq_expr(benchmark, src, diff): - benchmark(ops.core.Node.equals, src.op(), diff.op()) - - -def test_big_join_expr(benchmark, src, diff): - benchmark(ir.Table.join, src, diff, ["validation_name"], how="outer") - - -def test_big_join_execute(benchmark, nrels): - pytest.importorskip("duckdb") - - con = ibis.duckdb.connect() - - # cache to avoid a request-per-union operand - src = make_big_union( - con.read_csv( - "https://github.com/ibis-project/ibis/files/12580336/source_pivot.csv" - ) - .rename(id="column0") - .cache(), - nrels, - ) - - diff = make_big_union( - con.read_csv( - "https://github.com/ibis-project/ibis/files/12580340/differences_pivot.csv" - ) - .rename(id="column0") - .cache(), - nrels, - ) - - expr = src.join(diff, ["validation_name"], how="outer") - t = benchmark.pedantic(expr.to_pyarrow, rounds=1, iterations=1, warmup_rounds=1) - assert len(t) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 19868964eb27..8bad125da763 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -25,7 +25,11 @@ import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.backends.conftest import ALL_BACKENDS -from ibis.backends.tests.errors import Py4JJavaError, PyDruidProgrammingError +from ibis.backends.tests.errors import ( + PsycoPg2InternalError, + Py4JJavaError, + PyDruidProgrammingError, +) from ibis.util import gen_name, guid if TYPE_CHECKING: @@ -115,7 +119,8 @@ def test_create_table(backend, con, temp_table, lamduh, sch): marks=[ pytest.mark.notyet(["clickhouse"], reason="Can't specify both"), pytest.mark.notyet( - ["pyspark", "trino", "exasol"], reason="No support for temp tables" + ["pyspark", "trino", "exasol", "risingwave"], + reason="No support for temp tables", ), pytest.mark.never(["polars"], reason="Everything in-memory is temp"), pytest.mark.broken(["mssql"], reason="Incorrect temp table syntax"), @@ -132,7 +137,8 @@ def test_create_table(backend, con, temp_table, lamduh, sch): id="temp, no overwrite", marks=[ pytest.mark.notyet( - ["pyspark", "trino", "exasol"], reason="No support for temp tables" + ["pyspark", "trino", "exasol", "risingwave"], + reason="No support for temp tables", ), pytest.mark.never(["polars"], reason="Everything in-memory is temp"), pytest.mark.broken(["mssql"], reason="Incorrect temp table syntax"), @@ -308,7 +314,7 @@ def tmpcon(alchemy_con): @mark.notimpl(["exasol"], reason="Exasol does not support temporary tables") @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) def test_create_temporary_table_from_schema(tmpcon, new_schema): @@ -375,7 +381,7 @@ def test_rename_table(con, temp_table, temp_table_orig): ) @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason='Feature is not yet implemented: column constraints "NOT NULL"', ) def test_nullable_input_output(con, temp_table): @@ -719,11 +725,6 @@ def test_unsigned_integer_type(alchemy_con, alchemy_temp_table): marks=mark.postgres, id="postgresql", ), - param( - "postgresql://root:@localhost:4566/dev", - marks=mark.risingwave, - id="risingwave", - ), param( "pyspark://?spark.app.name=test-pyspark", marks=[ @@ -1120,11 +1121,6 @@ def test_set_backend_name(name, monkeypatch): marks=mark.postgres, id="postgres", ), - param( - "postgres://root:@localhost:4566/dev", - marks=mark.risingwave, - id="risingwave", - ), ], ) def test_set_backend_url(url, monkeypatch): @@ -1188,7 +1184,7 @@ def test_create_table_timestamp(con, temp_table): @mark.notimpl(["exasol"], reason="Exasol does not support temporary tables") @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) def test_persist_expression_ref_count(backend, con, alltypes): @@ -1213,7 +1209,7 @@ def test_persist_expression_ref_count(backend, con, alltypes): @mark.notimpl(["exasol"], reason="Exasol does not support temporary tables") @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) def test_persist_expression(backend, alltypes): @@ -1232,7 +1228,7 @@ def test_persist_expression(backend, alltypes): @mark.notimpl(["exasol"], reason="Exasol does not support temporary tables") @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) def test_persist_expression_contextmanager(backend, alltypes): @@ -1253,7 +1249,7 @@ def test_persist_expression_contextmanager(backend, alltypes): @mark.notimpl(["exasol"], reason="Exasol does not support temporary tables") @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) def test_persist_expression_contextmanager_ref_count(backend, con, alltypes): @@ -1276,7 +1272,7 @@ def test_persist_expression_contextmanager_ref_count(backend, con, alltypes): ) @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) @mark.notimpl(["exasol"], reason="Exasol does not support temporary tables") @@ -1318,7 +1314,7 @@ def test_persist_expression_multiple_refs(backend, con, alltypes): @mark.notimpl(["exasol"], reason="Exasol does not support temporary tables") @pytest.mark.never( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) def test_persist_expression_repeated_cache(alltypes): @@ -1345,6 +1341,11 @@ def test_persist_expression_repeated_cache(alltypes): ["oracle"], reason="Oracle error message for a missing table/view doesn't include the name of the table", ) +@pytest.mark.never( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", +) def test_persist_expression_release(con, alltypes): non_cached_table = alltypes.mutate( test_column="calculation", other_column="big calc 3" @@ -1431,7 +1432,7 @@ def test_create_schema(con_create_schema): @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: information_schema.schemata is not supported,", ) def test_list_schemas(con_create_schema): diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index f2621791442a..f938ea0143b7 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -231,7 +231,7 @@ def test_dot_sql_reuse_alias_with_different_types(backend, alltypes, df): backend.assert_series_equal(foo2.x.execute(), expected2) -_NO_SQLGLOT_DIALECT = {"pandas", "dask", "druid", "flink", "risingwave"} +_NO_SQLGLOT_DIALECT = {"pandas", "dask", "druid", "flink"} no_sqlglot_dialect = sorted( # TODO(cpcloud): remove the strict=False hack once backends are ported to # sqlglot @@ -244,11 +244,6 @@ def test_dot_sql_reuse_alias_with_different_types(backend, alltypes, df): "dialect", [*sorted(_get_backend_names() - _NO_SQLGLOT_DIALECT), *no_sqlglot_dialect], ) -@pytest.mark.notyet( - ["risingwave"], - raises=ValueError, - reason="risingwave doesn't support sqlglot.dialects.dialect.Dialect", -) @pytest.mark.notyet(["polars"], raises=PolarsComputeError) @dot_sql_notimpl @dot_sql_never @@ -276,11 +271,6 @@ def test_table_dot_sql_transpile(backend, alltypes, dialect, df): ["druid"], raises=AttributeError, reason="druid doesn't respect column names" ) @pytest.mark.notyet(["snowflake", "bigquery"]) -@pytest.mark.notyet( - ["risingwave"], - raises=ValueError, - reason="risingwave doesn't support sqlglot.dialects.dialect.Dialect", -) @dot_sql_notimpl @dot_sql_never def test_con_dot_sql_transpile(backend, con, dialect, df): @@ -300,11 +290,6 @@ def test_con_dot_sql_transpile(backend, con, dialect, df): @dot_sql_never @pytest.mark.notimpl(["druid", "flink", "polars", "exasol"]) @pytest.mark.notyet(["snowflake"], reason="snowflake column names are case insensitive") -@pytest.mark.notyet( - ["risingwave"], - raises=ValueError, - reason="risingwave doesn't support sqlglot.dialects.dialect.Dialect", -) def test_order_by_no_projection(backend): con = backend.connection expr = ( diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index b3ce4d4cfaf0..02eefb296c3a 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -4,7 +4,6 @@ import pyarrow as pa import pyarrow.csv as pcsv import pytest -import sqlalchemy as sa from pytest import param import ibis @@ -342,11 +341,6 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): id="decimal128", marks=[ pytest.mark.notyet(["flink"], raises=NotImplementedError), - pytest.mark.notyet( - ["risingwave"], - raises=sa.exc.DBAPIError, - reason="Feature is not yet implemented: unsupported data type: NUMERIC(38,9)", - ), pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], ), @@ -367,11 +361,6 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): raises=(PySparkParseException, PySparkArithmeticException), reason="precision is out of range", ), - pytest.mark.notyet( - ["risingwave"], - raises=sa.exc.DBAPIError, - reason="Feature is not yet implemented: unsupported data type: NUMERIC(76,38)", - ), pytest.mark.notyet(["flink"], raises=NotImplementedError), pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], @@ -495,16 +484,7 @@ def test_to_pandas_batches_empty_table(backend, con): @pytest.mark.parametrize( "n", [ - param( - None, - marks=[ - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="risingwave doesn't support limit null", - ), - ], - ), + None, 1, ], ) @@ -516,19 +496,11 @@ def test_to_pandas_batches_nonempty_table(backend, con, n): assert sum(map(len, t.to_pandas_batches())) == n +@pytest.mark.notimpl(["flink"]) @pytest.mark.parametrize( "n", [ - param( - None, - marks=[ - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="risingwave doesn't support limit null", - ), - ], - ), + None, 0, 1, 2, diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index f4065eb9058f..8ffd569a71a9 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -9,7 +9,6 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa import toolz from pytest import param @@ -26,6 +25,7 @@ ImpalaHiveServer2Error, MySQLProgrammingError, OracleDatabaseError, + PsycoPg2InternalError, Py4JJavaError, PyDruidProgrammingError, PyODBCDataError, @@ -548,7 +548,7 @@ def test_order_by(backend, alltypes, df, key, df_kwargs): @pytest.mark.notimpl(["dask", "pandas", "polars", "mssql", "druid"]) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function random() does not exist", ) def test_order_by_random(alltypes): @@ -852,12 +852,12 @@ def test_typeof(con): @pytest.mark.notimpl(["datafusion", "druid"]) @pytest.mark.notimpl(["pyspark"], condition=is_older_than("pyspark", "3.5.0")) @pytest.mark.notyet(["dask"], reason="not supported by the backend") +@pytest.mark.notyet(["exasol"], raises=ExaQueryError, reason="not supported by exasol") @pytest.mark.broken( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="https://github.com/risingwavelabs/risingwave/issues/1343", ) -@pytest.mark.notyet(["exasol"], raises=ExaQueryError, reason="not supported by exasol") def test_isin_uncorrelated( backend, batting, awards_players, batting_df, awards_players_df ): @@ -1037,11 +1037,6 @@ def query(t, group_cols): reason="backend doesn't support arrays and we don't implement pivot_longer with unions yet", raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason='sql parser error: Expected ), found: TEXT at line:3, column:219 Near "))]) AS anon_1(f1"', -) @pytest.mark.broken( ["trino"], reason="invalid code generated for unnesting a struct", @@ -1163,7 +1158,7 @@ def test_pivot_wider(backend): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function last(double precision) does not exist, do you mean left or least", ) def test_distinct_on_keep(backend, on, keep): @@ -1233,7 +1228,7 @@ def test_distinct_on_keep(backend, on, keep): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function first(double precision) does not exist", ) def test_distinct_on_keep_is_none(backend, on): @@ -1287,8 +1282,6 @@ def test_hash_consistent(backend, alltypes): "pyspark", "risingwave", "sqlite", - "clickhouse", - "mssql", ] ) def test_hashbytes(backend, alltypes): @@ -1322,8 +1315,6 @@ def hash_256(col): "risingwave", "snowflake", "trino", - "pyspark", - "mssql", ] ) @pytest.mark.notyet( @@ -1352,7 +1343,6 @@ def hash_256(col): "pandas", "dask", "oracle", - "risingwave", "snowflake", "sqlite", ] @@ -1513,26 +1503,12 @@ def test_try_cast_func(con, from_val, to_type, func): param( slice(None, None), lambda t: t.count().to_pandas(), - marks=[ - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="risingwave doesn't support limit/offset", - ), - ], id="[:]", ), param(slice(0, 0), lambda _: 0, id="[0:0]"), param( slice(0, None), lambda t: t.count().to_pandas(), - marks=[ - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="risingwave doesn't support limit/offset", - ), - ], id="[0:]", ), # positive stop @@ -1588,11 +1564,6 @@ def test_try_cast_func(con, from_val, to_type, func): raises=ImpalaHiveServer2Error, reason="impala doesn't support OFFSET without ORDER BY", ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="risingwave doesn't support limit/offset", - ), pytest.mark.notyet(["oracle"], raises=com.UnsupportedArgumentError), ], ), @@ -1680,16 +1651,16 @@ def test_static_table_slice(backend, slc, expected_count_fn): raises=com.UnsupportedArgumentError, reason="Removed half-baked dynamic offset functionality for now", ) +@pytest.mark.notimpl( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="risingwave doesn't support limit/offset", +) @pytest.mark.notyet( ["trino"], raises=TrinoUserError, reason="backend doesn't support dynamic limit/offset", ) -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="risingwave doesn't support limit/offset", -) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notyet( ["clickhouse"], @@ -1770,16 +1741,16 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): ) @pytest.mark.notyet(["pyspark"], reason="pyspark doesn't support dynamic limit/offset") @pytest.mark.notyet(["flink"], reason="flink doesn't support dynamic limit/offset") -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="risingwave doesn't support limit/offset", -) @pytest.mark.notyet( ["mssql"], reason="doesn't support dynamic limit/offset; compiles incorrectly in sqlglot", raises=AssertionError, ) +@pytest.mark.notimpl( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="risingwave doesn't support limit/offset", +) def test_dynamic_table_slice_with_computed_offset(backend): t = backend.functional_alltypes @@ -1798,17 +1769,10 @@ def test_dynamic_table_slice_with_computed_offset(backend): backend.assert_frame_equal(result, expected) -@pytest.mark.notimpl( - [ - "druid", - "flink", - "polars", - "snowflake", - ] -) +@pytest.mark.notimpl(["druid", "flink", "polars", "snowflake"]) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function random() does not exist", ) def test_sample(backend): @@ -1826,17 +1790,10 @@ def test_sample(backend): backend.assert_frame_equal(empty, df.iloc[:0]) -@pytest.mark.notimpl( - [ - "druid", - "flink", - "polars", - "snowflake", - ] -) +@pytest.mark.notimpl(["druid", "flink", "polars", "snowflake"]) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function random() does not exist", ) def test_sample_memtable(con, backend): @@ -1895,11 +1852,6 @@ def test_substitute(backend): ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" ) @pytest.mark.notimpl(["flink"], reason="no sqlglot dialect", raises=ValueError) -@pytest.mark.notimpl( - ["risingwave"], - raises=ValueError, - reason="risingwave doesn't support sqlglot.dialects.dialect.Dialect", -) def test_simple_memtable_construct(con): t = ibis.memtable({"a": [1, 2]}) expr = t.a diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index a20cd61a98d3..887c10547b6d 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -198,7 +198,7 @@ def test_semi_join_topk(con, batting, awards_players, func): @pytest.mark.notimpl(["dask", "druid", "exasol", "oracle"]) @pytest.mark.notimpl( - ["postgres", "mssql"], + ["postgres", "mssql", "risingwave"], raises=com.IbisTypeError, reason="postgres can't handle null types columns", ) diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py index 4aa30d079620..74d62db9a8b5 100644 --- a/ibis/backends/tests/test_map.py +++ b/ibis/backends/tests/test_map.py @@ -3,13 +3,12 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from pytest import param import ibis import ibis.common.exceptions as exc import ibis.expr.datatypes as dt -from ibis.backends.tests.errors import Py4JJavaError +from ibis.backends.tests.errors import PsycoPg2InternalError, Py4JJavaError pytestmark = [ pytest.mark.never( @@ -38,7 +37,7 @@ def test_map_table(backend): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_column_map_values(backend): @@ -73,7 +72,7 @@ def test_column_map_merge(backend): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_literal_map_keys(con): @@ -93,7 +92,7 @@ def test_literal_map_keys(con): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_literal_map_values(con): @@ -145,7 +144,7 @@ def test_map_scalar_contains_key_scalar(con): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_map_scalar_contains_key_column(backend, alltypes, df): @@ -215,7 +214,7 @@ def test_literal_map_merge(con): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_literal_map_getitem_broadcast(backend, alltypes, df): @@ -237,7 +236,7 @@ def test_literal_map_getitem_broadcast(backend, alltypes, df): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_literal_map_get_broadcast(backend, alltypes, df): @@ -269,7 +268,7 @@ def test_literal_map_get_broadcast(backend, alltypes, df): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_map_construct_dict(con, keys, values): @@ -361,7 +360,7 @@ def test_map_get_with_null_on_not_nullable(con, null_value): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_map_get_with_null_on_null_type_with_null(con, null_value): @@ -392,7 +391,7 @@ def test_map_get_with_null_on_null_type_with_non_null(con): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_map_create_table(con, temp_table): @@ -410,7 +409,7 @@ def test_map_create_table(con, temp_table): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function hstore(character varying[], character varying[]) does not exist", ) def test_map_length(con): diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 76ffe94aae62..b92abea1470c 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -9,7 +9,6 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from pytest import param import ibis @@ -25,6 +24,7 @@ MySQLOperationalError, OracleDatabaseError, PsycoPg2DivisionByZero, + PsycoPg2InternalError, Py4JError, PyDruidProgrammingError, PyODBCDataError, @@ -254,9 +254,9 @@ def test_numeric_literal(con, backend, expr, expected_types): "dask": decimal.Decimal("1.1"), "exasol": decimal.Decimal("1"), "duckdb": decimal.Decimal("1.1"), - "risingwave": 1.1, "impala": decimal.Decimal("1"), "postgres": decimal.Decimal("1.1"), + "risingwave": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), "mysql": decimal.Decimal("1"), @@ -296,9 +296,9 @@ def test_numeric_literal(con, backend, expr, expected_types): "sqlite": decimal.Decimal("1.1"), "trino": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.100000000"), - "risingwave": 1.1, "impala": decimal.Decimal("1.1"), "postgres": decimal.Decimal("1.1"), + "risingwave": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), "mysql": decimal.Decimal("1.1"), @@ -332,8 +332,8 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": decimal.Decimal("1.1"), "sqlite": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), - "risingwave": 1.1, "postgres": decimal.Decimal("1.1"), + "risingwave": decimal.Decimal("1.1"), "pandas": decimal.Decimal("1.1"), "pyspark": decimal.Decimal("1.1"), "clickhouse": decimal.Decimal( @@ -384,10 +384,10 @@ def test_numeric_literal(con, backend, expr, expected_types): ibis.literal(decimal.Decimal("Infinity"), type=dt.decimal), # TODO(krzysztof-kwitt): Should we unify it? { - "risingwave": float("nan"), "bigquery": float("inf"), "sqlite": decimal.Decimal("Infinity"), "postgres": decimal.Decimal("Infinity"), + "risingwave": decimal.Decimal("Infinity"), "pandas": decimal.Decimal("Infinity"), "dask": decimal.Decimal("Infinity"), "pyspark": decimal.Decimal("Infinity"), @@ -406,13 +406,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["trino"], - "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " - "message=\"line 1:51: 'Infinity' is not a valid decimal literal\", " - "query_id=20230128_024107_01084_y8zm3)", - raises=sa.exc.ProgrammingError, - ), pytest.mark.notyet( ["mysql", "impala"], raises=com.UnsupportedOperationError ), @@ -455,10 +448,10 @@ def test_numeric_literal(con, backend, expr, expected_types): ibis.literal(decimal.Decimal("-Infinity"), type=dt.decimal), # TODO(krzysztof-kwitt): Should we unify it? { - "risingwave": float("nan"), "bigquery": float("-inf"), "sqlite": decimal.Decimal("-Infinity"), "postgres": decimal.Decimal("-Infinity"), + "risingwave": decimal.Decimal("-Infinity"), "pandas": decimal.Decimal("-Infinity"), "dask": decimal.Decimal("-Infinity"), "pyspark": decimal.Decimal("-Infinity"), @@ -477,13 +470,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["trino"], - "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " - "message=\"line 1:51: '-Infinity' is not a valid decimal literal\", " - "query_id=20230128_024107_01084_y8zm3)", - raises=sa.exc.ProgrammingError, - ), pytest.mark.notyet( ["mysql", "impala"], raises=com.UnsupportedOperationError ), @@ -551,13 +537,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["trino"], - "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " - "message=\"line 1:51: 'NaN' is not a valid decimal literal\", " - "query_id=20230128_024107_01084_y8zm3)", - raises=sa.exc.ProgrammingError, - ), pytest.mark.notyet( ["mysql", "impala"], raises=com.UnsupportedOperationError ), @@ -754,12 +733,12 @@ def test_isnan_isinf( math.log(5.556, 2), id="log-base", marks=[ + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function log10(numeric, numeric) does not exist", ), - pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -773,14 +752,34 @@ def test_isnan_isinf( math.log(5.556, 2), id="log2", marks=[ + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function log10(numeric, numeric) does not exist", ), - pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), + param( + L(5.556).log10(), + math.log10(5.556), + id="log10", + ), + param( + L(5.556).radians(), + math.radians(5.556), + id="radians", + ), + param( + L(5.556).degrees(), + math.degrees(5.556), + id="degrees", + ), + param( + L(11) % 3, + 11 % 3, + id="mod", + ), param(L(5.556).log10(), math.log10(5.556), id="log10"), param( L(5.556).radians(), @@ -929,12 +928,12 @@ def test_simple_math_functions_columns( lambda t: t.double_col.add(1).log(2), lambda t: np.log2(t.double_col + 1), marks=[ + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function log10(numeric, numeric) does not exist", ), - pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], id="log2", ), @@ -971,7 +970,7 @@ def test_simple_math_functions_columns( pytest.mark.notimpl(["polars"], raises=com.UnsupportedArgumentError), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function log10(numeric, numeric) does not exist", ), ], @@ -1197,7 +1196,6 @@ def test_floating_mod(backend, alltypes, df): reason="Oracle doesn't do integer division by zero", ), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), - pytest.mark.notyet(["risingwave"], raises=sa.exc.InternalError), ], ), param( @@ -1210,7 +1208,6 @@ def test_floating_mod(backend, alltypes, df): reason="Oracle doesn't do integer division by zero", ), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), - pytest.mark.notyet(["risingwave"], raises=sa.exc.InternalError), ], ), param( @@ -1223,7 +1220,6 @@ def test_floating_mod(backend, alltypes, df): reason="Oracle doesn't do integer division by zero", ), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), - pytest.mark.notyet(["risingwave"], raises=sa.exc.InternalError), ], ), param( @@ -1236,7 +1232,6 @@ def test_floating_mod(backend, alltypes, df): reason="Oracle doesn't do integer division by zero", ), pytest.mark.never(["impala"], reason="doesn't allow divide by zero"), - pytest.mark.notyet(["risingwave"], raises=sa.exc.InternalError), ], ), param( @@ -1319,6 +1314,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "snowflake", "trino", "postgres", + "risingwave", "mysql", "druid", "mssql", @@ -1326,11 +1322,6 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): ], reason="Not SQLAlchemy backends", ) -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="Feature is not yet implemented: unsupported data type: NUMERIC(5)", -) def test_sa_default_numeric_precision_and_scale( con, backend, default_precisions, default_scales, temp_table ): @@ -1364,13 +1355,13 @@ def test_sa_default_numeric_precision_and_scale( assert_equal(schema, expected) +@pytest.mark.notimpl(["dask", "pandas", "polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function random() does not exist", ) -@pytest.mark.notimpl(["dask", "pandas", "polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) def test_random(con): expr = ibis.random() result = con.execute(expr) @@ -1481,7 +1472,7 @@ def test_constants(con, const): param(lambda t: t.int_col, lambda _: 3, id="col_scalar"), ], ) -@pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError)) +@pytest.mark.notimpl(["exasol"], raises=(ExaQueryError)) @flink_no_bitwise def test_bitwise_columns(backend, con, alltypes, df, op, left_fn, right_fn): expr = op(left_fn(alltypes), right_fn(alltypes)).name("tmp") @@ -1518,7 +1509,7 @@ def test_bitwise_columns(backend, con, alltypes, df, op, left_fn, right_fn): ], ) @pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError) -@pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, ExaQueryError)) +@pytest.mark.notimpl(["exasol"], raises=(ExaQueryError)) @flink_no_bitwise def test_bitwise_shift(backend, alltypes, df, op, left_fn, right_fn): expr = op(left_fn(alltypes), right_fn(alltypes)).name("tmp") diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index a72b7c140b22..67c7b5123281 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -6,13 +6,16 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from pytest import param import ibis import ibis.expr.datatypes as dt from ibis import _ -from ibis.backends.tests.errors import OracleDatabaseError, Py4JJavaError +from ibis.backends.tests.errors import ( + OracleDatabaseError, + PsycoPg2InternalError, + Py4JJavaError, +) @pytest.mark.parametrize( @@ -38,11 +41,6 @@ def test_floating_scalar_parameter(backend, alltypes, df, column, raw_value): [("2009-03-01", "2010-07-03"), ("2014-12-01", "2017-01-05")], ) @pytest.mark.notimpl(["trino", "druid"]) -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function make_date(integer, integer, integer) does not exist", -) @pytest.mark.broken(["oracle"], raises=OracleDatabaseError) def test_date_scalar_parameter(backend, alltypes, start_string, end_string): start, end = ibis.param(dt.date), ibis.param(dt.date) @@ -116,7 +114,7 @@ def test_scalar_param_struct(con): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function make_date(integer, integer, integer) does not exist", ) def test_scalar_param_map(con): @@ -179,11 +177,6 @@ def test_scalar_param(backend, alltypes, df, value, dtype, col): ids=["string", "date", "datetime"], ) @pytest.mark.notimpl(["druid", "oracle"]) -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function make_date(integer, integer, integer) does not exist", -) def test_scalar_param_date(backend, alltypes, value): param = ibis.param("date") ds_col = alltypes.date_string_col diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py index f1ecdd6bdab6..64824b612462 100644 --- a/ibis/backends/tests/test_register.py +++ b/ibis/backends/tests/test_register.py @@ -409,12 +409,7 @@ def test_register_garbage(con, monkeypatch): ], ) @pytest.mark.notyet( - ["impala", "mssql", "mysql", "postgres", "risingwave", "sqlite", "trino"] -) -@pytest.mark.notimpl( - ["flink"], - raises=ValueError, - reason="read_parquet() missing required argument: 'schema'", + ["flink", "impala", "mssql", "mysql", "postgres", "risingwave", "sqlite", "trino"] ) def test_read_parquet(con, tmp_path, data_dir, fname, in_table_name): pq = pytest.importorskip("pyarrow.parquet") @@ -445,12 +440,17 @@ def ft_data(data_dir): @pytest.mark.notyet( - ["impala", "mssql", "mysql", "pandas", "postgres", "risingwave", "sqlite", "trino"] -) -@pytest.mark.notimpl( - ["flink"], - raises=ValueError, - reason="read_parquet() missing required argument: 'schema'", + [ + "flink", + "impala", + "mssql", + "mysql", + "pandas", + "postgres", + "risingwave", + "sqlite", + "trino", + ] ) def test_read_parquet_glob(con, tmp_path, ft_data): pq = pytest.importorskip("pyarrow.parquet") @@ -469,12 +469,17 @@ def test_read_parquet_glob(con, tmp_path, ft_data): @pytest.mark.notyet( - ["impala", "mssql", "mysql", "pandas", "postgres", "risingwave", "sqlite", "trino"] -) -@pytest.mark.notimpl( - ["flink"], - raises=ValueError, - reason="read_csv() missing required argument: 'schema'", + [ + "flink", + "impala", + "mssql", + "mysql", + "pandas", + "postgres", + "risingwave", + "sqlite", + "trino", + ] ) def test_read_csv_glob(con, tmp_path, ft_data): pc = pytest.importorskip("pyarrow.csv") @@ -556,12 +561,7 @@ def num_diamonds(data_dir): [param(None, id="default"), param("fancy_stones", id="file_name")], ) @pytest.mark.notyet( - ["impala", "mssql", "mysql", "postgres", "risingwave", "sqlite", "trino"] -) -@pytest.mark.notimpl( - ["flink"], - raises=ValueError, - reason="read_csv() missing required argument: 'schema'", + ["flink", "impala", "mssql", "mysql", "postgres", "risingwave", "sqlite", "trino"] ) def test_read_csv(con, data_dir, in_table_name, num_diamonds): fname = "diamonds.csv" diff --git a/ibis/backends/tests/test_set_ops.py b/ibis/backends/tests/test_set_ops.py index 41102559ad9c..4df076da7f97 100644 --- a/ibis/backends/tests/test_set_ops.py +++ b/ibis/backends/tests/test_set_ops.py @@ -4,14 +4,13 @@ import pandas as pd import pytest -import sqlalchemy as sa from pytest import param import ibis import ibis.common.exceptions as com import ibis.expr.types as ir from ibis import _ -from ibis.backends.tests.errors import PyDruidProgrammingError +from ibis.backends.tests.errors import PsycoPg2InternalError, PyDruidProgrammingError @pytest.fixture @@ -84,7 +83,7 @@ def test_union_mixed_distinct(backend, union_subsets): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: INTERSECT all", ), ], @@ -138,7 +137,7 @@ def test_intersect(backend, alltypes, df, distinct): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: EXCEPT all", ), ], @@ -223,7 +222,7 @@ def test_top_level_union(backend, con, alltypes, distinct): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: INTERSECT all", ), ], diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 4cd7d0d8ffa2..0db31eb5662d 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -8,10 +8,9 @@ from ibis import _ from ibis.backends.conftest import _get_backends_to_test -sa = pytest.importorskip("sqlalchemy") sg = pytest.importorskip("sqlglot") -pytestmark = pytest.mark.notimpl(["flink", "risingwave"]) +pytestmark = pytest.mark.notimpl(["flink"]) simple_literal = param(ibis.literal(1), id="simple_literal") array_literal = param( @@ -27,7 +26,7 @@ ) no_structs = pytest.mark.never( ["impala", "mysql", "sqlite", "mssql", "exasol"], - raises=(NotImplementedError, sa.exc.CompileError, exc.UnsupportedBackendType), + raises=(NotImplementedError, exc.UnsupportedBackendType), reason="structs not supported in the backend", ) no_struct_literals = pytest.mark.notimpl( @@ -62,9 +61,6 @@ def test_literal(backend, expr): @pytest.mark.never(["pandas", "dask", "polars"], reason="not SQL") -@pytest.mark.xfail_version( - mssql=["sqlalchemy>=2"], reason="sqlalchemy 2 prefixes literals with `N`" -) def test_group_by_has_index(backend, snapshot): countries = ibis.table( dict(continent="string", population="int64"), name="countries" diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index e25ab5f62d13..8e4e8d5fc0c4 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from pytest import param import ibis @@ -14,6 +13,7 @@ from ibis.backends.tests.errors import ( ClickHouseDatabaseError, OracleDatabaseError, + PsycoPg2InternalError, PyDruidProgrammingError, PyODBCProgrammingError, ) @@ -62,7 +62,7 @@ ), pytest.mark.broken( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason='sql parser error: Expected end of statement, found: "NG\'" at line:1, column:31 Near "SELECT \'STRI"NG\' AS "\'STRI""', ), ], @@ -90,7 +90,7 @@ ), pytest.mark.broken( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason='sql parser error: Expected end of statement, found: "NG\'" at line:1, column:31 Near "SELECT \'STRI"NG\' AS "\'STRI""', ), ], @@ -233,11 +233,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["mssql", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -248,11 +243,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["mssql", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -268,11 +258,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["druid"], reason="No posix support", raises=AssertionError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -283,11 +268,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["mssql", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -300,11 +280,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["mssql", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -317,11 +292,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["mssql", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -334,11 +304,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["mssql", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -349,11 +314,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["mssql", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -364,11 +324,6 @@ def uses_java_re(t): pytest.mark.notimpl( ["mssql", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function textregexeq(character varying, character varying) does not exist", - ), ], ), param( @@ -990,7 +945,7 @@ def test_multiple_subs(con): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function levenshtein(character varying, character varying) does not exist", ) @pytest.mark.parametrize( diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 95f7df9f4ea5..f2b8c99fc73b 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -79,7 +79,7 @@ def test_literal(backend, con, field): backend.assert_series_equal(result, expected.astype(dtype)) -@pytest.mark.notimpl(["postgres", "risingwave"]) +@pytest.mark.notimpl(["postgres"]) @pytest.mark.parametrize("field", ["a", "b", "c"]) @pytest.mark.notyet( ["clickhouse"], reason="clickhouse doesn't support nullable nested types" diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 68a9306c26d5..3573a598ddf6 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -9,7 +9,6 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa import sqlglot as sg from pytest import param @@ -30,6 +29,7 @@ OracleDatabaseError, PolarsComputeError, PolarsPanicException, + PsycoPg2InternalError, Py4JJavaError, PyDruidProgrammingError, PyODBCProgrammingError, @@ -152,6 +152,11 @@ def test_timestamp_extract(backend, alltypes, df, attr): raises=AssertionError, reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14670", ), + pytest.mark.broken( + ["risingwave"], + raises=AssertionError, + reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14670", + ), ], ), ], @@ -634,7 +639,7 @@ def test_date_truncate(backend, alltypes, df, unit): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Bind error: Invalid unit: week", ), ], @@ -657,7 +662,7 @@ def test_date_truncate(backend, alltypes, df, unit): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Bind error: Invalid unit: millisecond", ), ], @@ -681,7 +686,7 @@ def test_date_truncate(backend, alltypes, df, unit): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Bind error: Invalid unit: microsecond", ), ], @@ -738,7 +743,7 @@ def convert_to_offset(offset, displacement_type=displacement_type): pytest.mark.notyet(["trino"], raises=com.UnsupportedOperationError), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Bind error: Invalid unit: week", ), ], @@ -831,7 +836,7 @@ def convert_to_offset(x): id="timestamp-add-interval-binop", marks=[ pytest.mark.notimpl( - ["dask", "risingwave", "snowflake", "sqlite", "bigquery", "exasol"], + ["dask", "snowflake", "sqlite", "bigquery", "exasol"], raises=com.OperationNotDefinedError, ), pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError), @@ -851,14 +856,7 @@ def convert_to_offset(x): id="timestamp-add-interval-binop-different-units", marks=[ pytest.mark.notimpl( - [ - "sqlite", - "risingwave", - "polars", - "snowflake", - "bigquery", - "exasol", - ], + ["sqlite", "polars", "snowflake", "bigquery", "exasol"], raises=com.OperationNotDefinedError, ), pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError), @@ -982,11 +980,6 @@ def convert_to_offset(x): raises=AttributeError, reason="'StringColumn' object has no attribute 'date'", ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function make_date(integer, integer, integer) does not exist", - ), pytest.mark.broken( ["flink"], raises=com.UnsupportedOperationError, @@ -1428,13 +1421,6 @@ def test_interval_add_cast_column(backend, alltypes, df): ), "%Y%m%d", marks=[ - pytest.mark.notimpl( - [ - "risingwave", - ], - raises=AttributeError, - reason="Neither 'concat' object nor 'Comparator' object has an attribute 'value'", - ), pytest.mark.notimpl( [ "polars", @@ -1617,7 +1603,6 @@ def test_integer_to_timestamp(backend, con, unit): [ "dask", "pandas", - "risingwave", "clickhouse", "sqlite", "datafusion", @@ -1723,6 +1708,11 @@ def test_day_of_week_column(backend, alltypes, df): "Ref: https://nightlies.apache.org/flink/flink-docs-release-1.13/docs/dev/table/functions/systemfunctions/#temporal-functions" ), ), + pytest.mark.broken( + ["risingwave"], + raises=AssertionError, + reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14670", + ), ], ), ], @@ -1803,7 +1793,7 @@ def test_now_from_projection(alltypes): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=com.OperationNotDefinedError, reason="function make_date(integer, integer, integer) does not exist", ) def test_date_literal(con, backend): @@ -1837,7 +1827,7 @@ def test_date_literal(con, backend): @pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function make_timestamp(integer, integer, integer, integer, integer, integer) does not exist", ) def test_timestamp_literal(con, backend): @@ -1895,7 +1885,7 @@ def test_timestamp_literal(con, backend): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function make_timestamp(integer, integer, integer, integer, integer, integer) does not exist", ) def test_timestamp_with_timezone_literal(con, timezone, expected): @@ -1928,7 +1918,7 @@ def test_timestamp_with_timezone_literal(con, timezone, expected): @pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function make_time(integer, integer, integer) does not exist", ) def test_time_literal(con, backend): @@ -2078,7 +2068,7 @@ def test_interval_literal(con, backend): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=com.OperationNotDefinedError, reason="function make_date(integer, integer, integer) does not exist", ) def test_date_column_from_ymd(backend, con, alltypes, df): @@ -2100,12 +2090,12 @@ def test_date_column_from_ymd(backend, con, alltypes, df): raises=AttributeError, reason="StringColumn' object has no attribute 'year'", ) +@pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function make_timestamp(smallint, smallint, smallint, smallint, smallint, smallint) does not exist", ) -@pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError) def test_timestamp_column_from_ymdhms(backend, con, alltypes, df): c = alltypes.timestamp_col expr = ibis.timestamp( @@ -2234,11 +2224,6 @@ def build_date_col(t): param(lambda _: DATE, build_date_col, id="date_column"), ], ) -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function make_date(integer, integer, integer) does not exist", -) def test_timestamp_date_comparison(backend, alltypes, df, left_fn, right_fn): left = left_fn(alltypes) right = right_fn(alltypes) @@ -2360,12 +2345,12 @@ def test_large_timestamp(con): reason="assert Timestamp('2023-01-07 13:20:05.561000') == Timestamp('2023-01-07 13:20:05.561000231')", raises=AssertionError, ), + pytest.mark.notimpl(["exasol"], raises=AssertionError), pytest.mark.notyet( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Parse error: timestamp without time zone Can't cast string to timestamp (expected format is YYYY-MM-DD HH:MM:SS[.D+{up to 6 digits}] or YYYY-MM-DD HH:MM or YYYY-MM-DD or ISO 8601 format)", ), - pytest.mark.notimpl(["exasol"], raises=AssertionError), ], ), ], @@ -2395,11 +2380,6 @@ def test_timestamp_precision_output(con, ts, scale, unit): ], raises=com.OperationNotDefinedError, ) -@pytest.mark.notyet( - ["risingwave"], - reason="risingwave doesn't have any easy way to accurately compute the delta in specific units", - raises=com.OperationNotDefinedError, -) @pytest.mark.parametrize( ("start", "end", "unit", "expected"), [ @@ -2416,7 +2396,7 @@ def test_timestamp_precision_output(con, ts, scale, unit): reason="time types not yet implemented in ibis for the clickhouse backend", ), pytest.mark.notyet( - ["postgres"], + ["postgres", "risingwave"], reason="postgres doesn't have any easy way to accurately compute the delta in specific units", raises=com.OperationNotDefinedError, ), @@ -2565,7 +2545,7 @@ def test_delta(con, start, end, unit, expected): @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function date_bin(interval, timestamp without time zone, timestamp without time zone) does not exist", ) def test_timestamp_bucket(backend, kws, pd_freq): @@ -2604,7 +2584,7 @@ def test_timestamp_bucket(backend, kws, pd_freq): @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="function date_bin(interval, timestamp without time zone, timestamp without time zone) does not exist", ) def test_timestamp_bucket_offset(backend, offset_mins): @@ -2717,11 +2697,6 @@ def test_time_literal_sql(dialect, snapshot, micros): param(datetime.date.fromisoformat, id="fromstring"), ], ) -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="function make_date(integer, integer, integer) does not exist", -) def test_date_scalar(con, value, func): expr = ibis.date(func(value)).name("tmp") diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index 5802727f205d..7b427bea9173 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -4,7 +4,6 @@ import uuid import pytest -import sqlalchemy.exc import ibis import ibis.common.exceptions as com @@ -21,6 +20,7 @@ "flink": "CHAR(36) NOT NULL", "impala": "STRING", "postgres": "uuid", + "risingwave": "character varying", "snowflake": "VARCHAR", "sqlite": "text", "trino": "uuid", @@ -28,11 +28,6 @@ @pytest.mark.notimpl(["datafusion", "polars"], raises=NotImplementedError) -@pytest.mark.notimpl( - ["risingwave"], - raises=sqlalchemy.exc.InternalError, - reason="Feature is not yet implemented: unsupported data type: UUID", -) @pytest.mark.notimpl(["polars"], raises=NotImplementedError) @pytest.mark.notimpl(["datafusion"], raises=Exception) def test_uuid_literal(con, backend): diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index e68e70773ad7..1841a1ac0287 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from pytest import param import ibis @@ -19,6 +18,7 @@ ImpalaHiveServer2Error, MySQLOperationalError, OracleDatabaseError, + PsycoPg2InternalError, Py4JJavaError, PyDruidProgrammingError, PyODBCProgrammingError, @@ -148,7 +148,7 @@ def calc_zscore(s): pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Unrecognized window function: percent_rank", ), ], @@ -165,7 +165,7 @@ def calc_zscore(s): pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Unrecognized window function: cume_dist", ), ], @@ -196,7 +196,7 @@ def calc_zscore(s): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Unrecognized window function: ntile", ), ], @@ -236,12 +236,8 @@ def calc_zscore(s): ["impala", "mssql"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.notimpl( - ["flink"], - raises=com.OperationNotDefinedError, - reason="No translation rule for ", - ), - pytest.mark.notimpl(["risingwave"], raises=sa.exc.InternalError), + pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError), + pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError), ], ), param( @@ -407,7 +403,7 @@ def test_grouped_bounded_expanding_window( pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ), ], @@ -667,14 +663,10 @@ def test_grouped_unbounded_window( @pytest.mark.broken(["dask"], raises=AssertionError) @pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["flink"], - raises=com.UnsupportedOperationError, - reason="OVER RANGE FOLLOWING windows are not supported in Flink yet", -) +@pytest.mark.notimpl(["flink"], raises=com.UnsupportedOperationError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ) def test_simple_ungrouped_unbound_following_window( @@ -706,7 +698,7 @@ def test_simple_ungrouped_unbound_following_window( @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ) def test_simple_ungrouped_window_with_scalar_order_by(alltypes): @@ -739,7 +731,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ), ], @@ -771,16 +763,16 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): pytest.mark.notimpl( ["pandas", "dask"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="Feature is not yet implemented: Unrecognized window function: ntile", - ), pytest.mark.notimpl( ["flink"], raises=Py4JJavaError, reason="CalciteContextException: Argument to function 'NTILE' must be a literal", ), + pytest.mark.notimpl( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="Feature is not yet implemented: Unrecognized window function: ntile", + ), ], ), param( @@ -857,7 +849,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): marks=[ pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ), ], @@ -892,7 +884,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ), ], @@ -905,7 +897,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): marks=[ pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ), ], @@ -943,7 +935,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ), pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ), ], @@ -1060,17 +1052,17 @@ def test_ungrouped_unbounded_window( reason="RANGE OFFSET frame for 'DB::ColumnNullable' ORDER BY column is not implemented", raises=ClickHouseDatabaseError, ) -@pytest.mark.notimpl( - ["risingwave"], - raises=sa.exc.InternalError, - reason="Feature is not yet implemented: window frame in `RANGE` mode is not supported yet", -) @pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) @pytest.mark.broken( ["mysql"], raises=MySQLOperationalError, reason="https://github.com/tobymao/sqlglot/issues/2779", ) +@pytest.mark.notimpl( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="Feature is not yet implemented: window frame in `RANGE` mode is not supported yet", +) def test_grouped_bounded_range_window(backend, alltypes, df): # Explanation of the range window spec below: # @@ -1128,7 +1120,7 @@ def gb_fn(df): ) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Unrecognized window function: percent_rank", ) def test_percent_rank_whole_table_no_order_by(backend, alltypes, df): @@ -1179,7 +1171,7 @@ def agg(df): @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ) def test_mutate_window_filter(backend, alltypes): @@ -1256,16 +1248,16 @@ def test_first_last(backend): ["mssql"], raises=PyODBCProgrammingError, reason="not support by the backend" ) @pytest.mark.broken(["flink"], raises=Py4JJavaError, reason="bug in Flink") -@pytest.mark.broken( - ["risingwave"], - raises=sa.exc.InternalError, - reason="sql parser error: Expected literal int, found: INTERVAL at line:1, column:99", -) @pytest.mark.broken( ["exasol"], raises=ExaQueryError, reason="database can't handle UTC timestamps in DataFrames", ) +@pytest.mark.broken( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="sql parser error: Expected literal int, found: INTERVAL at line:1, column:99", +) def test_range_expression_bounds(backend): t = ibis.memtable( { @@ -1312,7 +1304,7 @@ def test_range_expression_bounds(backend): ) @pytest.mark.broken( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Unrecognized window function: percent_rank", ) def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): @@ -1347,7 +1339,7 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): @pytest.mark.notyet(["flink"], raises=com.UnsupportedOperationError) @pytest.mark.notimpl( ["risingwave"], - raises=sa.exc.InternalError, + raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ) def test_windowed_order_by_sequence_is_preserved(con): diff --git a/poetry.lock b/poetry.lock index 1120abb6e22e..83cf246a8e5c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,87 +2,87 @@ [[package]] name = "aiohttp" -version = "3.9.1" +version = "3.9.3" description = "Async http client/server framework (asyncio)" optional = true python-versions = ">=3.8" files = [ - {file = "aiohttp-3.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1f80197f8b0b846a8d5cf7b7ec6084493950d0882cc5537fb7b96a69e3c8590"}, - {file = "aiohttp-3.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c72444d17777865734aa1a4d167794c34b63e5883abb90356a0364a28904e6c0"}, - {file = "aiohttp-3.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b05d5cbe9dafcdc733262c3a99ccf63d2f7ce02543620d2bd8db4d4f7a22f83"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c4fa235d534b3547184831c624c0b7c1e262cd1de847d95085ec94c16fddcd5"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:289ba9ae8e88d0ba16062ecf02dd730b34186ea3b1e7489046fc338bdc3361c4"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bff7e2811814fa2271be95ab6e84c9436d027a0e59665de60edf44e529a42c1f"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81b77f868814346662c96ab36b875d7814ebf82340d3284a31681085c051320f"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b9c7426923bb7bd66d409da46c41e3fb40f5caf679da624439b9eba92043fa6"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8d44e7bf06b0c0a70a20f9100af9fcfd7f6d9d3913e37754c12d424179b4e48f"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22698f01ff5653fe66d16ffb7658f582a0ac084d7da1323e39fd9eab326a1f26"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ca7ca5abfbfe8d39e653870fbe8d7710be7a857f8a8386fc9de1aae2e02ce7e4"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:8d7f98fde213f74561be1d6d3fa353656197f75d4edfbb3d94c9eb9b0fc47f5d"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5216b6082c624b55cfe79af5d538e499cd5f5b976820eac31951fb4325974501"}, - {file = "aiohttp-3.9.1-cp310-cp310-win32.whl", hash = "sha256:0e7ba7ff228c0d9a2cd66194e90f2bca6e0abca810b786901a569c0de082f489"}, - {file = "aiohttp-3.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:c7e939f1ae428a86e4abbb9a7c4732bf4706048818dfd979e5e2839ce0159f23"}, - {file = "aiohttp-3.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:df9cf74b9bc03d586fc53ba470828d7b77ce51b0582d1d0b5b2fb673c0baa32d"}, - {file = "aiohttp-3.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecca113f19d5e74048c001934045a2b9368d77b0b17691d905af18bd1c21275e"}, - {file = "aiohttp-3.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8cef8710fb849d97c533f259103f09bac167a008d7131d7b2b0e3a33269185c0"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bea94403a21eb94c93386d559bce297381609153e418a3ffc7d6bf772f59cc35"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91c742ca59045dce7ba76cab6e223e41d2c70d79e82c284a96411f8645e2afff"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c93b7c2e52061f0925c3382d5cb8980e40f91c989563d3d32ca280069fd6a87"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee2527134f95e106cc1653e9ac78846f3a2ec1004cf20ef4e02038035a74544d"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11ff168d752cb41e8492817e10fb4f85828f6a0142b9726a30c27c35a1835f01"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b8c3a67eb87394386847d188996920f33b01b32155f0a94f36ca0e0c635bf3e3"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c7b5d5d64e2a14e35a9240b33b89389e0035e6de8dbb7ffa50d10d8b65c57449"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:69985d50a2b6f709412d944ffb2e97d0be154ea90600b7a921f95a87d6f108a2"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:c9110c06eaaac7e1f5562caf481f18ccf8f6fdf4c3323feab28a93d34cc646bd"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d737e69d193dac7296365a6dcb73bbbf53bb760ab25a3727716bbd42022e8d7a"}, - {file = "aiohttp-3.9.1-cp311-cp311-win32.whl", hash = "sha256:4ee8caa925aebc1e64e98432d78ea8de67b2272252b0a931d2ac3bd876ad5544"}, - {file = "aiohttp-3.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:a34086c5cc285be878622e0a6ab897a986a6e8bf5b67ecb377015f06ed316587"}, - {file = "aiohttp-3.9.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f800164276eec54e0af5c99feb9494c295118fc10a11b997bbb1348ba1a52065"}, - {file = "aiohttp-3.9.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:500f1c59906cd142d452074f3811614be04819a38ae2b3239a48b82649c08821"}, - {file = "aiohttp-3.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0b0a6a36ed7e164c6df1e18ee47afbd1990ce47cb428739d6c99aaabfaf1b3af"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69da0f3ed3496808e8cbc5123a866c41c12c15baaaead96d256477edf168eb57"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:176df045597e674fa950bf5ae536be85699e04cea68fa3a616cf75e413737eb5"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b796b44111f0cab6bbf66214186e44734b5baab949cb5fb56154142a92989aeb"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f27fdaadce22f2ef950fc10dcdf8048407c3b42b73779e48a4e76b3c35bca26c"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcb6532b9814ea7c5a6a3299747c49de30e84472fa72821b07f5a9818bce0f66"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:54631fb69a6e44b2ba522f7c22a6fb2667a02fd97d636048478db2fd8c4e98fe"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4b4c452d0190c5a820d3f5c0f3cd8a28ace48c54053e24da9d6041bf81113183"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:cae4c0c2ca800c793cae07ef3d40794625471040a87e1ba392039639ad61ab5b"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:565760d6812b8d78d416c3c7cfdf5362fbe0d0d25b82fed75d0d29e18d7fc30f"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:54311eb54f3a0c45efb9ed0d0a8f43d1bc6060d773f6973efd90037a51cd0a3f"}, - {file = "aiohttp-3.9.1-cp312-cp312-win32.whl", hash = "sha256:85c3e3c9cb1d480e0b9a64c658cd66b3cfb8e721636ab8b0e746e2d79a7a9eed"}, - {file = "aiohttp-3.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:11cb254e397a82efb1805d12561e80124928e04e9c4483587ce7390b3866d213"}, - {file = "aiohttp-3.9.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8a22a34bc594d9d24621091d1b91511001a7eea91d6652ea495ce06e27381f70"}, - {file = "aiohttp-3.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:598db66eaf2e04aa0c8900a63b0101fdc5e6b8a7ddd805c56d86efb54eb66672"}, - {file = "aiohttp-3.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c9376e2b09895c8ca8b95362283365eb5c03bdc8428ade80a864160605715f1"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41473de252e1797c2d2293804e389a6d6986ef37cbb4a25208de537ae32141dd"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c5857612c9813796960c00767645cb5da815af16dafb32d70c72a8390bbf690"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffcd828e37dc219a72c9012ec44ad2e7e3066bec6ff3aaa19e7d435dbf4032ca"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:219a16763dc0294842188ac8a12262b5671817042b35d45e44fd0a697d8c8361"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f694dc8a6a3112059258a725a4ebe9acac5fe62f11c77ac4dcf896edfa78ca28"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bcc0ea8d5b74a41b621ad4a13d96c36079c81628ccc0b30cfb1603e3dfa3a014"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90ec72d231169b4b8d6085be13023ece8fa9b1bb495e4398d847e25218e0f431"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:cf2a0ac0615842b849f40c4d7f304986a242f1e68286dbf3bd7a835e4f83acfd"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:0e49b08eafa4f5707ecfb321ab9592717a319e37938e301d462f79b4e860c32a"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2c59e0076ea31c08553e868cec02d22191c086f00b44610f8ab7363a11a5d9d8"}, - {file = "aiohttp-3.9.1-cp38-cp38-win32.whl", hash = "sha256:4831df72b053b1eed31eb00a2e1aff6896fb4485301d4ccb208cac264b648db4"}, - {file = "aiohttp-3.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:3135713c5562731ee18f58d3ad1bf41e1d8883eb68b363f2ffde5b2ea4b84cc7"}, - {file = "aiohttp-3.9.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cfeadf42840c1e870dc2042a232a8748e75a36b52d78968cda6736de55582766"}, - {file = "aiohttp-3.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:70907533db712f7aa791effb38efa96f044ce3d4e850e2d7691abd759f4f0ae0"}, - {file = "aiohttp-3.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cdefe289681507187e375a5064c7599f52c40343a8701761c802c1853a504558"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7481f581251bb5558ba9f635db70908819caa221fc79ee52a7f58392778c636"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49f0c1b3c2842556e5de35f122fc0f0b721334ceb6e78c3719693364d4af8499"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d406b01a9f5a7e232d1b0d161b40c05275ffbcbd772dc18c1d5a570961a1ca4"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d8e4450e7fe24d86e86b23cc209e0023177b6d59502e33807b732d2deb6975f"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c0266cd6f005e99f3f51e583012de2778e65af6b73860038b968a0a8888487a"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab221850108a4a063c5b8a70f00dd7a1975e5a1713f87f4ab26a46e5feac5a0e"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c88a15f272a0ad3d7773cf3a37cc7b7d077cbfc8e331675cf1346e849d97a4e5"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:237533179d9747080bcaad4d02083ce295c0d2eab3e9e8ce103411a4312991a0"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:02ab6006ec3c3463b528374c4cdce86434e7b89ad355e7bf29e2f16b46c7dd6f"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04fa38875e53eb7e354ece1607b1d2fdee2d175ea4e4d745f6ec9f751fe20c7c"}, - {file = "aiohttp-3.9.1-cp39-cp39-win32.whl", hash = "sha256:82eefaf1a996060602f3cc1112d93ba8b201dbf5d8fd9611227de2003dddb3b7"}, - {file = "aiohttp-3.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:9b05d33ff8e6b269e30a7957bd3244ffbce2a7a35a81b81c382629b80af1a8bf"}, - {file = "aiohttp-3.9.1.tar.gz", hash = "sha256:8fc49a87ac269d4529da45871e2ffb6874e87779c3d0e2ccd813c0899221239d"}, + {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:939677b61f9d72a4fa2a042a5eee2a99a24001a67c13da113b2e30396567db54"}, + {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f5cd333fcf7590a18334c90f8c9147c837a6ec8a178e88d90a9b96ea03194cc"}, + {file = "aiohttp-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82e6aa28dd46374f72093eda8bcd142f7771ee1eb9d1e223ff0fa7177a96b4a5"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f56455b0c2c7cc3b0c584815264461d07b177f903a04481dfc33e08a89f0c26b"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bca77a198bb6e69795ef2f09a5f4c12758487f83f33d63acde5f0d4919815768"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e083c285857b78ee21a96ba1eb1b5339733c3563f72980728ca2b08b53826ca5"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab40e6251c3873d86ea9b30a1ac6d7478c09277b32e14745d0d3c6e76e3c7e29"}, + {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df822ee7feaaeffb99c1a9e5e608800bd8eda6e5f18f5cfb0dc7eeb2eaa6bbec"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:acef0899fea7492145d2bbaaaec7b345c87753168589cc7faf0afec9afe9b747"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cd73265a9e5ea618014802ab01babf1940cecb90c9762d8b9e7d2cc1e1969ec6"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a78ed8a53a1221393d9637c01870248a6f4ea5b214a59a92a36f18151739452c"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:6b0e029353361f1746bac2e4cc19b32f972ec03f0f943b390c4ab3371840aabf"}, + {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7cf5c9458e1e90e3c390c2639f1017a0379a99a94fdfad3a1fd966a2874bba52"}, + {file = "aiohttp-3.9.3-cp310-cp310-win32.whl", hash = "sha256:3e59c23c52765951b69ec45ddbbc9403a8761ee6f57253250c6e1536cacc758b"}, + {file = "aiohttp-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:055ce4f74b82551678291473f66dc9fb9048a50d8324278751926ff0ae7715e5"}, + {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6b88f9386ff1ad91ace19d2a1c0225896e28815ee09fc6a8932fded8cda97c3d"}, + {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c46956ed82961e31557b6857a5ca153c67e5476972e5f7190015018760938da2"}, + {file = "aiohttp-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07b837ef0d2f252f96009e9b8435ec1fef68ef8b1461933253d318748ec1acdc"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad46e6f620574b3b4801c68255492e0159d1712271cc99d8bdf35f2043ec266"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ed3e046ea7b14938112ccd53d91c1539af3e6679b222f9469981e3dac7ba1ce"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:039df344b45ae0b34ac885ab5b53940b174530d4dd8a14ed8b0e2155b9dddccb"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7943c414d3a8d9235f5f15c22ace69787c140c80b718dcd57caaade95f7cd93b"}, + {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84871a243359bb42c12728f04d181a389718710129b36b6aad0fc4655a7647d4"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5eafe2c065df5401ba06821b9a054d9cb2848867f3c59801b5d07a0be3a380ae"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9d3c9b50f19704552f23b4eaea1fc082fdd82c63429a6506446cbd8737823da3"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:f033d80bc6283092613882dfe40419c6a6a1527e04fc69350e87a9df02bbc283"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:2c895a656dd7e061b2fd6bb77d971cc38f2afc277229ce7dd3552de8313a483e"}, + {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1f5a71d25cd8106eab05f8704cd9167b6e5187bcdf8f090a66c6d88b634802b4"}, + {file = "aiohttp-3.9.3-cp311-cp311-win32.whl", hash = "sha256:50fca156d718f8ced687a373f9e140c1bb765ca16e3d6f4fe116e3df7c05b2c5"}, + {file = "aiohttp-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:5fe9ce6c09668063b8447f85d43b8d1c4e5d3d7e92c63173e6180b2ac5d46dd8"}, + {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:38a19bc3b686ad55804ae931012f78f7a534cce165d089a2059f658f6c91fa60"}, + {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:770d015888c2a598b377bd2f663adfd947d78c0124cfe7b959e1ef39f5b13869"}, + {file = "aiohttp-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee43080e75fc92bf36219926c8e6de497f9b247301bbf88c5c7593d931426679"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52df73f14ed99cee84865b95a3d9e044f226320a87af208f068ecc33e0c35b96"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc9b311743a78043b26ffaeeb9715dc360335e5517832f5a8e339f8a43581e4d"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b955ed993491f1a5da7f92e98d5dad3c1e14dc175f74517c4e610b1f2456fb11"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:504b6981675ace64c28bf4a05a508af5cde526e36492c98916127f5a02354d53"}, + {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fe5571784af92b6bc2fda8d1925cccdf24642d49546d3144948a6a1ed58ca5"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ba39e9c8627edc56544c8628cc180d88605df3892beeb2b94c9bc857774848ca"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e5e46b578c0e9db71d04c4b506a2121c0cb371dd89af17a0586ff6769d4c58c1"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:938a9653e1e0c592053f815f7028e41a3062e902095e5a7dc84617c87267ebd5"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:c3452ea726c76e92f3b9fae4b34a151981a9ec0a4847a627c43d71a15ac32aa6"}, + {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ff30218887e62209942f91ac1be902cc80cddb86bf00fbc6783b7a43b2bea26f"}, + {file = "aiohttp-3.9.3-cp312-cp312-win32.whl", hash = "sha256:38f307b41e0bea3294a9a2a87833191e4bcf89bb0365e83a8be3a58b31fb7f38"}, + {file = "aiohttp-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:b791a3143681a520c0a17e26ae7465f1b6f99461a28019d1a2f425236e6eedb5"}, + {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ed621426d961df79aa3b963ac7af0d40392956ffa9be022024cd16297b30c8c"}, + {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7f46acd6a194287b7e41e87957bfe2ad1ad88318d447caf5b090012f2c5bb528"}, + {file = "aiohttp-3.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:feeb18a801aacb098220e2c3eea59a512362eb408d4afd0c242044c33ad6d542"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f734e38fd8666f53da904c52a23ce517f1b07722118d750405af7e4123933511"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b40670ec7e2156d8e57f70aec34a7216407848dfe6c693ef131ddf6e76feb672"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdd215b7b7fd4a53994f238d0f46b7ba4ac4c0adb12452beee724ddd0743ae5d"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:017a21b0df49039c8f46ca0971b3a7fdc1f56741ab1240cb90ca408049766168"}, + {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99abf0bba688259a496f966211c49a514e65afa9b3073a1fcee08856e04425b"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:648056db9a9fa565d3fa851880f99f45e3f9a771dd3ff3bb0c048ea83fb28194"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8aacb477dc26797ee089721536a292a664846489c49d3ef9725f992449eda5a8"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:522a11c934ea660ff8953eda090dcd2154d367dec1ae3c540aff9f8a5c109ab4"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:5bce0dc147ca85caa5d33debc4f4d65e8e8b5c97c7f9f660f215fa74fc49a321"}, + {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b4af9f25b49a7be47c0972139e59ec0e8285c371049df1a63b6ca81fdd216a2"}, + {file = "aiohttp-3.9.3-cp38-cp38-win32.whl", hash = "sha256:298abd678033b8571995650ccee753d9458dfa0377be4dba91e4491da3f2be63"}, + {file = "aiohttp-3.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:69361bfdca5468c0488d7017b9b1e5ce769d40b46a9f4a2eed26b78619e9396c"}, + {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0fa43c32d1643f518491d9d3a730f85f5bbaedcbd7fbcae27435bb8b7a061b29"}, + {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:835a55b7ca49468aaaac0b217092dfdff370e6c215c9224c52f30daaa735c1c1"}, + {file = "aiohttp-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06a9b2c8837d9a94fae16c6223acc14b4dfdff216ab9b7202e07a9a09541168f"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abf151955990d23f84205286938796c55ff11bbfb4ccfada8c9c83ae6b3c89a3"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59c26c95975f26e662ca78fdf543d4eeaef70e533a672b4113dd888bd2423caa"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f95511dd5d0e05fd9728bac4096319f80615aaef4acbecb35a990afebe953b0e"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595f105710293e76b9dc09f52e0dd896bd064a79346234b521f6b968ffdd8e58"}, + {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c8b816c2b5af5c8a436df44ca08258fc1a13b449393a91484225fcb7545533"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f1088fa100bf46e7b398ffd9904f4808a0612e1d966b4aa43baa535d1b6341eb"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f59dfe57bb1ec82ac0698ebfcdb7bcd0e99c255bd637ff613760d5f33e7c81b3"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:361a1026c9dd4aba0109e4040e2aecf9884f5cfe1b1b1bd3d09419c205e2e53d"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:363afe77cfcbe3a36353d8ea133e904b108feea505aa4792dad6585a8192c55a"}, + {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e2c45c208c62e955e8256949eb225bd8b66a4c9b6865729a786f2aa79b72e9d"}, + {file = "aiohttp-3.9.3-cp39-cp39-win32.whl", hash = "sha256:f7217af2e14da0856e082e96ff637f14ae45c10a5714b63c77f26d8884cf1051"}, + {file = "aiohttp-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:27468897f628c627230dba07ec65dc8d0db566923c48f29e084ce382119802bc"}, + {file = "aiohttp-3.9.3.tar.gz", hash = "sha256:90842933e5d1ff760fae6caca4b2b3edba53ba8f4b71e95dacf2818a2aca06f7"}, ] [package.dependencies] @@ -259,37 +259,40 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p [[package]] name = "beartype" -version = "0.16.4" +version = "0.17.0" description = "Unbearably fast runtime type checking in pure Python." optional = false python-versions = ">=3.8.0" files = [ - {file = "beartype-0.16.4-py3-none-any.whl", hash = "sha256:64865952f9dff1e17f22684b3c7286fc79754553b47eaefeb1286224ae8c1bd9"}, - {file = "beartype-0.16.4.tar.gz", hash = "sha256:1ada89cf2d6eb30eb6e156eed2eb5493357782937910d74380918e53c2eae0bf"}, + {file = "beartype-0.17.0-py3-none-any.whl", hash = "sha256:fa84b77a8d037f2a39c4aa2f3dc71854afc7d79312e55a66b338da68fdd48c60"}, + {file = "beartype-0.17.0.tar.gz", hash = "sha256:3226fbba8c53b4e698acdb47dcaf3c0640151c4d405618c281e6631f4112947d"}, ] [package.extras] all = ["typing-extensions (>=3.10.0.0)"] -dev = ["autoapi (>=0.9.0)", "coverage (>=5.5)", "mypy (>=0.800)", "numpy", "pandera", "pydata-sphinx-theme (<=0.7.2)", "pytest (>=4.0.0)", "sphinx", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)", "tox (>=3.20.1)", "typing-extensions (>=3.10.0.0)"] +dev = ["autoapi (>=0.9.0)", "coverage (>=5.5)", "equinox", "mypy (>=0.800)", "numpy", "pandera", "pydata-sphinx-theme (<=0.7.2)", "pytest (>=4.0.0)", "sphinx", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)", "torch", "tox (>=3.20.1)", "typing-extensions (>=3.10.0.0)"] doc-rtd = ["autoapi (>=0.9.0)", "pydata-sphinx-theme (<=0.7.2)", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)"] -test-tox = ["mypy (>=0.800)", "numpy", "pandera", "pytest (>=4.0.0)", "sphinx", "typing-extensions (>=3.10.0.0)"] +test-tox = ["equinox", "mypy (>=0.800)", "numpy", "pandera", "pytest (>=4.0.0)", "sphinx", "torch", "typing-extensions (>=3.10.0.0)"] test-tox-coverage = ["coverage (>=5.5)"] [[package]] name = "beautifulsoup4" -version = "4.12.2" +version = "4.12.3" description = "Screen-scraping library" optional = false python-versions = ">=3.6.0" files = [ - {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, - {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, ] [package.dependencies] soupsieve = ">1.2" [package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] @@ -442,33 +445,33 @@ files = [ [[package]] name = "black" -version = "23.12.1" +version = "24.1.1" description = "The uncompromising code formatter." optional = true python-versions = ">=3.8" files = [ - {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, - {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, - {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"}, - {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"}, - {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"}, - {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"}, - {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"}, - {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"}, - {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"}, - {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"}, - {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"}, - {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"}, - {file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"}, - {file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"}, - {file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"}, - {file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"}, - {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"}, - {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"}, - {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"}, - {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"}, - {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"}, - {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"}, + {file = "black-24.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2588021038bd5ada078de606f2a804cadd0a3cc6a79cb3e9bb3a8bf581325a4c"}, + {file = "black-24.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a95915c98d6e32ca43809d46d932e2abc5f1f7d582ffbe65a5b4d1588af7445"}, + {file = "black-24.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fa6a0e965779c8f2afb286f9ef798df770ba2b6cee063c650b96adec22c056a"}, + {file = "black-24.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:5242ecd9e990aeb995b6d03dc3b2d112d4a78f2083e5a8e86d566340ae80fec4"}, + {file = "black-24.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fc1ec9aa6f4d98d022101e015261c056ddebe3da6a8ccfc2c792cbe0349d48b7"}, + {file = "black-24.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0269dfdea12442022e88043d2910429bed717b2d04523867a85dacce535916b8"}, + {file = "black-24.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3d64db762eae4a5ce04b6e3dd745dcca0fb9560eb931a5be97472e38652a161"}, + {file = "black-24.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5d7b06ea8816cbd4becfe5f70accae953c53c0e53aa98730ceccb0395520ee5d"}, + {file = "black-24.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e2c8dfa14677f90d976f68e0c923947ae68fa3961d61ee30976c388adc0b02c8"}, + {file = "black-24.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a21725862d0e855ae05da1dd25e3825ed712eaaccef6b03017fe0853a01aa45e"}, + {file = "black-24.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07204d078e25327aad9ed2c64790d681238686bce254c910de640c7cc4fc3aa6"}, + {file = "black-24.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:a83fe522d9698d8f9a101b860b1ee154c1d25f8a82ceb807d319f085b2627c5b"}, + {file = "black-24.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08b34e85170d368c37ca7bf81cf67ac863c9d1963b2c1780c39102187ec8dd62"}, + {file = "black-24.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7258c27115c1e3b5de9ac6c4f9957e3ee2c02c0b39222a24dc7aa03ba0e986f5"}, + {file = "black-24.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40657e1b78212d582a0edecafef133cf1dd02e6677f539b669db4746150d38f6"}, + {file = "black-24.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e298d588744efda02379521a19639ebcd314fba7a49be22136204d7ed1782717"}, + {file = "black-24.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:34afe9da5056aa123b8bfda1664bfe6fb4e9c6f311d8e4a6eb089da9a9173bf9"}, + {file = "black-24.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:854c06fb86fd854140f37fb24dbf10621f5dab9e3b0c29a690ba595e3d543024"}, + {file = "black-24.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3897ae5a21ca132efa219c029cce5e6bfc9c3d34ed7e892113d199c0b1b444a2"}, + {file = "black-24.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:ecba2a15dfb2d97105be74bbfe5128bc5e9fa8477d8c46766505c1dda5883aac"}, + {file = "black-24.1.1-py3-none-any.whl", hash = "sha256:5cdc2e2195212208fbcae579b931407c1fa9997584f0a415421748aeafff1168"}, + {file = "black-24.1.1.tar.gz", hash = "sha256:48b5760dcbfe5cf97fd4fba23946681f3a81514c6ab8a45b50da67ac8fbc6c7b"}, ] [package.dependencies] @@ -506,17 +509,17 @@ traittypes = ">=0.0.6" [[package]] name = "branca" -version = "0.7.0" +version = "0.7.1" description = "Generate complex HTML+JS pages with Python" optional = false python-versions = ">=3.7" files = [ - {file = "branca-0.7.0-py3-none-any.whl", hash = "sha256:c653d9a3fef1e6cd203757c77d3eb44810f11998506451f9a27d52b983500c16"}, - {file = "branca-0.7.0.tar.gz", hash = "sha256:503ccb589a9ee9464cb7b5b17e5ffd8d5082c5c28624197f58f20d4d377a68bb"}, + {file = "branca-0.7.1-py3-none-any.whl", hash = "sha256:70515944ed2d1ed2784c552508df58037ca19402a8a1069d57f9113e3e012f51"}, + {file = "branca-0.7.1.tar.gz", hash = "sha256:e6b6f37a37bc0abffd960c68c045a7fe025d628eff87fedf6ab6ca814812110c"}, ] [package.dependencies] -jinja2 = "*" +jinja2 = ">=3" [[package]] name = "build" @@ -818,91 +821,77 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] [[package]] name = "clickhouse-connect" -version = "0.6.23" +version = "0.7.0" description = "ClickHouse Database Core Driver for Python, Pandas, and Superset" optional = true -python-versions = "~=3.7" -files = [ - {file = "clickhouse-connect-0.6.23.tar.gz", hash = "sha256:a74d01349390c0e2713603539927f105c1b9772cd81f850deb1d1ec57e4d9cfc"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cfc8e1cd68f7824e90a3492bf64f66934ad95529fac282cf96bc5a50255a5932"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0726b5f19343dde5b337e8495713a28e0449f42504ea47a691a5a39768ccd79"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e35c767497b22f4c9069f99c24f965ae266b2448e7d2c69407d82f632616bbf7"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c677748c2fb6087fce8fb185980cb539887db2253d0f81900c4a21ef38d7cb89"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:106c680114649d5bfacf76e26bcc87df8d07141b1a3c944099ba0ce297694c7e"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4cd72d85d246fe000377035b1858720b12045f3df1042cc03a5fad4dba6b7f78"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4f7c68297ac933603768f5dd4f49c94f88dacd9344e099b0221ead6b9914643e"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cff31e4fd52517ad79c6d50037c1da7fcaf1270d4c1c70a52852701ff415d86b"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-win32.whl", hash = "sha256:931d9bf3ecd212107e43dd8ed735a79b840b8577d4508b2cf866b1503350b415"}, - {file = "clickhouse_connect-0.6.23-cp310-cp310-win_amd64.whl", hash = "sha256:a9e55a50fb165a7be30d335da84e04ec025b2783999312917df86815dc8be3af"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d3a7734e48f0494764ef481c694e02bc78415df60a49d5119c032e75b5e1f8b"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5a66bee81dcbdea969f39a7f75b11225e985cfa752dccd148f54bacac800f72"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d62335114e5792fa95548465d831bb33a1b226c85b87b075c7e6c692a5edc77"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b9fa3eb7d8cbc87f635f7942cb6817a38c6491c8b40cfb6a7c0a6a8e0d59e4"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac7c459641dd99fa7e48921d2c4148d8c0cb171697aa487b55364b0b9081bf07"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:746be95964105fa2eca05ab2dab02b1319e9c94f4a9599b4d3c2894f9090c9bc"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f2593096fc0af049dcf55e03aaf3858bbc94bedddd1fd504087127ec48b68c7b"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38f480f264b648333f90a9f715f2357bf6ec284a9152d3a4a249dea87c797a60"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-win32.whl", hash = "sha256:f9793b8ae15ca93f7ae5d2c96c7de79ab7f6cf311b0c115d9e7948f0887086a0"}, - {file = "clickhouse_connect-0.6.23-cp311-cp311-win_amd64.whl", hash = "sha256:e6301202780893d5e3f2f62874670790a450bcbf8009d48ca360d04197205efa"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:66883e21a1598688c2a32f46a3ab9a858eca609bcd6fa6e4e0e758993027f356"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:257482716a9563cd2c964543e46af01848779fcb3665063c30b49e13f82ad27a"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7489202b7eec462e40b52066393f5ec62c82f1e514013a4e9e5f5eab962ad61"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e297da4ab46a405ce3555b89573cd256c80efc505130e08bac673095d731c6d"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f5d0097ae7ef1ff13afb2d56e5b93dfca515480d491f280315b0c16ce58c93c"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1dbd63e6bd2189259a5a9506e8efe5d8117f3d8b114c8d76bb4397eed4751927"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1d861a3b7b877227fe136e6648f3aca070a69ed407fd77c49722ad6b8f3a5aa2"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e5912503717237e0d4cafee19fbe579442484bfe6923063053e21e42c952a043"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-win32.whl", hash = "sha256:d288cf60ef846720fa8b2d2758b72dc488072121d331d7c4b27547e935129472"}, - {file = "clickhouse_connect-0.6.23-cp312-cp312-win_amd64.whl", hash = "sha256:4948ca8f292984d1d0cd7ea3bd9aa909101bf62e7d0d7902546d19b3f965f3be"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ad213ef12b0c5a474e38a13b95113aa3aafe1a35d7e81035b4c1bcc2d8d50e93"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed9ea8f2be2dcc4cfda1d33ce07d61467c93f1dbb7a98f09d69464b991dcaba0"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3fd4dcdefcfa2a7175c3c97c53bf66c38544ef84a276932fae4ffcb4c273998"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:102a04bf1eb7612e0924031c751e31dd4bad58e79543c13e8805fddcecbbfe45"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ac3943d4d522bcb1a4becb8850cdb3bfba07713178e84e4cadcd955b9002e28c"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7fe4d55c6f8b72eeedce439ed1287ea1971e30b54879df6059dc87f73bf472d2"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e2dc8127159d5415859af6ac8b42bc70b71709d834477a1dd82e5b147de66e82"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-win32.whl", hash = "sha256:854fcd6cbf7919aa2b9e9f92c52cb5b2d1c976c4e2145193910662237a8d879c"}, - {file = "clickhouse_connect-0.6.23-cp37-cp37m-win_amd64.whl", hash = "sha256:24c17054e395908eb483fad3dd899748a4d919e5b4e0db2a31c56df503f0921d"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3af001ef95f8dcb572f5cb4518d367b449fa6baf2b8fccc0e6112e04f1932b2b"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9ad33e0949dd8842beb34e6c5f01fac293bfa7701a2697f64d400d30b2858fe0"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b73130d09bb6eddf57d94d9c3bca4953af03231cc14a6757fddd9d3839720c3"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f27d725f054c54d6d8340b5545c8713ca6af640c75ade9eb0eef8b441ec37d66"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ac6bcbf730538106c376d029a0e9aa3e155490cae326e7256a51804d9576345"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9a7b35ccc8526456ad2794ab6af014506cb4472eed7f864d4d7d58bc4acf3b83"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3ac5fe6ac94ca77eed1ba440df81b5f4ff99fa85120afe46676e185f3f6f840d"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6eb8576ab497f9425e1dc3595e0fbe6c97615ae5dc7d184f2f65df874bb31539"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-win32.whl", hash = "sha256:c936e1adf8257acfc459282477ad65e2ef38eba26f72d58187154e8cbeaa1641"}, - {file = "clickhouse_connect-0.6.23-cp38-cp38-win_amd64.whl", hash = "sha256:07756dd0c610765000e9d7e202557cb6a06d1e0fd007234458d752998cd8c6da"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bcc1e103b1af2ae8b0485d1c09a55f8e9cb80e02fdaf8a039b813d07950a9586"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:10e6d20b61e5342fed752fb5221c10517af92182072fc792c5252541850d7340"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ce7caa2ceff666aaa86da11b9f42ddd09ae7ffe727db1617f352991f7a67667"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7cafc6ed2214321f3363fe5f23cf9880544ba05dc1820a994f0975bdd7e31d9"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f7e5ba4c78ef49354fac3d91eb951611430c8be2298f1dc2a1516be3149a41c"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a3691ed25e971bbf62c8bed843e80eecc0e4cb9b990e662e0916e8f2b05cd4ec"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b43948da665bbcb5c60e3871e878e068384cd9d2f845bc02fc4c22c934831cd"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9d1b7fb932e45482199f4abce61c95ead8e8628cf1fb882c2b28dc11318742da"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-win32.whl", hash = "sha256:4315c7718a9cc8eedc4b40b53a954e2d5a5dfb705c4a659e3a167537889268da"}, - {file = "clickhouse_connect-0.6.23-cp39-cp39-win_amd64.whl", hash = "sha256:040307cad9c11f503290e75d14c0b402454502fa7ab3c742ad8dac1a419219eb"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:086599316e5a532c34dadcf9fa2ea19a923d0acdcc891a829b3cc5cc061fd26a"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bb73a0ee0a0161fce7c38b7f8f66e3c5f0598b8d1f3c30e24ccd17ba1c117b3"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e4b0111ed72058836a44313369dd05e7c550da8e8ca486834c599ae81c7cd6b"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e70e97eb15e89995a220fdec19b448b48f8ea65a014e71bc1cc17763a7d8fd0e"}, - {file = "clickhouse_connect-0.6.23-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8e541c2f8921cd48dc57551391441b35af5991ae49f1d221ba77950ad195b807"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c7301862b6eb87aeb48f257932f60b3141777cae317217185279b7a036840e07"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f06348ecd72036d22d65d528221010c86559bdfc048f1633c5cd009993219a0c"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b40d1ec596f7c3ecf1e0d07916ab8c4f7ee52eb867758977335b285c4916e585"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09753a2ce1dfbe0a54fe8a7a464f67b2e0f01c1731f06d68a3ec821a00985d88"}, - {file = "clickhouse_connect-0.6.23-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a8ff9600c9721a574c7716a2ad6b436fd043eb05a544ed08d895504d18fb6d5d"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7d5ec217ae361c8c18c605030f9d601d861e23dc23af502d9608e769f3e467b4"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de6bc3e4ac94545a6f80c49f49ad9a9945efe1084ecd89804ebbb048b022699"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d6277c7ec66585e1f03da95617de172aeb38232d8da8a3e69f67b4f7149017"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee1cb7130f0d5e03066e9d4b94ae2052dd96329466c20a3f8688eeebc6f16033"}, - {file = "clickhouse_connect-0.6.23-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:aa7009a68df2f464861c694a15f06221f308ee9758a6848c4541936d0b5a9448"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:019bf068b38cb0b94fda3cb51d776f556911777197d1a3d0e73eb41259449879"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a378b6c6b0c092b18e0169b0b94a1f709b80d192e8e6275cfe6eff9c3cb26df0"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd2c4356a7a496d8920c756b0ddac916d7a9a902e6183fe4da67c86a6bf19b34"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:075acc6085c8ece277ce91688a739cbfd54c48de2c839d554045457defdbb81c"}, - {file = "clickhouse_connect-0.6.23-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7fca7189195785f5ff2a0805e52dd68f684bd5e4f475ba5ade06c5c6769d562e"}, +python-versions = "~=3.8" +files = [ + {file = "clickhouse-connect-0.7.0.tar.gz", hash = "sha256:4fc0c7c58632237d91a26691507ab37dc28233461f6bbe42699a4f36bba86181"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0ca2e06e033afe8bbf5bad97b5766501f11886414b2f5b504a15cf703ad2d774"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:96480e2c36b265ec1b619e610e3d691be33327a0accb8ba4b4e9b3e6e0437e6f"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8f990b247caa6560f7b5d266d86364c68dbace9d44515c77b62bbd9bdeb8011"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6de3043b32f40d3300a0931ef91d783eba2d67e12437747a59dfda72b796987d"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80545c875038830bb57f28c37c0d0924f99d149cea8c603528abb37022b66ac5"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:01f8a33949d42085207638ed21d7e5442794680f276f9b3903511f6abe08bdce"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:860e117edfca7b4bdc89aa5f8dd89fc1625f90ec0ced0eccf5572bec205fb2c0"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:32a048eab8d415318c5983db7dfeb73dc431f1a53e2e253fffe795906bed1cc6"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-win32.whl", hash = "sha256:f26d9bc7a25193e4e27e636a8b3162ffd67c29c49945e0087ff27a0fbc87a980"}, + {file = "clickhouse_connect-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:ac6a5bc0fb93e003291a22c74802560dc7b47ac8e17c400014728072f3296ce4"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d5a037afd82a3ea21106f0de0b556e2ec619b2d836af5268381f939f8a78c2d"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f4560a6eed525ce02aaa42891876e6566a59427a5c8709533fca3fabd49b308"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f03e79bce8535936a938eb5c6bb5d83ae51d70f2f8ecc09c9b6b56c55141b36"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aac5cc6f769ba2b5fc9da6e331cdfe6d1586e3a2af59b28ff9b0408ddd4bf61"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dedf464abe72e1807b5fc86761760e5e736730c2ca2185ef2931b6d2fac860c"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3c3af22a296caec7680a1e6a94eccb78e2aabccd33dd5d5f37187566f6072cb2"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9c0a1e26584bce35642632c28aef99e9a19502ce1148ca78974f0e84fdfe2676"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a37ba5fe6d9dfde5299b6a04e2c4086ebe6c3b6652e5841de2b12fea51694d99"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-win32.whl", hash = "sha256:739779d942f2103991d85f0d0297a05e17f0ee46dbc370d1420590eb836a44b1"}, + {file = "clickhouse_connect-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:b9f2a19a2e53463694046e835dea78dfb1ab1891115148020568dc3d18f40e64"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6b9e1a818dcb2287c327f2ae5e103094bbeb50656b21f7e1536551f668a6348a"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5cba0f75c07d8ee8b8acc974134b04184a9d971511a0cd0cc794d4de0edd4786"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab38f3cd100c1f97f24d12b41a97f18117f4e77e2b00d35e92898a115a328bef"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73a95a3a32f036aff1ce4616082bcb1b2246de36aad13dc60641fa592f7bbcee"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71e54b434cf7905957338b8db8e2a9981a6d9bb0a8fa8ee32d6ce30a8f2e7996"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:dd3b4aec4cb695eacbcbbe6a3d88aef7b72e4829d5b1003edf87a4b0bebb17a1"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ca0eb351d1b9e913887a340878bc444cfd1c4ded337446bf014c281a7254c089"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0625fe98c746f3d66baf30630863f61c1decd2e86dba8d024c7bc3175728794c"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-win32.whl", hash = "sha256:9b9b83e8e630564c4045ebf9ff38d6d5cef5fd9fb669ab7357dd981cd58959b4"}, + {file = "clickhouse_connect-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca0e3b7ece52cb24bee459b42eb2bc9d2460b53c5de47e99f89454f197509f3e"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:46558c4f54149fb82d06977e536ca19ee5d343ea77cdffbdd1398f534cb5b9a9"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6267326bf1a4e4f6803bead7a28fc148c499e5e4aec5aff9a142bde7a4b269b6"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31019259fdc38759884efaf7e5b5ea6b3612767ac52934f1f4e79913e66ddc09"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be688d9a885035c1604f846ea44d400af7d7e14c49b72ec04ee932216860755d"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b99319b8b08e4696e4011f8c8e3e5a5041a9f98920e8e2abf8c444e9e2d1aae2"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1ffc7465c509bb10c6d8f8d66b31298a203b6a85c137d2cd21195e86243eaf70"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0d3a2e600b50360ac36bb6b97ac44d4851ef2144a3c055df19fff2f48e84ab3e"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:85fedc194b35b7676660bbe510b6eb0fd8ae6c78dca4038dec09a93a227168e8"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-win32.whl", hash = "sha256:61b140694779843f6c2110f1068fb4acbcb3601599d9a721c897605e5939e3ac"}, + {file = "clickhouse_connect-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:29afdd5edc77dec17db140df4f1fde66ccffd384011627ce96cd73f0c67ed75f"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d8f14faeafcf9add0eeeb4781d4a5aa8e3b83648db401c5e76237b7a2c631141"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:85a12697c0c3ebcc24bc2c4f5636f986a36f040b28e079b90c7974e12db3424f"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db497029d455e07278b4f714d63936d4462e63554d68c3285f3e0a01e5f7aaa1"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b5462bbd9d776c899a16d17ec49ca4c43793565f5a6956fd64272eecb6bfc55"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d641717b0f675c6cd7c93dc452863a1eac6cf91d637b483a9c42d23b5617ec23"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a9531260d1ef35119ed9d909fda51578279270e38ecf54ff5f1d9d6b6a2b39f8"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fa88a35cb47e38f4ce3d1c3dbc61656537de22c84a5d751f775b974a4efd2e32"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3eb7e0dc1c87e4e9126b2bc614e312022fa741f53f003d98b2f240e6ce8c1e1c"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-win32.whl", hash = "sha256:f479e6422578bc4fb7828f22b882e5294fe9ac9f9af8c660d216c24746bb2be0"}, + {file = "clickhouse_connect-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:b1c81338664b2457fae97c1334182e81b77ec057ea9ec3a47d682d14a03b6573"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f167de0f3639aaa0807d011e175ff33be86e2727a4644da65a019306ff3f021"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:185ee65eab42bdd545e00c8244a72c797d1961173b78e37b0ea7b130ef0d9c73"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48dde9b8cd9daf7ec0e4baa72e94e40cdd749ae7aef1dfbe7c7d22af53dae8f4"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d75b1a01770c04650a7dd3ebbee21369939b00125fbb70c02067ac779f523c8"}, + {file = "clickhouse_connect-0.7.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9f895290de6fa8347114a361cc944ade1ddeba895f936752533b85984d4d183e"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:359eab438b3d6f20090b592084493e04dac369e65d0cf4e1da3ecc84750b52c4"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efc6b0af171681844fbb39d7b756235aea5b416ce7fca163834e068afbd3f833"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cc1a64bc8fb91e594efbd5b4560e6c661ebf75a11d37e08d48c45f3f4e439f7"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99c2d25ceb1fbf187c7b9815373dbcdfc04d1b233dafb3547b56dfeca6904584"}, + {file = "clickhouse_connect-0.7.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95a1e292b4cf230c2944bdc31c19c4e8fcbcd5609e24322ff5211af357beb26a"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e94de62c3dacfb7b6874f6593ad4268d38c17a1117847acdc1ad0c7b34c4e373"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a34a3f713f0148e30ddaf431af671ed16baf732eff7437ff2c7519adeda2f9c9"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9d08af8c5383d377a12e576106d7c3e0de0d03a3cbc6b9de89932e4b40f550d"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e06b70e4a4a54810ee293875febf71562c346688e2bc517c141958ef1c2af710"}, + {file = "clickhouse_connect-0.7.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:5c1bc46af3e0eca5a580aaecffd7dc47d541173d3189f250c59ffdd9d1cb0dd1"}, ] [package.dependencies] @@ -1078,63 +1067,63 @@ test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" -version = "7.4.0" +version = "7.4.1" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36b0ea8ab20d6a7564e89cb6135920bc9188fb5f1f7152e94e8300b7b189441a"}, - {file = "coverage-7.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0676cd0ba581e514b7f726495ea75aba3eb20899d824636c6f59b0ed2f88c471"}, - {file = "coverage-7.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ca5c71a5a1765a0f8f88022c52b6b8be740e512980362f7fdbb03725a0d6b9"}, - {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c97726520f784239f6c62506bc70e48d01ae71e9da128259d61ca5e9788516"}, - {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:815ac2d0f3398a14286dc2cea223a6f338109f9ecf39a71160cd1628786bc6f5"}, - {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:80b5ee39b7f0131ebec7968baa9b2309eddb35b8403d1869e08f024efd883566"}, - {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5b2ccb7548a0b65974860a78c9ffe1173cfb5877460e5a229238d985565574ae"}, - {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:995ea5c48c4ebfd898eacb098164b3cc826ba273b3049e4a889658548e321b43"}, - {file = "coverage-7.4.0-cp310-cp310-win32.whl", hash = "sha256:79287fd95585ed36e83182794a57a46aeae0b64ca53929d1176db56aacc83451"}, - {file = "coverage-7.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b14b4f8760006bfdb6e08667af7bc2d8d9bfdb648351915315ea17645347137"}, - {file = "coverage-7.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04387a4a6ecb330c1878907ce0dc04078ea72a869263e53c72a1ba5bbdf380ca"}, - {file = "coverage-7.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea81d8f9691bb53f4fb4db603203029643caffc82bf998ab5b59ca05560f4c06"}, - {file = "coverage-7.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74775198b702868ec2d058cb92720a3c5a9177296f75bd97317c787daf711505"}, - {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76f03940f9973bfaee8cfba70ac991825611b9aac047e5c80d499a44079ec0bc"}, - {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:485e9f897cf4856a65a57c7f6ea3dc0d4e6c076c87311d4bc003f82cfe199d25"}, - {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6ae8c9d301207e6856865867d762a4b6fd379c714fcc0607a84b92ee63feff70"}, - {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bf477c355274a72435ceb140dc42de0dc1e1e0bf6e97195be30487d8eaaf1a09"}, - {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:83c2dda2666fe32332f8e87481eed056c8b4d163fe18ecc690b02802d36a4d26"}, - {file = "coverage-7.4.0-cp311-cp311-win32.whl", hash = "sha256:697d1317e5290a313ef0d369650cfee1a114abb6021fa239ca12b4849ebbd614"}, - {file = "coverage-7.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:26776ff6c711d9d835557ee453082025d871e30b3fd6c27fcef14733f67f0590"}, - {file = "coverage-7.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:13eaf476ec3e883fe3e5fe3707caeb88268a06284484a3daf8250259ef1ba143"}, - {file = "coverage-7.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846f52f46e212affb5bcf131c952fb4075b55aae6b61adc9856222df89cbe3e2"}, - {file = "coverage-7.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26f66da8695719ccf90e794ed567a1549bb2644a706b41e9f6eae6816b398c4a"}, - {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:164fdcc3246c69a6526a59b744b62e303039a81e42cfbbdc171c91a8cc2f9446"}, - {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:316543f71025a6565677d84bc4df2114e9b6a615aa39fb165d697dba06a54af9"}, - {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bb1de682da0b824411e00a0d4da5a784ec6496b6850fdf8c865c1d68c0e318dd"}, - {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0e8d06778e8fbffccfe96331a3946237f87b1e1d359d7fbe8b06b96c95a5407a"}, - {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a56de34db7b7ff77056a37aedded01b2b98b508227d2d0979d373a9b5d353daa"}, - {file = "coverage-7.4.0-cp312-cp312-win32.whl", hash = "sha256:51456e6fa099a8d9d91497202d9563a320513fcf59f33991b0661a4a6f2ad450"}, - {file = "coverage-7.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd3c1e4cb2ff0083758f09be0f77402e1bdf704adb7f89108007300a6da587d0"}, - {file = "coverage-7.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d1bf53c4c8de58d22e0e956a79a5b37f754ed1ffdbf1a260d9dcfa2d8a325e"}, - {file = "coverage-7.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:109f5985182b6b81fe33323ab4707011875198c41964f014579cf82cebf2bb85"}, - {file = "coverage-7.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc9d4bc55de8003663ec94c2f215d12d42ceea128da8f0f4036235a119c88ac"}, - {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc6d65b21c219ec2072c1293c505cf36e4e913a3f936d80028993dd73c7906b1"}, - {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a10a4920def78bbfff4eff8a05c51be03e42f1c3735be42d851f199144897ba"}, - {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b8e99f06160602bc64da35158bb76c73522a4010f0649be44a4e167ff8555952"}, - {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7d360587e64d006402b7116623cebf9d48893329ef035278969fa3bbf75b697e"}, - {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29f3abe810930311c0b5d1a7140f6395369c3db1be68345638c33eec07535105"}, - {file = "coverage-7.4.0-cp38-cp38-win32.whl", hash = "sha256:5040148f4ec43644702e7b16ca864c5314ccb8ee0751ef617d49aa0e2d6bf4f2"}, - {file = "coverage-7.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9864463c1c2f9cb3b5db2cf1ff475eed2f0b4285c2aaf4d357b69959941aa555"}, - {file = "coverage-7.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:936d38794044b26c99d3dd004d8af0035ac535b92090f7f2bb5aa9c8e2f5cd42"}, - {file = "coverage-7.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:799c8f873794a08cdf216aa5d0531c6a3747793b70c53f70e98259720a6fe2d7"}, - {file = "coverage-7.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7defbb9737274023e2d7af02cac77043c86ce88a907c58f42b580a97d5bcca9"}, - {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1526d265743fb49363974b7aa8d5899ff64ee07df47dd8d3e37dcc0818f09ed"}, - {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf635a52fc1ea401baf88843ae8708591aa4adff875e5c23220de43b1ccf575c"}, - {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:756ded44f47f330666843b5781be126ab57bb57c22adbb07d83f6b519783b870"}, - {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0eb3c2f32dabe3a4aaf6441dde94f35687224dfd7eb2a7f47f3fd9428e421058"}, - {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bfd5db349d15c08311702611f3dccbef4b4e2ec148fcc636cf8739519b4a5c0f"}, - {file = "coverage-7.4.0-cp39-cp39-win32.whl", hash = "sha256:53d7d9158ee03956e0eadac38dfa1ec8068431ef8058fe6447043db1fb40d932"}, - {file = "coverage-7.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfd2a8b6b0d8e66e944d47cdec2f47c48fef2ba2f2dff5a9a75757f64172857e"}, - {file = "coverage-7.4.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:c530833afc4707fe48524a44844493f36d8727f04dcce91fb978c414a8556cc6"}, - {file = "coverage-7.4.0.tar.gz", hash = "sha256:707c0f58cb1712b8809ece32b68996ee1e609f71bd14615bd8f87a1293cb610e"}, + {file = "coverage-7.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7"}, + {file = "coverage-7.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61"}, + {file = "coverage-7.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee"}, + {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25"}, + {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19"}, + {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630"}, + {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c"}, + {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b"}, + {file = "coverage-7.4.1-cp310-cp310-win32.whl", hash = "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016"}, + {file = "coverage-7.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018"}, + {file = "coverage-7.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295"}, + {file = "coverage-7.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c"}, + {file = "coverage-7.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676"}, + {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd"}, + {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011"}, + {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74"}, + {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1"}, + {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6"}, + {file = "coverage-7.4.1-cp311-cp311-win32.whl", hash = "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5"}, + {file = "coverage-7.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968"}, + {file = "coverage-7.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581"}, + {file = "coverage-7.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6"}, + {file = "coverage-7.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66"}, + {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156"}, + {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3"}, + {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1"}, + {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1"}, + {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc"}, + {file = "coverage-7.4.1-cp312-cp312-win32.whl", hash = "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74"}, + {file = "coverage-7.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448"}, + {file = "coverage-7.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218"}, + {file = "coverage-7.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45"}, + {file = "coverage-7.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d"}, + {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06"}, + {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766"}, + {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75"}, + {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60"}, + {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad"}, + {file = "coverage-7.4.1-cp38-cp38-win32.whl", hash = "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042"}, + {file = "coverage-7.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d"}, + {file = "coverage-7.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54"}, + {file = "coverage-7.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70"}, + {file = "coverage-7.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628"}, + {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950"}, + {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1"}, + {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7"}, + {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756"}, + {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35"}, + {file = "coverage-7.4.1-cp39-cp39-win32.whl", hash = "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c"}, + {file = "coverage-7.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a"}, + {file = "coverage-7.4.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166"}, + {file = "coverage-7.4.1.tar.gz", hash = "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04"}, ] [package.dependencies] @@ -1216,13 +1205,13 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "dask" -version = "2024.1.0" +version = "2024.1.1" description = "Parallel PyData with Task Scheduling" optional = false python-versions = ">=3.9" files = [ - {file = "dask-2024.1.0-py3-none-any.whl", hash = "sha256:717102ef7c309297291095a0061d374f3b72e11ce4e1115ab9faff940e274b4b"}, - {file = "dask-2024.1.0.tar.gz", hash = "sha256:f24fdc7a07e59a1403bf6903e6d8dc15ed6f8607d3311b4f00f88d8a2ac63e49"}, + {file = "dask-2024.1.1-py3-none-any.whl", hash = "sha256:860ce2797905095beff0187c214840b80c77d752dcb9098a8283e3655a762bf5"}, + {file = "dask-2024.1.1.tar.gz", hash = "sha256:d0dc92e81ce68594a0a0ce23ba33f4d648f2c2f4217ab9b79068b7ecfb0416c7"}, ] [package.dependencies] @@ -1242,7 +1231,7 @@ array = ["numpy (>=1.21)"] complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=7.0)", "pyarrow-hotfix"] dataframe = ["dask[array]", "pandas (>=1.3)"] diagnostics = ["bokeh (>=2.4.2)", "jinja2 (>=2.10.3)"] -distributed = ["distributed (==2024.1.0)"] +distributed = ["distributed (==2024.1.1)"] test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist"] [[package]] @@ -1355,19 +1344,19 @@ files = [ [[package]] name = "distributed" -version = "2024.1.0" +version = "2024.1.1" description = "Distributed scheduler for Dask" optional = false python-versions = ">=3.9" files = [ - {file = "distributed-2024.1.0-py3-none-any.whl", hash = "sha256:b552c9331350ba0e7cb8eccb1da8942b44997ccb680338f61c43fe9843c69988"}, - {file = "distributed-2024.1.0.tar.gz", hash = "sha256:f1d0e2dd5249085e32c6ff5c0ce0521c7e844dd52337683a69363a6bb1799a30"}, + {file = "distributed-2024.1.1-py3-none-any.whl", hash = "sha256:cf05d3b38e1700339b3e36395729ab62110e723efefaecc21a8260fdc7555cf9"}, + {file = "distributed-2024.1.1.tar.gz", hash = "sha256:28cf5e9f4f07197b03ea8e5272e374ce2b9e9dc6742f6c9b525fd81645213c67"}, ] [package.dependencies] click = ">=8.0" cloudpickle = ">=1.5.0" -dask = "2024.1.0" +dask = "2024.1.1" jinja2 = ">=2.10.3" locket = ">=1.0.0" msgpack = ">=1.0.0" @@ -1884,22 +1873,24 @@ gcsfuse = ["fusepy"] [[package]] name = "gdown" -version = "4.7.1" -description = "Google Drive direct download of big files." +version = "5.0.1" +description = "Google Drive Public File/Folder Downloader" optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "gdown-4.7.1-py3-none-any.whl", hash = "sha256:65d495699e7c2c61af0d0e9c32748fb4f79abaf80d747a87456c7be14aac2560"}, - {file = "gdown-4.7.1.tar.gz", hash = "sha256:347f23769679aaf7efa73e5655270fcda8ca56be65eb84a4a21d143989541045"}, + {file = "gdown-5.0.1-py3-none-any.whl", hash = "sha256:3f595fcfd4b1bccd5cf73453f60984c5fa1c18eed499277a52b23337238c2670"}, + {file = "gdown-5.0.1.tar.gz", hash = "sha256:173557b4d33aad9f7dc75ce2ff963d8b313f36371e15da4b5ebb35ac6c7d5af6"}, ] [package.dependencies] beautifulsoup4 = "*" filelock = "*" requests = {version = "*", extras = ["socks"]} -six = "*" tqdm = "*" +[package.extras] +test = ["build", "mypy", "pytest", "ruff", "twine", "types-requests"] + [[package]] name = "geojson" version = "3.1.0" @@ -1913,13 +1904,13 @@ files = [ [[package]] name = "geopandas" -version = "0.14.2" +version = "0.14.3" description = "Geographic pandas extensions" optional = false python-versions = ">=3.9" files = [ - {file = "geopandas-0.14.2-py3-none-any.whl", hash = "sha256:0efa61235a68862c1c6be89fc3707cdeba67667d5676bb19e24f3c57a8c2f723"}, - {file = "geopandas-0.14.2.tar.gz", hash = "sha256:6e71d57b8376f9fdc9f1c3aa3170e7e420e91778de854f51013ae66fd371ccdb"}, + {file = "geopandas-0.14.3-py3-none-any.whl", hash = "sha256:41b31ad39e21bc9e8c4254f78f8dc4ce3d33d144e22e630a00bb336c83160204"}, + {file = "geopandas-0.14.3.tar.gz", hash = "sha256:748af035d4a068a4ae00cab384acb61d387685c833b0022e0729aa45216b23ac"}, ] [package.dependencies] @@ -1931,13 +1922,13 @@ shapely = ">=1.8.0" [[package]] name = "google-api-core" -version = "2.15.0" +version = "2.16.1" description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google-api-core-2.15.0.tar.gz", hash = "sha256:abc978a72658f14a2df1e5e12532effe40f94f868f6e23d95133bd6abcca35ca"}, - {file = "google_api_core-2.15.0-py3-none-any.whl", hash = "sha256:2aa56d2be495551e66bbff7f729b790546f87d5c90e74781aa77233bcb395a8a"}, + {file = "google-api-core-2.16.1.tar.gz", hash = "sha256:7f668ffa3d5b9f3c6930407e5f5d691c05a376050a5a5fd772b9dc32e70a0c30"}, + {file = "google_api_core-2.16.1-py3-none-any.whl", hash = "sha256:257e9e152cd18da0c6701113c122ade04dca04731e179fc5c7dca48e1396ec4c"}, ] [package.dependencies] @@ -1961,13 +1952,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.26.2" +version = "2.27.0" description = "Google Authentication Library" optional = false python-versions = ">=3.7" files = [ - {file = "google-auth-2.26.2.tar.gz", hash = "sha256:97327dbbf58cccb58fc5a1712bba403ae76668e64814eb30f7316f7e27126b81"}, - {file = "google_auth-2.26.2-py2.py3-none-any.whl", hash = "sha256:3f445c8ce9b61ed6459aad86d8ccdba4a9afed841b2d1451a11ef4db08957424"}, + {file = "google-auth-2.27.0.tar.gz", hash = "sha256:e863a56ccc2d8efa83df7a80272601e43487fa9a728a376205c86c26aaefa821"}, + {file = "google_auth-2.27.0-py2.py3-none-any.whl", hash = "sha256:8e4bad367015430ff253fe49d500fdc3396c1a434db5740828c728e45bcce245"}, ] [package.dependencies] @@ -2002,13 +1993,13 @@ tool = ["click (>=6.0.0)"] [[package]] name = "google-cloud-bigquery" -version = "3.16.0" +version = "3.17.1" description = "Google BigQuery API client library" optional = true python-versions = ">=3.7" files = [ - {file = "google-cloud-bigquery-3.16.0.tar.gz", hash = "sha256:1d6abf4b1d740df17cb43a078789872af8059a0b1dd999f32ea69ebc6f7ba7ef"}, - {file = "google_cloud_bigquery-3.16.0-py2.py3-none-any.whl", hash = "sha256:8bac7754f92bf87ee81f38deabb7554d82bb9591fbe06a5c82f33e46e5a482f9"}, + {file = "google-cloud-bigquery-3.17.1.tar.gz", hash = "sha256:0ae07b90d5052ba3a296a2210a2144c28469300d71f6f455881f94c2df543057"}, + {file = "google_cloud_bigquery-3.17.1-py2.py3-none-any.whl", hash = "sha256:7a9a92c7b1f6a6bf8b4c05c150e49f4ad1a03dd591dbd4522381b3f23bf07c73"}, ] [package.dependencies] @@ -2298,13 +2289,13 @@ test = ["objgraph", "psutil"] [[package]] name = "griffe" -version = "0.38.1" +version = "0.40.0" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.8" files = [ - {file = "griffe-0.38.1-py3-none-any.whl", hash = "sha256:334c79d3b5964ade65c05dfcaf53518c576dedd387aaba5c9fd71212f34f1483"}, - {file = "griffe-0.38.1.tar.gz", hash = "sha256:bd68d7da7f3d87bc57eb9962b250db123efd9bbcc06c11c1a91b6e583b2a9361"}, + {file = "griffe-0.40.0-py3-none-any.whl", hash = "sha256:db1da6d1d8e08cbb20f1a7dee8c09da940540c2d4c1bfa26a9091cf6fc36a9ec"}, + {file = "griffe-0.40.0.tar.gz", hash = "sha256:76c4439eaa2737af46ae003c331ab6ca79c5365b552f7b5aed263a3b4125735b"}, ] [package.dependencies] @@ -2312,84 +2303,84 @@ colorama = ">=0.4" [[package]] name = "grpcio" -version = "1.60.0" +version = "1.60.1" description = "HTTP/2-based RPC framework" optional = true python-versions = ">=3.7" files = [ - {file = "grpcio-1.60.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:d020cfa595d1f8f5c6b343530cd3ca16ae5aefdd1e832b777f9f0eb105f5b139"}, - {file = "grpcio-1.60.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b98f43fcdb16172dec5f4b49f2fece4b16a99fd284d81c6bbac1b3b69fcbe0ff"}, - {file = "grpcio-1.60.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:20e7a4f7ded59097c84059d28230907cd97130fa74f4a8bfd1d8e5ba18c81491"}, - {file = "grpcio-1.60.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:452ca5b4afed30e7274445dd9b441a35ece656ec1600b77fff8c216fdf07df43"}, - {file = "grpcio-1.60.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43e636dc2ce9ece583b3e2ca41df5c983f4302eabc6d5f9cd04f0562ee8ec1ae"}, - {file = "grpcio-1.60.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e306b97966369b889985a562ede9d99180def39ad42c8014628dd3cc343f508"}, - {file = "grpcio-1.60.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f897c3b127532e6befdcf961c415c97f320d45614daf84deba0a54e64ea2457b"}, - {file = "grpcio-1.60.0-cp310-cp310-win32.whl", hash = "sha256:b87efe4a380887425bb15f220079aa8336276398dc33fce38c64d278164f963d"}, - {file = "grpcio-1.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:a9c7b71211f066908e518a2ef7a5e211670761651039f0d6a80d8d40054047df"}, - {file = "grpcio-1.60.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:fb464479934778d7cc5baf463d959d361954d6533ad34c3a4f1d267e86ee25fd"}, - {file = "grpcio-1.60.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:4b44d7e39964e808b071714666a812049765b26b3ea48c4434a3b317bac82f14"}, - {file = "grpcio-1.60.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:90bdd76b3f04bdb21de5398b8a7c629676c81dfac290f5f19883857e9371d28c"}, - {file = "grpcio-1.60.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91229d7203f1ef0ab420c9b53fe2ca5c1fbeb34f69b3bc1b5089466237a4a134"}, - {file = "grpcio-1.60.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b36a2c6d4920ba88fa98075fdd58ff94ebeb8acc1215ae07d01a418af4c0253"}, - {file = "grpcio-1.60.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:297eef542156d6b15174a1231c2493ea9ea54af8d016b8ca7d5d9cc65cfcc444"}, - {file = "grpcio-1.60.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:87c9224acba0ad8bacddf427a1c2772e17ce50b3042a789547af27099c5f751d"}, - {file = "grpcio-1.60.0-cp311-cp311-win32.whl", hash = "sha256:95ae3e8e2c1b9bf671817f86f155c5da7d49a2289c5cf27a319458c3e025c320"}, - {file = "grpcio-1.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:467a7d31554892eed2aa6c2d47ded1079fc40ea0b9601d9f79204afa8902274b"}, - {file = "grpcio-1.60.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:a7152fa6e597c20cb97923407cf0934e14224af42c2b8d915f48bc3ad2d9ac18"}, - {file = "grpcio-1.60.0-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:7db16dd4ea1b05ada504f08d0dca1cd9b926bed3770f50e715d087c6f00ad748"}, - {file = "grpcio-1.60.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:b0571a5aef36ba9177e262dc88a9240c866d903a62799e44fd4aae3f9a2ec17e"}, - {file = "grpcio-1.60.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fd9584bf1bccdfff1512719316efa77be235469e1e3295dce64538c4773840b"}, - {file = "grpcio-1.60.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6a478581b1a1a8fdf3318ecb5f4d0cda41cacdffe2b527c23707c9c1b8fdb55"}, - {file = "grpcio-1.60.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:77c8a317f0fd5a0a2be8ed5cbe5341537d5c00bb79b3bb27ba7c5378ba77dbca"}, - {file = "grpcio-1.60.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1c30bb23a41df95109db130a6cc1b974844300ae2e5d68dd4947aacba5985aa5"}, - {file = "grpcio-1.60.0-cp312-cp312-win32.whl", hash = "sha256:2aef56e85901c2397bd557c5ba514f84de1f0ae5dd132f5d5fed042858115951"}, - {file = "grpcio-1.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:e381fe0c2aa6c03b056ad8f52f8efca7be29fb4d9ae2f8873520843b6039612a"}, - {file = "grpcio-1.60.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:92f88ca1b956eb8427a11bb8b4a0c0b2b03377235fc5102cb05e533b8693a415"}, - {file = "grpcio-1.60.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:e278eafb406f7e1b1b637c2cf51d3ad45883bb5bd1ca56bc05e4fc135dfdaa65"}, - {file = "grpcio-1.60.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:a48edde788b99214613e440fce495bbe2b1e142a7f214cce9e0832146c41e324"}, - {file = "grpcio-1.60.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de2ad69c9a094bf37c1102b5744c9aec6cf74d2b635558b779085d0263166454"}, - {file = "grpcio-1.60.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:073f959c6f570797272f4ee9464a9997eaf1e98c27cb680225b82b53390d61e6"}, - {file = "grpcio-1.60.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c826f93050c73e7769806f92e601e0efdb83ec8d7c76ddf45d514fee54e8e619"}, - {file = "grpcio-1.60.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9e30be89a75ee66aec7f9e60086fadb37ff8c0ba49a022887c28c134341f7179"}, - {file = "grpcio-1.60.0-cp37-cp37m-win_amd64.whl", hash = "sha256:b0fb2d4801546598ac5cd18e3ec79c1a9af8b8f2a86283c55a5337c5aeca4b1b"}, - {file = "grpcio-1.60.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:9073513ec380434eb8d21970e1ab3161041de121f4018bbed3146839451a6d8e"}, - {file = "grpcio-1.60.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:74d7d9fa97809c5b892449b28a65ec2bfa458a4735ddad46074f9f7d9550ad13"}, - {file = "grpcio-1.60.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:1434ca77d6fed4ea312901122dc8da6c4389738bf5788f43efb19a838ac03ead"}, - {file = "grpcio-1.60.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e61e76020e0c332a98290323ecfec721c9544f5b739fab925b6e8cbe1944cf19"}, - {file = "grpcio-1.60.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675997222f2e2f22928fbba640824aebd43791116034f62006e19730715166c0"}, - {file = "grpcio-1.60.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5208a57eae445ae84a219dfd8b56e04313445d146873117b5fa75f3245bc1390"}, - {file = "grpcio-1.60.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:428d699c8553c27e98f4d29fdc0f0edc50e9a8a7590bfd294d2edb0da7be3629"}, - {file = "grpcio-1.60.0-cp38-cp38-win32.whl", hash = "sha256:83f2292ae292ed5a47cdcb9821039ca8e88902923198f2193f13959360c01860"}, - {file = "grpcio-1.60.0-cp38-cp38-win_amd64.whl", hash = "sha256:705a68a973c4c76db5d369ed573fec3367d7d196673fa86614b33d8c8e9ebb08"}, - {file = "grpcio-1.60.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c193109ca4070cdcaa6eff00fdb5a56233dc7610216d58fb81638f89f02e4968"}, - {file = "grpcio-1.60.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:676e4a44e740deaba0f4d95ba1d8c5c89a2fcc43d02c39f69450b1fa19d39590"}, - {file = "grpcio-1.60.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:5ff21e000ff2f658430bde5288cb1ac440ff15c0d7d18b5fb222f941b46cb0d2"}, - {file = "grpcio-1.60.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c86343cf9ff7b2514dd229bdd88ebba760bd8973dac192ae687ff75e39ebfab"}, - {file = "grpcio-1.60.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fd3b3968ffe7643144580f260f04d39d869fcc2cddb745deef078b09fd2b328"}, - {file = "grpcio-1.60.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:30943b9530fe3620e3b195c03130396cd0ee3a0d10a66c1bee715d1819001eaf"}, - {file = "grpcio-1.60.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b10241250cb77657ab315270b064a6c7f1add58af94befa20687e7c8d8603ae6"}, - {file = "grpcio-1.60.0-cp39-cp39-win32.whl", hash = "sha256:79a050889eb8d57a93ed21d9585bb63fca881666fc709f5d9f7f9372f5e7fd03"}, - {file = "grpcio-1.60.0-cp39-cp39-win_amd64.whl", hash = "sha256:8a97a681e82bc11a42d4372fe57898d270a2707f36c45c6676e49ce0d5c41353"}, - {file = "grpcio-1.60.0.tar.gz", hash = "sha256:2199165a1affb666aa24adf0c97436686d0a61bc5fc113c037701fb7c7fceb96"}, -] - -[package.extras] -protobuf = ["grpcio-tools (>=1.60.0)"] + {file = "grpcio-1.60.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:14e8f2c84c0832773fb3958240c69def72357bc11392571f87b2d7b91e0bb092"}, + {file = "grpcio-1.60.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:33aed0a431f5befeffd9d346b0fa44b2c01aa4aeae5ea5b2c03d3e25e0071216"}, + {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:fead980fbc68512dfd4e0c7b1f5754c2a8e5015a04dea454b9cada54a8423525"}, + {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:082081e6a36b6eb5cf0fd9a897fe777dbb3802176ffd08e3ec6567edd85bc104"}, + {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55ccb7db5a665079d68b5c7c86359ebd5ebf31a19bc1a91c982fd622f1e31ff2"}, + {file = "grpcio-1.60.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b54577032d4f235452f77a83169b6527bf4b77d73aeada97d45b2aaf1bf5ce0"}, + {file = "grpcio-1.60.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7d142bcd604166417929b071cd396aa13c565749a4c840d6c702727a59d835eb"}, + {file = "grpcio-1.60.1-cp310-cp310-win32.whl", hash = "sha256:2a6087f234cb570008a6041c8ffd1b7d657b397fdd6d26e83d72283dae3527b1"}, + {file = "grpcio-1.60.1-cp310-cp310-win_amd64.whl", hash = "sha256:f2212796593ad1d0235068c79836861f2201fc7137a99aa2fea7beeb3b101177"}, + {file = "grpcio-1.60.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:79ae0dc785504cb1e1788758c588c711f4e4a0195d70dff53db203c95a0bd303"}, + {file = "grpcio-1.60.1-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:4eec8b8c1c2c9b7125508ff7c89d5701bf933c99d3910e446ed531cd16ad5d87"}, + {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:8c9554ca8e26241dabe7951aa1fa03a1ba0856688ecd7e7bdbdd286ebc272e4c"}, + {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91422ba785a8e7a18725b1dc40fbd88f08a5bb4c7f1b3e8739cab24b04fa8a03"}, + {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cba6209c96828711cb7c8fcb45ecef8c8859238baf15119daa1bef0f6c84bfe7"}, + {file = "grpcio-1.60.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c71be3f86d67d8d1311c6076a4ba3b75ba5703c0b856b4e691c9097f9b1e8bd2"}, + {file = "grpcio-1.60.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:af5ef6cfaf0d023c00002ba25d0751e5995fa0e4c9eec6cd263c30352662cbce"}, + {file = "grpcio-1.60.1-cp311-cp311-win32.whl", hash = "sha256:a09506eb48fa5493c58f946c46754ef22f3ec0df64f2b5149373ff31fb67f3dd"}, + {file = "grpcio-1.60.1-cp311-cp311-win_amd64.whl", hash = "sha256:49c9b6a510e3ed8df5f6f4f3c34d7fbf2d2cae048ee90a45cd7415abab72912c"}, + {file = "grpcio-1.60.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:b58b855d0071575ea9c7bc0d84a06d2edfbfccec52e9657864386381a7ce1ae9"}, + {file = "grpcio-1.60.1-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:a731ac5cffc34dac62053e0da90f0c0b8560396a19f69d9703e88240c8f05858"}, + {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:cf77f8cf2a651fbd869fbdcb4a1931464189cd210abc4cfad357f1cacc8642a6"}, + {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c557e94e91a983e5b1e9c60076a8fd79fea1e7e06848eb2e48d0ccfb30f6e073"}, + {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:069fe2aeee02dfd2135d562d0663fe70fbb69d5eed6eb3389042a7e963b54de8"}, + {file = "grpcio-1.60.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb0af13433dbbd1c806e671d81ec75bd324af6ef75171fd7815ca3074fe32bfe"}, + {file = "grpcio-1.60.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2f44c32aef186bbba254129cea1df08a20be414144ac3bdf0e84b24e3f3b2e05"}, + {file = "grpcio-1.60.1-cp312-cp312-win32.whl", hash = "sha256:a212e5dea1a4182e40cd3e4067ee46be9d10418092ce3627475e995cca95de21"}, + {file = "grpcio-1.60.1-cp312-cp312-win_amd64.whl", hash = "sha256:6e490fa5f7f5326222cb9f0b78f207a2b218a14edf39602e083d5f617354306f"}, + {file = "grpcio-1.60.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:4216e67ad9a4769117433814956031cb300f85edc855252a645a9a724b3b6594"}, + {file = "grpcio-1.60.1-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:73e14acd3d4247169955fae8fb103a2b900cfad21d0c35f0dcd0fdd54cd60367"}, + {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6ecf21d20d02d1733e9c820fb5c114c749d888704a7ec824b545c12e78734d1c"}, + {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33bdea30dcfd4f87b045d404388469eb48a48c33a6195a043d116ed1b9a0196c"}, + {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53b69e79d00f78c81eecfb38f4516080dc7f36a198b6b37b928f1c13b3c063e9"}, + {file = "grpcio-1.60.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:39aa848794b887120b1d35b1b994e445cc028ff602ef267f87c38122c1add50d"}, + {file = "grpcio-1.60.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:72153a0d2e425f45b884540a61c6639436ddafa1829a42056aa5764b84108b8e"}, + {file = "grpcio-1.60.1-cp37-cp37m-win_amd64.whl", hash = "sha256:50d56280b482875d1f9128ce596e59031a226a8b84bec88cb2bf76c289f5d0de"}, + {file = "grpcio-1.60.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:6d140bdeb26cad8b93c1455fa00573c05592793c32053d6e0016ce05ba267549"}, + {file = "grpcio-1.60.1-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:bc808924470643b82b14fe121923c30ec211d8c693e747eba8a7414bc4351a23"}, + {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:70c83bb530572917be20c21f3b6be92cd86b9aecb44b0c18b1d3b2cc3ae47df0"}, + {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b106bc52e7f28170e624ba61cc7dc6829566e535a6ec68528f8e1afbed1c41f"}, + {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e980cd6db1088c144b92fe376747328d5554bc7960ce583ec7b7d81cd47287"}, + {file = "grpcio-1.60.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0c5807e9152eff15f1d48f6b9ad3749196f79a4a050469d99eecb679be592acc"}, + {file = "grpcio-1.60.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f1c3dc536b3ee124e8b24feb7533e5c70b9f2ef833e3b2e5513b2897fd46763a"}, + {file = "grpcio-1.60.1-cp38-cp38-win32.whl", hash = "sha256:d7404cebcdb11bb5bd40bf94131faf7e9a7c10a6c60358580fe83913f360f929"}, + {file = "grpcio-1.60.1-cp38-cp38-win_amd64.whl", hash = "sha256:c8754c75f55781515a3005063d9a05878b2cfb3cb7e41d5401ad0cf19de14872"}, + {file = "grpcio-1.60.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:0250a7a70b14000fa311de04b169cc7480be6c1a769b190769d347939d3232a8"}, + {file = "grpcio-1.60.1-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:660fc6b9c2a9ea3bb2a7e64ba878c98339abaf1811edca904ac85e9e662f1d73"}, + {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:76eaaba891083fcbe167aa0f03363311a9f12da975b025d30e94b93ac7a765fc"}, + {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d97c65ea7e097056f3d1ead77040ebc236feaf7f71489383d20f3b4c28412a"}, + {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb2a2911b028f01c8c64d126f6b632fcd8a9ac975aa1b3855766c94e4107180"}, + {file = "grpcio-1.60.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:5a1ebbae7e2214f51b1f23b57bf98eeed2cf1ba84e4d523c48c36d5b2f8829ff"}, + {file = "grpcio-1.60.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a66f4d2a005bc78e61d805ed95dedfcb35efa84b7bba0403c6d60d13a3de2d6"}, + {file = "grpcio-1.60.1-cp39-cp39-win32.whl", hash = "sha256:8d488fbdbf04283f0d20742b64968d44825617aa6717b07c006168ed16488804"}, + {file = "grpcio-1.60.1-cp39-cp39-win_amd64.whl", hash = "sha256:61b7199cd2a55e62e45bfb629a35b71fc2c0cb88f686a047f25b1112d3810904"}, + {file = "grpcio-1.60.1.tar.gz", hash = "sha256:dd1d3a8d1d2e50ad9b59e10aa7f07c7d1be2b367f3f2d33c5fade96ed5460962"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.60.1)"] [[package]] name = "grpcio-status" -version = "1.60.0" +version = "1.60.1" description = "Status proto mapping for gRPC" optional = true python-versions = ">=3.6" files = [ - {file = "grpcio-status-1.60.0.tar.gz", hash = "sha256:f10e0b6db3adc0fdc244b71962814ee982996ef06186446b5695b9fa635aa1ab"}, - {file = "grpcio_status-1.60.0-py3-none-any.whl", hash = "sha256:7d383fa36e59c1e61d380d91350badd4d12ac56e4de2c2b831b050362c3c572e"}, + {file = "grpcio-status-1.60.1.tar.gz", hash = "sha256:61b5aab8989498e8aa142c20b88829ea5d90d18c18c853b9f9e6d407d37bf8b4"}, + {file = "grpcio_status-1.60.1-py3-none-any.whl", hash = "sha256:3034fdb239185b6e0f3169d08c268c4507481e4b8a434c21311a03d9eb5889a0"}, ] [package.dependencies] googleapis-common-protos = ">=1.5.5" -grpcio = ">=1.60.0" +grpcio = ">=1.60.1" protobuf = ">=4.21.6" [[package]] @@ -2408,13 +2399,13 @@ tests = ["freezegun", "pytest", "pytest-cov"] [[package]] name = "hypothesis" -version = "6.93.0" +version = "6.97.4" description = "A library for property-based testing" optional = false python-versions = ">=3.8" files = [ - {file = "hypothesis-6.93.0-py3-none-any.whl", hash = "sha256:bfe6173e36c8cf0779a79de757a8a7151568b2703cb14dcbc186517c7a79144b"}, - {file = "hypothesis-6.93.0.tar.gz", hash = "sha256:e9ceaa5bbd244471fa1c28272fb2b0c68bb6ee014473394d63519ed02bd2d4de"}, + {file = "hypothesis-6.97.4-py3-none-any.whl", hash = "sha256:9069fe3fb18d9b7dd218bd69ab50bbc66426819dfac7cc7168ba85034d98a4df"}, + {file = "hypothesis-6.97.4.tar.gz", hash = "sha256:28ff724fa81ccc55f64f0f1eb06e4a75db6a195fe0857e9b3184cf4ff613a103"}, ] [package.dependencies] @@ -2576,13 +2567,13 @@ ipywidgets = "*" [[package]] name = "ipykernel" -version = "6.28.0" +version = "6.29.0" description = "IPython Kernel for Jupyter" optional = false python-versions = ">=3.8" files = [ - {file = "ipykernel-6.28.0-py3-none-any.whl", hash = "sha256:c6e9a9c63a7f4095c0a22a79f765f079f9ec7be4f2430a898ddea889e8665661"}, - {file = "ipykernel-6.28.0.tar.gz", hash = "sha256:69c11403d26de69df02225916f916b37ea4b9af417da0a8c827f84328d88e5f3"}, + {file = "ipykernel-6.29.0-py3-none-any.whl", hash = "sha256:076663ca68492576f051e4af7720d33f34383e655f2be0d544c8b1c9de915b2f"}, + {file = "ipykernel-6.29.0.tar.gz", hash = "sha256:b5dd3013cab7b330df712891c96cd1ab868c27a7159e606f762015e9bf8ceb3f"}, ] [package.dependencies] @@ -2605,17 +2596,17 @@ cov = ["coverage[toml]", "curio", "matplotlib", "pytest-cov", "trio"] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "trio"] pyqt5 = ["pyqt5"] pyside6 = ["pyside6"] -test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio", "pytest-cov", "pytest-timeout"] +test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (==0.23.2)", "pytest-cov", "pytest-timeout"] [[package]] name = "ipyleaflet" -version = "0.18.1" +version = "0.18.2" description = "A Jupyter widget for dynamic Leaflet maps" optional = false python-versions = ">=3.7" files = [ - {file = "ipyleaflet-0.18.1-py3-none-any.whl", hash = "sha256:c941429945248fb0fb8a7b30cc4f248d3194e4a409066a068495a633c97eb6c6"}, - {file = "ipyleaflet-0.18.1.tar.gz", hash = "sha256:f35d70ad0e0bb2c0c160b499ab8c788333fc54576596e33b974f0dfeee941d12"}, + {file = "ipyleaflet-0.18.2-py3-none-any.whl", hash = "sha256:dc5bed1bad3ba3244fe97aac9d4ed8f8096ae3d5e6ac0c5fdfbe7f1d2a01d3f8"}, + {file = "ipyleaflet-0.18.2.tar.gz", hash = "sha256:8f166529ec7784de08822b253b8cc593fa81af8a8f967d70cbc53e45a6d3755f"}, ] [package.dependencies] @@ -2778,13 +2769,13 @@ files = [ [[package]] name = "jsonschema" -version = "4.20.0" +version = "4.21.1" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.8" files = [ - {file = "jsonschema-4.20.0-py3-none-any.whl", hash = "sha256:ed6231f0429ecf966f5bc8dfef245998220549cbbcf140f913b7464c52c3b6b3"}, - {file = "jsonschema-4.20.0.tar.gz", hash = "sha256:4f614fd46d8d61258610998997743ec5492a648b33cf478c1ddc23ed4598a5fa"}, + {file = "jsonschema-4.21.1-py3-none-any.whl", hash = "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f"}, + {file = "jsonschema-4.21.1.tar.gz", hash = "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5"}, ] [package.dependencies] @@ -3151,71 +3142,71 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] [[package]] name = "markupsafe" -version = "2.1.3" +version = "2.1.4" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" files = [ - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, - {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-win32.whl", hash = "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-win32.whl", hash = "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-win32.whl", hash = "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-win32.whl", hash = "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-win32.whl", hash = "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-win_amd64.whl", hash = "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-win32.whl", hash = "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959"}, + {file = "MarkupSafe-2.1.4.tar.gz", hash = "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f"}, ] [[package]] @@ -3395,85 +3386,101 @@ files = [ [[package]] name = "multidict" -version = "6.0.4" +version = "6.0.5" description = "multidict implementation" optional = true python-versions = ">=3.7" files = [ - {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, - {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, - {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, - {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, - {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, - {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, - {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, - {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, - {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, - {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, - {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, - {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, - {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, - {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, - {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"}, + {file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"}, + {file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e"}, + {file = "multidict-6.0.5-cp311-cp311-win32.whl", hash = "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c"}, + {file = "multidict-6.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda"}, + {file = "multidict-6.0.5-cp312-cp312-win32.whl", hash = "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5"}, + {file = "multidict-6.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556"}, + {file = "multidict-6.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc"}, + {file = "multidict-6.0.5-cp37-cp37m-win32.whl", hash = "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee"}, + {file = "multidict-6.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44"}, + {file = "multidict-6.0.5-cp38-cp38-win32.whl", hash = "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241"}, + {file = "multidict-6.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c"}, + {file = "multidict-6.0.5-cp39-cp39-win32.whl", hash = "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b"}, + {file = "multidict-6.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755"}, + {file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"}, + {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, ] [[package]] @@ -3543,13 +3550,13 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] [[package]] name = "nest-asyncio" -version = "1.5.8" +version = "1.6.0" description = "Patch asyncio to allow nested event loops" optional = false python-versions = ">=3.5" files = [ - {file = "nest_asyncio-1.5.8-py3-none-any.whl", hash = "sha256:accda7a339a70599cb08f9dd09a67e0c2ef8d8d6f4c07f96ab203f2ae254e48d"}, - {file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"}, + {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, + {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, ] [[package]] @@ -3694,71 +3701,70 @@ files = [ [[package]] name = "pandas" -version = "2.1.4" +version = "2.0.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" files = [ - {file = "pandas-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9"}, - {file = "pandas-2.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034"}, - {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d"}, - {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9"}, - {file = "pandas-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139"}, - {file = "pandas-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46"}, - {file = "pandas-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092"}, - {file = "pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821"}, - {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d"}, - {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171"}, - {file = "pandas-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623"}, - {file = "pandas-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e"}, - {file = "pandas-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6"}, - {file = "pandas-2.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b"}, - {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540"}, - {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead"}, - {file = "pandas-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1"}, - {file = "pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf"}, - {file = "pandas-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34"}, - {file = "pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a"}, - {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732"}, - {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8"}, - {file = "pandas-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860"}, - {file = "pandas-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984"}, - {file = "pandas-2.1.4.tar.gz", hash = "sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7"}, + {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, + {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, + {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, + {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, + {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, + {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, + {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, + {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, + {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, + {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, + {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, ] [package.dependencies] numpy = [ - {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, - {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" tzdata = ">=2022.1" [package.extras] -all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] -aws = ["s3fs (>=2022.05.0)"] -clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] -compression = ["zstandard (>=0.17.0)"] -computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] -consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] +aws = ["s3fs (>=2021.08.0)"] +clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] +compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] +computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2022.05.0)"] -gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] -hdf5 = ["tables (>=3.7.0)"] -html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] -mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +fss = ["fsspec (>=2021.07.0)"] +gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] +hdf5 = ["tables (>=3.6.1)"] +html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] +mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] -spss = ["pyreadstat (>=1.1.5)"] -sql-other = ["SQLAlchemy (>=1.4.36)"] -test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.8.0)"] +postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] +spss = ["pyreadstat (>=1.1.2)"] +sql-other = ["SQLAlchemy (>=1.4.16)"] +test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.6.3)"] [[package]] name = "parso" @@ -4040,13 +4046,13 @@ typing = ["ipython", "pandas-stubs", "pyright"] [[package]] name = "pluggy" -version = "1.3.0" +version = "1.4.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" files = [ - {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, - {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, + {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, + {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, ] [package.extras] @@ -4055,21 +4061,22 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "plum-dispatch" -version = "2.2.2" +version = "2.3.2" description = "Multiple dispatch in Python" optional = false python-versions = ">=3.8" files = [ - {file = "plum_dispatch-2.2.2-py3-none-any.whl", hash = "sha256:d7ee415bd166ffa90eaa4b24d7c9dc7ca6f8875750586001e7c9baff706223bd"}, - {file = "plum_dispatch-2.2.2.tar.gz", hash = "sha256:d5d180225c9fbf0277375bb558b649d97d0b651a91037bb7155cedbe9f52764b"}, + {file = "plum_dispatch-2.3.2-py3-none-any.whl", hash = "sha256:96f519d416accf9a009117682f689114eb23e867bb6f977eed74ef85ef7fef9d"}, + {file = "plum_dispatch-2.3.2.tar.gz", hash = "sha256:f49f00dfdf7ab0f16c9b85cc27cc5241ffb59aee02218bac671ec7c1ac65e139"}, ] [package.dependencies] beartype = ">=0.16.2" +rich = ">=10.0" typing-extensions = {version = "*", markers = "python_version <= \"3.10\""} [package.extras] -dev = ["black (==23.9.0)", "build", "coveralls", "ghp-import", "ipython", "jupyter-book", "mypy", "numpy", "pre-commit", "pyright", "pytest (>=6)", "pytest-cov", "tox", "wheel"] +dev = ["black (==23.9.0)", "build", "coveralls", "ghp-import", "ipython", "jupyter-book", "mypy", "numpy", "pre-commit", "pyright (>=1.1.331)", "pytest (>=6)", "pytest-cov", "ruff (==0.1.0)", "tox", "wheel"] [[package]] name = "poetry" @@ -4155,17 +4162,17 @@ poetry-core = ">=1.7.0,<2.0.0" [[package]] name = "polars" -version = "0.20.4" +version = "0.20.6" description = "Blazingly fast DataFrame library" optional = true python-versions = ">=3.8" files = [ - {file = "polars-0.20.4-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:5fa84f74fc2274e3926d083ccd084c81b3e04debdc66fd917cafe7026d1df19c"}, - {file = "polars-0.20.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:4c5a9f981708f3d090dd5513d83806bcb8a1725653d80bcf63bb738a097b1162"}, - {file = "polars-0.20.4-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfdc2672423c9c73e34161b7b4833c40d042b9d36e899866dc858e8a221b0849"}, - {file = "polars-0.20.4-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:76f270fb17955c97958c2e301c5c2fa10015ccf3048697964ad9c2198e4c6fe6"}, - {file = "polars-0.20.4-cp38-abi3-win_amd64.whl", hash = "sha256:96a067be35745942d3fe6cd3ad1513f9ab7f4249d2b2502484ee64b30d221f96"}, - {file = "polars-0.20.4.tar.gz", hash = "sha256:21a90aa0c7401c80fc814b4db371dced780df6bd5ac81a329307e796b5821190"}, + {file = "polars-0.20.6-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59845bae0b614b3291baa889cfc2a251e1024129696bb655596f2b5556e9f9a1"}, + {file = "polars-0.20.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:9e86736f68440bf97a9100fa0a79ae7ce616d1af6fd4669fff1345f03aab14c0"}, + {file = "polars-0.20.6-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4f4e3335fdcc863f6aac0616510b1baa5e13d5e818ebbfcb980ad534bd6edc2"}, + {file = "polars-0.20.6-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:7c7b494beea914a54bcae8868dee3988a88ecb48525df948e07aacf2fb83e711"}, + {file = "polars-0.20.6-cp38-abi3-win_amd64.whl", hash = "sha256:a96b157d68697c8d6ef2f7c2cc1734d498c3c6cc0c9c18d4fff7283ccfabdd1d"}, + {file = "polars-0.20.6.tar.gz", hash = "sha256:b53553308bc7e2b4f841b18f1949b61ed7f2cf155c5c64712298efa5af67a997"}, ] [package.extras] @@ -4272,27 +4279,27 @@ files = [ [[package]] name = "psutil" -version = "5.9.7" +version = "5.9.8" description = "Cross-platform lib for process and system monitoring in Python." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "psutil-5.9.7-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:0bd41bf2d1463dfa535942b2a8f0e958acf6607ac0be52265ab31f7923bcd5e6"}, - {file = "psutil-5.9.7-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:5794944462509e49d4d458f4dbfb92c47539e7d8d15c796f141f474010084056"}, - {file = "psutil-5.9.7-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:fe361f743cb3389b8efda21980d93eb55c1f1e3898269bc9a2a1d0bb7b1f6508"}, - {file = "psutil-5.9.7-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:e469990e28f1ad738f65a42dcfc17adaed9d0f325d55047593cb9033a0ab63df"}, - {file = "psutil-5.9.7-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:3c4747a3e2ead1589e647e64aad601981f01b68f9398ddf94d01e3dc0d1e57c7"}, - {file = "psutil-5.9.7-cp27-none-win32.whl", hash = "sha256:1d4bc4a0148fdd7fd8f38e0498639ae128e64538faa507df25a20f8f7fb2341c"}, - {file = "psutil-5.9.7-cp27-none-win_amd64.whl", hash = "sha256:4c03362e280d06bbbfcd52f29acd79c733e0af33d707c54255d21029b8b32ba6"}, - {file = "psutil-5.9.7-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ea36cc62e69a13ec52b2f625c27527f6e4479bca2b340b7a452af55b34fcbe2e"}, - {file = "psutil-5.9.7-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1132704b876e58d277168cd729d64750633d5ff0183acf5b3c986b8466cd0284"}, - {file = "psutil-5.9.7-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe8b7f07948f1304497ce4f4684881250cd859b16d06a1dc4d7941eeb6233bfe"}, - {file = "psutil-5.9.7-cp36-cp36m-win32.whl", hash = "sha256:b27f8fdb190c8c03914f908a4555159327d7481dac2f01008d483137ef3311a9"}, - {file = "psutil-5.9.7-cp36-cp36m-win_amd64.whl", hash = "sha256:44969859757f4d8f2a9bd5b76eba8c3099a2c8cf3992ff62144061e39ba8568e"}, - {file = "psutil-5.9.7-cp37-abi3-win32.whl", hash = "sha256:c727ca5a9b2dd5193b8644b9f0c883d54f1248310023b5ad3e92036c5e2ada68"}, - {file = "psutil-5.9.7-cp37-abi3-win_amd64.whl", hash = "sha256:f37f87e4d73b79e6c5e749440c3113b81d1ee7d26f21c19c47371ddea834f414"}, - {file = "psutil-5.9.7-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:032f4f2c909818c86cea4fe2cc407f1c0f0cde8e6c6d702b28b8ce0c0d143340"}, - {file = "psutil-5.9.7.tar.gz", hash = "sha256:3f02134e82cfb5d089fddf20bb2e03fd5cd52395321d1c8458a9e58500ff417c"}, + {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, + {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"}, + {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"}, + {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"}, + {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"}, + {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"}, + {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"}, + {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"}, + {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"}, + {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"}, + {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"}, + {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"}, + {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"}, + {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"}, + {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"}, + {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"}, ] [package.extras] @@ -4309,8 +4316,6 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, - {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, - {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -4430,51 +4435,51 @@ files = [ [[package]] name = "pyarrow" -version = "14.0.2" +version = "15.0.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" files = [ - {file = "pyarrow-14.0.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:ba9fe808596c5dbd08b3aeffe901e5f81095baaa28e7d5118e01354c64f22807"}, - {file = "pyarrow-14.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:22a768987a16bb46220cef490c56c671993fbee8fd0475febac0b3e16b00a10e"}, - {file = "pyarrow-14.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dbba05e98f247f17e64303eb876f4a80fcd32f73c7e9ad975a83834d81f3fda"}, - {file = "pyarrow-14.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a898d134d00b1eca04998e9d286e19653f9d0fcb99587310cd10270907452a6b"}, - {file = "pyarrow-14.0.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:87e879323f256cb04267bb365add7208f302df942eb943c93a9dfeb8f44840b1"}, - {file = "pyarrow-14.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:76fc257559404ea5f1306ea9a3ff0541bf996ff3f7b9209fc517b5e83811fa8e"}, - {file = "pyarrow-14.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0c4a18e00f3a32398a7f31da47fefcd7a927545b396e1f15d0c85c2f2c778cd"}, - {file = "pyarrow-14.0.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:87482af32e5a0c0cce2d12eb3c039dd1d853bd905b04f3f953f147c7a196915b"}, - {file = "pyarrow-14.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:059bd8f12a70519e46cd64e1ba40e97eae55e0cbe1695edd95384653d7626b23"}, - {file = "pyarrow-14.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f16111f9ab27e60b391c5f6d197510e3ad6654e73857b4e394861fc79c37200"}, - {file = "pyarrow-14.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06ff1264fe4448e8d02073f5ce45a9f934c0f3db0a04460d0b01ff28befc3696"}, - {file = "pyarrow-14.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd4f4b472ccf4042f1eab77e6c8bce574543f54d2135c7e396f413046397d5a"}, - {file = "pyarrow-14.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:32356bfb58b36059773f49e4e214996888eeea3a08893e7dbde44753799b2a02"}, - {file = "pyarrow-14.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:52809ee69d4dbf2241c0e4366d949ba035cbcf48409bf404f071f624ed313a2b"}, - {file = "pyarrow-14.0.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:c87824a5ac52be210d32906c715f4ed7053d0180c1060ae3ff9b7e560f53f944"}, - {file = "pyarrow-14.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a25eb2421a58e861f6ca91f43339d215476f4fe159eca603c55950c14f378cc5"}, - {file = "pyarrow-14.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c1da70d668af5620b8ba0a23f229030a4cd6c5f24a616a146f30d2386fec422"}, - {file = "pyarrow-14.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cc61593c8e66194c7cdfae594503e91b926a228fba40b5cf25cc593563bcd07"}, - {file = "pyarrow-14.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:78ea56f62fb7c0ae8ecb9afdd7893e3a7dbeb0b04106f5c08dbb23f9c0157591"}, - {file = "pyarrow-14.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:37c233ddbce0c67a76c0985612fef27c0c92aef9413cf5aa56952f359fcb7379"}, - {file = "pyarrow-14.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:e4b123ad0f6add92de898214d404e488167b87b5dd86e9a434126bc2b7a5578d"}, - {file = "pyarrow-14.0.2-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:e354fba8490de258be7687f341bc04aba181fc8aa1f71e4584f9890d9cb2dec2"}, - {file = "pyarrow-14.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:20e003a23a13da963f43e2b432483fdd8c38dc8882cd145f09f21792e1cf22a1"}, - {file = "pyarrow-14.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc0de7575e841f1595ac07e5bc631084fd06ca8b03c0f2ecece733d23cd5102a"}, - {file = "pyarrow-14.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66e986dc859712acb0bd45601229021f3ffcdfc49044b64c6d071aaf4fa49e98"}, - {file = "pyarrow-14.0.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f7d029f20ef56673a9730766023459ece397a05001f4e4d13805111d7c2108c0"}, - {file = "pyarrow-14.0.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:209bac546942b0d8edc8debda248364f7f668e4aad4741bae58e67d40e5fcf75"}, - {file = "pyarrow-14.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:1e6987c5274fb87d66bb36816afb6f65707546b3c45c44c28e3c4133c010a881"}, - {file = "pyarrow-14.0.2-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:a01d0052d2a294a5f56cc1862933014e696aa08cc7b620e8c0cce5a5d362e976"}, - {file = "pyarrow-14.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a51fee3a7db4d37f8cda3ea96f32530620d43b0489d169b285d774da48ca9785"}, - {file = "pyarrow-14.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64df2bf1ef2ef14cee531e2dfe03dd924017650ffaa6f9513d7a1bb291e59c15"}, - {file = "pyarrow-14.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c0fa3bfdb0305ffe09810f9d3e2e50a2787e3a07063001dcd7adae0cee3601a"}, - {file = "pyarrow-14.0.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c65bf4fd06584f058420238bc47a316e80dda01ec0dfb3044594128a6c2db794"}, - {file = "pyarrow-14.0.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:63ac901baec9369d6aae1cbe6cca11178fb018a8d45068aaf5bb54f94804a866"}, - {file = "pyarrow-14.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:75ee0efe7a87a687ae303d63037d08a48ef9ea0127064df18267252cfe2e9541"}, - {file = "pyarrow-14.0.2.tar.gz", hash = "sha256:36cef6ba12b499d864d1def3e990f97949e0b79400d08b7cf74504ffbd3eb025"}, -] - -[package.dependencies] -numpy = ">=1.16.6" + {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"}, + {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"}, + {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"}, + {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"}, + {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"}, + {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"}, + {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"}, + {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"}, + {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"}, + {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"}, + {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"}, + {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"}, + {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"}, + {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"}, + {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"}, + {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"}, +] + +[package.dependencies] +numpy = ">=1.16.6,<2" [[package]] name = "pyarrow-hotfix" @@ -4525,18 +4530,18 @@ files = [ [[package]] name = "pydantic" -version = "2.5.3" +version = "2.6.0" description = "Data validation using Python type hints" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"}, - {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"}, + {file = "pydantic-2.6.0-py3-none-any.whl", hash = "sha256:1440966574e1b5b99cf75a13bec7b20e3512e8a61b894ae252f56275e2c465ae"}, + {file = "pydantic-2.6.0.tar.gz", hash = "sha256:ae887bd94eb404b09d86e4d12f93893bdca79d766e738528c6fa1c849f3c6bcf"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.14.6" +pydantic-core = "2.16.1" typing-extensions = ">=4.6.1" [package.extras] @@ -4544,116 +4549,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.14.6" +version = "2.16.1" description = "" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"}, - {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"}, - {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"}, - {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"}, - {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"}, - {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"}, - {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"}, - {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"}, - {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"}, - {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"}, - {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"}, - {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"}, - {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"}, - {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"}, - {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"}, - {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"}, - {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"}, - {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"}, - {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"}, - {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"}, - {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"}, - {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"}, - {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"}, - {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"}, - {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"}, - {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"}, - {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"}, - {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"}, - {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"}, - {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"}, - {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"}, - {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"}, - {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"}, - {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"}, - {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"}, + {file = "pydantic_core-2.16.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:300616102fb71241ff477a2cbbc847321dbec49428434a2f17f37528721c4948"}, + {file = "pydantic_core-2.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5511f962dd1b9b553e9534c3b9c6a4b0c9ded3d8c2be96e61d56f933feef9e1f"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98f0edee7ee9cc7f9221af2e1b95bd02810e1c7a6d115cfd82698803d385b28f"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9795f56aa6b2296f05ac79d8a424e94056730c0b860a62b0fdcfe6340b658cc8"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c45f62e4107ebd05166717ac58f6feb44471ed450d07fecd90e5f69d9bf03c48"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:462d599299c5971f03c676e2b63aa80fec5ebc572d89ce766cd11ca8bcb56f3f"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ebaa4bf6386a3b22eec518da7d679c8363fb7fb70cf6972161e5542f470798"}, + {file = "pydantic_core-2.16.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:99f9a50b56713a598d33bc23a9912224fc5d7f9f292444e6664236ae471ddf17"}, + {file = "pydantic_core-2.16.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8ec364e280db4235389b5e1e6ee924723c693cbc98e9d28dc1767041ff9bc388"}, + {file = "pydantic_core-2.16.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:653a5dfd00f601a0ed6654a8b877b18d65ac32c9d9997456e0ab240807be6cf7"}, + {file = "pydantic_core-2.16.1-cp310-none-win32.whl", hash = "sha256:1661c668c1bb67b7cec96914329d9ab66755911d093bb9063c4c8914188af6d4"}, + {file = "pydantic_core-2.16.1-cp310-none-win_amd64.whl", hash = "sha256:561be4e3e952c2f9056fba5267b99be4ec2afadc27261505d4992c50b33c513c"}, + {file = "pydantic_core-2.16.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:102569d371fadc40d8f8598a59379c37ec60164315884467052830b28cc4e9da"}, + {file = "pydantic_core-2.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:735dceec50fa907a3c314b84ed609dec54b76a814aa14eb90da31d1d36873a5e"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e83ebbf020be727d6e0991c1b192a5c2e7113eb66e3def0cd0c62f9f266247e4"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:30a8259569fbeec49cfac7fda3ec8123486ef1b729225222f0d41d5f840b476f"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:920c4897e55e2881db6a6da151198e5001552c3777cd42b8a4c2f72eedc2ee91"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f5247a3d74355f8b1d780d0f3b32a23dd9f6d3ff43ef2037c6dcd249f35ecf4c"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5bea8012df5bb6dda1e67d0563ac50b7f64a5d5858348b5c8cb5043811c19d"}, + {file = "pydantic_core-2.16.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ed3025a8a7e5a59817b7494686d449ebfbe301f3e757b852c8d0d1961d6be864"}, + {file = "pydantic_core-2.16.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:06f0d5a1d9e1b7932477c172cc720b3b23c18762ed7a8efa8398298a59d177c7"}, + {file = "pydantic_core-2.16.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:150ba5c86f502c040b822777e2e519b5625b47813bd05f9273a8ed169c97d9ae"}, + {file = "pydantic_core-2.16.1-cp311-none-win32.whl", hash = "sha256:d6cbdf12ef967a6aa401cf5cdf47850559e59eedad10e781471c960583f25aa1"}, + {file = "pydantic_core-2.16.1-cp311-none-win_amd64.whl", hash = "sha256:afa01d25769af33a8dac0d905d5c7bb2d73c7c3d5161b2dd6f8b5b5eea6a3c4c"}, + {file = "pydantic_core-2.16.1-cp311-none-win_arm64.whl", hash = "sha256:1a2fe7b00a49b51047334d84aafd7e39f80b7675cad0083678c58983662da89b"}, + {file = "pydantic_core-2.16.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f478ec204772a5c8218e30eb813ca43e34005dff2eafa03931b3d8caef87d51"}, + {file = "pydantic_core-2.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1936ef138bed2165dd8573aa65e3095ef7c2b6247faccd0e15186aabdda7f66"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99d3a433ef5dc3021c9534a58a3686c88363c591974c16c54a01af7efd741f13"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd88f40f2294440d3f3c6308e50d96a0d3d0973d6f1a5732875d10f569acef49"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fac641bbfa43d5a1bed99d28aa1fded1984d31c670a95aac1bf1d36ac6ce137"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:72bf9308a82b75039b8c8edd2be2924c352eda5da14a920551a8b65d5ee89253"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb4363e6c9fc87365c2bc777a1f585a22f2f56642501885ffc7942138499bf54"}, + {file = "pydantic_core-2.16.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:20f724a023042588d0f4396bbbcf4cffd0ddd0ad3ed4f0d8e6d4ac4264bae81e"}, + {file = "pydantic_core-2.16.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fb4370b15111905bf8b5ba2129b926af9470f014cb0493a67d23e9d7a48348e8"}, + {file = "pydantic_core-2.16.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23632132f1fd608034f1a56cc3e484be00854db845b3a4a508834be5a6435a6f"}, + {file = "pydantic_core-2.16.1-cp312-none-win32.whl", hash = "sha256:b9f3e0bffad6e238f7acc20c393c1ed8fab4371e3b3bc311020dfa6020d99212"}, + {file = "pydantic_core-2.16.1-cp312-none-win_amd64.whl", hash = "sha256:a0b4cfe408cd84c53bab7d83e4209458de676a6ec5e9c623ae914ce1cb79b96f"}, + {file = "pydantic_core-2.16.1-cp312-none-win_arm64.whl", hash = "sha256:d195add190abccefc70ad0f9a0141ad7da53e16183048380e688b466702195dd"}, + {file = "pydantic_core-2.16.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:502c062a18d84452858f8aea1e520e12a4d5228fc3621ea5061409d666ea1706"}, + {file = "pydantic_core-2.16.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d8c032ccee90b37b44e05948b449a2d6baed7e614df3d3f47fe432c952c21b60"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:920f4633bee43d7a2818e1a1a788906df5a17b7ab6fe411220ed92b42940f818"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9f5d37ff01edcbace53a402e80793640c25798fb7208f105d87a25e6fcc9ea06"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:399166f24c33a0c5759ecc4801f040dbc87d412c1a6d6292b2349b4c505effc9"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ac89ccc39cd1d556cc72d6752f252dc869dde41c7c936e86beac5eb555041b66"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73802194f10c394c2bedce7a135ba1d8ba6cff23adf4217612bfc5cf060de34c"}, + {file = "pydantic_core-2.16.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8fa00fa24ffd8c31fac081bf7be7eb495be6d248db127f8776575a746fa55c95"}, + {file = "pydantic_core-2.16.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:601d3e42452cd4f2891c13fa8c70366d71851c1593ed42f57bf37f40f7dca3c8"}, + {file = "pydantic_core-2.16.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:07982b82d121ed3fc1c51faf6e8f57ff09b1325d2efccaa257dd8c0dd937acca"}, + {file = "pydantic_core-2.16.1-cp38-none-win32.whl", hash = "sha256:d0bf6f93a55d3fa7a079d811b29100b019784e2ee6bc06b0bb839538272a5610"}, + {file = "pydantic_core-2.16.1-cp38-none-win_amd64.whl", hash = "sha256:fbec2af0ebafa57eb82c18c304b37c86a8abddf7022955d1742b3d5471a6339e"}, + {file = "pydantic_core-2.16.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a497be217818c318d93f07e14502ef93d44e6a20c72b04c530611e45e54c2196"}, + {file = "pydantic_core-2.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:694a5e9f1f2c124a17ff2d0be613fd53ba0c26de588eb4bdab8bca855e550d95"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d4dfc66abea3ec6d9f83e837a8f8a7d9d3a76d25c9911735c76d6745950e62c"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8655f55fe68c4685673265a650ef71beb2d31871c049c8b80262026f23605ee3"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:21e3298486c4ea4e4d5cc6fb69e06fb02a4e22089304308817035ac006a7f506"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71b4a48a7427f14679f0015b13c712863d28bb1ab700bd11776a5368135c7d60"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10dca874e35bb60ce4f9f6665bfbfad050dd7573596608aeb9e098621ac331dc"}, + {file = "pydantic_core-2.16.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa496cd45cda0165d597e9d6f01e36c33c9508f75cf03c0a650018c5048f578e"}, + {file = "pydantic_core-2.16.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5317c04349472e683803da262c781c42c5628a9be73f4750ac7d13040efb5d2d"}, + {file = "pydantic_core-2.16.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:42c29d54ed4501a30cd71015bf982fa95e4a60117b44e1a200290ce687d3e640"}, + {file = "pydantic_core-2.16.1-cp39-none-win32.whl", hash = "sha256:ba07646f35e4e49376c9831130039d1b478fbfa1215ae62ad62d2ee63cf9c18f"}, + {file = "pydantic_core-2.16.1-cp39-none-win_amd64.whl", hash = "sha256:2133b0e412a47868a358713287ff9f9a328879da547dc88be67481cdac529118"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d25ef0c33f22649b7a088035fd65ac1ce6464fa2876578df1adad9472f918a76"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:99c095457eea8550c9fa9a7a992e842aeae1429dab6b6b378710f62bfb70b394"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b49c604ace7a7aa8af31196abbf8f2193be605db6739ed905ecaf62af31ccae0"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c56da23034fe66221f2208c813d8aa509eea34d97328ce2add56e219c3a9f41c"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cebf8d56fee3b08ad40d332a807ecccd4153d3f1ba8231e111d9759f02edfd05"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1ae8048cba95f382dba56766525abca438328455e35c283bb202964f41a780b0"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:780daad9e35b18d10d7219d24bfb30148ca2afc309928e1d4d53de86822593dc"}, + {file = "pydantic_core-2.16.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c94b5537bf6ce66e4d7830c6993152940a188600f6ae044435287753044a8fe2"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:adf28099d061a25fbcc6531febb7a091e027605385de9fe14dd6a97319d614cf"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:644904600c15816a1f9a1bafa6aab0d21db2788abcdf4e2a77951280473f33e1"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87bce04f09f0552b66fca0c4e10da78d17cb0e71c205864bab4e9595122cb9d9"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:877045a7969ace04d59516d5d6a7dee13106822f99a5d8df5e6822941f7bedc8"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9c46e556ee266ed3fb7b7a882b53df3c76b45e872fdab8d9cf49ae5e91147fd7"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4eebbd049008eb800f519578e944b8dc8e0f7d59a5abb5924cc2d4ed3a1834ff"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c0be58529d43d38ae849a91932391eb93275a06b93b79a8ab828b012e916a206"}, + {file = "pydantic_core-2.16.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b1fc07896fc1851558f532dffc8987e526b682ec73140886c831d773cef44b76"}, + {file = "pydantic_core-2.16.1.tar.gz", hash = "sha256:daff04257b49ab7f4b3f73f98283d3dbb1a65bf3500d55c7beac3c66c310fe34"}, ] [package.dependencies] @@ -4677,13 +4656,13 @@ setuptools = "*" [[package]] name = "pydeps" -version = "1.12.17" +version = "1.12.18" description = "Display module dependencies" optional = false python-versions = "*" files = [ - {file = "pydeps-1.12.17-py3-none-any.whl", hash = "sha256:4fb2e86071c78c1b85a1c63745a267d100e91daf6bab2f14331b3c77433b58b4"}, - {file = "pydeps-1.12.17.tar.gz", hash = "sha256:c308e8355a1e77ff0af899d6f9f1665d4eb07019692dba9fb1dc1cab05df36a4"}, + {file = "pydeps-1.12.18-py3-none-any.whl", hash = "sha256:fc57f56a6eaf92ea6b9b503dc43d55f098661e253a868bbb52fccfbbcc8e79de"}, + {file = "pydeps-1.12.18.tar.gz", hash = "sha256:15c5d023b5053308e19a69591da06d9f3ff038e7a47111c40c9986b6a2929a4b"}, ] [package.dependencies] @@ -4750,71 +4729,71 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyinstrument" -version = "4.6.1" +version = "4.6.2" description = "Call stack profiler for Python. Shows you why your code is slow!" optional = false python-versions = ">=3.7" files = [ - {file = "pyinstrument-4.6.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:73476e4bc6e467ac1b2c3c0dd1f0b71c9061d4de14626676adfdfbb14aa342b4"}, - {file = "pyinstrument-4.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4d1da8efd974cf9df52ee03edaee2d3875105ddd00de35aa542760f7c612bdf7"}, - {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:507be1ee2f2b0c9fba74d622a272640dd6d1b0c9ec3388b2cdeb97ad1e77125f"}, - {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95cee6de08eb45754ef4f602ce52b640d1c535d934a6a8733a974daa095def37"}, - {file = "pyinstrument-4.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7873e8cec92321251fdf894a72b3c78f4c5c20afdd1fef0baf9042ec843bb04"}, - {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a242f6cac40bc83e1f3002b6b53681846dfba007f366971db0bf21e02dbb1903"}, - {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:97c9660cdb4bd2a43cf4f3ab52cffd22f3ac9a748d913b750178fb34e5e39e64"}, - {file = "pyinstrument-4.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e304cd0723e2b18ada5e63c187abf6d777949454c734f5974d64a0865859f0f4"}, - {file = "pyinstrument-4.6.1-cp310-cp310-win32.whl", hash = "sha256:cee21a2d78187dd8a80f72f5d0f1ddb767b2d9800f8bb4d94b6d11f217c22cdb"}, - {file = "pyinstrument-4.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:2000712f71d693fed2f8a1c1638d37b7919124f367b37976d07128d49f1445eb"}, - {file = "pyinstrument-4.6.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a366c6f3dfb11f1739bdc1dee75a01c1563ad0bf4047071e5e77598087df457f"}, - {file = "pyinstrument-4.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6be327be65d934796558aa9cb0f75ce62ebd207d49ad1854610c97b0579ad47"}, - {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e160d9c5d20d3e4ef82269e4e8b246ff09bdf37af5fb8cb8ccca97936d95ad6"}, - {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ffbf56605ef21c2fcb60de2fa74ff81f417d8be0c5002a407e414d6ef6dee43"}, - {file = "pyinstrument-4.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92cc4924596d6e8f30a16182bbe90893b1572d847ae12652f72b34a9a17c24a"}, - {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f4b48a94d938cae981f6948d9ec603bab2087b178d2095d042d5a48aabaecaab"}, - {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7a386392275bdef4a1849712dc5b74f0023483fca14ef93d0ca27d453548982"}, - {file = "pyinstrument-4.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:871b131b83e9b1122f2325061c68ed1e861eebcb568c934d2fb193652f077f77"}, - {file = "pyinstrument-4.6.1-cp311-cp311-win32.whl", hash = "sha256:8d8515156dd91f5652d13b5fcc87e634f8fe1c07b68d1d0840348cdd50bf5ace"}, - {file = "pyinstrument-4.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb868fbe089036e9f32525a249f4c78b8dc46967612393f204b8234f439c9cc4"}, - {file = "pyinstrument-4.6.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a18cd234cce4f230f1733807f17a134e64a1f1acabf74a14d27f583cf2b183df"}, - {file = "pyinstrument-4.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:574cfca69150be4ce4461fb224712fbc0722a49b0dc02fa204d02807adf6b5a0"}, - {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e02cf505e932eb8ccf561b7527550a67ec14fcae1fe0e25319b09c9c166e914"}, - {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:832fb2acef9d53701c1ab546564c45fb70a8770c816374f8dd11420d399103c9"}, - {file = "pyinstrument-4.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13cb57e9607545623ebe462345b3d0c4caee0125d2d02267043ece8aca8f4ea0"}, - {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9be89e7419bcfe8dd6abb0d959d6d9c439c613a4a873514c43d16b48dae697c9"}, - {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:476785cfbc44e8e1b1ad447398aa3deae81a8df4d37eb2d8bbb0c404eff979cd"}, - {file = "pyinstrument-4.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e9cebd90128a3d2fee36d3ccb665c1b9dce75261061b2046203e45c4a8012d54"}, - {file = "pyinstrument-4.6.1-cp312-cp312-win32.whl", hash = "sha256:1d0b76683df2ad5c40eff73607dc5c13828c92fbca36aff1ddf869a3c5a55fa6"}, - {file = "pyinstrument-4.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:c4b7af1d9d6a523cfbfedebcb69202242d5bd0cb89c4e094cc73d5d6e38279bd"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:79ae152f8c6a680a188fb3be5e0f360ac05db5bbf410169a6c40851dfaebcce9"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cad2745964c174c65aa75f1bf68a4394d1b4d28f33894837cfd315d1e836f0"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb81f66f7f94045d723069cf317453d42375de9ff3c69089cf6466b078ac1db4"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab30ae75969da99e9a529e21ff497c18fdf958e822753db4ae7ed1e67094040"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f36cb5b644762fb3c86289324bbef17e95f91cd710603ac19444a47f638e8e96"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8b45075d9dbbc977dbc7007fb22bb0054c6990fbe91bf48dd80c0b96c6307ba7"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:475ac31477f6302e092463896d6a2055f3e6abcd293bad16ff94fc9185308a88"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-win32.whl", hash = "sha256:29172ab3d8609fdf821c3f2562dc61e14f1a8ff5306607c32ca743582d3a760e"}, - {file = "pyinstrument-4.6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:bd176f297c99035127b264369d2bb97a65255f65f8d4e843836baf55ebb3cee4"}, - {file = "pyinstrument-4.6.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:23e9b4526978432e9999021da9a545992cf2ac3df5ee82db7beb6908fc4c978c"}, - {file = "pyinstrument-4.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2dbcaccc9f456ef95557ec501caeb292119c24446d768cb4fb43578b0f3d572c"}, - {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2097f63c66c2bc9678c826b9ff0c25acde3ed455590d9dcac21220673fe74fbf"}, - {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:205ac2e76bd65d61b9611a9ce03d5f6393e34ec5b41dd38808f25d54e6b3e067"}, - {file = "pyinstrument-4.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f414ddf1161976a40fc0a333000e6a4ad612719eac0b8c9bb73f47153187148"}, - {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:65e62ebfa2cd8fb57eda90006f4505ac4c70da00fc2f05b6d8337d776ea76d41"}, - {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d96309df4df10be7b4885797c5f69bb3a89414680ebaec0722d8156fde5268c3"}, - {file = "pyinstrument-4.6.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f3d1ad3bc8ebb4db925afa706aa865c4bfb40d52509f143491ac0df2440ee5d2"}, - {file = "pyinstrument-4.6.1-cp38-cp38-win32.whl", hash = "sha256:dc37cb988c8854eb42bda2e438aaf553536566657d157c4473cc8aad5692a779"}, - {file = "pyinstrument-4.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:2cd4ce750c34a0318fc2d6c727cc255e9658d12a5cf3f2d0473f1c27157bdaeb"}, - {file = "pyinstrument-4.6.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6ca95b21f022e995e062b371d1f42d901452bcbedd2c02f036de677119503355"}, - {file = "pyinstrument-4.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ac1e1d7e1f1b64054c4eb04eb4869a7a5eef2261440e73943cc1b1bc3c828c18"}, - {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0711845e953fce6ab781221aacffa2a66dbc3289f8343e5babd7b2ea34da6c90"}, - {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b7d28582017de35cb64eb4e4fa603e753095108ca03745f5d17295970ee631f"}, - {file = "pyinstrument-4.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7be57db08bd366a37db3aa3a6187941ee21196e8b14975db337ddc7d1490649d"}, - {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9a0ac0f56860398d2628ce389826ce83fb3a557d0c9a2351e8a2eac6eb869983"}, - {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a9045186ff13bc826fef16be53736a85029aae3c6adfe52e666cad00d7ca623b"}, - {file = "pyinstrument-4.6.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6c4c56b6eab9004e92ad8a48bb54913fdd71fc8a748ae42a27b9e26041646f8b"}, - {file = "pyinstrument-4.6.1-cp39-cp39-win32.whl", hash = "sha256:37e989c44b51839d0c97466fa2b623638b9470d56d79e329f359f0e8fa6d83db"}, - {file = "pyinstrument-4.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:5494c5a84fee4309d7d973366ca6b8b9f8ba1d6b254e93b7c506264ef74f2cef"}, - {file = "pyinstrument-4.6.1.tar.gz", hash = "sha256:f4731b27121350f5a983d358d2272fe3df2f538aed058f57217eef7801a89288"}, + {file = "pyinstrument-4.6.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7a1b1cd768ea7ea9ab6f5490f7e74431321bcc463e9441dbc2f769617252d9e2"}, + {file = "pyinstrument-4.6.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8a386b9d09d167451fb2111eaf86aabf6e094fed42c15f62ec51d6980bce7d96"}, + {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c3e3ca8553b9aac09bd978c73d21b9032c707ac6d803bae6a20ecc048df4a8"}, + {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5f329f5534ca069420246f5ce57270d975229bcb92a3a3fd6b2ca086527d9764"}, + {file = "pyinstrument-4.6.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4dcdcc7ba224a0c5edfbd00b0f530f5aed2b26da5aaa2f9af5519d4aa8c7e41"}, + {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73db0c2c99119c65b075feee76e903b4ed82e59440fe8b5724acf5c7cb24721f"}, + {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:da58f265326f3cf3975366ccb8b39014f1e69ff8327958a089858d71c633d654"}, + {file = "pyinstrument-4.6.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:feebcf860f955401df30d029ec8de7a0c5515d24ea809736430fd1219686fe14"}, + {file = "pyinstrument-4.6.2-cp310-cp310-win32.whl", hash = "sha256:b2b66ff0b16c8ecf1ec22de001cfff46872b2c163c62429055105564eef50b2e"}, + {file = "pyinstrument-4.6.2-cp310-cp310-win_amd64.whl", hash = "sha256:8d104b7a7899d5fa4c5bf1ceb0c1a070615a72c5dc17bc321b612467ad5c5d88"}, + {file = "pyinstrument-4.6.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:62f6014d2b928b181a52483e7c7b82f2c27e22c577417d1681153e5518f03317"}, + {file = "pyinstrument-4.6.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dcb5c8d763c5df55131670ba2a01a8aebd0d490a789904a55eb6a8b8d497f110"}, + {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ed4e8c6c84e0e6429ba7008a66e435ede2d8cb027794c20923c55669d9c5633"}, + {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c0f0e1d8f8c70faa90ff57f78ac0dda774b52ea0bfb2d9f0f41ce6f3e7c869e"}, + {file = "pyinstrument-4.6.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b3c44cb037ad0d6e9d9a48c14d856254ada641fbd0ae9de40da045fc2226a2a"}, + {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:be9901f17ac2f527c352f2fdca3d717c1d7f2ce8a70bad5a490fc8cc5d2a6007"}, + {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8a9791bf8916c1cf439c202fded32de93354b0f57328f303d71950b0027c7811"}, + {file = "pyinstrument-4.6.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d6162615e783c59e36f2d7caf903a7e3ecb6b32d4a4ae8907f2760b2ef395bf6"}, + {file = "pyinstrument-4.6.2-cp311-cp311-win32.whl", hash = "sha256:28af084aa84bbfd3620ebe71d5f9a0deca4451267f363738ca824f733de55056"}, + {file = "pyinstrument-4.6.2-cp311-cp311-win_amd64.whl", hash = "sha256:dd6007d3c2e318e09e582435dd8d111cccf30d342af66886b783208813caf3d7"}, + {file = "pyinstrument-4.6.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e3813c8ecfab9d7d855c5f0f71f11793cf1507f40401aa33575c7fd613577c23"}, + {file = "pyinstrument-4.6.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6c761372945e60fc1396b7a49f30592e8474e70a558f1a87346d27c8c4ce50f7"}, + {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fba3244e94c117bf4d9b30b8852bbdcd510e7329fdd5c7c8b3799e00a9215a8"}, + {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:803ac64e526473d64283f504df3b0d5c2c203ea9603cab428641538ffdc753a7"}, + {file = "pyinstrument-4.6.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2e554b1bb0df78f5ce8a92df75b664912ca93aa94208386102af454ec31b647"}, + {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7c671057fad22ee3ded897a6a361204ea2538e44c1233cad0e8e30f6d27f33db"}, + {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:d02f31fa13a9e8dc702a113878419deba859563a32474c9f68e04619d43d6f01"}, + {file = "pyinstrument-4.6.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b55983a884f083f93f0fc6d12ff8df0acd1e2fb0580d2f4c7bfe6def33a84b58"}, + {file = "pyinstrument-4.6.2-cp312-cp312-win32.whl", hash = "sha256:fdc0a53b27e5d8e47147489c7dab596ddd1756b1e053217ef5bc6718567099ff"}, + {file = "pyinstrument-4.6.2-cp312-cp312-win_amd64.whl", hash = "sha256:dd5c53a0159126b5ce7cbc4994433c9c671e057c85297ff32645166a06ad2c50"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b082df0bbf71251a7f4880a12ed28421dba84ea7110bb376e0533067a4eaff40"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90350533396071cb2543affe01e40bf534c35cb0d4b8fa9fdb0f052f9ca2cfe3"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67268bb0d579330cff40fd1c90b8510363ca1a0e7204225840614068658dab77"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20e15b4e1d29ba0b7fc81aac50351e0dc0d7e911e93771ebc3f408e864a2c93b"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2e625fc6ffcd4fd420493edd8276179c3f784df207bef4c2192725c1b310534c"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:113d2fc534c9ca7b6b5661d6ada05515bf318f6eb34e8d05860fe49eb7cfe17e"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3098cd72b71a322a72dafeb4ba5c566465e193d2030adad4c09566bd2f89bf4f"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-win32.whl", hash = "sha256:08fdc7f88c989316fa47805234c37a40fafe7b614afd8ae863f0afa9d1707b37"}, + {file = "pyinstrument-4.6.2-cp37-cp37m-win_amd64.whl", hash = "sha256:5ebeba952c0056dcc9b9355328c78c4b5c2a33b4b4276a9157a3ab589f3d1bac"}, + {file = "pyinstrument-4.6.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:34e59e91c88ec9ad5630c0964eca823949005e97736bfa838beb4789e94912a2"}, + {file = "pyinstrument-4.6.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cd0320c39e99e3c0a3129d1ed010ac41e5a7eb96fb79900d270080a97962e995"}, + {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46992e855d630575ec635eeca0068a8ddf423d4fd32ea0875a94e9f8688f0b95"}, + {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e474c56da636253dfdca7cd1998b240d6b39f7ed34777362db69224fcf053b1"}, + {file = "pyinstrument-4.6.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4b559322f30509ad8f082561792352d0805b3edfa508e492a36041fdc009259"}, + {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:06a8578b2943eb1dbbf281e1e59e44246acfefd79e1b06d4950f01b693de12af"}, + {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7bd3da31c46f1c1cb7ae89031725f6a1d1015c2041d9c753fe23980f5f9fd86c"}, + {file = "pyinstrument-4.6.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e63f4916001aa9c625976a50779282e0a5b5e9b17c52a50ef4c651e468ed5b88"}, + {file = "pyinstrument-4.6.2-cp38-cp38-win32.whl", hash = "sha256:32ec8db6896b94af790a530e1e0edad4d0f941a0ab8dd9073e5993e7ea46af7d"}, + {file = "pyinstrument-4.6.2-cp38-cp38-win_amd64.whl", hash = "sha256:a59fc4f7db738a094823afe6422509fa5816a7bf74e768ce5a7a2ddd91af40ac"}, + {file = "pyinstrument-4.6.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3a165e0d2deb212d4cf439383982a831682009e1b08733c568cac88c89784e62"}, + {file = "pyinstrument-4.6.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7ba858b3d6f6e5597c641edcc0e7e464f85aba86d71bc3b3592cb89897bf43f6"}, + {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fd8e547cf3df5f0ec6e4dffbe2e857f6b28eda51b71c3c0b5a2fc0646527835"}, + {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de2c1714a37a820033b19cf134ead43299a02662f1379140974a9ab733c5f3a"}, + {file = "pyinstrument-4.6.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01fc45dedceec3df81668d702bca6d400d956c8b8494abc206638c167c78dfd9"}, + {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5b6e161ef268d43ee6bbfae7fd2cdd0a52c099ddd21001c126ca1805dc906539"}, + {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6ba8e368d0421f15ba6366dfd60ec131c1b46505d021477e0f865d26cf35a605"}, + {file = "pyinstrument-4.6.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edca46f04a573ac2fb11a84b937844e6a109f38f80f4b422222fb5be8ecad8cb"}, + {file = "pyinstrument-4.6.2-cp39-cp39-win32.whl", hash = "sha256:baf375953b02fe94d00e716f060e60211ede73f49512b96687335f7071adb153"}, + {file = "pyinstrument-4.6.2-cp39-cp39-win_amd64.whl", hash = "sha256:af1a953bce9fd530040895d01ff3de485e25e1576dccb014f76ba9131376fcad"}, + {file = "pyinstrument-4.6.2.tar.gz", hash = "sha256:0002ee517ed8502bbda6eb2bb1ba8f95a55492fcdf03811ba13d4806e50dd7f6"}, ] [package.extras] @@ -5070,13 +5049,13 @@ docs = ["Sphinx (>=6.2,<7.0)", "boto3 (>=1.26,<2.0)", "cartopy (>=0.21,<1.0)", " [[package]] name = "pytest" -version = "7.4.4" +version = "8.0.0" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, - {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, + {file = "pytest-8.0.0-py3-none-any.whl", hash = "sha256:50fb9cbe836c3f20f0dfa99c565201fb75dc54c8d76373cd1bde06b06657bdb6"}, + {file = "pytest-8.0.0.tar.gz", hash = "sha256:249b1b0864530ba251b7438274c4d251c58d868edaaec8762893ad4a0d71c36c"}, ] [package.dependencies] @@ -5084,7 +5063,7 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" -pluggy = ">=0.12,<2.0" +pluggy = ">=1.3.0,<2.0" tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] @@ -5285,13 +5264,13 @@ six = ">=1.5" [[package]] name = "pytz" -version = "2023.3.post1" +version = "2023.4" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, - {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, + {file = "pytz-2023.4-py2.py3-none-any.whl", hash = "sha256:f90ef520d95e7c46951105338d918664ebfd6f1d995bd7d153127ce90efafa6a"}, + {file = "pytz-2023.4.tar.gz", hash = "sha256:31d4583c4ed539cd037956140d695e42c033a19e984bfce9964a3f7d59bc2b40"}, ] [[package]] @@ -5623,13 +5602,13 @@ full = ["numpy"] [[package]] name = "referencing" -version = "0.32.1" +version = "0.33.0" description = "JSON Referencing + Python" optional = false python-versions = ">=3.8" files = [ - {file = "referencing-0.32.1-py3-none-any.whl", hash = "sha256:7e4dc12271d8e15612bfe35792f5ea1c40970dadf8624602e33db2758f7ee554"}, - {file = "referencing-0.32.1.tar.gz", hash = "sha256:3c57da0513e9563eb7e203ebe9bb3a1b509b042016433bd1e45a2853466c3dd3"}, + {file = "referencing-0.33.0-py3-none-any.whl", hash = "sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5"}, + {file = "referencing-0.33.0.tar.gz", hash = "sha256:c775fedf74bc0f9189c2a3be1c12fd03e8c23f4d371dce795df44e06c5b412f7"}, ] [package.dependencies] @@ -5934,118 +5913,133 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.1.13" +version = "0.1.15" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.1.13-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e3fd36e0d48aeac672aa850045e784673449ce619afc12823ea7868fcc41d8ba"}, - {file = "ruff-0.1.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9fb6b3b86450d4ec6a6732f9f60c4406061b6851c4b29f944f8c9d91c3611c7a"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b13ba5d7156daaf3fd08b6b993360a96060500aca7e307d95ecbc5bb47a69296"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9ebb40442f7b531e136d334ef0851412410061e65d61ca8ce90d894a094feb22"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226b517f42d59a543d6383cfe03cccf0091e3e0ed1b856c6824be03d2a75d3b6"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5f0312ba1061e9b8c724e9a702d3c8621e3c6e6c2c9bd862550ab2951ac75c16"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2f59bcf5217c661254bd6bc42d65a6fd1a8b80c48763cb5c2293295babd945dd"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6894b00495e00c27b6ba61af1fc666f17de6140345e5ef27dd6e08fb987259d"}, - {file = "ruff-0.1.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1600942485c6e66119da294c6294856b5c86fd6df591ce293e4a4cc8e72989"}, - {file = "ruff-0.1.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ee3febce7863e231a467f90e681d3d89210b900d49ce88723ce052c8761be8c7"}, - {file = "ruff-0.1.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dcaab50e278ff497ee4d1fe69b29ca0a9a47cd954bb17963628fa417933c6eb1"}, - {file = "ruff-0.1.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f57de973de4edef3ad3044d6a50c02ad9fc2dff0d88587f25f1a48e3f72edf5e"}, - {file = "ruff-0.1.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:7a36fa90eb12208272a858475ec43ac811ac37e91ef868759770b71bdabe27b6"}, - {file = "ruff-0.1.13-py3-none-win32.whl", hash = "sha256:a623349a505ff768dad6bd57087e2461be8db58305ebd5577bd0e98631f9ae69"}, - {file = "ruff-0.1.13-py3-none-win_amd64.whl", hash = "sha256:f988746e3c3982bea7f824c8fa318ce7f538c4dfefec99cd09c8770bd33e6539"}, - {file = "ruff-0.1.13-py3-none-win_arm64.whl", hash = "sha256:6bbbc3042075871ec17f28864808540a26f0f79a4478c357d3e3d2284e832998"}, - {file = "ruff-0.1.13.tar.gz", hash = "sha256:e261f1baed6291f434ffb1d5c6bd8051d1c2a26958072d38dfbec39b3dda7352"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"}, + {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"}, + {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"}, + {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"}, + {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, ] [[package]] name = "scikit-learn" -version = "1.3.2" +version = "1.4.0" description = "A set of python modules for machine learning and data mining" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "scikit-learn-1.3.2.tar.gz", hash = "sha256:a2f54c76accc15a34bfb9066e6c7a56c1e7235dda5762b990792330b52ccfb05"}, - {file = "scikit_learn-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e326c0eb5cf4d6ba40f93776a20e9a7a69524c4db0757e7ce24ba222471ee8a1"}, - {file = "scikit_learn-1.3.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:535805c2a01ccb40ca4ab7d081d771aea67e535153e35a1fd99418fcedd1648a"}, - {file = "scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1215e5e58e9880b554b01187b8c9390bf4dc4692eedeaf542d3273f4785e342c"}, - {file = "scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ee107923a623b9f517754ea2f69ea3b62fc898a3641766cb7deb2f2ce450161"}, - {file = "scikit_learn-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:35a22e8015048c628ad099da9df5ab3004cdbf81edc75b396fd0cff8699ac58c"}, - {file = "scikit_learn-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6fb6bc98f234fda43163ddbe36df8bcde1d13ee176c6dc9b92bb7d3fc842eb66"}, - {file = "scikit_learn-1.3.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:18424efee518a1cde7b0b53a422cde2f6625197de6af36da0b57ec502f126157"}, - {file = "scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3271552a5eb16f208a6f7f617b8cc6d1f137b52c8a1ef8edf547db0259b2c9fb"}, - {file = "scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4144a5004a676d5022b798d9e573b05139e77f271253a4703eed295bde0433"}, - {file = "scikit_learn-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:67f37d708f042a9b8d59551cf94d30431e01374e00dc2645fa186059c6c5d78b"}, - {file = "scikit_learn-1.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8db94cd8a2e038b37a80a04df8783e09caac77cbe052146432e67800e430c028"}, - {file = "scikit_learn-1.3.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:61a6efd384258789aa89415a410dcdb39a50e19d3d8410bd29be365bcdd512d5"}, - {file = "scikit_learn-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb06f8dce3f5ddc5dee1715a9b9f19f20d295bed8e3cd4fa51e1d050347de525"}, - {file = "scikit_learn-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b2de18d86f630d68fe1f87af690d451388bb186480afc719e5f770590c2ef6c"}, - {file = "scikit_learn-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107"}, - {file = "scikit_learn-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a19f90f95ba93c1a7f7924906d0576a84da7f3b2282ac3bfb7a08a32801add93"}, - {file = "scikit_learn-1.3.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b8692e395a03a60cd927125eef3a8e3424d86dde9b2370d544f0ea35f78a8073"}, - {file = "scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15e1e94cc23d04d39da797ee34236ce2375ddea158b10bee3c343647d615581d"}, - {file = "scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:785a2213086b7b1abf037aeadbbd6d67159feb3e30263434139c98425e3dcfcf"}, - {file = "scikit_learn-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:64381066f8aa63c2710e6b56edc9f0894cc7bf59bd71b8ce5613a4559b6145e0"}, - {file = "scikit_learn-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c43290337f7a4b969d207e620658372ba3c1ffb611f8bc2b6f031dc5c6d1d03"}, - {file = "scikit_learn-1.3.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:dc9002fc200bed597d5d34e90c752b74df516d592db162f756cc52836b38fe0e"}, - {file = "scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d08ada33e955c54355d909b9c06a4789a729977f165b8bae6f225ff0a60ec4a"}, - {file = "scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763f0ae4b79b0ff9cca0bf3716bcc9915bdacff3cebea15ec79652d1cc4fa5c9"}, - {file = "scikit_learn-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:ed932ea780517b00dae7431e031faae6b49b20eb6950918eb83bd043237950e0"}, -] - -[package.dependencies] -joblib = ">=1.1.1" -numpy = ">=1.17.3,<2.0" -scipy = ">=1.5.0" + {file = "scikit-learn-1.4.0.tar.gz", hash = "sha256:d4373c984eba20e393216edd51a3e3eede56cbe93d4247516d205643c3b93121"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fce93a7473e2f4ee4cc280210968288d6a7d7ad8dc6fa7bb7892145e407085f9"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d77df3d1e15fc37a9329999979fa7868ba8655dbab21fe97fc7ddabac9e08cc7"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2404659fedec40eeafa310cd14d613e564d13dbf8f3c752d31c095195ec05de6"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e98632da8f6410e6fb6bf66937712c949b4010600ccd3f22a5388a83e610cc3c"}, + {file = "scikit_learn-1.4.0-1-cp310-cp310-win_amd64.whl", hash = "sha256:11b3b140f70fbc9f6a08884631ae8dd60a4bb2d7d6d1de92738ea42b740d8992"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8341eabdc754d5ab91641a7763243845e96b6d68e03e472531e88a4f1b09f21"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d1f6bce875ac2bb6b52514f67c185c564ccd299a05b65b7bab091a4c13dde12d"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c408b46b2fd61952d519ea1af2f8f0a7a703e1433923ab1704c4131520b2083b"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b465dd1dcd237b7b1dcd1a9048ccbf70a98c659474324fa708464c3a2533fad"}, + {file = "scikit_learn-1.4.0-1-cp311-cp311-win_amd64.whl", hash = "sha256:0db8e22c42f7980fe5eb22069b1f84c48966f3e0d23a01afde5999e3987a2501"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7eef6ea2ed289af40e88c0be9f7704ca8b5de18508a06897c3fe21e0905efdf"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:349669b01435bc4dbf25c6410b0892073befdaec52637d1a1d1ff53865dc8db3"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d439c584e58434d0350701bd33f6c10b309e851fccaf41c121aed55f6851d8cf"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0e2427d9ef46477625ab9b55c1882844fe6fc500f418c3f8e650200182457bc"}, + {file = "scikit_learn-1.4.0-1-cp312-cp312-win_amd64.whl", hash = "sha256:d3d75343940e7bf9b85c830c93d34039fa015eeb341c5c0b4cd7a90dadfe00d4"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:76986d22e884ab062b1beecdd92379656e9d3789ecc1f9870923c178de55f9fe"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e22446ad89f1cb7657f0d849dcdc345b48e2d10afa3daf2925fdb740f85b714c"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74812c9eabb265be69d738a8ea8d4884917a59637fcbf88a5f0e9020498bc6b3"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad2a63e0dd386b92da3270887a29b308af4d7c750d8c4995dfd9a4798691bcc"}, + {file = "scikit_learn-1.4.0-1-cp39-cp39-win_amd64.whl", hash = "sha256:53b9e29177897c37e2ff9d4ba6ca12fdb156e22523e463db05def303f5c72b5c"}, + {file = "scikit_learn-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb8f044a8f5962613ce1feb4351d66f8d784bd072d36393582f351859b065f7d"}, + {file = "scikit_learn-1.4.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:a6372c90bbf302387792108379f1ec77719c1618d88496d0df30cb8e370b4661"}, + {file = "scikit_learn-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:785ce3c352bf697adfda357c3922c94517a9376002971bc5ea50896144bc8916"}, + {file = "scikit_learn-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0aba2a20d89936d6e72d95d05e3bf1db55bca5c5920926ad7b92c34f5e7d3bbe"}, + {file = "scikit_learn-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2bac5d56b992f8f06816f2cd321eb86071c6f6d44bb4b1cb3d626525820d754b"}, + {file = "scikit_learn-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27ae4b0f1b2c77107c096a7e05b33458354107b47775428d1f11b23e30a73e8a"}, + {file = "scikit_learn-1.4.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5c5c62ffb52c3ffb755eb21fa74cc2cbf2c521bd53f5c04eaa10011dbecf5f80"}, + {file = "scikit_learn-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f0d2018ac6fa055dab65fe8a485967990d33c672d55bc254c56c35287b02fab"}, + {file = "scikit_learn-1.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a8918c415c4b4bf1d60c38d32958849a9191c2428ab35d30b78354085c7c7a"}, + {file = "scikit_learn-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:80a21de63275f8bcd7877b3e781679d2ff1eddfed515a599f95b2502a3283d42"}, + {file = "scikit_learn-1.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0f33bbafb310c26b81c4d41ecaebdbc1f63498a3f13461d50ed9a2e8f24d28e4"}, + {file = "scikit_learn-1.4.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:8b6ac1442ec714b4911e5aef8afd82c691b5c88b525ea58299d455acc4e8dcec"}, + {file = "scikit_learn-1.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05fc5915b716c6cc60a438c250108e9a9445b522975ed37e416d5ea4f9a63381"}, + {file = "scikit_learn-1.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:842b7d6989f3c574685e18da6f91223eb32301d0f93903dd399894250835a6f7"}, + {file = "scikit_learn-1.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:88bcb586fdff865372df1bc6be88bb7e6f9e0aa080dab9f54f5cac7eca8e2b6b"}, + {file = "scikit_learn-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f77674647dd31f56cb12ed13ed25b6ed43a056fffef051715022d2ebffd7a7d1"}, + {file = "scikit_learn-1.4.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:833999872e2920ce00f3a50839946bdac7539454e200eb6db54898a41f4bfd43"}, + {file = "scikit_learn-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:970ec697accaef10fb4f51763f3a7b1250f9f0553cf05514d0e94905322a0172"}, + {file = "scikit_learn-1.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:923d778f378ebacca2c672ab1740e5a413e437fb45ab45ab02578f8b689e5d43"}, + {file = "scikit_learn-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:1d041bc95006b545b59e458399e3175ab11ca7a03dc9a74a573ac891f5df1489"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" threadpoolctl = ">=2.0.0" [package.extras] -benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] -docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.10.1)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] -examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] -tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.16.2)"] +benchmark = ["matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "pandas (>=1.1.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"] [[package]] name = "scipy" -version = "1.11.4" +version = "1.12.0" description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "scipy-1.11.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc9a714581f561af0848e6b69947fda0614915f072dfd14142ed1bfe1b806710"}, - {file = "scipy-1.11.4-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cf00bd2b1b0211888d4dc75656c0412213a8b25e80d73898083f402b50f47e41"}, - {file = "scipy-1.11.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9999c008ccf00e8fbcce1236f85ade5c569d13144f77a1946bef8863e8f6eb4"}, - {file = "scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:933baf588daa8dc9a92c20a0be32f56d43faf3d1a60ab11b3f08c356430f6e56"}, - {file = "scipy-1.11.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8fce70f39076a5aa62e92e69a7f62349f9574d8405c0a5de6ed3ef72de07f446"}, - {file = "scipy-1.11.4-cp310-cp310-win_amd64.whl", hash = "sha256:6550466fbeec7453d7465e74d4f4b19f905642c89a7525571ee91dd7adabb5a3"}, - {file = "scipy-1.11.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f313b39a7e94f296025e3cffc2c567618174c0b1dde173960cf23808f9fae4be"}, - {file = "scipy-1.11.4-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1b7c3dca977f30a739e0409fb001056484661cb2541a01aba0bb0029f7b68db8"}, - {file = "scipy-1.11.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00150c5eae7b610c32589dda259eacc7c4f1665aedf25d921907f4d08a951b1c"}, - {file = "scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:530f9ad26440e85766509dbf78edcfe13ffd0ab7fec2560ee5c36ff74d6269ff"}, - {file = "scipy-1.11.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5e347b14fe01003d3b78e196e84bd3f48ffe4c8a7b8a1afbcb8f5505cb710993"}, - {file = "scipy-1.11.4-cp311-cp311-win_amd64.whl", hash = "sha256:acf8ed278cc03f5aff035e69cb511741e0418681d25fbbb86ca65429c4f4d9cd"}, - {file = "scipy-1.11.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:028eccd22e654b3ea01ee63705681ee79933652b2d8f873e7949898dda6d11b6"}, - {file = "scipy-1.11.4-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c6ff6ef9cc27f9b3db93a6f8b38f97387e6e0591600369a297a50a8e96e835d"}, - {file = "scipy-1.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b030c6674b9230d37c5c60ab456e2cf12f6784596d15ce8da9365e70896effc4"}, - {file = "scipy-1.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad669df80528aeca5f557712102538f4f37e503f0c5b9541655016dd0932ca79"}, - {file = "scipy-1.11.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ce7fff2e23ab2cc81ff452a9444c215c28e6305f396b2ba88343a567feec9660"}, - {file = "scipy-1.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:36750b7733d960d7994888f0d148d31ea3017ac15eef664194b4ef68d36a4a97"}, - {file = "scipy-1.11.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6e619aba2df228a9b34718efb023966da781e89dd3d21637b27f2e54db0410d7"}, - {file = "scipy-1.11.4-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f3cd9e7b3c2c1ec26364856f9fbe78695fe631150f94cd1c22228456404cf1ec"}, - {file = "scipy-1.11.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d10e45a6c50211fe256da61a11c34927c68f277e03138777bdebedd933712fea"}, - {file = "scipy-1.11.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91af76a68eeae0064887a48e25c4e616fa519fa0d38602eda7e0f97d65d57937"}, - {file = "scipy-1.11.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6df1468153a31cf55ed5ed39647279beb9cfb5d3f84369453b49e4b8502394fd"}, - {file = "scipy-1.11.4-cp39-cp39-win_amd64.whl", hash = "sha256:ee410e6de8f88fd5cf6eadd73c135020bfbbbdfcd0f6162c36a7638a1ea8cc65"}, - {file = "scipy-1.11.4.tar.gz", hash = "sha256:90a2b78e7f5733b9de748f589f09225013685f9b218275257f8a8168ededaeaa"}, -] - -[package.dependencies] -numpy = ">=1.21.6,<1.28.0" + {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"}, + {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"}, + {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"}, + {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"}, + {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"}, + {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"}, + {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"}, + {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"}, + {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"}, + {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"}, + {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"}, + {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"}, + {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"}, + {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"}, + {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"}, + {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"}, + {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"}, + {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"}, + {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"}, + {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"}, + {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"}, + {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"}, + {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"}, + {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"}, + {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<1.29.0" [package.extras] dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] -test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] [[package]] name = "scooby" @@ -6063,13 +6057,13 @@ cpu = ["mkl", "psutil"] [[package]] name = "seaborn" -version = "0.13.1" +version = "0.13.2" description = "Statistical data visualization" optional = false python-versions = ">=3.8" files = [ - {file = "seaborn-0.13.1-py3-none-any.whl", hash = "sha256:6baa69b6d1169ae59037971491c450c0b73332b42bd4b23570b62a546bc61cb8"}, - {file = "seaborn-0.13.1.tar.gz", hash = "sha256:bfad65e9c5989e5e1897e61bdbd2f22e62455940ca76fd49eca3ed69345b9179"}, + {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"}, + {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"}, ] [package.dependencies] @@ -6194,32 +6188,37 @@ files = [ [[package]] name = "snowflake-connector-python" -version = "3.6.0" +version = "3.7.0" description = "Snowflake Connector for Python" optional = true python-versions = ">=3.8" files = [ - {file = "snowflake-connector-python-3.6.0.tar.gz", hash = "sha256:15667a918780d79da755e6a60bbf6918051854951e8f56ccdf5692283e9a8479"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4093b38cf9abf95c38119f0b23b07e23dc7a8689b956cd5d34975e1875741f20"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:cf5a964fe01b177063f8c44d14df3a72715580bcd195788ec2822090f37330a5"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55a6418cec585b050e6f05404f25e62b075a3bbea587dc1f903de15640565c58"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c76aea92b87f6ecd604e9c934aac8a779f2e20f3be1d990d53bb5b6d87b009"}, - {file = "snowflake_connector_python-3.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:9dfcf178271e892e64e4092b9e011239a066ce5de848afd2efe3f13197a9f8b3"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4916f9b4a0efd7c96d1fa50a157e05907b6935f91492cca7f200b43cc178a25e"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:f15024c66db5e87d359216ec733a2974d7562aa38f3f18c8b6e65489839e00d7"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcbd3102f807ebbbae52b1b5683d45cd7b3dcb0eaec131233ba6b156e8d70fa4"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7662e2de25b885abe08ab866cf7c7b026ad1af9faa39c25e2c25015ef807abe3"}, - {file = "snowflake_connector_python-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:d1fa102f55ee166cc766aeee3f9333b17b4bede6fb088eee1e1f022df15b6d81"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fde1e0727e2f23c2a07b49b30e1bc0f49977f965d08ddfda10015b24a2beeb76"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:1b51fe000c8cf6372d30b73c7136275e52788e6af47010cd1984c9fb03378e86"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7a11699689a19916e65794ce58dca72b8a40fe6a7eea06764931ede10b47bcc"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d810be5b180c6f47ce9b6f989fe64b9984383e4b77e30b284a83e33f229a3a82"}, - {file = "snowflake_connector_python-3.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b5db47d4164d6b7a07c413a46f9edc4a1d687e3df44fd9d5fa89a89aecb94a8e"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bf8c1ad5aab5304fefa2a4178061a24c96da45e3e3db9d901621e9953e005402"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:1058ab5c98cc62fde8b3f021f0a5076cb7865b5cdab8a9bccde0df88b9e91334"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b93f55989f80d69278e0f40a7a1c0e737806b7c0ddb0351513a752b837243e8"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50dd954ea5918d3242ded69225b72f701963cd9c043ee7d9ab35dc22211611c8"}, - {file = "snowflake_connector_python-3.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4ad42613b87f31441d07a8ea242f4c28ed5eb7b6e05986f9e94a7e44b96d3d1e"}, + {file = "snowflake-connector-python-3.7.0.tar.gz", hash = "sha256:b2bfaec64059307b08caadad40214d488fefb4a23fcd7553ac75f5ea758a9169"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f38070af24d15cd103d565b63b08c5eac3bdf72ad06ad27cd98c46359cb4bee2"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:f8f3912699030291fd82d75321cda44205c9f8fb27841ffbaaf6d3dc4065b798"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7ac1190c6ca48297429f0fb6515b54e3fd3bceb1b72fce7b59097044a9e98e0"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57deaa28baa601b64c1ae5a5c75260ab1c6a22bd07a8d8c7ac785c8deb1c556e"}, + {file = "snowflake_connector_python-3.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:715635ed5b6e5ef8de659fc336c1b89296fe72fdec180c40915c10df885c8082"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d51f3a8912fcc5169731d2b42262087e8a6da20f7344dd001ed97fbdf6ff972c"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:46bfa243875eff9c6dfe1afc26f2034b00ac6eb9f77010b2949a174c38a59722"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7004ccfe3c16075d33b0440b4d5241a50156bbc5dcbf11dec61674d0ac830f46"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee9e6a0a94e0ac1f15fa93c0f61f6e930240280bd043f61216d942e837beb7f"}, + {file = "snowflake_connector_python-3.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:b545fd11c9bd200681e182cf46bb4cbc8250ca6acc41fbea749799a2b23f574f"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:258541df8ba90201ce6f7c4ae9f59e3a9f585ed30fbbaafd207e0774104cf6dc"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:e548642913f7d0ef9d5a35c69c7a8308cbab8fe255fdc3c9f7e18c71e52a0c2e"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:639d09de40c014c7ab0308f622bd1d29a9b9dd05c0ced2d858da31323fa16bda"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da9cf62665ee47c7ec8c18ae554a31c72cacf1cef4b42d55cfbdbae4b5ddb3f2"}, + {file = "snowflake_connector_python-3.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad1d0e339cadb5ba79d24783c39ba21a63e2159f0d3d9540da0168f97043904c"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d8e4d0fad8b00b55bc99035ad2c54d9aa3ca8495f7dfcce736a961b5dbd1d9f"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:fc3e95d4c99472444ffda35b9bbfe4cd4c775279c7eca579f1eee9d8d2ec1e2a"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f93a5861333c2f87ecd1fea34a0fae35c12c196e86fa75c2dd89741e83f2d82"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdf0fe7d77e02949a8a2a7d365217b822bcaf2fc9541095a241116576458568"}, + {file = "snowflake_connector_python-3.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:1ec29946b224d8089070477f60ffe58923433d8c2308b6403684500e85c37699"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f945c512383a8b5f1d2404c40d20e0c915ba3f0ac01983f2e43987d6eecda02"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:574cf5be3c61a6ea421ac9710ac791a80f6dfcc53986ab81e68d1085dad79dab"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb8168458e5d23a0ba4d4e0a276bbd477ddd26d35c554f2c3c64cfe29622499a"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecf8f520692653775f51307140d326b53a51e338d67dc522b1d376b51b12d14e"}, + {file = "snowflake_connector_python-3.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:5ed928055ed40da22b2d6bdde62eee5068c352f66415e1c9aee7c45eb67d39cb"}, ] [package.dependencies] @@ -6393,17 +6392,17 @@ sqlalchemy = ">=1.0.0" [[package]] name = "sqlglot" -version = "20.8.0" +version = "20.11.0" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.7" files = [ - {file = "sqlglot-20.8.0-py3-none-any.whl", hash = "sha256:cb73b81a26da462c34b12b98cf193d679d4b5693703d309db236d9784cef60bb"}, - {file = "sqlglot-20.8.0.tar.gz", hash = "sha256:5636e97fab9efdb4a8690c0e32bbd2d657fe91eb650f10e913a56b4bd979faef"}, + {file = "sqlglot-20.11.0-py3-none-any.whl", hash = "sha256:658509272da15e90dd1c59d9ca5281d7bff2e87121f87e6f9e6541067a057c9c"}, + {file = "sqlglot-20.11.0.tar.gz", hash = "sha256:79a1510ffad1f1e4c5915751f0ed978c099e7e83cd4010ecbd471c00331b6902"}, ] [package.extras] -dev = ["autoflake", "black", "duckdb (>=0.6)", "isort", "maturin (>=1.4,<2.0)", "mypy (>=0.990)", "pandas", "pdoc", "pre-commit", "pyspark", "python-dateutil", "types-python-dateutil"] +dev = ["autoflake", "black", "duckdb (>=0.6)", "isort", "maturin (>=1.4,<2.0)", "mypy (>=0.990)", "pandas", "pdoc", "pre-commit", "pyspark", "python-dateutil", "types-python-dateutil", "typing-extensions"] rs = ["sqlglotrs (==0.1.0)"] [[package]] @@ -6604,13 +6603,13 @@ files = [ [[package]] name = "toolz" -version = "0.12.0" +version = "0.12.1" description = "List processing tools and functional utilities" optional = false -python-versions = ">=3.5" +python-versions = ">=3.7" files = [ - {file = "toolz-0.12.0-py3-none-any.whl", hash = "sha256:2059bd4148deb1884bb0eb770a3cde70e7f954cfbbdc2285f1f2de01fd21eb6f"}, - {file = "toolz-0.12.0.tar.gz", hash = "sha256:88c570861c440ee3f2f6037c4654613228ff40c93a6c25e0eba70d17282c6194"}, + {file = "toolz-0.12.1-py3-none-any.whl", hash = "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85"}, + {file = "toolz-0.12.1.tar.gz", hash = "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d"}, ] [[package]] @@ -6711,13 +6710,13 @@ tests = ["black", "httpretty (<1.1)", "isort", "pre-commit", "pytest", "pytest-r [[package]] name = "trove-classifiers" -version = "2024.1.8" +version = "2024.1.31" description = "Canonical source for classifiers on PyPI (pypi.org)." optional = false python-versions = "*" files = [ - {file = "trove-classifiers-2024.1.8.tar.gz", hash = "sha256:6e36caf430ff6485c4b57a4c6b364a13f6a898d16b9417c6c37467e59c14b05a"}, - {file = "trove_classifiers-2024.1.8-py3-none-any.whl", hash = "sha256:3c1ff4deb10149c7e39ede6e5bbc107def64362ef1ee7590ec98d71fb92f1b6a"}, + {file = "trove-classifiers-2024.1.31.tar.gz", hash = "sha256:bfdfe60bbf64985c524416afb637ecc79c558e0beb4b7f52b0039e01044b0229"}, + {file = "trove_classifiers-2024.1.31-py3-none-any.whl", hash = "sha256:854aba3358f3cf10e5c0916aa533f5a39e27aadd8ade26a54cdc2a93257e39c4"}, ] [[package]] @@ -7353,4 +7352,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "39f5035cdd3ae4bcc67b642e3f6b733f3c8956e903897c946624f39362a0b2cb" +content-hash = "51b82d38f90ea4682385f0fbdafd5613f57483482a076dd4ff1799217cffb2e6" diff --git a/pyproject.toml b/pyproject.toml index cc74e67c1a0d..f90144a22823 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ atpublic = ">=2.3,<5" bidict = ">=0.22.1,<1" multipledispatch = ">=0.6,<2" numpy = ">=1,<2" -pandas = ">=1.2.5,<3" +pandas = ">=1.2.5,<2.1" parsy = ">=2,<3" pyarrow = ">=2,<16" pyarrow-hotfix = ">=0.4,<1" @@ -191,11 +191,11 @@ mysql = ["pymysql"] oracle = ["oracledb", "packaging"] pandas = ["regex"] polars = ["polars", "packaging"] -risingwave = ["psycopg2"] postgres = ["psycopg2"] pyspark = ["pyspark", "packaging"] snowflake = ["snowflake-connector-python", "packaging"] sqlite = ["regex"] +risingwave = ["psycopg2"] trino = ["trino"] # non-backend extras visualization = ["graphviz"] From 5915b05d4a8df19a0ffad265139ab67b949ced51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 1 Feb 2024 21:45:31 +0100 Subject: [PATCH 145/161] refactor(dask): port the dask backend to the new execution model (#8005) Reimplementation of the dask backend on top of the new pandas executor. I had to adjust the pandas backend to support extending. This way the new dask implementation turned out to be pretty tidy. There are a couple of features which are not implemented using proper dask constructs, but rather have a fallback to local execution using pandas. The most notable are the window functions. The previous dask implementation supported just a couple of window cases, but this way we have full coverage at least. Thanks to the new pandas base we have a wider feature coverage, see the removed xfails in the test suite. --- .github/workflows/ibis-backends.yml | 22 +- ibis/backends/dask/__init__.py | 101 ++-- ibis/backends/dask/aggcontext.py | 214 ------- ibis/backends/dask/convert.py | 83 +++ ibis/backends/dask/core.py | 472 --------------- ibis/backends/dask/dispatch.py | 23 - ibis/backends/dask/execution/__init__.py | 16 - ibis/backends/dask/execution/aggregations.py | 131 ---- ibis/backends/dask/execution/arrays.py | 75 --- ibis/backends/dask/execution/decimal.py | 23 - ibis/backends/dask/execution/generic.py | 559 ------------------ ibis/backends/dask/execution/indexing.py | 31 - ibis/backends/dask/execution/join.py | 111 ---- ibis/backends/dask/execution/maps.py | 107 ---- ibis/backends/dask/execution/numeric.py | 153 ----- ibis/backends/dask/execution/reductions.py | 203 ------- ibis/backends/dask/execution/selection.py | 263 -------- ibis/backends/dask/execution/strings.py | 363 ------------ ibis/backends/dask/execution/structs.py | 35 -- ibis/backends/dask/execution/temporal.py | 177 ------ ibis/backends/dask/execution/util.py | 369 ------------ ibis/backends/dask/execution/window.py | 443 -------------- ibis/backends/dask/executor.py | 414 +++++++++++++ ibis/backends/dask/helpers.py | 186 ++++++ ibis/backends/dask/kernels.py | 56 ++ ibis/backends/dask/tests/conftest.py | 298 +++++++++- .../backends/dask/tests/execution/__init__.py | 0 .../backends/dask/tests/execution/conftest.py | 290 --------- .../dask/tests/execution/test_maps.py | 109 ---- .../dask/tests/execution/test_timecontext.py | 313 ---------- .../dask/tests/execution/test_util.py | 30 - .../dask/tests/{execution => }/test_arrays.py | 48 +- .../dask/tests/{execution => }/test_cast.py | 86 +-- ibis/backends/dask/tests/test_core.py | 90 +-- ibis/backends/dask/tests/test_dispatcher.py | 143 ----- .../tests/{execution => }/test_functions.py | 137 ++--- .../dask/tests/{execution => }/test_join.py | 206 +------ ibis/backends/dask/tests/test_maps.py | 90 +++ .../tests/{execution => }/test_operations.py | 337 +++++------ .../tests/{execution => }/test_strings.py | 10 +- .../tests/{execution => }/test_structs.py | 42 +- .../tests/{execution => }/test_temporal.py | 121 ++-- ibis/backends/dask/tests/test_udf.py | 131 +--- .../dask/tests/{execution => }/test_window.py | 278 +++------ ibis/backends/dask/trace.py | 159 ----- ibis/backends/dask/udf.py | 298 ---------- ibis/backends/pandas/__init__.py | 45 +- ibis/backends/pandas/convert.py | 9 +- ibis/backends/pandas/executor.py | 241 ++++---- ibis/backends/pandas/helpers.py | 186 +++--- ibis/backends/pandas/kernels.py | 136 ++--- ibis/backends/pandas/rewrites.py | 23 +- ibis/backends/pandas/tests/test_cast.py | 25 +- ibis/backends/pandas/tests/test_join.py | 31 +- ibis/backends/tests/test_aggregation.py | 57 +- ibis/backends/tests/test_array.py | 50 +- ibis/backends/tests/test_client.py | 4 +- ibis/backends/tests/test_examples.py | 2 +- ibis/backends/tests/test_generic.py | 14 +- ibis/backends/tests/test_interactive.py | 8 +- ibis/backends/tests/test_join.py | 8 +- ibis/backends/tests/test_param.py | 2 +- ibis/backends/tests/test_string.py | 37 +- ibis/backends/tests/test_temporal.py | 24 +- ibis/backends/tests/test_timecontext.py | 1 + ibis/backends/tests/test_vectorized_udf.py | 10 +- ibis/backends/tests/test_window.py | 85 +-- ibis/formats/pandas.py | 12 - ibis/formats/tests/test_dask.py | 201 ------- pyproject.toml | 2 + 70 files changed, 2096 insertions(+), 6963 deletions(-) delete mode 100644 ibis/backends/dask/aggcontext.py create mode 100644 ibis/backends/dask/convert.py delete mode 100644 ibis/backends/dask/core.py delete mode 100644 ibis/backends/dask/dispatch.py delete mode 100644 ibis/backends/dask/execution/__init__.py delete mode 100644 ibis/backends/dask/execution/aggregations.py delete mode 100644 ibis/backends/dask/execution/arrays.py delete mode 100644 ibis/backends/dask/execution/decimal.py delete mode 100644 ibis/backends/dask/execution/generic.py delete mode 100644 ibis/backends/dask/execution/indexing.py delete mode 100644 ibis/backends/dask/execution/join.py delete mode 100644 ibis/backends/dask/execution/maps.py delete mode 100644 ibis/backends/dask/execution/numeric.py delete mode 100644 ibis/backends/dask/execution/reductions.py delete mode 100644 ibis/backends/dask/execution/selection.py delete mode 100644 ibis/backends/dask/execution/strings.py delete mode 100644 ibis/backends/dask/execution/structs.py delete mode 100644 ibis/backends/dask/execution/temporal.py delete mode 100644 ibis/backends/dask/execution/util.py delete mode 100644 ibis/backends/dask/execution/window.py create mode 100644 ibis/backends/dask/executor.py create mode 100644 ibis/backends/dask/helpers.py create mode 100644 ibis/backends/dask/kernels.py delete mode 100644 ibis/backends/dask/tests/execution/__init__.py delete mode 100644 ibis/backends/dask/tests/execution/conftest.py delete mode 100644 ibis/backends/dask/tests/execution/test_maps.py delete mode 100644 ibis/backends/dask/tests/execution/test_timecontext.py delete mode 100644 ibis/backends/dask/tests/execution/test_util.py rename ibis/backends/dask/tests/{execution => }/test_arrays.py (82%) rename ibis/backends/dask/tests/{execution => }/test_cast.py (62%) delete mode 100644 ibis/backends/dask/tests/test_dispatcher.py rename ibis/backends/dask/tests/{execution => }/test_functions.py (51%) rename ibis/backends/dask/tests/{execution => }/test_join.py (61%) create mode 100644 ibis/backends/dask/tests/test_maps.py rename ibis/backends/dask/tests/{execution => }/test_operations.py (72%) rename ibis/backends/dask/tests/{execution => }/test_strings.py (91%) rename ibis/backends/dask/tests/{execution => }/test_structs.py (66%) rename ibis/backends/dask/tests/{execution => }/test_temporal.py (64%) rename ibis/backends/dask/tests/{execution => }/test_window.py (63%) delete mode 100644 ibis/backends/dask/trace.py delete mode 100644 ibis/backends/dask/udf.py delete mode 100644 ibis/formats/tests/test_dask.py diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 873a3994dfeb..3c75efccf953 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -73,10 +73,10 @@ jobs: extras: - clickhouse - examples - # - name: dask - # title: Dask - # extras: - # - dask + - name: dask + title: Dask + extras: + - dask - name: pandas title: Pandas extras: @@ -438,13 +438,13 @@ jobs: - "3.9" - "3.11" backend: - # - name: dask - # title: Dask - # deps: - # - "dask[array,dataframe]@2022.9.1" - # - "pandas@1.5.3" - # extras: - # - dask + - name: dask + title: Dask + deps: + - "dask[array,dataframe]@2022.9.1" + - "pandas@1.5.3" + extras: + - dask - name: postgres title: PostgreSQL deps: diff --git a/ibis/backends/dask/__init__.py b/ibis/backends/dask/__init__.py index 86a4f1055d2f..941e5a5864bf 100644 --- a/ibis/backends/dask/__init__.py +++ b/ibis/backends/dask/__init__.py @@ -5,32 +5,23 @@ import dask import dask.dataframe as dd import pandas as pd -from dask.base import DaskMethodsMixin + +import ibis.common.exceptions as com # import the pandas execution module to register dispatched implementations of # execute_node that the dask backend will later override -import ibis.backends.pandas.execution -import ibis.config import ibis.expr.operations as ops import ibis.expr.schema as sch import ibis.expr.types as ir from ibis import util -from ibis.backends.dask.core import execute_and_reset from ibis.backends.pandas import BasePandasBackend -from ibis.backends.pandas.core import _apply_schema -from ibis.formats.pandas import DaskData +from ibis.formats.pandas import PandasData if TYPE_CHECKING: import pathlib from collections.abc import Mapping, MutableMapping -raise RuntimeError("Temporarily make the dask backend dysfunctional") - -# Make sure that the pandas backend options have been loaded -ibis.pandas # noqa: B018 - - class Backend(BasePandasBackend): name = "dask" backend_table_type = dd.DataFrame @@ -57,9 +48,6 @@ def do_connect( ... } >>> ibis.dask.connect(data) """ - # register dispatchers - from ibis.backends.dask import udf # noqa: F401 - if dictionary is None: dictionary = {} @@ -75,49 +63,53 @@ def do_connect( def version(self): return dask.__version__ - def execute( - self, - query: ir.Expr, - params: Mapping[ir.Expr, object] | None = None, - limit: str = "default", - **kwargs, - ): + def _validate_args(self, expr, limit, timecontext): + if timecontext is not None: + raise com.UnsupportedArgumentError( + "The Dask backend does not support timecontext" + ) if limit != "default" and limit is not None: - raise ValueError( + raise com.UnsupportedArgumentError( "limit parameter to execute is not yet implemented in the " "dask backend" ) - - if not isinstance(query, ir.Expr): + if not isinstance(expr, ir.Expr): raise TypeError( - "`query` has type {!r}, expected ibis.expr.types.Expr".format( - type(query).__name__ + "`expr` has type {!r}, expected ibis.expr.types.Expr".format( + type(expr).__name__ ) ) - compiled = self.compile(query, params, **kwargs) - if isinstance(compiled, DaskMethodsMixin): - result = compiled.compute() - else: - result = compiled - return _apply_schema(query.op(), result) - def compile( - self, query: ir.Expr, params: Mapping[ir.Expr, object] | None = None, **kwargs + self, + expr: ir.Expr, + params: dict | None = None, + limit: int | None = None, + timecontext=None, ): - """Compile `expr`. + from ibis.backends.dask.executor import DaskExecutor - Returns - ------- - dask.dataframe.core.DataFrame | dask.dataframe.core.Series | dask.dataframe.core.Scalar - Dask graph. - """ - params = { - k.op() if isinstance(k, ir.Expr) else k: v - for k, v in ({} if params is None else params).items() - } + self._validate_args(expr, limit, timecontext) + params = params or {} + params = {k.op() if isinstance(k, ir.Expr) else k: v for k, v in params.items()} + + return DaskExecutor.compile(expr.op(), backend=self, params=params) + + def execute( + self, + expr: ir.Expr, + params: Mapping[ir.Expr, object] | None = None, + limit: str = "default", + timecontext=None, + **kwargs, + ): + from ibis.backends.dask.executor import DaskExecutor + + self._validate_args(expr, limit, timecontext) + params = params or {} + params = {k.op() if isinstance(k, ir.Expr) else k: v for k, v in params.items()} - return execute_and_reset(query.op(), params=params, **kwargs) + return DaskExecutor.execute(expr.op(), backend=self, params=params) def read_csv( self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any @@ -178,20 +170,15 @@ def read_parquet( def table(self, name: str, schema: sch.Schema | None = None): df = self.dictionary[name] schema = schema or self.schemas.get(name, None) - schema = DaskData.infer_table(df, schema=schema) + schema = PandasData.infer_table(df.head(1), schema=schema) return ops.DatabaseTable(name, schema, self).to_expr() - @classmethod - def _supports_conversion(cls, obj: Any) -> bool: - return isinstance(obj, cls.backend_table_type) - - @staticmethod - def _from_pandas(df: pd.DataFrame, npartitions: int = 1) -> dd.DataFrame: - return dd.from_pandas(df, npartitions=npartitions) + def _convert_object(self, obj) -> dd.DataFrame: + if isinstance(obj, dd.DataFrame): + return obj - @classmethod - def _convert_object(cls, obj: dd.DataFrame) -> dd.DataFrame: - return obj + pandas_df = super()._convert_object(obj) + return dd.from_pandas(pandas_df, npartitions=1) def _load_into_cache(self, name, expr): self.create_table(name, self.compile(expr).persist()) diff --git a/ibis/backends/dask/aggcontext.py b/ibis/backends/dask/aggcontext.py deleted file mode 100644 index b0ea30ccd29d..000000000000 --- a/ibis/backends/dask/aggcontext.py +++ /dev/null @@ -1,214 +0,0 @@ -from __future__ import annotations - -import operator -from typing import TYPE_CHECKING, Any, Callable, Union - -import dask.dataframe as dd - -import ibis -from ibis.backends.pandas.aggcontext import ( - AggregationContext, - compute_window_spec, - construct_time_context_aware_series, - get_time_col, - window_agg_udf, - wrap_for_agg, -) -from ibis.backends.pandas.aggcontext import Transform as PandasTransform - -if TYPE_CHECKING: - from dask.dataframe.groupby import SeriesGroupBy - -# TODO Consolidate this logic with the pandas aggcontext. -# This file is almost a direct port of the pandas aggcontext. -# https://github.com/ibis-project/ibis/issues/5911 - - -class Summarize(AggregationContext): - __slots__ = () - - def agg(self, grouped_data, function, *args, **kwargs): - if isinstance(function, str): - return getattr(grouped_data, function)(*args, **kwargs) - - if not callable(function): - raise TypeError(f"Object {function} is not callable or a string") - - elif isinstance(grouped_data, dd.Series): - return grouped_data.reduction(wrap_for_agg(function, args, kwargs)) - else: - return grouped_data.agg(wrap_for_agg(function, args, kwargs)) - - -class Transform(PandasTransform): - def agg(self, grouped_data, function, *args, **kwargs): - res = super().agg(grouped_data, function, *args, **kwargs) - index_name = res.index.name if res.index.name is not None else "index" - res = res.reset_index().set_index(index_name).iloc[:, 0] - return res - - -def dask_window_agg_built_in( - frame: dd.DataFrame, - windowed: dd.rolling.Rolling, - function: str, - max_lookback: int, - *args: tuple[Any], - **kwargs: dict[str, Any], -) -> dd.Series: - """Apply window aggregation with built-in aggregators.""" - assert isinstance(function, str) - method = operator.methodcaller(function, *args, **kwargs) - - if max_lookback is not None: - agg_method = method - - def sliced_agg(s): - return agg_method(s.iloc[-max_lookback:]) - - method = operator.methodcaller("apply", sliced_agg, raw=False) - - result = method(windowed) - # No MultiIndex support in dask - result.index = frame.index - return result - - -class Window(AggregationContext): - __slots__ = ("construct_window",) - - def __init__(self, kind, *args, **kwargs): - super().__init__( - parent=kwargs.pop("parent", None), - group_by=kwargs.pop("group_by", None), - order_by=kwargs.pop("order_by", None), - output_type=kwargs.pop("output_type"), - max_lookback=kwargs.pop("max_lookback", None), - ) - self.construct_window = operator.methodcaller(kind, *args, **kwargs) - - def agg( - self, - grouped_data: Union[dd.Series, SeriesGroupBy], - function: Union[str, Callable], - *args: Any, - **kwargs: Any, - ) -> dd.Series: - # avoid a pandas warning about numpy arrays being passed through - # directly - group_by = self.group_by - order_by = self.order_by - - assert group_by or order_by - - # Get the DataFrame from which the operand originated - # (passed in when constructing this context object in - # execute_node(ops.Window)) - parent = self.parent - frame = getattr(parent, "obj", parent) - grouped_meta = getattr(grouped_data, "_meta_nonempty", grouped_data) - obj = getattr(grouped_meta, "obj", grouped_data) - name = obj.name - if frame[name] is not obj or name in group_by or name in order_by: - name = f"{name}_{ibis.util.guid()}" - frame = frame.assign(**{name: obj}) - - # set the index to our order_by keys and append it to the existing - # index - # TODO: see if we can do this in the caller, when the context - # is constructed rather than pulling out the data - columns = group_by + order_by + [name] - # Create a new frame to avoid mutating the original one - indexed_by_ordering = frame[columns].copy() - # placeholder column to compute window_sizes below - indexed_by_ordering["_placeholder"] = 0 - indexed_by_ordering = indexed_by_ordering.set_index(order_by) - - # regroup if needed - if group_by: - grouped_frame = indexed_by_ordering.groupby(group_by, group_keys=False) - else: - grouped_frame = indexed_by_ordering - grouped = grouped_frame[name] - - if callable(function): - # To compute the window_size, we need to construct a - # RollingGroupby and compute count using construct_window. - # However, if the RollingGroupby is not numeric, e.g., - # we are calling window UDF on a timestamp column, we - # cannot compute rolling count directly because: - # (1) windowed.count() will exclude NaN observations - # , which results in incorrect window sizes. - # (2) windowed.apply(len, raw=True) will include NaN - # observations, but doesn't work on non-numeric types. - # https://github.com/pandas-dev/pandas/issues/23002 - # To deal with this, we create a _placeholder column - windowed_frame = self.construct_window(grouped_frame) - window_sizes = windowed_frame["_placeholder"].count().reset_index(drop=True) - mask = ~(window_sizes.isna()) - window_upper_indices = dd.Series(range(len(window_sizes))) + 1 - window_lower_indices = window_upper_indices - window_sizes - # The result Series of udf may need to be trimmed by - # timecontext. In order to do so, 'time' must be added - # as an index to the Series, if present. Here We extract - # time column from the parent Dataframe `frame`. - if get_time_col() in frame: - result_index = construct_time_context_aware_series(obj, frame).index - else: - result_index = obj.index - result = window_agg_udf( - grouped_data, - function, - window_lower_indices, - window_upper_indices, - mask, - result_index, - self.dtype, - self.max_lookback, - *args, - **kwargs, - ) - else: - # perform the per-group rolling operation - windowed = self.construct_window(grouped) - result = dask_window_agg_built_in( - frame, - windowed, - function, - self.max_lookback, - *args, - **kwargs, - ) - try: - return result.astype(self.dtype, copy=False) - except (TypeError, ValueError): - # The dtypes in result could have been promoted during the agg - # computation. Trying to downcast the type back with self.dtype will - # fail but we want to result with the promoted types anyways. - return result - - -class Cumulative(Window): - __slots__ = () - - def __init__(self, window, *args, **kwargs): - super().__init__("rolling", *args, window=window, min_periods=1, **kwargs) - - -class Moving(Window): - __slots__ = () - - def __init__(self, start, max_lookback, *args, **kwargs): - start = compute_window_spec(start.dtype, start.value) - - super().__init__( - "rolling", - start, - *args, - max_lookback=max_lookback, - min_periods=1, - **kwargs, - ) - - def short_circuit_method(self, grouped_data, function): - raise AttributeError("No short circuit method for rolling operations") diff --git a/ibis/backends/dask/convert.py b/ibis/backends/dask/convert.py new file mode 100644 index 000000000000..270f4ffc6aaf --- /dev/null +++ b/ibis/backends/dask/convert.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import dask.dataframe as dd +import pandas as pd +import pandas.api.types as pdt + +import ibis.expr.datatypes as dt +from ibis.backends.pandas.convert import PandasConverter +from ibis.formats.pandas import DataMapper, PandasType + + +class DaskConverter(DataMapper): + @classmethod + def convert_scalar(cls, obj, dtype): + return PandasConverter.convert_scalar(obj, dtype) + + @classmethod + def convert_column(cls, obj, dtype): + pandas_type = PandasType.from_ibis(dtype) + + method_name = f"convert_{dtype.__class__.__name__}" + convert_method = getattr(cls, method_name, cls.convert_default) + + return convert_method(obj, dtype, pandas_type) + + @classmethod + def convert_default(cls, s, dtype, pandas_type): + if pandas_type == object: + func = lambda x: x if x is pd.NA else dt.normalize(dtype, x) + meta = (s.name, pandas_type) + return s.map(func, na_action="ignore", meta=meta).astype(pandas_type) + else: + return s.astype(pandas_type) + + @classmethod + def convert_Integer(cls, s, dtype, pandas_type): + if pdt.is_datetime64_any_dtype(s.dtype): + return s.astype("int64").floordiv(int(1e9)).astype(pandas_type) + else: + return s.astype(pandas_type) + + convert_SignedInteger = convert_UnsignedInteger = convert_Integer + convert_Int64 = convert_Int32 = convert_Int16 = convert_Int8 = convert_SignedInteger + convert_UInt64 = ( + convert_UInt32 + ) = convert_UInt16 = convert_UInt8 = convert_UnsignedInteger + + @classmethod + def convert_Floating(cls, s, dtype, pandas_type): + if pdt.is_datetime64_any_dtype(s.dtype): + return s.astype("int64").floordiv(int(1e9)).astype(pandas_type) + else: + return s.astype(pandas_type) + + convert_Float64 = convert_Float32 = convert_Float16 = convert_Floating + + @classmethod + def convert_Timestamp(cls, s, dtype, pandas_type): + if isinstance(s.dtype, pd.DatetimeTZDtype): + return s.dt.tz_convert(dtype.timezone) + elif pdt.is_datetime64_dtype(s.dtype): + return s.dt.tz_localize(dtype.timezone) + elif pdt.is_numeric_dtype(s.dtype): + return dd.to_datetime(s, unit="s").dt.tz_localize(dtype.timezone) + else: + return dd.to_datetime(s, utc=True).dt.tz_localize(dtype.timezone) + + @classmethod + def convert_Date(cls, s, dtype, pandas_type): + if isinstance(s.dtype, pd.DatetimeTZDtype): + s = s.dt.tz_convert("UTC").dt.tz_localize(None) + elif pdt.is_numeric_dtype(s.dtype): + s = dd.to_datetime(s, unit="D") + else: + s = dd.to_datetime(s) + + return s.dt.normalize() + + @classmethod + def convert_String(cls, s, dtype, pandas_type): + # TODO(kszucs): should switch to the new pandas string type and convert + # object columns using s.convert_dtypes() method + return s.map(str, na_action="ignore").astype(object) diff --git a/ibis/backends/dask/core.py b/ibis/backends/dask/core.py deleted file mode 100644 index df5b04a1b44a..000000000000 --- a/ibis/backends/dask/core.py +++ /dev/null @@ -1,472 +0,0 @@ -"""The Dask backend. - -The dask backend is a very close port of the pandas backend, and thus has -the similar caveats. - -The dask backend is a departure from the typical ibis backend in that it -doesn't compile to anything and the execution of the ibis expression -is under the purview of ibis itself rather than executing SQL on a server. - -Design ------- -The dask backend uses a technique called `multiple dispatch -`_, implemented in a -third-party open source library called `multipledispatch -`_. - -Multiple dispatch is a generalization of standard single-dispatch runtime -polymorphism to multiple arguments. - -Compilation ------------ -The ibis backend uses the ibis dispatching machinery to "compile" a dask -TaskGraph you can run `.compute()` on to evaluate it. - -Execution ---------- -Execution is divided into different dispatched functions, each arising from -a different use case. - -A top level function `execute` exists to provide the API for executing an ibis -expression against in-memory data. - -The general flow of execution is: - -:: - If the current operation is in scope: - return it - Else: - execute the arguments of the current node - - execute the current node with its executed arguments - -Specifically, execute is comprised of a series of steps that happen at -different times during the loop. - -1. ``compute_time_context`` ---------------------------- -First, at the beginning of the main execution loop, ``compute_time_context`` is -called. This function computes time contexts, and pass them to all children of -the current node. These time contexts could be used in later steps to get data. -This is essential for time series Table, and related operations that adjust -time context, such as window, asof_join, etc. - -By default, this function simply pass the unchanged time context to all -children nodes. - - -2. ``pre_execute`` ------------------- -Second, ``pre_execute`` is called. -This function serves a similar purpose to ``data_preload``, the key difference -being that ``pre_execute`` is called *every time* there's a call to execute. - -By default this function does nothing. - -3. ``execute_node`` -------------------- - -Then, when an expression is ready to be evaluated we call -:func:`~ibis.dask.core.execute` on the expressions arguments and then -:func:`~ibis.dask.dispatch.execute_node` on the expression with its -now-materialized arguments. - -4. ``post_execute`` -------------------- -The final step--``post_execute``--is called immediately after the previous call -to ``execute_node`` and takes the instance of the -:class:`~ibis.expr.operations.Node` just computed and the result of the -computation. - -The purpose of this function is to allow additional computation to happen in -the context of the current level of the execution loop. You might be wondering -That may sound vague, so let's look at an example. - -Let's say you want to take a three day rolling average, and you want to include -3 days of data prior to the first date of the input. You don't want to see that -data in the result for a few reasons, one of which is that it would break the -contract of window functions: given N rows of input there are N rows of output. - -Defining a ``post_execute`` rule for :class:`~ibis.expr.operations.Window` -allows you to encode such logic. One might want to implement this using -:class:`~ibis.expr.operations.ScalarParameter`, in which case the ``scope`` -passed to ``post_execute`` would be the bound values passed in at the time the -``execute`` method was called. - - -Scope ------ -Scope is used across the execution phases, it iss a map that maps Ibis -operators to actual data. It is used to cache data for calculated ops. It is -an optimization to reused executed results. - -With time context included, the key is op associated with each expression; -And scope value is another key-value map: -- value: pd.DataFrame or pd.Series that is the result of executing key op -- timecontext: of type TimeContext, the time context associated with the data -stored in value - -See ibis.common.scope for details about the implementation. -""" -from __future__ import annotations - -import functools -from typing import TYPE_CHECKING, Any - -import dask.dataframe as dd -from multipledispatch import Dispatcher - -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope -from ibis.backends.base.df.timecontext import TimeContext, canonicalize_context -from ibis.backends.dask import aggcontext as agg_ctx -from ibis.backends.dask.dispatch import ( - execute_literal, - execute_node, - post_execute, - pre_execute, -) -from ibis.backends.dask.trace import trace -from ibis.backends.pandas.core import ( - compute_time_context, - get_node_arguments, - is_computable_input, - is_computable_input_arg, -) - -if TYPE_CHECKING: - from collections.abc import Mapping - -is_computable_input.register(dd.core.Scalar)(is_computable_input_arg) - - -# TODO(kszucs): should deduplicate with pandas code since it is an exact copy -# of pandas.execute_with_scope() -def execute_with_scope( - node: ops.Node, - scope: Scope, - timecontext: TimeContext | None = None, - aggcontext: agg_ctx.AggregationContext | None = None, - clients=None, - **kwargs, -): - """Execute an expression `expr`, with data provided in `scope`. - - Parameters - ---------- - node - The operation node to execute. - scope - A Scope class, with dictionary mapping `ops.Node` subclass instances to - concrete data such as a pandas DataFrame. - timecontext - A tuple of (begin, end) that is passed from parent Node to children - see [timecontext.py](ibis/backends/pandas/execution/timecontext.py) for - detailed usage for this time context. - aggcontext - Context used to compute an aggregation. - clients - Sequence of clients - kwargs - Keyword arguments - """ - # Call pre_execute, to allow clients to intercept the expression before - # computing anything *and* before associating leaf nodes with data. This - # allows clients to provide their own data for each leaf. - if clients is None: - clients, _ = node.to_expr()._find_backends() - - if aggcontext is None: - aggcontext = agg_ctx.Summarize() - - pre_executed_scope = pre_execute( - node, - *clients, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - **kwargs, - ) - new_scope = scope.merge_scope(pre_executed_scope) - result = execute_until_in_scope( - node, - new_scope, - timecontext=timecontext, - aggcontext=aggcontext, - clients=clients, - # XXX: we *explicitly* pass in scope and not new_scope here so that - # post_execute sees the scope of execute_with_scope, not the scope of - # execute_until_in_scope - post_execute_=functools.partial( - post_execute, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - clients=clients, - **kwargs, - ), - **kwargs, - ).get_value(node, timecontext) - return result - - -# TODO(kszucs): should deduplicate with pandas code since it is an exact copy -# of pandas.execute_until_in_scope() -@trace -def execute_until_in_scope( - node, - scope: Scope, - timecontext: TimeContext | None = None, - aggcontext=None, - clients=None, - post_execute_=None, - **kwargs, -) -> Scope: - """Execute until our op is in `scope`.""" - # these should never be None - assert aggcontext is not None, "aggcontext is None" - assert clients is not None, "clients is None" - assert post_execute_ is not None, "post_execute_ is None" - - # base case: our op has been computed (or is a leaf data node), so - # return the corresponding value - if scope.get_value(node, timecontext) is not None: - return scope - if isinstance(node, ops.Literal): - # special case literals to avoid the overhead of dispatching - # execute_node - return Scope( - { - node: execute_literal( - node, - node.value, - node.dtype, - aggcontext=aggcontext, - **kwargs, - ) - }, - timecontext, - ) - - # figure out what arguments we're able to compute on based on the - # expressions inputs. things like expressions, None, and scalar types are - # computable whereas ``list``s are not - computable_args = [ - arg for arg in get_node_arguments(node) if is_computable_input(arg) - ] - - # pre_executed_states is a list of states with same the length of - # computable_args, these states are passed to each arg - if timecontext: - arg_timecontexts = compute_time_context( - node, - num_args=len(computable_args), - timecontext=timecontext, - clients=clients, - scope=scope, - ) - else: - arg_timecontexts = [None] * len(computable_args) - - pre_executed_scope = pre_execute( - node, - *clients, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - **kwargs, - ) - - new_scope = scope.merge_scope(pre_executed_scope) - - # Short circuit: if pre_execute puts op in scope, then we don't need to - # execute its computable_args - if new_scope.get_value(node, timecontext) is not None: - return new_scope - - # recursively compute each node's arguments until we've changed type. - # compute_time_context should return with a list with the same length - # as computable_args, the two lists will be zipping together for - # further execution - if len(arg_timecontexts) != len(computable_args): - raise com.IbisError( - "arg_timecontexts differ with computable_arg in length " - f"for type:\n{type(node).__name__}." - ) - - scopes = [ - execute_until_in_scope( - arg, - new_scope, - timecontext=timecontext, - aggcontext=aggcontext, - post_execute_=post_execute_, - clients=clients, - **kwargs, - ) - if isinstance(arg, ops.Node) - else Scope({arg: arg}, timecontext) - for (arg, timecontext) in zip(computable_args, arg_timecontexts) - ] - - # if we're unable to find data then raise an exception - if not scopes and computable_args: - raise com.UnboundExpressionError(f"Unable to find data for node:\n{node!r}") - - # there should be exactly one dictionary per computable argument - assert len(computable_args) == len(scopes) - - new_scope = new_scope.merge_scopes(scopes) - # pass our computed arguments to this node's execute_node implementation - data = [ - new_scope.get_value(arg, timecontext) if isinstance(arg, ops.Node) else arg - for (arg, timecontext) in zip(computable_args, arg_timecontexts) - ] - result = execute_node( - node, - *data, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - clients=clients, - **kwargs, - ) - computed = post_execute_(node, result, timecontext=timecontext) - return Scope({node: computed}, timecontext) - - -execute = Dispatcher("execute") - - -@execute.register(ops.Node) -@trace -def main_execute( - node: ops.Node, - params: Mapping[ops.Node, Any] | None = None, - scope: Scope | None = None, - timecontext: TimeContext | None = None, - aggcontext: agg_ctx.AggregationContext | None = None, - cache: Mapping[ops.Node, Any] | None = None, - **kwargs: Any, -): - """Execute an expression against data that are bound to it. - - If no data are bound, raise a `ValueError`. - - Parameters - ---------- - node : ibis.expr.operations.Node - The operation node to execute - params : Mapping[ibis.expr.operations.Node, object] - The data that an unbound parameter in `expr` maps to - scope : Mapping[ibis.expr.operations.Node, object] - Additional scope, mapping ibis operations to data - timecontext : Optional[TimeContext] - timecontext needed for execution - aggcontext : Optional[ibis.backends.pandas.aggcontext.AggregationContext] - An object indicating how to compute aggregations. For example, - a rolling mean needs to be computed differently than the mean of a - column. - cache - Cache used to store computations. - kwargs - Additional arguments that can potentially be used by individual node - execution - - Raises - ------ - ValueError - * If no data are bound to the input expression - """ - import ibis.expr.types as ir - - if scope is None: - scope = Scope() - - if timecontext is not None: - # convert timecontext to datetime type, if time strings are provided - timecontext = canonicalize_context(timecontext) - - if params is None: - params = {} - - if cache is None: - cache = {} - - # TODO: make expressions hashable so that we can get rid of these .op() - # calls everywhere - params = {k.op() if isinstance(k, ir.Expr) else k: v for k, v in params.items()} - scope = scope.merge_scope(Scope(params, timecontext)) - return execute_with_scope( - node, - scope, - timecontext=timecontext, - aggcontext=aggcontext, - cache=cache, - **kwargs, - ) - - -def execute_and_reset( - node, - params=None, - scope=None, - timecontext: TimeContext | None = None, - aggcontext=None, - **kwargs, -): - """Execute an expression against data that are bound to it. - - If no data are bound, raise an Exception. - - The difference between this function and `ibis.dask.core.execute` is that - this function resets the index of the result, if the result has an index. - - Parameters - ---------- - node : ibis.expr.operations.Node - The operation node to execute - params : Mapping[ibis.expr.operations.Node, object] - The data that an unbound parameter in `expr` maps to - scope : Mapping[ibis.expr.operations.Node, object] - Additional scope, mapping ibis operations to data - timecontext : Optional[TimeContext] - timecontext needed for execution - aggcontext : Optional[ibis.dask.aggcontext.AggregationContext] - An object indicating how to compute aggregations. For example, - a rolling mean needs to be computed differently than the mean of a - column. - kwargs : Dict[str, object] - Additional arguments that can potentially be used by individual node - execution - - Returns - ------- - result : Union[ - dask.dataframe.Series, - dask.dataframe.DataFrame, - ibis.dask.core.simple_types - ] - - Raises - ------ - ValueError - * If no data are bound to the input expression. - """ - result = execute( - node, - params=params, - scope=scope, - timecontext=timecontext, - aggcontext=aggcontext, - **kwargs, - ) - # Note - if `result` has npartitions > 1 `reset_index` will not create - # a monotonically increasing index. - if isinstance(result, dd.DataFrame): - df = result.reset_index() - return df[list(node.schema.names)] - elif isinstance(result, dd.Series): - return result.reset_index(drop=True) - return result diff --git a/ibis/backends/dask/dispatch.py b/ibis/backends/dask/dispatch.py deleted file mode 100644 index 5a1012ac028e..000000000000 --- a/ibis/backends/dask/dispatch.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import annotations - -from multipledispatch import Dispatcher - -import ibis.backends.pandas.core as core_dispatch -import ibis.backends.pandas.dispatch as pandas_dispatch -from ibis.backends.dask.trace import TraceTwoLevelDispatcher - -execute_node = TraceTwoLevelDispatcher("execute_node") -for types, func in pandas_dispatch.execute_node.funcs.items(): - execute_node.register(*types)(func) - -execute = Dispatcher("execute") -execute.funcs.update(core_dispatch.execute.funcs) - -pre_execute = Dispatcher("pre_execute") -pre_execute.funcs.update(core_dispatch.pre_execute.funcs) - -execute_literal = Dispatcher("execute_literal") -execute_literal.funcs.update(core_dispatch.execute_literal.funcs) - -post_execute = Dispatcher("post_execute") -post_execute.funcs.update(core_dispatch.post_execute.funcs) diff --git a/ibis/backends/dask/execution/__init__.py b/ibis/backends/dask/execution/__init__.py deleted file mode 100644 index 35e86d2665a1..000000000000 --- a/ibis/backends/dask/execution/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import annotations - -from ibis.backends.dask.execution.aggregations import * # noqa: F403 -from ibis.backends.dask.execution.arrays import * # noqa: F403 -from ibis.backends.dask.execution.decimal import * # noqa: F403 -from ibis.backends.dask.execution.generic import * # noqa: F403 -from ibis.backends.dask.execution.indexing import * # noqa: F403 -from ibis.backends.dask.execution.join import * # noqa: F403 -from ibis.backends.dask.execution.maps import * # noqa: F403 -from ibis.backends.dask.execution.numeric import * # noqa: F403 -from ibis.backends.dask.execution.reductions import * # noqa: F403 -from ibis.backends.dask.execution.selection import * # noqa: F403 -from ibis.backends.dask.execution.strings import * # noqa: F403 -from ibis.backends.dask.execution.structs import * # noqa: F403 -from ibis.backends.dask.execution.temporal import * # noqa: F403 -from ibis.backends.dask.execution.window import * # noqa: F403 diff --git a/ibis/backends/dask/execution/aggregations.py b/ibis/backends/dask/execution/aggregations.py deleted file mode 100644 index 9675a4e5f747..000000000000 --- a/ibis/backends/dask/execution/aggregations.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Execution rules for Aggregatons - mostly TODO. - -- ops.Aggregation -- ops.Any -- ops.All -""" - -from __future__ import annotations - -import functools -import operator -from typing import TYPE_CHECKING - -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb - -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope -from ibis.backends.dask import aggcontext as agg_ctx -from ibis.backends.dask.core import execute -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import coerce_to_output, safe_concat - -if TYPE_CHECKING: - from ibis.backends.base.df.timecontext import TimeContext - - -# TODO - aggregations - #2553 -# Not all code paths work cleanly here -@execute_node.register(ops.Aggregation, dd.DataFrame) -def execute_aggregation_dataframe( - op, data, scope=None, timecontext: TimeContext | None = None, **kwargs -): - assert op.metrics, "no metrics found during aggregation execution" - - if op.sort_keys: - raise NotImplementedError("sorting on aggregations not yet implemented") - - if op.predicates: - predicate = functools.reduce( - operator.and_, - ( - execute(p, scope=scope, timecontext=timecontext, **kwargs) - for p in op.predicates - ), - ) - data = data.loc[predicate] - - columns = {} - - if op.by: - grouping_keys = [ - key.name - if isinstance(key, ops.TableColumn) - else execute(key, scope=scope, timecontext=timecontext, **kwargs).rename( - key.name - ) - for key in op.by - ] - source = data.groupby(grouping_keys) - else: - source = data - - scope = scope.merge_scope(Scope({op.table: source}, timecontext)) - - pieces = [] - for metric in op.metrics: - piece = execute(metric, scope=scope, timecontext=timecontext, **kwargs) - piece = coerce_to_output(piece, metric) - pieces.append(piece) - - # We must perform this check here otherwise dask will throw a ValueError - # on `concat_and_check`. See docstring on `util.concat_via_join` for - # more detail - result = safe_concat(pieces) - - # If grouping, need a reset to get the grouping key back as a column - if op.by: - result = result.reset_index() - - result.columns = [columns.get(c, c) for c in result.columns] - - if op.having: - # .having(...) is only accessible on groupby, so this should never - # raise - if not op.by: - raise ValueError( - "Filtering out aggregation values is not allowed without at " - "least one grouping key" - ) - - # TODO(phillipc): Don't recompute identical subexpressions - predicate = functools.reduce( - operator.and_, - ( - execute(having, scope=scope, timecontext=timecontext, **kwargs) - for having in op.having - ), - ) - assert len(predicate) == len( - result - ), "length of predicate does not match length of DataFrame" - result = result.loc[predicate.values] - return result - - -@execute_node.register((ops.Any, ops.All), dd.Series, (dd.Series, type(None))) -def execute_any_all_series(op, data, mask, aggcontext=None, **kwargs): - if mask is not None: - data = data.loc[mask] - - name = type(op).__name__.lower() - if isinstance(aggcontext, (agg_ctx.Summarize, agg_ctx.Transform)): - result = aggcontext.agg(data, name) - else: - # Note this branch is not currently hit in the dask backend but is - # here for future scaffolding. - result = aggcontext.agg(data, operator.methodcaller(name)) - return result - - -@execute_node.register((ops.Any, ops.All), ddgb.SeriesGroupBy, type(None)) -def execute_any_all_series_group_by(op, data, mask, aggcontext=None, **kwargs): - name = type(op).__name__.lower() - if isinstance(aggcontext, (agg_ctx.Summarize, agg_ctx.Transform)): - result = aggcontext.agg(data, name) - else: - # Note this branch is not currently hit in the dask backend but is - # here for future scaffolding. - result = aggcontext.agg(data, operator.methodcaller(name)) - return result diff --git a/ibis/backends/dask/execution/arrays.py b/ibis/backends/dask/execution/arrays.py deleted file mode 100644 index ef3dd7889298..000000000000 --- a/ibis/backends/dask/execution/arrays.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import annotations - -import itertools -from functools import partial - -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb -import numpy as np -import pandas as pd - -import ibis.expr.operations as ops -from ibis.backends.dask.core import execute -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import ( - TypeRegistrationDict, - register_types_to_dispatcher, -) -from ibis.backends.pandas.execution.arrays import ( - execute_array_index, - execute_array_length, -) - -DASK_DISPATCH_TYPES: TypeRegistrationDict = { - ops.ArrayLength: [((dd.Series,), execute_array_length)], - ops.ArrayIndex: [((dd.Series, int), execute_array_index)], -} - -register_types_to_dispatcher(execute_node, DASK_DISPATCH_TYPES) - - -collect_list = dd.Aggregation( - name="collect_list", - chunk=lambda s: s.apply(list), - agg=lambda s0: s0.apply(lambda chunks: list(itertools.chain.from_iterable(chunks))), -) - - -@execute_node.register(ops.Array, tuple) -def execute_array_column(op, cols, **kwargs): - vals = [execute(arg, **kwargs) for arg in cols] - - length = next((len(v) for v in vals if isinstance(v, dd.Series)), None) - if length is None: - return vals - - n_partitions = next((v.npartitions for v in vals if isinstance(v, dd.Series)), None) - - def ensure_series(v): - if isinstance(v, dd.Series): - return v - else: - return dd.from_pandas(pd.Series([v] * length), npartitions=n_partitions) - - # dd.concat() can only handle array-likes. - # If we're given a scalar, we need to broadcast it as a Series. - df = dd.concat([ensure_series(v) for v in vals], axis=1) - return df.apply( - lambda row: np.array(row, dtype=object), axis=1, meta=(None, "object") - ) - - -# TODO - aggregations - #2553 -@execute_node.register(ops.ArrayCollect, dd.Series, type(None)) -def execute_array_collect(op, data, where, aggcontext=None, **kwargs): - return aggcontext.agg(data, collect_list) - - -@execute_node.register(ops.ArrayCollect, ddgb.SeriesGroupBy, type(None)) -def execute_array_collect_grouped_series(op, data, where, **kwargs): - return data.agg(collect_list) - - -@execute_node.register(ops.ArrayConcat, tuple) -def execute_array_concat(op, args, **kwargs): - return execute_node(op, *map(partial(execute, **kwargs), args), **kwargs) diff --git a/ibis/backends/dask/execution/decimal.py b/ibis/backends/dask/execution/decimal.py deleted file mode 100644 index 49c58fb9ac1b..000000000000 --- a/ibis/backends/dask/execution/decimal.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import annotations - -import decimal - -import dask.dataframe as dd - -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.dask.dispatch import execute_node - - -@execute_node.register(ops.Cast, dd.Series, dt.Decimal) -def execute_cast_series_to_decimal(op, data, type, **kwargs): - precision = type.precision - scale = type.scale - context = decimal.Context(prec=precision) - places = context.create_decimal(f"{'0' * (precision - scale)}.{'0' * scale}") - return data.apply( - lambda x, context=context, places=places: ( - context.create_decimal(x).quantize(places) - ), - meta=(data.name, "object"), - ) diff --git a/ibis/backends/dask/execution/generic.py b/ibis/backends/dask/execution/generic.py deleted file mode 100644 index 34c56b592e4e..000000000000 --- a/ibis/backends/dask/execution/generic.py +++ /dev/null @@ -1,559 +0,0 @@ -"""Execution rules for generic ibis operations.""" - -from __future__ import annotations - -import contextlib -import datetime -import decimal -import functools -import numbers -from operator import methodcaller - -import dask.array as da -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb -import numpy as np -import pandas as pd -from pandas import isnull, to_datetime -from pandas.api.types import DatetimeTZDtype - -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -import ibis.expr.types as ir -import ibis.util -from ibis.backends.dask import Backend as DaskBackend -from ibis.backends.dask.core import execute -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import ( - TypeRegistrationDict, - add_globally_consecutive_column, - make_selected_obj, - register_types_to_dispatcher, - rename_index, -) -from ibis.backends.pandas.core import ( - date_types, - integer_types, - numeric_types, - scalar_types, - simple_types, - timestamp_types, -) -from ibis.backends.pandas.execution import constants -from ibis.backends.pandas.execution.generic import ( - _execute_binary_op_impl, - compute_row_reduction, - execute_between, - execute_cast_series_array, - execute_cast_series_generic, - execute_count_distinct_star_frame, - execute_count_distinct_star_frame_filter, - execute_count_star_frame, - execute_count_star_frame_filter, - execute_count_star_frame_groupby, - execute_database_table_client, - execute_difference_dataframe_dataframe, - execute_distinct_dataframe, - execute_intersection_dataframe_dataframe, - execute_isinf, - execute_isnan, - execute_node_column_in_column, - execute_node_column_in_values, - execute_node_dropna_dataframe, - execute_node_fillna_dataframe_dict, - execute_node_fillna_dataframe_scalar, - execute_node_nullif_scalar_series, - execute_node_nullif_series, - execute_node_self_reference_dataframe, - execute_searched_case, - execute_series_clip, - execute_series_isnull, - execute_series_notnnull, - execute_sort_key_series, - execute_table_column_df_or_df_groupby, -) - -# Many dask and pandas functions are functionally equivalent, so we just add -# on registrations for dask types -DASK_DISPATCH_TYPES: TypeRegistrationDict = { - ops.Cast: [ - ((dd.Series, dt.DataType), execute_cast_series_generic), - ((dd.Series, dt.Array), execute_cast_series_array), - ], - ops.SortKey: [((dd.Series, bool), execute_sort_key_series)], - ops.Clip: [ - ( - ( - dd.Series, - (dd.Series, type(None)) + numeric_types, - (dd.Series, type(None)) + numeric_types, - ), - execute_series_clip, - ), - ], - ops.TableColumn: [ - ( - ((dd.DataFrame, ddgb.DataFrameGroupBy),), - execute_table_column_df_or_df_groupby, - ), - ], - ops.CountStar: [ - ( - (ddgb.DataFrameGroupBy, type(None)), - execute_count_star_frame_groupby, - ), - ((dd.DataFrame, type(None)), execute_count_star_frame), - ((dd.DataFrame, dd.Series), execute_count_star_frame_filter), - ], - ops.CountDistinctStar: [ - ( - (ddgb.DataFrameGroupBy, type(None)), - execute_count_star_frame_groupby, - ), - ((dd.DataFrame, type(None)), execute_count_distinct_star_frame), - ((dd.DataFrame, dd.Series), execute_count_distinct_star_frame_filter), - ], - ops.Between: [ - ( - ( - dd.Series, - (dd.Series, numbers.Real, str, datetime.datetime), - (dd.Series, numbers.Real, str, datetime.datetime), - ), - execute_between, - ), - ], - ops.Intersection: [ - ( - (dd.DataFrame, dd.DataFrame, bool), - execute_intersection_dataframe_dataframe, - ) - ], - ops.Difference: [ - ( - (dd.DataFrame, dd.DataFrame, bool), - execute_difference_dataframe_dataframe, - ) - ], - ops.DropNa: [((dd.DataFrame,), execute_node_dropna_dataframe)], - ops.FillNa: [ - ((dd.DataFrame, simple_types), execute_node_fillna_dataframe_scalar), - ((dd.DataFrame,), execute_node_fillna_dataframe_dict), - ], - ops.IsNull: [((dd.Series,), execute_series_isnull)], - ops.NotNull: [((dd.Series,), execute_series_notnnull)], - ops.IsNan: [((dd.Series,), execute_isnan)], - ops.IsInf: [((dd.Series,), execute_isinf)], - ops.SelfReference: [((dd.DataFrame,), execute_node_self_reference_dataframe)], - ops.InValues: [((dd.Series, tuple), execute_node_column_in_values)], - ops.InColumn: [((dd.Series, dd.Series), execute_node_column_in_column)], - ops.NullIf: [ - ((dd.Series, (dd.Series, *simple_types)), execute_node_nullif_series), - ((simple_types, dd.Series), execute_node_nullif_scalar_series), - ], - ops.Distinct: [((dd.DataFrame,), execute_distinct_dataframe)], -} - -register_types_to_dispatcher(execute_node, DASK_DISPATCH_TYPES) - -execute_node.register(ops.DatabaseTable, DaskBackend)(execute_database_table_client) - - -@execute_node.register(ops.Alias, object) -def execute_alias_series(op, _, **kwargs): - # just compile the underlying argument because the naming is handled - # by the translator for the top level expression - return execute(op.arg, **kwargs) - - -@execute_node.register(ops.Arbitrary, dd.Series, (dd.Series, type(None))) -def execute_arbitrary_series_mask(op, data, mask, aggcontext=None, **kwargs): - """Execute a masked `ops.Arbitrary` operation. - - We cannot use the pandas version because - [Dask does not support `.iloc`](https://docs.dask.org/en/latest/dataframe-indexing.html). - `.loc` will only work if our index lines up with the label. - """ - data = data[mask] if mask is not None else data - if op.how == "first": - index = 0 - elif op.how == "last": - index = len(data) - 1 # TODO - computation - else: - raise com.OperationNotDefinedError(f"Arbitrary {op.how!r} is not supported") - - return data.loc[index] - - -@execute_node.register(ops.Arbitrary, ddgb.SeriesGroupBy, type(None)) -def execute_arbitrary_series_groupby(op, data, _, aggcontext=None, **kwargs): - how = op.how - if how is None: - how = "first" - - if how not in {"first", "last"}: - raise com.OperationNotDefinedError(f"Arbitrary {how!r} is not supported") - return aggcontext.agg(data, how) - - -def _mode_agg(df): - return df.sum().sort_values(ascending=False).index[0] - - -@execute_node.register(ops.Mode, dd.Series, (dd.Series, type(None))) -def execute_mode_series(_, data, mask, **kwargs): - if mask is not None: - data = data[mask] - return data.reduction( - chunk=methodcaller("value_counts"), - combine=methodcaller("sum"), - aggregate=_mode_agg, - meta=data.dtype, - ) - - -def _grouped_mode_agg(gb): - return gb.obj.groupby(gb.obj.index.names).sum() - - -def _grouped_mode_finalize(series): - counts = "__counts__" - values = series.index.names[-1] - df = series.reset_index(-1, name=counts) - out = df.groupby(df.index.names).apply( - lambda g: g.sort_values(counts, ascending=False).iloc[0] - ) - return out[values] - - -@execute_node.register(ops.Mode, ddgb.SeriesGroupBy, (ddgb.SeriesGroupBy, type(None))) -def execute_mode_series_group_by(_, data, mask, **kwargs): - if mask is not None: - data = data[mask] - return data.agg( - dd.Aggregation( - name="mode", - chunk=methodcaller("value_counts"), - agg=_grouped_mode_agg, - finalize=_grouped_mode_finalize, - ) - ) - - -@execute_node.register(ops.Cast, ddgb.SeriesGroupBy, dt.DataType) -def execute_cast_series_group_by(op, data, type, **kwargs): - result = execute_cast_series_generic(op, make_selected_obj(data), type, **kwargs) - return result.groupby(data.index) - - -def cast_scalar_to_timestamp(data, tz): - if isinstance(data, str): - return pd.Timestamp(data, tz=tz) - return pd.Timestamp(data, unit="s", tz=tz) - - -@execute_node.register(ops.Cast, dd.core.Scalar, dt.Timestamp) -def execute_cast_scalar_timestamp(op, data, type, **kwargs): - return dd.map_partitions( - cast_scalar_to_timestamp, data, tz=type.timezone, meta="datetime64[ns]" - ) - - -def cast_series_to_timestamp(data, tz): - if pd.api.types.is_string_dtype(data): - timestamps = to_datetime(data) - else: - timestamps = to_datetime(data, unit="s") - if getattr(timestamps.dtype, "tz", None) is not None: - return timestamps.dt.tz_convert(tz) - return timestamps.dt.tz_localize(tz) - - -@execute_node.register(ops.Cast, dd.Series, dt.Timestamp) -def execute_cast_series_timestamp(op, data, type, **kwargs): - arg = op.arg - from_type = arg.dtype - - if from_type.equals(type): # noop cast - return data - - tz = type.timezone - dtype = "M8[ns]" if tz is None else DatetimeTZDtype("ns", tz) - - if from_type.is_timestamp(): - from_tz = from_type.timezone - if tz is None and from_tz is None: - return data - elif tz is None or from_tz is None: - return data.dt.tz_localize(tz) - elif tz is not None and from_tz is not None: - return data.dt.tz_convert(tz) - elif from_type.is_date(): - return data if tz is None else data.dt.tz_localize(tz) - elif from_type.is_string() or from_type.is_integer(): - return data.map_partitions( - cast_series_to_timestamp, - tz, - meta=(data.name, dtype), - ) - - raise TypeError(f"Don't know how to cast {from_type} to {type}") - - -@execute_node.register(ops.Cast, dd.Series, dt.Date) -def execute_cast_series_date(op, data, type, **kwargs): - arg = op.args[0] - from_type = arg.dtype - - if from_type.equals(type): - return data - - # TODO - we return slightly different things depending on the branch - # double check what the logic should be - - if from_type.is_timestamp(): - return data.dt.normalize() - - if from_type.equals(dt.string): - # TODO - this is broken - datetimes = data.map_partitions(to_datetime, meta=(data.name, "datetime64[ns]")) - - # TODO - we are getting rid of the index here - return datetimes.dt.normalize() - - if from_type.is_integer(): - return data.map_partitions( - to_datetime, unit="D", meta=(data.name, "datetime64[ns]") - ) - - raise TypeError(f"Don't know how to cast {from_type} to {type}") - - -@execute_node.register(ops.Limit, dd.DataFrame, integer_types, integer_types) -def execute_limit_frame(op, data, nrows, offset, **kwargs): - # NOTE: Dask Dataframes do not support iloc row based indexing - # Need to add a globally consecutive index in order to select nrows number of rows - if nrows == 0: - return dd.from_pandas( - pd.DataFrame(columns=data.columns).astype(data.dtypes), npartitions=1 - ) - unique_col_name = ibis.util.guid() - df = add_globally_consecutive_column(data, col_name=unique_col_name) - ret = df.loc[offset : (offset + nrows) - 1] - return rename_index(ret, None) - - -@execute_node.register(ops.Limit, dd.DataFrame, type(None), integer_types) -def execute_limit_frame_no_limit(op, data, nrows, offset, **kwargs): - unique_col_name = ibis.util.guid() - df = add_globally_consecutive_column(data, col_name=unique_col_name) - ret = df.loc[offset : (offset + len(df)) - 1] - return rename_index(ret, None) - - -@execute_node.register(ops.Not, (dd.core.Scalar, dd.Series)) -def execute_not_scalar_or_series(op, data, **kwargs): - return ~data - - -@execute_node.register(ops.Binary, dd.Series, dd.Series) -@execute_node.register(ops.Binary, dd.Series, dd.core.Scalar) -@execute_node.register(ops.Binary, dd.core.Scalar, dd.Series) -@execute_node.register(ops.Binary, dd.core.Scalar, scalar_types) -@execute_node.register(ops.Binary, scalar_types, dd.core.Scalar) -@execute_node.register(ops.Binary, dd.core.Scalar, dd.core.Scalar) -@execute_node.register( - (ops.NumericBinary, ops.LogicalBinary, ops.Comparison), - numeric_types, - dd.Series, -) -@execute_node.register( - (ops.NumericBinary, ops.LogicalBinary, ops.Comparison), - dd.Series, - numeric_types, -) -@execute_node.register((ops.Comparison, ops.Add, ops.Multiply), dd.Series, str) -@execute_node.register((ops.Comparison, ops.Add, ops.Multiply), str, dd.Series) -@execute_node.register(ops.Comparison, dd.Series, timestamp_types) -@execute_node.register(ops.Comparison, timestamp_types, dd.Series) -@execute_node.register(ops.BitwiseBinary, integer_types, integer_types) -@execute_node.register(ops.BitwiseBinary, dd.Series, integer_types) -@execute_node.register(ops.BitwiseBinary, integer_types, dd.Series) -def execute_binary_op(op, left, right, **kwargs): - return _execute_binary_op_impl(op, left, right, **kwargs) - - -@execute_node.register(ops.Comparison, dd.Series, date_types) -def execute_binary_op_date_right(op, left, right, **kwargs): - return _execute_binary_op_impl( - op, dd.to_datetime(left), pd.to_datetime(right), **kwargs - ) - - -@execute_node.register(ops.Binary, ddgb.SeriesGroupBy, ddgb.SeriesGroupBy) -def execute_binary_op_series_group_by(op, left, right, **kwargs): - if left.index != right.index: - raise ValueError( - f"Cannot perform {type(op).__name__} operation on two series with " - "different groupings" - ) - result = execute_binary_op( - op, make_selected_obj(left), make_selected_obj(right), **kwargs - ) - return result.groupby(left.index) - - -@execute_node.register(ops.Binary, ddgb.SeriesGroupBy, simple_types) -def execute_binary_op_series_gb_simple(op, left, right, **kwargs): - result = execute_binary_op(op, make_selected_obj(left), right, **kwargs) - return result.groupby(left.index) - - -@execute_node.register(ops.Binary, simple_types, ddgb.SeriesGroupBy) -def execute_binary_op_simple_series_gb(op, left, right, **kwargs): - result = execute_binary_op(op, left, make_selected_obj(right), **kwargs) - return result.groupby(right.index) - - -@execute_node.register(ops.Unary, ddgb.SeriesGroupBy) -def execute_unary_op_series_gb(op, operand, **kwargs): - result = execute_node(op, make_selected_obj(operand), **kwargs) - return result.groupby(operand.index) - - -@execute_node.register( - (ops.Log, ops.Round), - ddgb.SeriesGroupBy, - (numbers.Real, decimal.Decimal, type(None)), -) -def execute_log_series_gb_others(op, left, right, **kwargs): - result = execute_node(op, make_selected_obj(left), right, **kwargs) - return result.groupby(left.index) - - -@execute_node.register((ops.Log, ops.Round), ddgb.SeriesGroupBy, ddgb.SeriesGroupBy) -def execute_log_series_gb_series_gb(op, left, right, **kwargs): - result = execute_node( - op, make_selected_obj(left), make_selected_obj(right), **kwargs - ) - return result.groupby(left.index) - - -@execute_node.register(ops.Union, dd.DataFrame, dd.DataFrame, bool) -def execute_union_dataframe_dataframe( - op, left: dd.DataFrame, right: dd.DataFrame, distinct, **kwargs -): - result = dd.concat([left, right], axis=0) - return result.drop_duplicates() if distinct else result - - -@execute_node.register(ops.NullIf, simple_types, dd.Series) -def execute_node_nullif_scalar_series(op, value, series, **kwargs): - return series.where(series != value) - - -def wrap_case_result(raw: np.ndarray, expr: ir.Value): - """Wrap a CASE statement result in a Series and handle returning scalars. - - Parameters - ---------- - raw : ndarray[T] - The raw results of executing the ``CASE`` expression - expr : Value - The expression from the which `raw` was computed - - Returns - ------- - Union[scalar, Series] - """ - raw_1d = np.atleast_1d(raw) - if np.any(isnull(raw_1d)): - result = dd.from_array(raw_1d) - else: - result = dd.from_array( - raw_1d.astype(constants.IBIS_TYPE_TO_PANDAS_TYPE[expr.type()]) - ) - # TODO - we force computation here - if isinstance(expr, ir.Scalar) and result.size.compute() == 1: - return result.head().item() - return result - - -@execute_node.register(ops.SearchedCase, tuple, tuple, object) -def execute_searched_case_dask(op, when_nodes, then_nodes, otherwise, **kwargs): - whens = [execute(arg, **kwargs) for arg in when_nodes] - thens = [execute(arg, **kwargs) for arg in then_nodes] - if not isinstance(whens[0], dd.Series): - # if we are not dealing with dask specific objects, fallback to the - # pandas logic. For example, in the case of ibis literals. - # See `test_functions/test_ifelse_returning_bool` or - # `test_operations/test_searched_case_scalar` for code that hits this. - return execute_searched_case(op, when_nodes, then_nodes, otherwise, **kwargs) - - if otherwise is None: - otherwise = np.nan - idx = whens[0].index - whens = [w.to_dask_array() for w in whens] - if isinstance(thens[0], dd.Series): - # some computed column - thens = [t.to_dask_array() for t in thens] - else: - # scalar - thens = [da.from_array(np.array([t])) for t in thens] - raw = da.select(whens, thens, otherwise) - out = dd.from_dask_array( - raw, - index=idx, - ) - return out - - -@execute_node.register(ops.SimpleCase, dd.Series, tuple, tuple, object) -def execute_simple_case_series(op, value, whens, thens, otherwise, **kwargs): - whens = [execute(arg, **kwargs) for arg in whens] - thens = [execute(arg, **kwargs) for arg in thens] - if otherwise is None: - otherwise = np.nan - raw = np.select([value == when for when in whens], thens, otherwise) - return wrap_case_result(raw, op.to_expr()) - - -@execute_node.register(ops.Greatest, tuple) -def execute_node_greatest_list(op, values, **kwargs): - values = [execute(arg, **kwargs) for arg in values] - return compute_row_reduction(np.maximum.reduce, values, axis=0) - - -@execute_node.register(ops.Least, tuple) -def execute_node_least_list(op, values, **kwargs): - values = [execute(arg, **kwargs) for arg in values] - return compute_row_reduction(np.minimum.reduce, values, axis=0) - - -def coalesce(values): - def reducer(a1, a2): - with contextlib.suppress(AttributeError): - a1 = a1.compute() - return np.where(pd.isnull(a1), a2, a1) - - return functools.reduce(reducer, values) - - -@execute_node.register(ops.Coalesce, tuple) -def execute_node_coalesce(op, values, **kwargs): - # TODO: this is slow - values = [execute(arg, **kwargs) for arg in values] - return compute_row_reduction(coalesce, values) - - -@execute_node.register(ops.TableArrayView, dd.DataFrame) -def execute_table_array_view(op, _, **kwargs): - # Need to compute dataframe in order to squeeze into a scalar - ddf = execute(op.table) - return ddf.compute().squeeze() - - -@execute_node.register(ops.Sample, dd.DataFrame, object, object) -def execute_sample(op, data, fraction, seed, **kwargs): - return data.sample(frac=fraction, random_state=seed) diff --git a/ibis/backends/dask/execution/indexing.py b/ibis/backends/dask/execution/indexing.py deleted file mode 100644 index 99a723de87ae..000000000000 --- a/ibis/backends/dask/execution/indexing.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Execution rules for ops.IfElse operations.""" - -from __future__ import annotations - -import dask.dataframe as dd - -import ibis.expr.operations as ops -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.pandas.core import boolean_types, scalar_types, simple_types -from ibis.backends.pandas.execution.generic import pd_where - - -@execute_node.register(ops.IfElse, (dd.Series, *boolean_types), dd.Series, dd.Series) -@execute_node.register(ops.IfElse, (dd.Series, *boolean_types), dd.Series, simple_types) -@execute_node.register(ops.IfElse, (dd.Series, *boolean_types), simple_types, dd.Series) -@execute_node.register(ops.IfElse, (dd.Series, *boolean_types), type(None), type(None)) -def execute_node_where(op, cond, true, false, **kwargs): - if any(isinstance(x, (dd.Series, dd.core.Scalar)) for x in (cond, true, false)): - return dd.map_partitions(pd_where, cond, true, false) - # All are immediate scalars, handle locally - return true if cond else false - - -# For true/false as scalars, we only support identical type pairs + None to -# limit the size of the dispatch table and not have to worry about type -# promotion. -for typ in (str, *scalar_types): - for cond_typ in (dd.Series, *boolean_types): - execute_node.register(ops.IfElse, cond_typ, typ, typ)(execute_node_where) - execute_node.register(ops.IfElse, cond_typ, type(None), typ)(execute_node_where) - execute_node.register(ops.IfElse, cond_typ, typ, type(None))(execute_node_where) diff --git a/ibis/backends/dask/execution/join.py b/ibis/backends/dask/execution/join.py deleted file mode 100644 index 0d4dafbc32df..000000000000 --- a/ibis/backends/dask/execution/join.py +++ /dev/null @@ -1,111 +0,0 @@ -from __future__ import annotations - -import dask.dataframe as dd -from pandas import Timedelta - -import ibis.expr.operations as ops -import ibis.util -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution import constants -from ibis.backends.pandas.execution.join import ( - _compute_join_column, - _extract_predicate_names, -) - - -@execute_node.register( - ops.AsOfJoin, - dd.DataFrame, - dd.DataFrame, - tuple, - (Timedelta, type(None)), - tuple, -) -def execute_asof_join(op, left, right, by, tolerance, predicates, **kwargs): - left_on, right_on = _extract_predicate_names(predicates) - left_by, right_by = _extract_predicate_names(by) - - assert 0 <= len(left_on) <= 1, f"len(left_on) == {len(left_on)}" - assert 0 <= len(right_on) <= 1, f"len(right_on) == {len(right_on)}" - - on = left_on if left_on == right_on else None - return dd.merge_asof( - left=left, - right=right, - # NB: dask 2022.4.1 contains a bug from - # https://github.com/dask/dask/pull/8857 that keeps a column if `on` is - # non-empty without checking whether `left_on` is non-empty, this - # check works around that - on=on, - left_on=left_on if on is None else None, - right_on=right_on if on is None else None, - left_by=left_by or None, - right_by=right_by or None, - tolerance=tolerance, - suffixes=constants.JOIN_SUFFIXES, - ) - - -@execute_node.register(ops.CrossJoin, dd.DataFrame, dd.DataFrame, tuple) -def execute_cross_join(op, left, right, predicates, **kwargs): - """Execute a cross join in dask. - - Notes - ----- - We create a dummy column of all :data:`True` instances and use that as the - join key. This results in the desired Cartesian product behavior guaranteed - by cross join. - """ - assert not predicates, "cross join should have an empty predicate set" - # generate a unique name for the temporary join key - key = f"cross_join_{ibis.util.guid()}" - join_key = {key: True} - new_left = left.assign(**join_key) - new_right = right.assign(**join_key) - - # inner/outer doesn't matter because every row matches every other row - result = dd.merge( - new_left, - new_right, - how="inner", - on=key, - suffixes=constants.JOIN_SUFFIXES, - ) - - # remove the generated key - del result[key] - - return result - - -# TODO - execute_join - #2553 -@execute_node.register(ops.Join, dd.DataFrame, dd.DataFrame, tuple) -def execute_join(op, left, right, predicates, **kwargs): - op_type = type(op) - - try: - how = constants.JOIN_TYPES[op_type] - except KeyError: - raise NotImplementedError(f"{op_type.__name__} not supported") - - on = {op.left: [], op.right: []} - for predicate in predicates: - if not isinstance(predicate, ops.Equals): - raise TypeError("Only equality join predicates supported with dask") - new_left_column, left_pred_root = _compute_join_column(predicate.left, **kwargs) - on[left_pred_root].append(new_left_column) - - new_right_column, right_pred_root = _compute_join_column( - predicate.right, **kwargs - ) - on[right_pred_root].append(new_right_column) - - df = dd.merge( - left, - right, - how=how, - left_on=on[op.left], - right_on=on[op.right], - suffixes=constants.JOIN_SUFFIXES, - ) - return df diff --git a/ibis/backends/dask/execution/maps.py b/ibis/backends/dask/execution/maps.py deleted file mode 100644 index 4a44ad72cd84..000000000000 --- a/ibis/backends/dask/execution/maps.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import annotations - -from collections.abc import Mapping - -import dask.dataframe as dd -import numpy as np -import pandas as pd - -import ibis.expr.operations as ops -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import ( - TypeRegistrationDict, - register_types_to_dispatcher, -) -from ibis.backends.pandas.execution.maps import ( - map_contains_dict_series, - map_contains_series_object, - map_contains_series_series, - map_get_dict_scalar_series, - map_get_dict_series_scalar, - map_get_dict_series_series, - map_get_series_scalar_scalar, - map_get_series_scalar_series, - map_get_series_series_scalar, - map_get_series_series_series, - map_keys_series, - map_series_series, - map_values_series, - safe_merge, -) - -# NOTE - to avoid dispatch ambiguities we must unregister pandas, only to -# re-register below. The ordering in which dispatches are registered is -# meaningful. See -# https://multiple-dispatch.readthedocs.io/en/latest/resolution.html#ambiguities -# for more detail. -PANDAS_REGISTERED_TYPES = [ - (ops.MapGet, Mapping, object, pd.Series), - (ops.MapGet, Mapping, pd.Series, object), -] -for registered_type in PANDAS_REGISTERED_TYPES: - del execute_node[registered_type] - - -DASK_DISPATCH_TYPES: TypeRegistrationDict = { - ops.Map: [((dd.Series, dd.Series), map_series_series)], - ops.MapGet: [ - ((dd.Series, object, object), map_get_series_scalar_scalar), - ((dd.Series, object, dd.Series), map_get_series_scalar_series), - ((dd.Series, dd.Series, object), map_get_series_series_scalar), - ((dd.Series, dd.Series, dd.Series), map_get_series_series_series), - # This never occurs but we need to register it so multipledispatch - # does not see below registrations as ambiguous. See NOTE above. - ( - (Mapping, (dd.Series, pd.Series), (dd.Series, pd.Series)), - map_get_dict_series_series, - ), - ((Mapping, object, (dd.Series, pd.Series)), map_get_dict_scalar_series), - ((Mapping, (dd.Series, pd.Series), object), map_get_dict_series_scalar), - ], - ops.MapContains: [ - ((Mapping, dd.Series), map_contains_dict_series), - ((dd.Series, dd.Series), map_contains_series_series), - ((dd.Series, object), map_contains_series_object), - ], - ops.MapKeys: [((dd.Series,), map_keys_series)], - ops.MapValues: [((dd.Series,), map_values_series)], -} -register_types_to_dispatcher(execute_node, DASK_DISPATCH_TYPES) - - -@execute_node.register(ops.MapLength, dd.Series) -def map_length_series(op, data, **kwargs): - return data.map(len, na_action="ignore") - - -def none_filled_dask_series(n): - dd.from_array(np.full(n, None)) - - -@execute_node.register(ops.MapMerge, (Mapping, type(None)), dd.Series) -def execute_map_concat_dict_series(op, lhs, rhs, **kwargs): - if lhs is None: - return none_filled_dask_series(len(rhs)) - return rhs.map( - lambda m, lhs=lhs: safe_merge(lhs, m), - meta=(rhs.name, rhs.dtype), - ) - - -@execute_node.register(ops.MapMerge, dd.Series, (Mapping, type(None))) -def execute_map_concat_series_dict(op, lhs, rhs, **kwargs): - if rhs is None: - return none_filled_dask_series(len(lhs)) - return lhs.map( - lambda m, rhs=rhs: safe_merge(m, rhs), - meta=(lhs.name, lhs.dtype), - ) - - -@execute_node.register(ops.MapMerge, dd.Series, dd.Series) -def execute_map_concat_series_series(op, lhs, rhs, **kwargs): - rhsiter = iter(rhs.values) - return lhs.map( - lambda m, rhsiter=rhsiter: safe_merge(m, next(rhsiter)), - meta=(lhs.name, lhs.dtype), - ) diff --git a/ibis/backends/dask/execution/numeric.py b/ibis/backends/dask/execution/numeric.py deleted file mode 100644 index 1c2bfb9767ec..000000000000 --- a/ibis/backends/dask/execution/numeric.py +++ /dev/null @@ -1,153 +0,0 @@ -from __future__ import annotations - -import collections -import decimal -import functools -import numbers - -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb -import numpy as np -import pandas as pd - -import ibis.expr.operations as ops -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import make_selected_obj -from ibis.backends.pandas.core import numeric_types - - -@execute_node.register(ops.Negate, dd.Series) -def execute_series_negate(_, data, **kwargs): - return -data - - -@execute_node.register(ops.Negate, ddgb.SeriesGroupBy) -def execute_series_group_by_negate(op, data, **kwargs): - return execute_series_negate(op, make_selected_obj(data), **kwargs).groupby( - data.index - ) - - -def call_numpy_ufunc(func, op, data, **kwargs): - if data.dtype == np.dtype(np.object_): - return data.apply( - functools.partial(execute_node, op, **kwargs), - meta=(data.name, "object"), - ) - return func(data) - - -@execute_node.register(ops.Unary, dd.Series) -def execute_series_unary_op(op, data, **kwargs): - op_type = type(op) - if op_type == ops.BitwiseNot: - function = np.bitwise_not - else: - function = getattr(np, op_type.__name__.lower()) - return call_numpy_ufunc(function, op, data, **kwargs) - - -@execute_node.register(ops.Acos, dd.Series) -def execute_series_acos(_, data, **kwargs): - return np.arccos(data) - - -@execute_node.register(ops.Asin, dd.Series) -def execute_series_asin(_, data, **kwargs): - return np.arcsin(data) - - -@execute_node.register(ops.Atan, dd.Series) -def execute_series_atan(_, data, **kwargs): - return np.arctan(data) - - -@execute_node.register(ops.Cot, dd.Series) -def execute_series_cot(_, data, **kwargs): - return 1.0 / np.tan(data) - - -@execute_node.register(ops.Atan2, dd.Series, dd.Series) -@execute_node.register(ops.Atan2, numeric_types, dd.Series) -@execute_node.register(ops.Atan2, dd.Series, numeric_types) -def execute_series_atan2(_, y, x, **kwargs): - return np.arctan2(y, x) - - -@execute_node.register((ops.Ceil, ops.Floor), dd.Series) -def execute_series_ceil(op, data, **kwargs): - return_type = np.object_ if data.dtype == np.object_ else np.int64 - func = getattr(np, type(op).__name__.lower()) - return call_numpy_ufunc(func, op, data, **kwargs).astype(return_type) - - -def vectorize_object(op, arg, *args, **kwargs): - # TODO - this works for now, but I think we can do something much better - func = np.vectorize(functools.partial(execute_node, op, **kwargs)) - out = dd.from_array(func(arg, *args), columns=arg.name) - return out - - -@execute_node.register( - ops.Log, - dd.Series, - (dd.Series, pd.Series, numbers.Real, decimal.Decimal, type(None)), -) -def execute_series_log_with_base(op, data, base, **kwargs): - if data.dtype == np.dtype(np.object_): - return vectorize_object(op, data, base, **kwargs) - - if base is None: - return np.log(data) - return np.log(data) / np.log(base) - - -@execute_node.register(ops.Ln, dd.Series) -def execute_series_natural_log(op, data, **kwargs): - if data.dtype == np.dtype(np.object_): - return data.apply( - functools.partial(execute_node, op, **kwargs), - meta=(data.name, "object"), - ) - return np.log(data) - - -@execute_node.register(ops.Quantile, dd.Series, numeric_types, (dd.Series, type(None))) -def execute_series_quantile(op, data, quantile, mask, **_): - if mask is not None: - data = data.loc[mask] - return data.quantile(q=quantile) - - -@execute_node.register(ops.Quantile, ddgb.SeriesGroupBy, numeric_types, type(None)) -def execute_series_quantile_group_by(op, data, quantile, mask, **_): - raise NotImplementedError( - "Quantile not implemented for Dask SeriesGroupBy, Dask #9824" - ) - - -@execute_node.register( - ops.MultiQuantile, dd.Series, collections.abc.Sequence, type(None) -) -def execute_series_quantile_sequence(op, data, quantile, mask, **_): - return list(data.quantile(q=quantile)) - - -# TODO - aggregations - #2553 -@execute_node.register( - ops.MultiQuantile, ddgb.SeriesGroupBy, collections.abc.Sequence, type(None) -) -def execute_series_quantile_groupby(op, data, quantile, mask, aggcontext=None, **_): - def q(x, quantile): - result = x.quantile(quantile).tolist() - return [result for _ in range(len(x))] - - return aggcontext.agg(data, q, quantile) - - -@execute_node.register(ops.Round, dd.Series, (dd.Series, np.integer, type(None), int)) -def execute_round_series(op, data, places, **kwargs): - if data.dtype == np.dtype(np.object_): - return vectorize_object(op, data, places, **kwargs) - result = data.round(places or 0) - return result if places else result.astype("int64") diff --git a/ibis/backends/dask/execution/reductions.py b/ibis/backends/dask/execution/reductions.py deleted file mode 100644 index 1502841c2f3a..000000000000 --- a/ibis/backends/dask/execution/reductions.py +++ /dev/null @@ -1,203 +0,0 @@ -"""Reduces sequences. - -NOTE: This file overwrite the pandas backend registered handlers for: - -- execute_node_greatest_list, -- execute_node_least_list - -This is so we can register our handlers that transparently handle both the' -dask specific types and pandas types. This cannot be done via the -dispatcher since the top level container is a list. -""" - -from __future__ import annotations - -import contextlib -import functools -from collections.abc import Sized - -import dask.array as da -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb -import numpy as np -import toolz -from multipledispatch.variadic import Variadic - -import ibis.common.exceptions as exc -import ibis.expr.operations as ops -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import make_selected_obj -from ibis.backends.pandas.execution.generic import ( - execute_node_greatest_list, - execute_node_least_list, -) - - -@toolz.curry -def promote_to_sequence(length, obj): - if isinstance(obj, dd.Series): - # we must force length computation if we have mixed types - # otherwise da.reductions can't compare arrays - return obj.to_dask_array(lengths=True) - else: - return da.from_array(np.repeat(obj, length)) - - -def pairwise_reducer(func, values): - return functools.reduce(lambda x, y: func(x, y), values) - - -def compute_row_reduction(func, values): - final_sizes = {len(x) for x in values if isinstance(x, Sized)} - if not final_sizes: - return func(values) - (final_size,) = final_sizes - arrays = list(map(promote_to_sequence(final_size), values)) - raw = pairwise_reducer(func, arrays) - return dd.from_array(raw).squeeze() - - -# XXX: there's non-determinism in the dask and pandas dispatch registration of -# Greatest/Least/Coalesce, because 1) dask and pandas share `execute_node` -# which is a design flaw and 2) greatest/least/coalesce need to handle -# mixed-type (the Series types plus any related scalar type) inputs so `object` -# is used as a possible input type. -# -# Here we remove the dispatch for pandas if it exists because the dask rule -# handles both cases. -with contextlib.suppress(KeyError): - del execute_node[ops.Greatest, Variadic[object]] - - -with contextlib.suppress(KeyError): - del execute_node[ops.Least, Variadic[object]] - - -@execute_node.register(ops.Greatest, [(object, dd.Series)]) -def dask_execute_node_greatest_list(op, *values, **kwargs): - if all(type(v) != dd.Series for v in values): - return execute_node_greatest_list(op, *values, **kwargs) - return compute_row_reduction(da.maximum, values) - - -@execute_node.register(ops.Least, [(object, dd.Series)]) -def dask_execute_node_least_list(op, *values, **kwargs): - if all(type(v) != dd.Series for v in values): - return execute_node_least_list(op, *values, **kwargs) - return compute_row_reduction(da.minimum, values) - - -@execute_node.register(ops.Reduction, ddgb.SeriesGroupBy, type(None)) -def execute_reduction_series_groupby(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg(data, type(op).__name__.lower()) - - -def _filtered_reduction(data, mask): - return make_selected_obj(data)[mask.obj].groupby(data.index) - - -@execute_node.register(ops.Reduction, ddgb.SeriesGroupBy, ddgb.SeriesGroupBy) -def execute_reduction_series_gb_mask(op, data, mask, aggcontext=None, **kwargs): - grouped_and_filtered_data = _filtered_reduction(data, mask) - return aggcontext.agg(grouped_and_filtered_data, type(op).__name__.lower()) - - -@execute_node.register(ops.Reduction, dd.Series, (dd.Series, type(None))) -def execute_reduction_series_mask(op, data, mask, aggcontext=None, **kwargs): - operand = data[mask] if mask is not None else data - return aggcontext.agg(operand, type(op).__name__.lower()) - - -@execute_node.register( - (ops.First, ops.Last), ddgb.SeriesGroupBy, (ddgb.SeriesGroupBy, type(None)) -) -@execute_node.register((ops.First, ops.Last), dd.Series, (dd.Series, type(None))) -def execute_first_last_dask(op, data, mask, aggcontext=None, **kwargs): - raise exc.OperationNotDefinedError( - "Dask does not support first or last aggregations" - ) - - -@execute_node.register( - (ops.CountDistinct, ops.ApproxCountDistinct), - ddgb.SeriesGroupBy, - type(None), -) -def execute_count_distinct_series_groupby(op, data, _, aggcontext=None, **kwargs): - return aggcontext.agg(data, "nunique") - - -@execute_node.register( - (ops.CountDistinct, ops.ApproxCountDistinct), - ddgb.SeriesGroupBy, - ddgb.SeriesGroupBy, -) -def execute_count_distinct_series_groupby_mask( - op, data, mask, aggcontext=None, **kwargs -): - grouped_and_filtered_data = _filtered_reduction(data, mask) - return aggcontext.agg(grouped_and_filtered_data, "nunique") - - -@execute_node.register( - (ops.CountDistinct, ops.ApproxCountDistinct), - dd.Series, - (dd.Series, type(None)), -) -def execute_count_distinct_series_mask(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg(data[mask] if mask is not None else data, "nunique") - - -variance_ddof = {"pop": 0, "sample": 1} - - -@execute_node.register(ops.Variance, ddgb.SeriesGroupBy, type(None)) -def execute_reduction_series_groupby_var(op, data, _, aggcontext=None, **kwargs): - return aggcontext.agg(data, "var", ddof=variance_ddof[op.how]) - - -@execute_node.register(ops.Variance, ddgb.SeriesGroupBy, ddgb.SeriesGroupBy) -def execute_var_series_groupby_mask(op, data, mask, aggcontext=None, **kwargs): - grouped_and_filtered_data = _filtered_reduction(data, mask) - return aggcontext.agg(grouped_and_filtered_data, "var", ddof=variance_ddof[op.how]) - - -@execute_node.register(ops.Variance, dd.Series, (dd.Series, type(None))) -def execute_variance_series(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - "var", - ddof=variance_ddof[op.how], - ) - - -@execute_node.register(ops.StandardDev, ddgb.SeriesGroupBy, type(None)) -def execute_reduction_series_groupby_std(op, data, _, aggcontext=None, **kwargs): - return aggcontext.agg(data, "std", ddof=variance_ddof[op.how]) - - -@execute_node.register(ops.StandardDev, ddgb.SeriesGroupBy, ddgb.SeriesGroupBy) -def execute_std_series_groupby_mask(op, data, mask, aggcontext=None, **kwargs): - grouped_and_filtered_data = _filtered_reduction(data, mask) - return aggcontext.agg(grouped_and_filtered_data, "std", ddof=variance_ddof[op.how]) - - -@execute_node.register(ops.StandardDev, dd.Series, (dd.Series, type(None))) -def execute_standard_dev_series(op, data, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - "std", - ddof=variance_ddof[op.how], - ) - - -@execute_node.register(ops.ArgMax, dd.Series, dd.Series, (dd.Series, type(None))) -def execute_argmax_series(op, data, key, mask, aggcontext=None, **kwargs): - idxmax = aggcontext.agg(key[mask] if mask is not None else key, "idxmax").compute() - return data.loc[idxmax] - - -@execute_node.register(ops.ArgMin, dd.Series, dd.Series, (dd.Series, type(None))) -def execute_argmin_series(op, data, key, mask, aggcontext=None, **kwargs): - idxmin = aggcontext.agg(key[mask] if mask is not None else key, "idxmin").compute() - return data.loc[idxmin] diff --git a/ibis/backends/dask/execution/selection.py b/ibis/backends/dask/execution/selection.py deleted file mode 100644 index e4424ad1c6f9..000000000000 --- a/ibis/backends/dask/execution/selection.py +++ /dev/null @@ -1,263 +0,0 @@ -"""Dispatching code for Selection operations.""" - -from __future__ import annotations - -import functools -import operator -from typing import TYPE_CHECKING - -import dask.dataframe as dd -import pandas as pd -from toolz import concatv - -import ibis.expr.analysis as an -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope -from ibis.backends.dask.core import execute -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import ( - add_globally_consecutive_column, - coerce_to_output, - compute_sorted_frame, - is_row_order_preserving, - rename_index, -) -from ibis.backends.pandas.execution.selection import ( - build_df_from_selection, - map_new_column_names_to_data, - remap_overlapping_column_names, -) -from ibis.backends.pandas.execution.util import get_join_suffix_for_op - -if TYPE_CHECKING: - from ibis.backends.base.df.timecontext import TimeContext - - -# TODO(kszucs): deduplicate with pandas.compute_projection() since it is almost -# an exact copy of that function -def compute_projection( - node, - parent, - data, - scope: Scope | None = None, - timecontext: TimeContext | None = None, - **kwargs, -): - """Compute a projection. - - `ibis.expr.types.Scalar` instances occur when a specific column projection - is a window operation. - """ - if isinstance(node, ops.TableNode): - if node == parent.table: - return data - - assert isinstance(parent.table, ops.Join) - assert node in (parent.table.left, parent.table.right) - - mapping = remap_overlapping_column_names( - parent.table, - root_table=node, - data_columns=frozenset(data.columns), - ) - return map_new_column_names_to_data(mapping, data) - elif isinstance(node, ops.Value): - name = node.name - assert name is not None, "Value selection name is None" - - if node.shape.is_scalar(): - data_columns = frozenset(data.columns) - - if scope is None: - scope = Scope() - - scope = scope.merge_scopes( - Scope( - { - t: map_new_column_names_to_data( - remap_overlapping_column_names( - parent.table, t, data_columns - ), - data, - ) - }, - timecontext, - ) - for t in an.find_immediate_parent_tables(node) - ) - scalar = execute(node, scope=scope, **kwargs) - return data.assign(**{name: scalar})[name] - else: - if isinstance(node, ops.TableColumn): - if name in data: - return data[name].rename(name) - - if not isinstance(parent.table, ops.Join): - raise KeyError(name) - - suffix = get_join_suffix_for_op(node, parent.table) - return data.loc[:, name + suffix].rename(name) - - data_columns = frozenset(data.columns) - - scope = scope.merge_scopes( - Scope( - { - t: map_new_column_names_to_data( - remap_overlapping_column_names( - parent.table, t, data_columns - ), - data, - ) - }, - timecontext, - ) - for t in an.find_immediate_parent_tables(node) - ) - result = execute(node, scope=scope, timecontext=timecontext, **kwargs) - return coerce_to_output(result, node, data.index) - else: - raise TypeError(node) - - -def build_df_from_projection( - selections: list[ops.Node], - op: ops.Selection, - data: dd.DataFrame, - **kwargs, -) -> dd.DataFrame: - """Build up projection from individual pieces.""" - # Fast path for when we're assigning columns into the same table. - if (selections[0] is op.table) and all(is_row_order_preserving(selections[1:])): - for node in selections[1:]: - projection = compute_projection(node, op, data, **kwargs) - if isinstance(projection, dd.Series): - data = data.assign(**{projection.name: projection}) - else: - data = data.assign(**{c: projection[c] for c in projection.columns}) - return data - - # Slow path when we cannot do direct assigns - # Create a unique row identifier and set it as the index. This is - # used in dd.concat to merge the pieces back together. - partitioned_data = add_globally_consecutive_column(data) - data_pieces = [ - compute_projection(node, op, partitioned_data, **kwargs) for node in selections - ] - result = dd.concat(data_pieces, axis=1) - # _ibis_index was added and used to concat data_pieces together. - # Drop the index name here but keep the index as the dataframe is - # already partitioned on it. - return rename_index(result, None) - - -@execute_node.register(ops.Selection, dd.DataFrame) -def execute_selection_dataframe( - op, - data, - scope: Scope, - timecontext: TimeContext | None, - **kwargs, -): - result = data - - if op.predicates: - predicates = _compute_predicates( - op.table, op.predicates, data, scope, timecontext, **kwargs - ) - predicate = functools.reduce(operator.and_, predicates) - result = result.loc[predicate] - - if op.selections: - # if we are just performing select operations we can do a direct - # selection - if all(isinstance(s, ops.TableColumn) for s in op.selections): - result = build_df_from_selection(op.selections, result, op.table) - else: - result = build_df_from_projection( - op.selections, - op, - result, - scope=scope, - timecontext=timecontext, - **kwargs, - ) - if op.sort_keys: - if len(op.sort_keys) > 1: - raise NotImplementedError( - """ - Multi-key sorting is not implemented for the Dask backend - """ - ) - sort_key = op.sort_keys[0] - if sort_key.descending: - raise NotImplementedError( - "Descending sort is not supported for the Dask backend" - ) - result, _, _ = compute_sorted_frame( - df=result, - order_by=[sort_key], - scope=scope, - timecontext=timecontext, - **kwargs, - ) - result = add_globally_consecutive_column(result, col_name="_ibis_sort_index") - - return result - else: - grouping_keys = ordering_keys = () - - # return early if we do not have any temporary grouping or ordering columns - assert not grouping_keys, "group by should never show up in Selection" - if not ordering_keys: - return result - - # create a sequence of columns that we need to drop - temporary_columns = pd.Index(concatv(grouping_keys, ordering_keys)).difference( - data.columns - ) - - # no reason to call drop if we don't need to - if temporary_columns.empty: - return result - - # drop every temporary column we created for ordering or grouping - return result.drop(temporary_columns, axis=1) - - -def _compute_predicates( - table_op, - predicates, - data, - scope: Scope, - timecontext: TimeContext | None, - **kwargs, -): - """Compute the predicates for a table operation. - - This handles the cases where `predicates` are computed columns, in addition - to the simple case of named columns coming directly from the input table. - """ - for predicate in predicates: - # Map each root table of the predicate to the data so that we compute - # predicates on the result instead of any left or right tables if the - # Selection is on a Join. Project data to only include columns from - # the root table. - root_tables = an.find_immediate_parent_tables(predicate) - - # handle suffixes - data_columns = frozenset(data.columns) - - additional_scope = Scope() - for root_table in root_tables: - mapping = remap_overlapping_column_names(table_op, root_table, data_columns) - if mapping is not None: - new_data = data.loc[:, mapping.keys()].rename(columns=mapping) - else: - new_data = data - additional_scope = additional_scope.merge_scope( - Scope({root_table: new_data}, timecontext) - ) - - scope = scope.merge_scope(additional_scope) - yield execute(predicate, scope=scope, **kwargs) diff --git a/ibis/backends/dask/execution/strings.py b/ibis/backends/dask/execution/strings.py deleted file mode 100644 index cfadf80d15be..000000000000 --- a/ibis/backends/dask/execution/strings.py +++ /dev/null @@ -1,363 +0,0 @@ -from __future__ import annotations - -import functools -import itertools -import operator - -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb -import numpy as np -import pandas as pd -import toolz -from pandas import isnull - -import ibis -import ibis.expr.operations as ops -from ibis.backends.dask.core import execute -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import ( - TypeRegistrationDict, - make_selected_obj, - register_types_to_dispatcher, -) -from ibis.backends.pandas.core import integer_types, scalar_types -from ibis.backends.pandas.execution.strings import ( - execute_json_getitem_series_series, - execute_json_getitem_series_str_int, - execute_series_regex_extract, - execute_series_regex_replace, - execute_series_regex_search, - execute_series_right, - execute_series_string_replace, - execute_series_translate_scalar_scalar, - execute_series_translate_scalar_series, - execute_series_translate_series_scalar, - execute_series_translate_series_series, - execute_string_capitalize, - execute_string_contains, - execute_string_find, - execute_string_length_series, - execute_string_like_series_string, - execute_string_lower, - execute_string_lpad, - execute_string_lstrip, - execute_string_repeat, - execute_string_reverse, - execute_string_rpad, - execute_string_rstrip, - execute_string_strip, - execute_string_upper, - execute_substring_int_int, - haystack_to_series_of_lists, -) - -DASK_DISPATCH_TYPES: TypeRegistrationDict = { - ops.StringLength: [((dd.Series,), execute_string_length_series)], - ops.Substring: [ - ( - ( - dd.Series, - integer_types, - (type(None), *integer_types), - ), - execute_substring_int_int, - ), - ], - ops.Strip: [((dd.Series,), execute_string_strip)], - ops.LStrip: [((dd.Series,), execute_string_lstrip)], - ops.RStrip: [((dd.Series,), execute_string_rstrip)], - ops.LPad: [ - ( - ( - dd.Series, - (dd.Series,) + integer_types, - (dd.Series, str), - ), - execute_string_lpad, - ), - ], - ops.RPad: [ - ( - ( - dd.Series, - (dd.Series,) + integer_types, - (dd.Series, str), - ), - execute_string_rpad, - ), - ], - ops.Reverse: [((dd.Series,), execute_string_reverse)], - ops.StringReplace: [ - ( - (dd.Series, (dd.Series, str), (dd.Series, str)), - execute_series_string_replace, - ) - ], - ops.Lowercase: [((dd.Series,), execute_string_lower)], - ops.Uppercase: [((dd.Series,), execute_string_upper)], - ops.Capitalize: [((dd.Series,), execute_string_capitalize)], - ops.Repeat: [ - ((dd.Series, (dd.Series,) + integer_types), execute_string_repeat), - ], - ops.StringFind: [ - ( - ( - dd.Series, - (dd.Series, str), - (dd.Series, type(None)) + integer_types, - (dd.Series, type(None)) + integer_types, - ), - execute_string_find, - ) - ], - ops.StringContains: [ - ( - ( - dd.Series, - (dd.Series, str), - ), - execute_string_contains, - ) - ], - ops.StringSQLLike: [ - ( - ( - dd.Series, - str, - (str, type(None)), - ), - execute_string_like_series_string, - ), - ], - ops.RegexSearch: [ - ( - ( - dd.Series, - str, - ), - execute_series_regex_search, - ) - ], - ops.RegexExtract: [ - ( - (dd.Series, (dd.Series, str), integer_types), - execute_series_regex_extract, - ), - ], - ops.RegexReplace: [ - ( - ( - dd.Series, - str, - str, - ), - execute_series_regex_replace, - ), - ], - ops.Translate: [ - ( - (dd.Series, dd.Series, dd.Series), - execute_series_translate_series_series, - ), - ((dd.Series, dd.Series, str), execute_series_translate_series_scalar), - ((dd.Series, str, dd.Series), execute_series_translate_scalar_series), - ((dd.Series, str, str), execute_series_translate_scalar_scalar), - ], - ops.StrRight: [((dd.Series, integer_types), execute_series_right)], - ops.JSONGetItem: [ - ((dd.Series, (str, int)), execute_json_getitem_series_str_int), - ((dd.Series, dd.Series), execute_json_getitem_series_series), - ], -} -register_types_to_dispatcher(execute_node, DASK_DISPATCH_TYPES) - - -@execute_node.register(ops.Substring, dd.Series, dd.Series, integer_types) -def execute_substring_series_int(op, data, start, length, **kwargs): - return execute_substring_series_series( - op, data, start, dd.from_array(np.repeat(length, len(start))), **kwargs - ) - - -@execute_node.register(ops.Substring, dd.Series, integer_types, dd.Series) -def execute_string_substring_int_series(op, data, start, length, **kwargs): - return execute_substring_series_series( - op, - data, - dd.from_array(np.repeat(start, len(length))), - length, - **kwargs, - ) - - -# TODO - substring - #2553 -@execute_node.register(ops.Substring, dd.Series, dd.Series, dd.Series) -def execute_substring_series_series(op, data, start, length, **kwargs): - end = start + length - - # TODO - this is broken - start_iter = start.items() - end_iter = end.items() - - def iterate(value, start_iter=start_iter, end_iter=end_iter): - _, begin = next(start_iter) - _, end = next(end_iter) - if (begin is not None and isnull(begin)) or (end is not None and isnull(end)): - return None - return value[begin:end] - - return data.map(iterate) - - -@execute_node.register(ops.StringConcat, tuple) -def execute_node_string_concat(op, values, **kwargs): - values = [execute(arg, **kwargs) for arg in values] - return functools.reduce(operator.add, values) - - -@execute_node.register(ops.StringSQLLike, ddgb.SeriesGroupBy, str, str) -def execute_string_like_series_groupby_string(op, data, pattern, escape, **kwargs): - return execute_string_like_series_string( - op, make_selected_obj(data), pattern, escape, **kwargs - ).groupby(data.grouper.groupings) - - -# TODO - aggregations - #2553 -@execute_node.register(ops.GroupConcat, dd.Series, str, (dd.Series, type(None))) -def execute_group_concat_series_mask(op, data, sep, mask, aggcontext=None, **kwargs): - return aggcontext.agg( - data[mask] if mask is not None else data, - lambda series, sep=sep: sep.join(series.values), - ) - - -@execute_node.register(ops.GroupConcat, ddgb.SeriesGroupBy, str, type(None)) -def execute_group_concat_series_gb(op, data, sep, _, aggcontext=None, **kwargs): - custom_group_concat = dd.Aggregation( - name="custom_group_concat", - chunk=lambda s: s.apply(list), - agg=lambda s0: s0.apply( - lambda chunks: sep.join( - str(s) for s in itertools.chain.from_iterable(chunks) - ) - ), - ) - return data.agg(custom_group_concat) - - -# TODO - aggregations - #2553 -@execute_node.register(ops.GroupConcat, ddgb.SeriesGroupBy, str, ddgb.SeriesGroupBy) -def execute_group_concat_series_gb_mask(op, data, sep, mask, aggcontext=None, **kwargs): - def method(series, sep=sep): - return sep.join(series.values.astype(str)) - - return aggcontext.agg( - data, - lambda data, mask=mask.obj, method=method: method(data[mask[data.index]]), - ) - - -@execute_node.register(ops.StringAscii, dd.Series) -def execute_string_ascii(op, data, **kwargs): - output_meta = pd.Series([], dtype=np.dtype("int32"), name=data.name) - return data.map(ord, meta=output_meta) - - -@execute_node.register(ops.StringAscii, ddgb.SeriesGroupBy) -def execute_string_ascii_group_by(op, data, **kwargs): - return execute_string_ascii(op, make_selected_obj(data), **kwargs).groupby( - data.index - ) - - -@execute_node.register(ops.RegexSearch, ddgb.SeriesGroupBy, str) -def execute_series_regex_search_gb(op, data, pattern, **kwargs): - return execute_series_regex_search( - op, - make_selected_obj(data), - getattr(pattern, "obj", pattern), - **kwargs, - ).groupby(data.index) - - -@execute_node.register(ops.RegexExtract, ddgb.SeriesGroupBy, str, integer_types) -def execute_series_regex_extract_gb(op, data, pattern, index, **kwargs): - return execute_series_regex_extract( - op, make_selected_obj(data), pattern, index, **kwargs - ).groupby(data.index) - - -@execute_node.register(ops.RegexReplace, ddgb.SeriesGroupBy, str, str) -def execute_series_regex_replace_gb(op, data, pattern, replacement, **kwargs): - return execute_series_regex_replace( - make_selected_obj(data), pattern, replacement, **kwargs - ).groupby(data.index) - - -@execute_node.register(ops.StrRight, ddgb.SeriesGroupBy, integer_types) -def execute_series_right_gb(op, data, nchars, **kwargs): - return execute_series_right(op, make_selected_obj(data), nchars).groupby(data.index) - - -@execute_node.register(ops.StringJoin, (dd.Series, str), tuple) -def execute_series_join_scalar_sep(op, sep, args, **kwargs): - data = [execute(arg, **kwargs) for arg in args] - return functools.reduce(lambda x, y: x + sep + y, data) - - -def haystack_to_dask_series_of_lists(haystack, index=None): - pieces = haystack_to_series_of_lists(haystack, index) - return dd.from_pandas(pieces, npartitions=1) - - -@execute_node.register(ops.FindInSet, dd.Series, tuple) -def execute_series_find_in_set(op, needle, haystack, **kwargs): - def find_in_set(index, elements): - return ibis.util.safe_index(elements, index) - - haystack = [execute(arg, **kwargs) for arg in haystack] - return needle.apply(find_in_set, args=(haystack,)) - - -@execute_node.register(ops.FindInSet, ddgb.SeriesGroupBy, list) -def execute_series_group_by_find_in_set(op, needle, haystack, **kwargs): - pieces = [getattr(piece, "obj", piece) for piece in haystack] - return execute_series_find_in_set( - op, make_selected_obj(needle), pieces, **kwargs - ).groupby(needle.index) - - -# TODO we need this version not pandas -@execute_node.register(ops.FindInSet, scalar_types, list) -def execute_string_group_by_find_in_set(op, needle, haystack, **kwargs): - # `list` could contain series, series groupbys, or scalars - # mixing series and series groupbys is not allowed - series_in_haystack = [ - type(piece) - for piece in haystack - if isinstance(piece, (dd.Series, ddgb.SeriesGroupBy)) - ] - - if not series_in_haystack: - return ibis.util.safe_index(haystack, needle) - - try: - (collection_type,) = frozenset(map(type, series_in_haystack)) - except ValueError: - raise ValueError("Mixing Series and ddgb.SeriesGroupBy is not allowed") - - pieces = haystack_to_dask_series_of_lists( - [getattr(piece, "obj", piece) for piece in haystack] - ) - - result = pieces.map(toolz.flip(ibis.util.safe_index)(needle)) - if issubclass(collection_type, dd.Series): - return result - - assert issubclass(collection_type, ddgb.SeriesGroupBy) - - return result.groupby( - toolz.first( - piece.grouper.groupings for piece in haystack if hasattr(piece, "grouper") - ) - ) diff --git a/ibis/backends/dask/execution/structs.py b/ibis/backends/dask/execution/structs.py deleted file mode 100644 index 0c109d10608f..000000000000 --- a/ibis/backends/dask/execution/structs.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Dask backend execution of struct fields and literals.""" - -from __future__ import annotations - -import operator - -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb - -import ibis.expr.operations as ops -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import make_selected_obj - - -@execute_node.register(ops.StructField, dd.DataFrame) -def execute_node_struct_field_dict(op, data, **kwargs): - return data[op.field] - - -@execute_node.register(ops.StructField, dd.Series) -def execute_node_struct_field_series(op, data, **kwargs): - # TODO This meta is not necessarily right - getter = operator.itemgetter(op.field) - return data.map(getter, meta=(data.name, data.dtype)).rename(op.field) - - -@execute_node.register(ops.StructField, ddgb.SeriesGroupBy) -def execute_node_struct_field_series_group_by(op, data, **kwargs): - selected_obj = make_selected_obj(data) - getter = operator.itemgetter(op.field) - return ( - selected_obj.map(getter, meta=selected_obj._meta) - .rename(op.field) - .groupby(data.index) - ) diff --git a/ibis/backends/dask/execution/temporal.py b/ibis/backends/dask/execution/temporal.py deleted file mode 100644 index 1f82ed7d255a..000000000000 --- a/ibis/backends/dask/execution/temporal.py +++ /dev/null @@ -1,177 +0,0 @@ -from __future__ import annotations - -import datetime - -import dask.array as da -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb -import numpy as np -from pandas import Timedelta - -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import ( - TypeRegistrationDict, - make_selected_obj, - register_types_to_dispatcher, -) -from ibis.backends.pandas.core import ( - date_types, - integer_types, - numeric_types, - timedelta_types, - timestamp_types, -) -from ibis.backends.pandas.execution.temporal import ( - day_name, - execute_cast_integer_to_interval_series, - execute_date_add, - execute_date_sub_diff, - execute_date_sub_diff_date_series, - execute_date_sub_diff_series_date, - execute_day_of_week_index_series, - execute_day_of_week_name_series, - execute_epoch_seconds_series, - execute_extract_microsecond_series, - execute_extract_millisecond_series, - execute_extract_timestamp_field_series, - execute_interval_add_multiply_delta_series, - execute_interval_from_integer_series, - execute_interval_multiply_fdiv_series_numeric, - execute_strftime_series_str, - execute_timestamp_add_datetime_series, - execute_timestamp_date, - execute_timestamp_diff_series_datetime, - execute_timestamp_diff_sub_datetime_series, - execute_timestamp_diff_sub_series_series, - execute_timestamp_from_unix, - execute_timestamp_interval_add_series_delta, - execute_timestamp_interval_add_series_series, - execute_timestamp_sub_series_timedelta, - execute_timestamp_truncate, -) - -DASK_DISPATCH_TYPES: TypeRegistrationDict = { - ops.Cast: [((dd.Series, dt.Interval), execute_cast_integer_to_interval_series)], - ops.Strftime: [((dd.Series, str), execute_strftime_series_str)], - ops.TimestampFromUNIX: [ - (((dd.Series,) + integer_types), execute_timestamp_from_unix) - ], - ops.ExtractTemporalField: [((dd.Series,), execute_extract_timestamp_field_series)], - ops.ExtractMicrosecond: [((dd.Series,), execute_extract_microsecond_series)], - ops.ExtractMillisecond: [((dd.Series,), execute_extract_millisecond_series)], - ops.ExtractEpochSeconds: [((dd.Series,), execute_epoch_seconds_series)], - ops.IntervalFromInteger: [((dd.Series,), execute_interval_from_integer_series)], - ops.IntervalAdd: [ - ( - (timedelta_types, dd.Series), - execute_interval_add_multiply_delta_series, - ), - ( - (dd.Series, timedelta_types), - execute_timestamp_interval_add_series_delta, - ), - ((dd.Series, dd.Series), execute_timestamp_interval_add_series_series), - ], - ops.IntervalSubtract: [ - ((dd.Series, dd.Series), execute_timestamp_diff_sub_series_series) - ], - ops.IntervalMultiply: [ - ( - (timedelta_types, numeric_types + (dd.Series,)), - execute_interval_add_multiply_delta_series, - ), - ( - (dd.Series, numeric_types + (dd.Series,)), - execute_interval_multiply_fdiv_series_numeric, - ), - ], - ops.IntervalFloorDivide: [ - ( - ( - (Timedelta, dd.Series), - numeric_types + (dd.Series,), - ), - execute_interval_multiply_fdiv_series_numeric, - ) - ], - ops.TimestampAdd: [ - ((timestamp_types, dd.Series), execute_timestamp_add_datetime_series), - ( - (dd.Series, timedelta_types), - execute_timestamp_interval_add_series_delta, - ), - ((dd.Series, dd.Series), execute_timestamp_interval_add_series_series), - ], - ops.TimestampSub: [ - ((dd.Series, timedelta_types), execute_timestamp_sub_series_timedelta), - ( - (timestamp_types, dd.Series), - execute_timestamp_diff_sub_datetime_series, - ), - ], - (ops.TimestampDiff, ops.TimestampSub): [ - ((dd.Series, dd.Series), execute_timestamp_diff_sub_series_series) - ], - ops.TimestampDiff: [ - ((dd.Series, timestamp_types), execute_timestamp_diff_series_datetime), - ( - (timestamp_types, dd.Series), - execute_timestamp_diff_sub_datetime_series, - ), - ], - ops.DayOfWeekIndex: [((dd.Series,), execute_day_of_week_index_series)], - ops.DayOfWeekName: [((dd.Series,), execute_day_of_week_name_series)], - ops.Date: [((dd.Series,), execute_timestamp_date)], - ops.DateAdd: [ - ((dd.Series, timedelta_types), execute_date_add), - ((timedelta_types, dd.Series), execute_date_add), - ((dd.Series, dd.Series), execute_date_add), - ((date_types, dd.Series), execute_date_add), - ((dd.Series, date_types), execute_date_add), - ], - ops.DateSub: [ - ((date_types, dd.Series), execute_date_sub_diff), - ((dd.Series, dd.Series), execute_date_sub_diff), - ((dd.Series, timedelta_types), execute_date_sub_diff), - ], - ops.DateDiff: [ - ((date_types, dd.Series), execute_date_sub_diff_date_series), - ((dd.Series, dd.Series), execute_date_sub_diff), - ((dd.Series, date_types), execute_date_sub_diff_series_date), - ], - ops.TimestampTruncate: [((dd.Series,), execute_timestamp_truncate)], - ops.DateTruncate: [((dd.Series,), execute_timestamp_truncate)], -} -register_types_to_dispatcher(execute_node, DASK_DISPATCH_TYPES) - - -@execute_node.register( - ops.BetweenTime, - dd.Series, - (dd.Series, str, datetime.time), - (dd.Series, str, datetime.time), -) -def execute_between_time(op, data, lower, upper, **kwargs): - if getattr(data.dtype, "tz", None) is not None: - localized = data.dt.tz_convert("UTC").dt.tz_localize(None) - else: - localized = data - - time = localized.dt.time.astype(str) - indexer = ((time >= lower) & (time <= upper)).to_dask_array(True) - - result = da.zeros(len(data), dtype=np.bool_) - result[indexer] = True - return dd.from_array(result) - - -@execute_node.register(ops.DayOfWeekIndex, ddgb.SeriesGroupBy) -def execute_day_of_week_index_series_group_by(op, data, **kwargs): - return make_selected_obj(data).dt.dayofweek.astype(np.int16).groupby(data.index) - - -@execute_node.register(ops.DayOfWeekName, ddgb.SeriesGroupBy) -def execute_day_of_week_name_series_group_by(op, data, **kwargs): - return day_name(make_selected_obj(data).dt).groupby(data.index) diff --git a/ibis/backends/dask/execution/util.py b/ibis/backends/dask/execution/util.py deleted file mode 100644 index 7caefebbbb07..000000000000 --- a/ibis/backends/dask/execution/util.py +++ /dev/null @@ -1,369 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, Callable, Union - -import dask.dataframe as dd -import dask.delayed -import pandas as pd - -import ibis.expr.analysis as an -import ibis.expr.operations as ops -import ibis.util -from ibis.backends.base.df.scope import Scope -from ibis.backends.dask.core import execute -from ibis.common import graph - -if TYPE_CHECKING: - import numpy as np - from dask.dataframe.groupby import SeriesGroupBy - - from ibis.backends.base.df.timecontext import TimeContext - from ibis.backends.pandas.trace import TraceTwoLevelDispatcher - from ibis.expr.operations.sortkeys import SortKey - -DispatchRule = tuple[tuple[Union[type, tuple], ...], Callable] - -TypeRegistrationDict = dict[ - Union[type[ops.Node], tuple[type[ops.Node], ...]], list[DispatchRule] -] - - -def register_types_to_dispatcher( - dispatcher: TraceTwoLevelDispatcher, types: TypeRegistrationDict -): - """Perform registrations in bulk. - - Many dask operations utilize the functions defined in the pandas backend - without modification. - """ - for ibis_op, registration_list in types.items(): - for types_to_register, fn in registration_list: - dispatcher.register(ibis_op, *types_to_register)(fn) - - -def make_meta_series( - dtype: np.dtype, - name: str | None = None, - meta_index: pd.Index | None = None, -): - if isinstance(meta_index, pd.MultiIndex): - index_names = meta_index.names - series_index = pd.MultiIndex( - levels=[[]] * len(index_names), - codes=[[]] * len(index_names), - names=index_names, - ) - elif isinstance(meta_index, pd.Index): - series_index = pd.Index([], name=meta_index.name) - else: - series_index = pd.Index([]) - - return pd.Series( - [], - index=series_index, - dtype=dtype, - name=name, - ) - - -def make_selected_obj(gs: SeriesGroupBy) -> dd.DataFrame | dd.Series: - """Select a column from a `pandas.DataFrameGroupBy`.""" - # TODO profile this for data shuffling - # We specify drop=False in the case that we are grouping on the column - # we are selecting - if isinstance(gs.obj, dd.Series): - return gs.obj - else: - return gs.obj.set_index(gs.index, drop=False)[gs._meta._selected_obj.name] - - -def coerce_to_output( - result: Any, node: ops.Node, index: pd.Index | None = None -) -> dd.Series | dd.DataFrame: - """Cast the result to either a Series of DataFrame, renaming as needed. - - Reimplementation of `coerce_to_output` in the pandas backend, but - creates dask objects and adds special handling for dd.Scalars. - - Parameters - ---------- - result: Any - The result to cast - node: ibis.expr.operations.Node - The operation node associated with the result - index: pd.Index - Optional. If passed, scalar results will be broadcasted according - to the index. - - Returns - ------- - result: A `dd.Series` or `dd.DataFrame` - - Raises - ------ - ValueError - If unable to coerce result - - Examples - -------- - Examples below use pandas objects for legibility, but functionality is the - same on dask objects. - - >>> coerce_to_output(pd.Series(1), expr) # quartodoc: +SKIP # doctest: +SKIP - 0 1 - Name: result, dtype: int64 - >>> coerce_to_output(1, expr) # quartodoc: +SKIP # doctest: +SKIP - 0 1 - Name: result, dtype: int64 - >>> coerce_to_output(1, expr, [1, 2, 3]) # quartodoc: +SKIP # doctest: +SKIP - 1 1 - 2 1 - 3 1 - Name: result, dtype: int64 - >>> coerce_to_output([1, 2, 3], expr) # quartodoc: +SKIP # doctest: +SKIP - 0 [1, 2, 3] - Name: result, dtype: object - """ - result_name = node.name - - if isinstance(result, (pd.DataFrame, dd.DataFrame)): - result = result.apply(dict, axis=1) - return result.rename(result_name) - - if isinstance(result, (pd.Series, dd.Series)): - # Series from https://github.com/ibis-project/ibis/issues/2711 - return result.rename(result_name) - - if isinstance(result, dd.core.Scalar): - # wrap the scalar in a series - out_dtype = _pandas_dtype_from_dd_scalar(result) - out_len = 1 if index is None else len(index) - meta = make_meta_series(dtype=out_dtype, name=result_name) - # Specify `divisions` so that the created Dask object has - # known divisions (to be concatenatable with Dask objects - # created using `dd.from_pandas`) - series = dd.from_delayed( - _wrap_dd_scalar(result, result_name, out_len), - meta=meta, - divisions=(0, out_len - 1), - ) - return series - - # Wrap `result` in a single-element Series. - return dd.from_pandas(pd.Series([result], name=result_name), npartitions=1) - - -@dask.delayed -def _wrap_dd_scalar(x, name=None, series_len=1): - return pd.Series([x for _ in range(series_len)], name=name) - - -def _pandas_dtype_from_dd_scalar(x: dd.core.Scalar): - try: - return x.dtype - except AttributeError: - return pd.Series([x._meta]).dtype - - -def safe_concat(dfs: list[dd.Series | dd.DataFrame]) -> dd.DataFrame: - """Concatenate a list of `dd.Series` or `dd.DataFrame` objects into a DataFrame. - - This will use `DataFrame.concat` if all pieces are the same length. - Otherwise we will iterratively join. - - When axis=1 and divisions are unknown, Dask `DataFrame.concat` can only - operate on objects with equal lengths, otherwise it will raise a - ValueError in `concat_and_check`. - - See https://github.com/dask/dask/blob/2c2e837674895cafdb0612be81250ef2657d947e/dask/dataframe/multi.py#L907. - - Note - Repeatedly joining dataframes is likely to be quite slow, but this - should be hit rarely in real usage. A situation that triggers this slow - path is aggregations where aggregations return different numbers of rows - (see `test_aggregation_group_by` for a specific example). - TODO - performance. - """ - if len(dfs) == 1: - maybe_df = dfs[0] - if isinstance(maybe_df, dd.Series): - return maybe_df.to_frame() - else: - return maybe_df - - lengths = list(map(len, dfs)) - if len(set(lengths)) != 1: - result = dfs[0].to_frame() - - for other in dfs[1:]: - result = result.join(other.to_frame(), how="outer") - else: - result = dd.concat(dfs, axis=1) - - return result - - -def compute_sort_key( - key: str | SortKey, - data: dd.DataFrame, - timecontext: TimeContext, - scope: Scope | None = None, - **kwargs, -): - """Compute a sort key. - - We use this function instead of the pandas.execution.util so that we - use the dask `execute` method. - - This function borrows the logic in the pandas backend. `by` can be a - string or an expression. If `by.get_name()` raises an exception, we must - `execute` the expression and sort by the new derived column. - """ - name = ibis.util.guid() - if key.shape.is_columnar(): - if key.name in data: - return name, data[key.name] - if isinstance(key, str): - return key, None - else: - if scope is None: - scope = Scope() - scope = scope.merge_scopes( - Scope({t: data}, timecontext) - for t in an.find_immediate_parent_tables(key) - ) - new_column = execute(key, scope=scope, **kwargs) - new_column.name = name - return name, new_column - else: - raise NotImplementedError( - "Scalar sort keys are not yet supported in the dask backend" - ) - - -def compute_sorted_frame( - df: dd.DataFrame, - order_by: list[str | SortKey], - group_by: list[str | SortKey] | None = None, - timecontext=None, - **kwargs, -) -> dd.DataFrame: - sort_keys = [] - ascending = [] - - if group_by is None: - group_by = [] - - for value in group_by: - sort_keys.append(value) - ascending.append(True) - - for key in order_by: - sort_keys.append(key) - ascending.append(key.ascending) - - new_columns = {} - computed_sort_keys = [] - for key in sort_keys: - computed_sort_key, temporary_column = compute_sort_key( - key, df, timecontext, **kwargs - ) - computed_sort_keys.append(computed_sort_key) - if temporary_column is not None: - new_columns[computed_sort_key] = temporary_column - - result = df.assign(**new_columns) - result = result.sort_values(computed_sort_keys, ascending=ascending) - # TODO: we'll eventually need to return this frame with the temporary - # columns and drop them in the caller (maybe using post_execute?) - ngrouping_keys = len(group_by) - return ( - result, - computed_sort_keys[:ngrouping_keys], - computed_sort_keys[ngrouping_keys:], - ) - - -def assert_identical_grouping_keys(*args): - indices = [arg.index for arg in args] - # Depending on whether group_by was called like group_by("col") or - # group_by(["cold"]) index will be a string or a list - if isinstance(indices[0], list): - indices = [tuple(index) for index in indices] - grouping_keys = set(indices) - if len(grouping_keys) != 1: - raise AssertionError(f"Differing grouping keys passed: {grouping_keys}") - - -def add_globally_consecutive_column( - df: dd.DataFrame | dd.Series, - col_name: str = "_ibis_index", - set_as_index: bool = True, -) -> dd.DataFrame: - """Add a column that is globally consecutive across the distributed data. - - By construction, this column is already sorted and can be used to partition - the data. - This column can act as if we had a global index across the distributed data. - This index needs to be consecutive in the range of [0, len(df)), allows - downstream operations to work properly. - The default index of dask dataframes is to be consecutive within each partition. - - Important properties: - - - Each row has a unique id (i.e. a value in this column) - - The global index that's added is consecutive in the same order that the rows currently are in. - - IDs within each partition are already sorted - - We also do not explicitly deal with overflow in the bounds. - - Parameters - ---------- - df : dd.DataFrame - Dataframe to add the column to - col_name: str - Name of the column to use. Default is _ibis_index - set_as_index: bool - If True, will set the consecutive column as the index. Default is True. - - Returns - ------- - dd.DataFrame - New dask dataframe with sorted partitioned index - """ - if isinstance(df, dd.Series): - df = df.to_frame() - - if col_name in df.columns: - raise ValueError(f"Column {col_name} is already present in DataFrame") - - df = df.assign(**{col_name: 1}) - df = df.assign(**{col_name: df[col_name].cumsum() - 1}) - if set_as_index: - df = df.reset_index(drop=True) - df = df.set_index(col_name, sorted=True) - return df - - -def is_row_order_preserving(nodes) -> bool: - """Detects if the operation preserves row ordering. - - Certain operations we know will not affect the ordering of rows in - the dataframe (for example elementwise operations on ungrouped - dataframes). In these cases we may be able to avoid expensive joins - and assign directly into the parent dataframe. - """ - - def _is_row_order_preserving(node: ops.Node): - if isinstance(node, (ops.Reduction, ops.Window)): - return (graph.halt, False) - else: - return (graph.proceed, True) - - return graph.traverse(_is_row_order_preserving, nodes) - - -def rename_index(df: dd.DataFrame, new_index_name: str) -> dd.DataFrame: - # No elegant way to rename index - # https://github.com/dask/dask/issues/4950 - df = df.map_partitions(pd.DataFrame.rename_axis, new_index_name, axis="index") - return df diff --git a/ibis/backends/dask/execution/window.py b/ibis/backends/dask/execution/window.py deleted file mode 100644 index a54a5dc8751e..000000000000 --- a/ibis/backends/dask/execution/window.py +++ /dev/null @@ -1,443 +0,0 @@ -"""Code for computing window functions in the dask backend.""" - -from __future__ import annotations - -import operator -from typing import TYPE_CHECKING, Any, Callable, NoReturn - -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb -import pandas as pd -import toolz -from multipledispatch import Dispatcher - -import ibis.expr.analysis as an -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope -from ibis.backends.dask import aggcontext as agg_ctx -from ibis.backends.dask.core import compute_time_context, execute -from ibis.backends.dask.dispatch import execute_node -from ibis.backends.dask.execution.util import ( - _pandas_dtype_from_dd_scalar, - _wrap_dd_scalar, - add_globally_consecutive_column, - compute_sorted_frame, - make_meta_series, -) -from ibis.backends.pandas.core import ( - date_types, - integer_types, - simple_types, - timedelta_types, - timestamp_types, -) -from ibis.backends.pandas.execution.window import _post_process_group_by_order_by - -if TYPE_CHECKING: - from ibis.backends.base.df.timecontext import ( - TimeContext, - ) - from ibis.backends.pandas.aggcontext import AggregationContext - - -def _check_valid_window_frame(frame): - # TODO consolidate this with pandas - if frame.how == "range" and any( - not col.dtype.is_temporal() for col in frame.order_by - ): - raise NotImplementedError( - "The Dask backend only implements range windows with temporal " - "ordering keys" - ) - - if len(frame.order_by) > 1: - raise NotImplementedError( - "Multiple order_bys are not supported in the dask backend" - ) - - if frame.order_by and frame.group_by: - raise NotImplementedError( - "Grouped and order windows are not supported in the dask backend." - ) - - -def _get_post_process_function(frame: ops.WindowFrame) -> Callable: - # TODO consolidate with pandas - if frame.group_by: - if frame.order_by: - return _post_process_group_by_order_by - else: - return _post_process_group_by - elif frame.order_by: - return _post_process_order_by - else: - return _post_process_empty - - -get_aggcontext = Dispatcher("get_aggcontext") - - -@get_aggcontext.register(object) -def get_aggcontext_default( - window, - *, - scope, - operand, - parent, - group_by, - order_by, - **kwargs, -) -> NoReturn: - raise NotImplementedError( - f"get_aggcontext is not implemented for {type(window).__name__}" - ) - - -# TODO consolidate with pandas -@get_aggcontext.register(ops.WindowFrame) -def get_aggcontext_window( - frame, - *, - scope, - operand, - parent, - group_by, - order_by, - **kwargs, -) -> AggregationContext: - # no order by or group by: default summarization aggcontext - # - # if we're reducing and we have an order by expression then we need to - # expand or roll. - # - # otherwise we're transforming - output_type = operand.dtype - - if not group_by and not order_by: - aggcontext = agg_ctx.Summarize(parent=parent, output_type=output_type) - elif group_by and not order_by: - # groupby transform (window with a partition by clause in SQL parlance) - aggcontext = agg_ctx.Transform( - parent=parent, - group_by=group_by, - order_by=order_by, - output_type=output_type, - ) - elif frame.start is not None: - if isinstance(frame, ops.RowsWindowFrame): - max_lookback = frame.max_lookback - else: - max_lookback = None - aggcontext = agg_ctx.Moving( - frame.start, - # FIXME(kszucs): I don't think that we have a proper max_lookback test - # case because passing None here is not braking anything - max_lookback=max_lookback, - parent=parent, - group_by=group_by, - order_by=order_by, - output_type=output_type, - ) - else: - # expanding window - aggcontext = agg_ctx.Cumulative( - parent=parent, - window=len(parent), - group_by=group_by, - order_by=order_by, - output_type=output_type, - ) - - return aggcontext - - -def _post_process_empty( - result: Any, - parent: dd.Series | dd.DataFrame, - order_by: list[str], - group_by: list[str], - timecontext: TimeContext | None, - **kwargs, -) -> dd.Series: - """Post process non grouped, non ordered windows. - - dd.Series/dd.DataFrame objects are passed through, otherwise we conform - the output to the parent input (i.e. so the shape an partitioning matches). - - dd.core.Scalar needs special handling so downstream functions can work - with it. - """ - if isinstance(result, (dd.Series, dd.DataFrame)): - return result - elif isinstance(result, dd.core.Scalar): - # TODO this should be refactored with similar logic in util.py - # both solve the generalish problem we have of wrapping a - # dd.core.Scalar into something dask can work with downstream - # TODO computation - lens = parent.index.map_partitions(len).compute().values - out_dtype = _pandas_dtype_from_dd_scalar(result) - meta = make_meta_series(dtype=out_dtype) - delayeds = [_wrap_dd_scalar(result, None, out_len) for out_len in lens] - series = dd.from_delayed(delayeds, meta=meta) - series = add_globally_consecutive_column(series) - return series[0] - else: - # Project any non delayed object to the shape of "parent" - return parent.apply( - lambda row, result=result: result, meta=(None, "object"), axis=1 - ) - - -def _post_process_order_by( - series, - parent: dd.DataFrame, - order_by: list[str], - group_by: list[str], - timecontext: TimeContext | None, - **kwargs, -) -> dd.Series: - """Functions like pandas with dasky argsorting.""" - assert order_by and not group_by - if isinstance(series, dd.core.Scalar): - lens = parent.index.map_partitions(len).compute().values - out_dtype = _pandas_dtype_from_dd_scalar(series) - meta = make_meta_series(dtype=out_dtype) - delayeds = [_wrap_dd_scalar(series, None, out_len) for out_len in lens] - series = dd.from_delayed(delayeds, meta=meta) - series = add_globally_consecutive_column(series) - return series[0] - - series_index_name = "index" if series.index.name is None else series.index.name - # Need to sort series back before returning. - series = series.reset_index().set_index(series_index_name).iloc[:, 0] - - return series - - -def _post_process_group_by( - series, - parent: dd.DataFrame, - order_by: list[str], - group_by: list[str], - timecontext: TimeContext | None, - op, - **kwargs, -) -> dd.Series: - assert not order_by and group_by - # FIXME This is likely not needed anymore. - return series - - -@execute_node.register(ops.WindowFunction, dd.Series) -def execute_window_op( - op, - data, - scope: Scope, - timecontext: TimeContext | None = None, - aggcontext=None, - clients=None, - **kwargs, -): - func, frame = op.func, op.frame - _check_valid_window_frame(frame) - - adjusted_timecontext = None - if timecontext: - arg_timecontexts = compute_time_context( - op, timecontext=timecontext, clients=clients, scope=scope - ) - # timecontext is the original time context required by parent node - # of this Window, while adjusted_timecontext is the adjusted context - # of this Window, since we are doing a manual execution here, use - # adjusted_timecontext in later execution phases - adjusted_timecontext = arg_timecontexts[0] - - root_table = an.find_first_base_table(op) - root_data = execute( - root_table, - scope=scope, - timecontext=adjusted_timecontext, - clients=clients, - aggcontext=aggcontext, - **kwargs, - ) - - grouping_keys = [ - key.name - if isinstance(key, ops.TableColumn) - else execute( - key, - scope=scope, - clients=clients, - timecontext=adjusted_timecontext, - aggcontext=aggcontext, - **kwargs, - ) - for key in frame.group_by - ] - - if not frame.order_by: - ordering_keys = [] - - if frame.group_by: - if frame.order_by: - raise NotImplementedError("Grouped and order windows not supported yet") - # TODO finish implementing grouped/order windows. - else: - if len(grouping_keys) == 1 and isinstance(grouping_keys[0], dd.Series): - # Dask will raise an exception about not supporting multiple Series in group by key - # even if it is passed a length 1 list of Series. - # For this case we just make group_by_cols a single Series. - group_by_cols = grouping_keys[0] - else: - group_by_cols = grouping_keys - source = root_data.groupby(group_by_cols, sort=False, group_keys=False) - elif frame.order_by: - source, grouping_keys, ordering_keys = compute_sorted_frame( - df=root_data, - order_by=frame.order_by, - timecontext=timecontext, - **kwargs, - ) - else: - source = root_data - - # Here groupby object should be add to the corresponding node in scope - # for execution, data will be overwrite to a groupby object, so we - # force an update regardless of time context - new_scope = scope.merge_scopes( - [ - Scope({t: source}, adjusted_timecontext) - for t in an.find_immediate_parent_tables(func) - ], - overwrite=True, - ) - - aggcontext = get_aggcontext( - frame, - scope=scope, - operand=func, - parent=source, - group_by=grouping_keys, - order_by=ordering_keys, - **kwargs, - ) - result = execute( - func, - scope=new_scope, - timecontext=adjusted_timecontext, - aggcontext=aggcontext, - clients=clients, - **kwargs, - ) - - result = _get_post_process_function(frame)( - result, - root_data, - ordering_keys, - grouping_keys, - timecontext, - op=op, - ) - - # If the grouped operation we performed is not an analytic UDF we may need - # to realign the output to the input. - if ( - not isinstance(op.func, ops.AnalyticVectorizedUDF) - and not result.known_divisions - ): - if root_data.index.name != result.index.name: - result = dd.merge( - root_data[result.index.name].to_frame(), - result.to_frame(), - left_on=result.index.name, - right_index=True, - )[result.name] - - result.divisions = root_data.divisions - - return result - - -@execute_node.register( - (ops.Lead, ops.Lag), - (dd.Series, ddgb.SeriesGroupBy), - integer_types + (type(None),), - simple_types + (type(None),), -) -def execute_series_lead_lag(op, data, offset, default, **kwargs): - func = toolz.identity if isinstance(op, ops.Lag) else operator.neg - result = data.shift(func(1 if offset is None else offset)) - return post_lead_lag(result, default) - - -@execute_node.register( - (ops.Lead, ops.Lag), - (dd.Series, ddgb.SeriesGroupBy), - timedelta_types, - date_types + timestamp_types + (str, type(None)), -) -def execute_series_lead_lag_timedelta( - op, data, offset, default, aggcontext=None, **kwargs -): - """Shift a column relative to another one that is in units of time rather than rows.""" - # lagging adds time (delayed), leading subtracts time (moved up) - func = operator.add if isinstance(op, ops.Lag) else operator.sub - group_by = aggcontext.group_by - order_by = aggcontext.order_by - - # get the parent object from which `data` originated - parent = aggcontext.parent - - # get the DataFrame from the parent object, handling the DataFrameGroupBy - # case - parent_df = getattr(parent, "obj", parent) - - # perform the time shift - adjusted_parent_df = parent_df.assign( - **{k: func(parent_df[k], offset) for k in order_by} - ) - - # index the parent *after* adjustment - adjusted_indexed_parent = adjusted_parent_df.set_index(group_by + order_by) - - # get the column we care about - result = adjusted_indexed_parent[getattr(data, "obj", data).name] - - # add a default if necessary - return post_lead_lag(result, default) - - -def post_lead_lag(result, default): - if not pd.isnull(default): - return result.fillna(default) - return result - - -@execute_node.register(ops.FirstValue, dd.Series) -def execute_series_first_value(op, data, **kwargs): - arr = data.head(1, compute=False).values - # normally you shouldn't do this but we know that there is one row - # consider upstreaming to dask - arr._chunks = ((1,),) - return arr[0] - - -@execute_node.register(ops.FirstValue, ddgb.SeriesGroupBy) -def execute_series_group_by_first_value(op, data, aggcontext=None, **kwargs): - return aggcontext.agg(data, "first") - - -@execute_node.register(ops.LastValue, dd.Series) -def execute_series_last_value(op, data, **kwargs): - arr = data.tail(1, compute=False).values - - # normally you shouldn't do this but we know that there is one row - # consider upstreaming to dask - arr._chunks = ((1,),) - return arr[0] - - -@execute_node.register(ops.LastValue, ddgb.SeriesGroupBy) -def execute_series_group_by_last_value(op, data, aggcontext=None, **kwargs): - return aggcontext.agg(data, "last") diff --git a/ibis/backends/dask/executor.py b/ibis/backends/dask/executor.py new file mode 100644 index 000000000000..cc4890fbdcf0 --- /dev/null +++ b/ibis/backends/dask/executor.py @@ -0,0 +1,414 @@ +from __future__ import annotations + +import operator +from functools import reduce + +import dask.array as da +import dask.dataframe as dd +import numpy as np +import pandas as pd + +import ibis.backends.dask.kernels as dask_kernels +import ibis.expr.operations as ops +from ibis.backends.dask.convert import DaskConverter +from ibis.backends.dask.helpers import ( + DaskUtils, + add_globally_consecutive_column, +) +from ibis.backends.pandas.executor import PandasExecutor +from ibis.backends.pandas.rewrites import ( + PandasAggregate, + PandasJoin, + PandasLimit, + PandasResetIndex, + PandasScalarSubquery, + plan, +) +from ibis.common.exceptions import UnboundExpressionError +from ibis.formats.pandas import PandasData, PandasType +from ibis.util import gen_name + +# ruff: noqa: F811 + + +class DaskExecutor(PandasExecutor, DaskUtils): + name = "dask" + kernels = dask_kernels + + @classmethod + def visit(cls, op: ops.Node, **kwargs): + return super().visit(op, **kwargs) + + @classmethod + def visit(cls, op: ops.Cast, arg, to): + if arg is None: + return None + elif isinstance(arg, dd.Series): + return DaskConverter.convert_column(arg, to) + else: + return DaskConverter.convert_scalar(arg, to) + + @classmethod + def visit( + cls, op: ops.SimpleCase | ops.SearchedCase, cases, results, default, base=None + ): + def mapper(df, cases, results, default): + cases = [case.astype("bool") for case in cases] + cases.append(pd.Series(True, index=df.index)) + + results.append(default) + out = np.select(cases, results) + + return pd.Series(out, index=df.index) + + dtype = PandasType.from_ibis(op.dtype) + if base is not None: + cases = tuple(base == case for case in cases) + kwargs = dict(cases=cases, results=results, default=default) + + return cls.partitionwise(mapper, kwargs, name=op.name, dtype=dtype) + + @classmethod + def visit(cls, op: ops.TimestampTruncate | ops.DateTruncate, arg, unit): + # TODO(kszucs): should use serieswise() + unit = {"m": "Min", "ms": "L"}.get(unit.short, unit.short) + try: + return arg.dt.floor(unit) + except ValueError: + return arg.dt.to_period(unit).dt.to_timestamp() + + @classmethod + def visit(cls, op: ops.IntervalFromInteger, unit, **kwargs): + if unit.short in {"Y", "Q", "M", "W"}: + return cls.elementwise( + lambda v: pd.DateOffset(**{unit.plural: v}), + kwargs, + name=op.name, + dtype=object, + ) + else: + return cls.serieswise( + lambda arg: arg.astype(f"timedelta64[{unit.short}]"), kwargs + ) + + @classmethod + def visit(cls, op: ops.BetweenTime, arg, lower_bound, upper_bound): + if getattr(arg.dtype, "tz", None) is not None: + localized = arg.dt.tz_convert("UTC").dt.tz_localize(None) + else: + localized = arg + + time = localized.dt.time.astype(str) + indexer = ((time >= lower_bound) & (time <= upper_bound)).to_dask_array(True) + + result = da.zeros(len(arg), dtype=np.bool_) + result[indexer] = True + return dd.from_array(result) + + @classmethod + def visit(cls, op: ops.FindInSet, needle, values): + def mapper(df, cases): + thens = [i for i, _ in enumerate(cases)] + out = np.select(cases, thens, default=-1) + return pd.Series(out, index=df.index) + + dtype = PandasType.from_ibis(op.dtype) + cases = [needle == value for value in values] + kwargs = dict(cases=cases) + return cls.partitionwise(mapper, kwargs, name=op.name, dtype=dtype) + + @classmethod + def visit(cls, op: ops.Array, exprs): + return cls.rowwise( + lambda row: np.array(row, dtype=object), exprs, name=op.name, dtype=object + ) + + @classmethod + def visit(cls, op: ops.ArrayConcat, arg): + dtype = PandasType.from_ibis(op.dtype) + return cls.rowwise( + lambda row: np.concatenate(row.values), arg, name=op.name, dtype=dtype + ) + + @classmethod + def visit(cls, op: ops.Unnest, arg): + arg = cls.asseries(arg) + mask = arg.map(lambda v: bool(len(v)), na_action="ignore") + return arg[mask].explode() + + @classmethod + def visit( + cls, op: ops.ElementWiseVectorizedUDF, func, func_args, input_type, return_type + ): + """Execute an elementwise UDF.""" + + def mapper(df): + cols = [df[col] for col in df] + return func(*cols) + + df, _ = cls.asframe(func_args) + result = df.map_partitions(mapper) + if op.dtype.is_struct(): + result = result.apply(lambda row: row.to_dict(), axis=1) + return result + + ############################# Reductions ################################## + + @classmethod + def visit(cls, op: ops.ArgMin | ops.ArgMax, arg, key, where): + # TODO(kszucs): raise a warning about triggering compute()? + if isinstance(op, ops.ArgMin): + func = lambda x: x.idxmin() + else: + func = lambda x: x.idxmax() + + if where is None: + + def agg(df): + indices = func(df[key.name]) + if isinstance(indices, (dd.Series, dd.core.Scalar)): + # to support both aggregating within a group and globally + indices = indices.compute() + return df[arg.name].loc[indices] + else: + + def agg(df): + mask = df[where.name] + filtered = df[mask] + indices = func(filtered[key.name]) + if isinstance(indices, (dd.Series, dd.core.Scalar)): + # to support both aggregating within a group and globally + indices = indices.compute() + return filtered[arg.name].loc[indices] + + return agg + + @classmethod + def visit(cls, op: ops.Correlation, left, right, where, how): + if where is None: + + def agg(df): + return df[left.name].corr(df[right.name]) + else: + + def agg(df): + mask = df[where.name] + lhs = df[left.name][mask].compute() + rhs = df[right.name][mask].compute() + return lhs.corr(rhs) + + return agg + + @classmethod + def visit(cls, op: ops.Covariance, left, right, where, how): + # TODO(kszucs): raise a warning about triggering compute()? + ddof = {"pop": 0, "sample": 1}[how] + if where is None: + + def agg(df): + lhs = df[left.name].compute() + rhs = df[right.name].compute() + return lhs.cov(rhs, ddof=ddof) + else: + + def agg(df): + mask = df[where.name] + lhs = df[left.name][mask].compute() + rhs = df[right.name][mask].compute() + return lhs.cov(rhs, ddof=ddof) + + return agg + + @classmethod + def visit( + cls, op: ops.ReductionVectorizedUDF, func, func_args, input_type, return_type + ): + def agg(df): + # if df is a dask dataframe then we collect it to a pandas dataframe + # because the user-defined function expects a pandas dataframe + if isinstance(df, dd.DataFrame): + df = df.compute() + args = [df[col.name] for col in func_args] + return func(*args) + + return agg + + @classmethod + def visit( + cls, op: ops.AnalyticVectorizedUDF, func, func_args, input_type, return_type + ): + def agg(df, order_keys): + # if df is a dask dataframe then we collect it to a pandas dataframe + # because the user-defined function expects a pandas dataframe + if isinstance(df, dd.DataFrame): + df = df.compute() + args = [df[col.name] for col in func_args] + res = func(*args) + if isinstance(res, pd.DataFrame): + # it is important otherwise it is going to fill up the memory + res = res.apply(lambda row: row.to_dict(), axis=1) + return res + + return agg + + ############################ Window functions ############################# + + @classmethod + def visit(cls, op: ops.WindowFrame, table, start, end, **kwargs): + table = table.compute() + if isinstance(start, dd.Series): + start = start.compute() + if isinstance(end, dd.Series): + end = end.compute() + return super().visit(op, table=table, start=start, end=end, **kwargs) + + @classmethod + def visit(cls, op: ops.WindowFunction, func, frame): + result = super().visit(op, func=func, frame=frame) + return cls.asseries(result) + + ############################ Relational ################################### + + @classmethod + def visit(cls, op: ops.DatabaseTable, name, schema, source, namespace): + try: + return source.dictionary[name] + except KeyError: + raise UnboundExpressionError( + f"{name} is not a table in the {source.name!r} backend, you " + "probably tried to execute an expression without a data source" + ) + + @classmethod + def visit(cls, op: ops.InMemoryTable, name, schema, data): + df = data.to_frame().reset_index(drop=True) + return dd.from_pandas(df, npartitions=1) + + @classmethod + def visit(cls, op: ops.DummyTable, values): + df, _ = cls.asframe(values) + return df + + @classmethod + def visit(cls, op: PandasLimit, parent, n, offset): + n = n.compute().iat[0, 0] + offset = offset.compute().iat[0, 0] + + name = gen_name("limit") + df = add_globally_consecutive_column(parent, name, set_as_index=False) + if n is None: + df = df[df[name] >= offset] + else: + df = df[df[name].between(offset, offset + n - 1)] + + return df.drop(columns=[name]) + + @classmethod + def visit(cls, op: PandasResetIndex, parent): + return add_globally_consecutive_column(parent) + + @classmethod + def visit(cls, op: PandasJoin, **kwargs): + df = super().visit(op, **kwargs) + return add_globally_consecutive_column(df) + + @classmethod + def visit(cls, op: ops.Project, parent, values): + df, all_scalars = cls.asframe(values) + if all_scalars and len(parent) != len(df): + df = dd.concat([df] * len(parent)) + return df + + @classmethod + def visit(cls, op: ops.Filter, parent, predicates): + if predicates: + pred = reduce(operator.and_, predicates) + parent = parent.loc[pred].reset_index(drop=True) + return parent + + @classmethod + def visit(cls, op: ops.Sort, parent, keys): + # 1. add sort key columns to the dataframe if they are not already present + # 2. sort the dataframe using those columns + # 3. drop the sort key columns + ascending = [key.ascending for key in op.keys] + newcols = {gen_name("sort_key"): col for col in keys} + names = list(newcols.keys()) + df = parent.assign(**newcols) + df = df.sort_values(by=names, ascending=ascending) + return df.drop(names, axis=1) + + @classmethod + def visit(cls, op: PandasAggregate, parent, groups, metrics): + if not groups: + results = {k: v(parent) for k, v in metrics.items()} + combined, _ = cls.asframe(results) + return combined + + parent = parent.groupby([col.name for col in groups.values()]) + + measures = {} + for name, metric in metrics.items(): + meta = pd.Series( + name=name, + dtype=PandasType.from_ibis(op.metrics[name].dtype), + index=pd.MultiIndex( + levels=[[] for _ in groups], + codes=[[] for _ in groups], + names=list(groups.keys()), + ), + ) + measures[name] = parent.apply(metric, meta=meta) + + result = cls.concat(measures, axis=1).reset_index() + renames = {v.name: k for k, v in op.groups.items()} + return result.rename(columns=renames) + + @classmethod + def visit(cls, op: ops.InValues, value, options): + if isinstance(value, dd.Series): + return value.isin(options) + else: + return value in options + + @classmethod + def visit(cls, op: ops.InSubquery, rel, needle): + first_column = rel.compute().iloc[:, 0] + if isinstance(needle, dd.Series): + return needle.isin(first_column) + else: + return needle in first_column + + @classmethod + def visit(cls, op: PandasScalarSubquery, rel): + # TODO(kszucs): raise a warning about triggering compute()? + # could the compute be avoided here? + return rel.compute().iat[0, 0] + + @classmethod + def compile(cls, node, backend, params): + def fn(node, _, **kwargs): + return cls.visit(node, **kwargs) + + node = node.to_expr().as_table().op() + node = plan(node, backend=backend, params=params) + return node.map_clear(fn) + + @classmethod + def execute(cls, node, backend, params): + original = node + node = node.to_expr().as_table().op() + df = cls.compile(node, backend=backend, params=params) + assert isinstance(df, dd.DataFrame) + + result = df.compute() + result = PandasData.convert_table(result, node.schema) + if isinstance(original, ops.Value): + if original.shape.is_scalar(): + return result.iloc[0, 0] + elif original.shape.is_columnar(): + return result.iloc[:, 0] + else: + raise TypeError(f"Unexpected shape: {original.shape}") + else: + return result diff --git a/ibis/backends/dask/helpers.py b/ibis/backends/dask/helpers.py new file mode 100644 index 000000000000..11ec33792bbf --- /dev/null +++ b/ibis/backends/dask/helpers.py @@ -0,0 +1,186 @@ +from __future__ import annotations + +from typing import Callable + +import dask.array as da +import dask.dataframe as dd +import numpy as np +import pandas as pd + +from ibis.backends.pandas.helpers import PandasUtils + + +class DaskUtils(PandasUtils): + @classmethod + def merge(cls, *args, **kwargs): + return dd.merge(*args, **kwargs) + + @classmethod + def merge_asof(cls, *args, **kwargs): + return dd.merge_asof(*args, **kwargs) + + @classmethod + def concat(cls, dfs, **kwargs): + if isinstance(dfs, dict): + dfs = [v.rename(k) for k, v in dfs.items()] + return dd.concat(dfs, **kwargs) + + @classmethod + def asseries(cls, value, like=None): + """Ensure that value is a pandas Series object, broadcast if necessary.""" + + if isinstance(value, dd.Series): + return value + elif isinstance(value, dd.core.Scalar): + # Create a Dask array from the Dask scalar + try: + dtype = value.dtype + except AttributeError: + # @property + # def dtype(self): + # > return self._meta.dtype + # E AttributeError: 'Timestamp' object has no attribute 'dtype' + dtype = object + array = da.from_delayed(value.to_delayed(), (1,), dtype=dtype) + # Create a Dask series from the Dask array + return dd.from_array(array) + elif isinstance(value, pd.Series): + return dd.from_pandas(value, npartitions=1) + elif like is not None: + if isinstance(value, (tuple, list, dict)): + fn = lambda df: pd.Series([value] * len(df), index=df.index) + else: + fn = lambda df: pd.Series(value, index=df.index) + return like.map_partitions(fn) + else: + return dd.from_pandas(pd.Series([value]), npartitions=1) + + @classmethod + def asframe(cls, values: dict | tuple): + # TODO(kszucs): prefer using assign instead of concat + """Construct a DataFrame from a dict or tuple of Series objects.""" + if isinstance(values, dict): + names, values = zip(*values.items()) + elif isinstance(values, tuple): + names = [f"_{i}" for i in range(len(values))] + else: + raise TypeError(f"values must be a dict, or tuple; got {type(values)}") + + all_scalars = True + representative = None + for v in values: + if isinstance(v, dd.Series): + all_scalars = False + representative = v + break + + columns = [cls.asseries(v, like=representative) for v in values] + columns = [v.rename(k) for k, v in zip(names, columns)] + + # dd.concat turns decimal.Decimal("NaN") into np.nan for some reason + df = dd.concat(columns, axis=1) + return df, all_scalars + + @classmethod + def rowwise(cls, func: Callable, operands, name, dtype): + if dtype == np.dtype(" dd.DataFrame: + """Add a column that is globally consecutive across the distributed data. + + By construction, this column is already sorted and can be used to partition + the data. + This column can act as if we had a global index across the distributed data. + This index needs to be consecutive in the range of [0, len(df)), allows + downstream operations to work properly. + The default index of dask dataframes is to be consecutive within each partition. + + Important properties: + + - Each row has a unique id (i.e. a value in this column) + - The global index that's added is consecutive in the same order that the rows currently are in. + - IDs within each partition are already sorted + + We also do not explicitly deal with overflow in the bounds. + + Parameters + ---------- + df: dd.DataFrame + Dataframe to add the column to + name: str + Name of the column to use. Default is _ibis_index + set_as_index: bool + If True, will set the consecutive column as the index. Default is True. + + Returns + ------- + dd.DataFrame + New dask dataframe with sorted partitioned index + """ + if isinstance(df, dd.Series): + df = df.to_frame() + + if name in df.columns: + raise ValueError(f"Column {name} is already present in DataFrame") + + df = df.assign(**{name: 1}) + df = df.assign(**{name: df[name].cumsum() - 1}) + if set_as_index: + df = df.reset_index(drop=True) + df = df.set_index(name, sorted=True) + + # No elegant way to rename index https://github.com/dask/dask/issues/4950 + df = df.map_partitions(pd.DataFrame.rename_axis, None, axis="index") + + return df diff --git a/ibis/backends/dask/kernels.py b/ibis/backends/dask/kernels.py new file mode 100644 index 000000000000..d31d0883d48e --- /dev/null +++ b/ibis/backends/dask/kernels.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import dask.dataframe as dd +import numpy as np + +import ibis.backends.pandas.kernels as pandas_kernels +import ibis.expr.operations as ops + +generic = pandas_kernels.generic.copy() +columnwise = pandas_kernels.columnwise.copy() +elementwise = pandas_kernels.elementwise.copy() +elementwise_decimal = pandas_kernels.elementwise_decimal.copy() + +rowwise = { + **pandas_kernels.rowwise, + ops.DateAdd: lambda row: row["left"] + row["right"], +} + +reductions = { + **pandas_kernels.reductions, + ops.Mode: lambda x: x.mode().loc[0], + ops.ApproxMedian: lambda x: x.median_approximate(), + ops.BitAnd: lambda x: x.reduction(np.bitwise_and.reduce), + ops.BitOr: lambda x: x.reduction(np.bitwise_or.reduce), + ops.BitXor: lambda x: x.reduction(np.bitwise_xor.reduce), + # Window functions are calculated locally using pandas + ops.Last: lambda x: x.compute().iloc[-1] if isinstance(x, dd.Series) else x.iat[-1], + ops.First: lambda x: x.loc[0] if isinstance(x, dd.Series) else x.iat[0], +} + +serieswise = { + **pandas_kernels.serieswise, + ops.StringAscii: lambda arg: arg.map( + ord, na_action="ignore", meta=(arg.name, "int32") + ), + ops.TimestampFromUNIX: lambda arg, unit: dd.to_datetime(arg, unit=unit.short), + ops.DayOfWeekIndex: lambda arg: dd.to_datetime(arg).dt.dayofweek, + ops.DayOfWeekName: lambda arg: dd.to_datetime(arg).dt.day_name(), +} + +# prefer other kernels for the following operations +del generic[ops.IsNull] +del generic[ops.NotNull] +del generic[ops.DateAdd] # must pass metadata +del serieswise[ops.Round] # dask series doesn't have a round() method +del serieswise[ops.Strftime] # doesn't support columnar format strings +del serieswise[ops.Substring] + + +supported_operations = ( + generic.keys() + | columnwise.keys() + | rowwise.keys() + | serieswise.keys() + | elementwise.keys() +) diff --git a/ibis/backends/dask/tests/conftest.py b/ibis/backends/dask/tests/conftest.py index 6fc90f866154..810af551f89f 100644 --- a/ibis/backends/dask/tests/conftest.py +++ b/ibis/backends/dask/tests/conftest.py @@ -1,5 +1,6 @@ from __future__ import annotations +import decimal from typing import Any import dask @@ -8,10 +9,14 @@ import pytest import ibis +import ibis.expr.datatypes as dt from ibis.backends.conftest import TEST_TABLES from ibis.backends.pandas.tests.conftest import TestConf as PandasTest from ibis.backends.tests.data import array_types, json_types, win +dd = pytest.importorskip("dask.dataframe") + + # FIXME Dask issue with non deterministic groupby results, relates to the # shuffle method on a local cluster. Manually setting the shuffle method # avoids the issue https://github.com/dask/dask/issues/10034. @@ -90,10 +95,297 @@ def dataframe(npartitions): @pytest.fixture -def core_client(dataframe): +def con(dataframe): return ibis.dask.connect({"df": dataframe}) @pytest.fixture -def ibis_table(core_client): - return core_client.table("df") +def ibis_table(con): + return con.table("df") + + +@pytest.fixture(scope="module") +def pandas_df(): + return pd.DataFrame( + { + "plain_int64": list(range(1, 4)), + "plain_strings": list("abc"), + "plain_float64": [4.0, 5.0, 6.0], + "plain_datetimes_naive": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ), + "plain_datetimes_ny": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ).dt.tz_localize("America/New_York"), + "plain_datetimes_utc": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ).dt.tz_localize("UTC"), + "dup_strings": list("dad"), + "dup_ints": [1, 2, 1], + "float64_as_strings": ["100.01", "234.23", "-999.34"], + "int64_as_strings": list(map(str, range(1, 4))), + "strings_with_space": [" ", "abab", "ddeeffgg"], + "int64_with_zeros": [0, 1, 0], + "float64_with_zeros": [1.0, 0.0, 1.0], + "float64_positive": [1.0, 2.0, 1.0], + "strings_with_nulls": ["a", None, "b"], + "datetime_strings_naive": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ).astype(str), + "datetime_strings_ny": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ) + .dt.tz_localize("America/New_York") + .astype(str), + "datetime_strings_utc": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values + ) + .dt.tz_localize("UTC") + .astype(str), + "decimal": list(map(decimal.Decimal, ["1.0", "2", "3.234"])), + "array_of_float64": [[1.0, 2.0], [3.0], []], + "array_of_int64": [[1, 2], [], [3]], + "array_of_strings": [["a", "b"], [], ["c"]], + "map_of_strings_integers": [{"a": 1, "b": 2}, None, {}], + "map_of_integers_strings": [{}, None, {1: "a", 2: "b"}], + "map_of_complex_values": [None, {"a": [1, 2, 3], "b": []}, {}], + } + ) + + +@pytest.fixture(scope="module") +def df(npartitions, pandas_df): + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def batting_pandas_df(data_dir): + num_rows = 1000 + start_index = 30 + df = pd.read_parquet(data_dir / "parquet" / "batting.parquet").iloc[ + start_index : start_index + num_rows + ] + return df.reset_index(drop=True) + + +@pytest.fixture(scope="module") +def batting_df(npartitions, batting_pandas_df): + return dd.from_pandas(batting_pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def awards_players_df(data_dir): + return dd.read_parquet(data_dir / "parquet" / "awards_players.parquet") + + +@pytest.fixture(scope="module") +def df1(npartitions): + pandas_df = pd.DataFrame( + {"key": list("abcd"), "value": [3, 4, 5, 6], "key2": list("eeff")} + ) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def df2(npartitions): + pandas_df = pd.DataFrame( + {"key": list("ac"), "other_value": [4.0, 6.0], "key3": list("fe")} + ) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def intersect_df2(npartitions): + pandas_df = pd.DataFrame({"key": list("cd"), "value": [5, 6], "key2": list("ff")}) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def time_df1(npartitions): + pandas_df = pd.DataFrame( + {"time": pd.to_datetime([1, 2, 3, 4]), "value": [1.1, 2.2, 3.3, 4.4]} + ) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def time_df2(npartitions): + pandas_df = pd.DataFrame( + {"time": pd.to_datetime([2, 4]), "other_value": [1.2, 2.0]} + ) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def time_df3(npartitions): + pandas_df = pd.DataFrame( + { + "time": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=8).values + ), + "id": list(range(1, 9)), + "value": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], + } + ) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def time_keyed_df1(npartitions): + pandas_df = pd.DataFrame( + { + "time": pd.Series( + pd.date_range(start="2017-01-02 01:02:03.234", periods=6).values + ), + "key": [1, 2, 3, 1, 2, 3], + "value": [1.2, 1.4, 2.0, 4.0, 8.0, 16.0], + } + ) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def time_keyed_df2(npartitions): + pandas_df = pd.DataFrame( + { + "time": pd.Series( + pd.date_range( + start="2017-01-02 01:02:03.234", freq="3D", periods=3 + ).values + ), + "key": [1, 2, 3], + "other_value": [1.1, 1.2, 2.2], + } + ) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +@pytest.fixture(scope="module") +def client( + df, + df1, + df2, + df3, + time_df1, + time_df2, + time_df3, + time_keyed_df1, + time_keyed_df2, + intersect_df2, +): + return ibis.dask.connect( + { + "df": df, + "df1": df1, + "df2": df2, + "df3": df3, + "left": df1, + "right": df2, + "time_df1": time_df1, + "time_df2": time_df2, + "time_df3": time_df3, + "time_keyed_df1": time_keyed_df1, + "time_keyed_df2": time_keyed_df2, + "intersect_df2": intersect_df2, + } + ) + + +@pytest.fixture(scope="module") +def df3(npartitions): + pandas_df = pd.DataFrame( + { + "key": list("ac"), + "other_value": [4.0, 6.0], + "key2": list("ae"), + "key3": list("fe"), + } + ) + return dd.from_pandas(pandas_df, npartitions=npartitions) + + +t_schema = { + "decimal": dt.Decimal(4, 3), + "array_of_float64": dt.Array(dt.double), + "array_of_int64": dt.Array(dt.int64), + "array_of_strings": dt.Array(dt.string), + "map_of_strings_integers": dt.Map(dt.string, dt.int64), + "map_of_integers_strings": dt.Map(dt.int64, dt.string), + "map_of_complex_values": dt.Map(dt.string, dt.Array(dt.int64)), +} + + +@pytest.fixture(scope="module") +def t(client): + return client.table("df", schema=t_schema) + + +@pytest.fixture(scope="module") +def lahman(batting_df, awards_players_df): + return ibis.dask.connect( + {"batting": batting_df, "awards_players": awards_players_df} + ) + + +@pytest.fixture(scope="module") +def left(client): + return client.table("left") + + +@pytest.fixture(scope="module") +def right(client): + return client.table("right") + + +@pytest.fixture(scope="module") +def time_left(client): + return client.table("time_df1") + + +@pytest.fixture(scope="module") +def time_right(client): + return client.table("time_df2") + + +@pytest.fixture(scope="module") +def time_table(client): + return client.table("time_df3") + + +@pytest.fixture(scope="module") +def time_keyed_left(client): + return client.table("time_keyed_df1") + + +@pytest.fixture(scope="module") +def time_keyed_right(client): + return client.table("time_keyed_df2") + + +@pytest.fixture(scope="module") +def batting(lahman): + return lahman.table("batting") + + +@pytest.fixture(scope="module") +def sel_cols(batting): + cols = batting.columns + start, end = cols.index("AB"), cols.index("H") + 1 + return ["playerID", "yearID", "teamID", "G"] + cols[start:end] + + +@pytest.fixture(scope="module") +def players_base(batting, sel_cols): + # TODO Dask doesn't support order_by and group_by yet + # Adding an order by would cause all groupby tests to fail. + return batting[sel_cols] # .order_by(sel_cols[:3]) + + +@pytest.fixture(scope="module") +def players(players_base): + return players_base.group_by("playerID") + + +@pytest.fixture(scope="module") +def players_df(players_base): + return players_base.execute().reset_index(drop=True) diff --git a/ibis/backends/dask/tests/execution/__init__.py b/ibis/backends/dask/tests/execution/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/ibis/backends/dask/tests/execution/conftest.py b/ibis/backends/dask/tests/execution/conftest.py deleted file mode 100644 index 2b9e034f5208..000000000000 --- a/ibis/backends/dask/tests/execution/conftest.py +++ /dev/null @@ -1,290 +0,0 @@ -from __future__ import annotations - -import decimal - -import pandas as pd -import pytest - -import ibis -import ibis.expr.datatypes as dt - -dd = pytest.importorskip("dask.dataframe") - - -@pytest.fixture(scope="module") -def df(npartitions): - pandas_df = pd.DataFrame( - { - "plain_int64": list(range(1, 4)), - "plain_strings": list("abc"), - "plain_float64": [4.0, 5.0, 6.0], - "plain_datetimes_naive": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ), - "plain_datetimes_ny": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ).dt.tz_localize("America/New_York"), - "plain_datetimes_utc": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ).dt.tz_localize("UTC"), - "dup_strings": list("dad"), - "dup_ints": [1, 2, 1], - "float64_as_strings": ["100.01", "234.23", "-999.34"], - "int64_as_strings": list(map(str, range(1, 4))), - "strings_with_space": [" ", "abab", "ddeeffgg"], - "int64_with_zeros": [0, 1, 0], - "float64_with_zeros": [1.0, 0.0, 1.0], - "float64_positive": [1.0, 2.0, 1.0], - "strings_with_nulls": ["a", None, "b"], - "datetime_strings_naive": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ).astype(str), - "datetime_strings_ny": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ) - .dt.tz_localize("America/New_York") - .astype(str), - "datetime_strings_utc": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=3).values - ) - .dt.tz_localize("UTC") - .astype(str), - "decimal": list(map(decimal.Decimal, ["1.0", "2", "3.234"])), - "array_of_float64": [[1.0, 2.0], [3.0], []], - "array_of_int64": [[1, 2], [], [3]], - "array_of_strings": [["a", "b"], [], ["c"]], - "map_of_strings_integers": [{"a": 1, "b": 2}, None, {}], - "map_of_integers_strings": [{}, None, {1: "a", 2: "b"}], - "map_of_complex_values": [None, {"a": [1, 2, 3], "b": []}, {}], - } - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def batting_df(data_dir): - df = dd.read_parquet(data_dir / "parquet" / "batting.parquet") - # Dask dataframe thinks the columns are of type int64, - # but when computed they are all float64. - non_float_cols = ["playerID", "yearID", "stint", "teamID", "lgID", "G"] - float_cols = [c for c in df.columns if c not in non_float_cols] - df = df.astype({col: "float64" for col in float_cols}) - return df.sample(frac=0.01).reset_index(drop=True) - - -@pytest.fixture(scope="module") -def awards_players_df(data_dir): - return dd.read_parquet(data_dir / "parquet" / "awards_players.parquet") - - -@pytest.fixture(scope="module") -def df1(npartitions): - pandas_df = pd.DataFrame( - {"key": list("abcd"), "value": [3, 4, 5, 6], "key2": list("eeff")} - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def df2(npartitions): - pandas_df = pd.DataFrame( - {"key": list("ac"), "other_value": [4.0, 6.0], "key3": list("fe")} - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def intersect_df2(npartitions): - pandas_df = pd.DataFrame({"key": list("cd"), "value": [5, 6], "key2": list("ff")}) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def time_df1(npartitions): - pandas_df = pd.DataFrame( - {"time": pd.to_datetime([1, 2, 3, 4]), "value": [1.1, 2.2, 3.3, 4.4]} - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def time_df2(npartitions): - pandas_df = pd.DataFrame( - {"time": pd.to_datetime([2, 4]), "other_value": [1.2, 2.0]} - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def time_df3(npartitions): - pandas_df = pd.DataFrame( - { - "time": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=8).values - ), - "id": list(range(1, 9)), - "value": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], - } - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def time_keyed_df1(npartitions): - pandas_df = pd.DataFrame( - { - "time": pd.Series( - pd.date_range(start="2017-01-02 01:02:03.234", periods=6).values - ), - "key": [1, 2, 3, 1, 2, 3], - "value": [1.2, 1.4, 2.0, 4.0, 8.0, 16.0], - } - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def time_keyed_df2(npartitions): - pandas_df = pd.DataFrame( - { - "time": pd.Series( - pd.date_range( - start="2017-01-02 01:02:03.234", freq="3D", periods=3 - ).values - ), - "key": [1, 2, 3], - "other_value": [1.1, 1.2, 2.2], - } - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -@pytest.fixture(scope="module") -def client( - df, - df1, - df2, - df3, - time_df1, - time_df2, - time_df3, - time_keyed_df1, - time_keyed_df2, - intersect_df2, -): - return ibis.dask.connect( - { - "df": df, - "df1": df1, - "df2": df2, - "df3": df3, - "left": df1, - "right": df2, - "time_df1": time_df1, - "time_df2": time_df2, - "time_df3": time_df3, - "time_keyed_df1": time_keyed_df1, - "time_keyed_df2": time_keyed_df2, - "intersect_df2": intersect_df2, - } - ) - - -@pytest.fixture(scope="module") -def df3(npartitions): - pandas_df = pd.DataFrame( - { - "key": list("ac"), - "other_value": [4.0, 6.0], - "key2": list("ae"), - "key3": list("fe"), - } - ) - return dd.from_pandas(pandas_df, npartitions=npartitions) - - -t_schema = { - "decimal": dt.Decimal(4, 3), - "array_of_float64": dt.Array(dt.double), - "array_of_int64": dt.Array(dt.int64), - "array_of_strings": dt.Array(dt.string), - "map_of_strings_integers": dt.Map(dt.string, dt.int64), - "map_of_integers_strings": dt.Map(dt.int64, dt.string), - "map_of_complex_values": dt.Map(dt.string, dt.Array(dt.int64)), -} - - -@pytest.fixture(scope="module") -def t(client): - return client.table("df", schema=t_schema) - - -@pytest.fixture(scope="module") -def lahman(batting_df, awards_players_df): - return ibis.dask.connect( - {"batting": batting_df, "awards_players": awards_players_df} - ) - - -@pytest.fixture(scope="module") -def left(client): - return client.table("left") - - -@pytest.fixture(scope="module") -def right(client): - return client.table("right") - - -@pytest.fixture(scope="module") -def time_left(client): - return client.table("time_df1") - - -@pytest.fixture(scope="module") -def time_right(client): - return client.table("time_df2") - - -@pytest.fixture(scope="module") -def time_table(client): - return client.table("time_df3") - - -@pytest.fixture(scope="module") -def time_keyed_left(client): - return client.table("time_keyed_df1") - - -@pytest.fixture(scope="module") -def time_keyed_right(client): - return client.table("time_keyed_df2") - - -@pytest.fixture(scope="module") -def batting(lahman): - return lahman.table("batting") - - -@pytest.fixture(scope="module") -def sel_cols(batting): - cols = batting.columns - start, end = cols.index("AB"), cols.index("H") + 1 - return ["playerID", "yearID", "teamID", "G"] + cols[start:end] - - -@pytest.fixture(scope="module") -def players_base(batting, sel_cols): - # TODO Dask doesn't support order_by and group_by yet - # Adding an order by would cause all groupby tests to fail. - return batting[sel_cols] # .order_by(sel_cols[:3]) - - -@pytest.fixture(scope="module") -def players(players_base): - return players_base.group_by("playerID") - - -@pytest.fixture(scope="module") -def players_df(players_base): - return players_base.execute().reset_index(drop=True) diff --git a/ibis/backends/dask/tests/execution/test_maps.py b/ibis/backends/dask/tests/execution/test_maps.py deleted file mode 100644 index 0967ac912a80..000000000000 --- a/ibis/backends/dask/tests/execution/test_maps.py +++ /dev/null @@ -1,109 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd -import pytest - -import ibis - -dd = pytest.importorskip("dask.dataframe") -from dask.dataframe.utils import tm # noqa: E402 - - -def test_map_length_expr(t): - expr = t.map_of_integers_strings.length() - result = expr.compile() - expected = dd.from_pandas( - pd.Series([0, None, 2], name="map_of_integers_strings"), - npartitions=1, - ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) - - -def test_map_value_for_key_expr(t): - expr = t.map_of_integers_strings[1] - result = expr.compile() - expected = dd.from_pandas( - pd.Series([None, None, "a"], name="map_of_integers_strings"), - npartitions=1, - ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) - - -def test_map_value_or_default_for_key_expr(t): - expr = t.map_of_complex_values.get("a") - result = expr.compile() - expected = dd.from_pandas( - pd.Series( - [None, [1, 2, 3], None], - dtype="object", - name="map_of_complex_values", - ), - npartitions=1, - ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) - - -def safe_sorter(element): - return sorted(element) if isinstance(element, list) else element - - -def test_map_keys_expr(t): - expr = t.map_of_strings_integers.keys() - result = expr.compile().map(safe_sorter) - expected = dd.from_pandas( - pd.Series( - [["a", "b"], None, []], - dtype="object", - name="map_of_strings_integers", - ), - npartitions=1, - ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) - - -def test_map_values_expr(t): - expr = t.map_of_complex_values.values() - result = expr.compile().map(safe_sorter) - expected = dd.from_pandas( - pd.Series( - [ - None, - np.array([[1, 2, 3], []], dtype="object"), - np.array([], dtype="object"), - ], - dtype="object", - name="map_of_complex_values", - ), - npartitions=1, - ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) - - -def test_map_concat_expr(t): - expr = t.map_of_complex_values + {"b": [4, 5, 6], "c": [], "a": []} - result = expr.compile() - expected = dd.from_pandas( - pd.Series( - [ - None, - {"a": [], "b": [4, 5, 6], "c": []}, - {"b": [4, 5, 6], "c": [], "a": []}, - ], - dtype="object", - name="map_of_complex_values", - ), - npartitions=1, - ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) - - -def test_map_value_for_key_literal_broadcast(t): - lookup_table = ibis.literal({"a": 1, "b": 2, "c": 3, "d": 4}) - expr = lookup_table.get(t.dup_strings) - result = expr.compile() - expected = dd.from_pandas( - pd.Series([4, 1, 4], name="dup_strings"), - npartitions=1, - ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) diff --git a/ibis/backends/dask/tests/execution/test_timecontext.py b/ibis/backends/dask/tests/execution/test_timecontext.py deleted file mode 100644 index 30984f190849..000000000000 --- a/ibis/backends/dask/tests/execution/test_timecontext.py +++ /dev/null @@ -1,313 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -import pytest -from pandas import Timedelta, Timestamp - -import ibis -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.df.timecontext import ( - TimeContext, - TimeContextRelation, - adjust_context, - compare_timecontext, -) -from ibis.backends.pandas.execution import execute - -dd = pytest.importorskip("dask.dataframe") - -from dask.dataframe.utils import tm # noqa: E402 - -if TYPE_CHECKING: - from ibis.backends.base.df.scope import Scope - - -class CustomAsOfJoin(ops.AsOfJoin): - pass - - -def test_execute_with_timecontext(time_table): - expr = time_table - # define a time context for time-series data - context = (Timestamp("20170101"), Timestamp("20170103")) - - # without time context, execute produces every row - df_all = expr.execute() - assert len(df_all["time"]) == 8 - - # with context set, execute produces only rows within context - df_within_context = expr.execute(timecontext=context) - assert len(df_within_context["time"]) == 1 - - -def test_bad_timecontext(time_table, t): - expr = time_table - - # define context with illegal string - with pytest.raises(com.IbisError, match=r".*type pd.Timestamp.*"): - context = ("bad", "context") - expr.execute(timecontext=context) - - # define context with unsupported type int - with pytest.raises(com.IbisError, match=r".*type pd.Timestamp.*"): - context = (20091010, 20100101) - expr.execute(timecontext=context) - - # define context with too few values - with pytest.raises(com.IbisError, match=r".*should specify.*"): - context = Timestamp("20101010") - expr.execute(timecontext=context) - - # define context with begin value later than end - with pytest.raises(com.IbisError, match=r".*before or equal.*"): - context = (Timestamp("20101010"), Timestamp("20090101")) - expr.execute(timecontext=context) - - # execute context with a table without time column - with pytest.raises(com.IbisError, match=r".*must have a time column.*"): - context = (Timestamp("20090101"), Timestamp("20100101")) - t.execute(timecontext=context) - - -def test_compare_timecontext(): - c1 = (Timestamp("20170101"), Timestamp("20170103")) - c2 = (Timestamp("20170101"), Timestamp("20170111")) - c3 = (Timestamp("20160101"), Timestamp("20160103")) - c4 = (Timestamp("20161215"), Timestamp("20170102")) - assert compare_timecontext(c1, c2) == TimeContextRelation.SUBSET - assert compare_timecontext(c2, c1) == TimeContextRelation.SUPERSET - assert compare_timecontext(c1, c4) == TimeContextRelation.OVERLAP - assert compare_timecontext(c1, c3) == TimeContextRelation.NONOVERLAP - - -def test_context_adjustment_asof_join( - time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2 -): - expr = time_keyed_left.asof_join( - time_keyed_right, "time", by="key", tolerance=4 * ibis.interval(days=1) - )[time_keyed_left, time_keyed_right.other_value] - context = (Timestamp("20170105"), Timestamp("20170111")) - result = expr.execute(timecontext=context) - - # compare with asof_join of manually trimmed tables - trimmed_df1 = time_keyed_df1[time_keyed_df1["time"] >= context[0]][ - time_keyed_df1["time"] < context[1] - ] - trimmed_df2 = time_keyed_df2[ - time_keyed_df2["time"] >= context[0] - Timedelta(days=4) - ][time_keyed_df2["time"] < context[1]] - expected = dd.merge_asof( - trimmed_df1, - trimmed_df2, - on="time", - by="key", - tolerance=Timedelta("4D"), - ).compute() - tm.assert_frame_equal(result, expected) - - -@pytest.mark.xfail(reason="TODO - windowing - #2553") -@pytest.mark.parametrize( - ["interval_ibis", "interval_pd"], - [ - (ibis.interval(days=1), "1d"), - (3 * ibis.interval(days=1), "3d"), - (5 * ibis.interval(days=1), "5d"), - ], -) -def test_context_adjustment_window(time_table, time_df3, interval_ibis, interval_pd): - # trim data manually - expected = ( - time_df3.set_index("time").value.rolling(interval_pd, closed="both").mean() - ) - expected = expected[expected.index >= Timestamp("20170105")].reset_index(drop=True) - - context = Timestamp("20170105"), Timestamp("20170111") - - window = ibis.trailing_window(interval_ibis, order_by=time_table.time) - expr = time_table["value"].mean().over(window) - # result should adjust time context accordingly - result = expr.execute(timecontext=context) - tm.assert_series_equal(result, expected) - - -@pytest.mark.xfail(reason="TODO - windowing - #2553") -def test_setting_timecontext_in_scope(time_table, time_df3): - expected_win_1 = ( - time_df3.compute().set_index("time").value.rolling("3d", closed="both").mean() - ) - expected_win_1 = expected_win_1[ - expected_win_1.index >= Timestamp("20170105") - ].reset_index(drop=True) - - context = Timestamp("20170105"), Timestamp("20170111") - window1 = ibis.trailing_window(3 * ibis.interval(days=1), order_by=time_table.time) - """In the following expression, Selection node will be executed first and - get table in context ('20170105', '20170101'). - - Then in window execution table will be executed again with a larger - context adjusted by window preceding days ('20170102', '20170111'). - To get the correct result, the cached table result with a smaller - context must be discard and updated to a larger time range. - """ - expr = time_table.mutate(value=time_table["value"].mean().over(window1)) - result = expr.execute(timecontext=context) - tm.assert_series_equal(result["value"], expected_win_1) - - -@pytest.mark.xfail(reason="TODO - windowing - #2553") -def test_context_adjustment_multi_window(time_table, time_df3): - expected_win_1 = ( - time_df3.compute() - .set_index("time") - .rename(columns={"value": "v1"})["v1"] - .rolling("3d", closed="both") - .mean() - ) - expected_win_1 = expected_win_1[ - expected_win_1.index >= Timestamp("20170105") - ].reset_index(drop=True) - - expected_win_2 = ( - time_df3.compute() - .set_index("time") - .rename(columns={"value": "v2"})["v2"] - .rolling("2d", closed="both") - .mean() - ) - expected_win_2 = expected_win_2[ - expected_win_2.index >= Timestamp("20170105") - ].reset_index(drop=True) - - context = Timestamp("20170105"), Timestamp("20170111") - window1 = ibis.trailing_window(3 * ibis.interval(days=1), order_by=time_table.time) - window2 = ibis.trailing_window(2 * ibis.interval(days=1), order_by=time_table.time) - expr = time_table.mutate( - v1=time_table["value"].mean().over(window1), - v2=time_table["value"].mean().over(window2), - ) - result = expr.execute(timecontext=context) - - tm.assert_series_equal(result["v1"], expected_win_1) - tm.assert_series_equal(result["v2"], expected_win_2) - - -@pytest.mark.xfail(reason="TODO - windowing - #2553") -def test_context_adjustment_window_groupby_id(time_table, time_df3): - """This test case is meant to test trim_window_result method in - dask/execution/window.py to see if it could trim Series correctly with - groupby params.""" - expected = ( - time_df3.compute() - .set_index("time") - .groupby("id") - .value.rolling("3d", closed="both") - .mean() - ) - # This is a MultiIndexed Series - expected = expected.reset_index() - expected = expected[expected.time >= Timestamp("20170105")].reset_index(drop=True)[ - "value" - ] - - context = Timestamp("20170105"), Timestamp("20170111") - - # expected.index.name = None - window = ibis.trailing_window( - 3 * ibis.interval(days=1), group_by="id", order_by=time_table.time - ) - expr = time_table["value"].mean().over(window) - # result should adjust time context accordingly - result = expr.execute(timecontext=context) - tm.assert_series_equal(result, expected) - - -def test_adjust_context_scope(time_keyed_left, time_keyed_right): - """Test that `adjust_context` has access to `scope` by default.""" - - @adjust_context.register(CustomAsOfJoin) - def adjust_context_custom_asof_join( - op: ops.AsOfJoin, - scope: Scope, - timecontext: TimeContext, - ) -> TimeContext: - """Confirms that `scope` is passed in.""" - assert scope is not None - return timecontext - - expr = CustomAsOfJoin( - left=time_keyed_left, - right=time_keyed_right, - predicates="time", - by="key", - tolerance=ibis.interval(days=4), - ).to_expr() - expr = expr[time_keyed_left, time_keyed_right.other_value] - context = (Timestamp("20170105"), Timestamp("20170111")) - expr.execute(timecontext=context) - - -def test_adjust_context_complete_shift( - time_keyed_left, - time_keyed_right, - time_keyed_df1, - time_keyed_df2, -): - """Test `adjust_context` function that completely shifts the context. - - This results in an adjusted context that is NOT a subset of the - original context. This is unlike an `adjust_context` function - that only expands the context. - - See #3104 - """ - - # Create a contrived `adjust_context` function for - # CustomAsOfJoin to mock this. - - @adjust_context.register(CustomAsOfJoin) - def adjust_context_custom_asof_join( - op: ops.AsOfJoin, - scope: Scope, - timecontext: TimeContext, - ) -> TimeContext: - """Shifts both the begin and end in the same direction.""" - begin, end = timecontext - timedelta = execute(op.tolerance) - return (begin - timedelta, end - timedelta) - - expr = CustomAsOfJoin( - left=time_keyed_left, - right=time_keyed_right, - predicates="time", - by="key", - tolerance=ibis.interval(days=4), - ).to_expr() - expr = expr[time_keyed_left, time_keyed_right.other_value] - context = (Timestamp("20170101"), Timestamp("20170111")) - result = expr.execute(timecontext=context) - - # Compare with asof_join of manually trimmed tables - # Left table: No shift for context - # Right table: Shift both begin and end of context by 4 days - trimmed_df1 = time_keyed_df1[time_keyed_df1["time"] >= context[0]][ - time_keyed_df1["time"] < context[1] - ] - trimmed_df2 = time_keyed_df2[ - time_keyed_df2["time"] >= context[0] - Timedelta(days=4) - ][time_keyed_df2["time"] < context[1] - Timedelta(days=4)] - expected = ( - dd.merge_asof( - trimmed_df1, - trimmed_df2, - on="time", - by="key", - tolerance=Timedelta("4D"), - ) - .compute() - .reset_index(drop=True) - ) - - tm.assert_frame_equal(result, expected) diff --git a/ibis/backends/dask/tests/execution/test_util.py b/ibis/backends/dask/tests/execution/test_util.py deleted file mode 100644 index 153cc3c4f427..000000000000 --- a/ibis/backends/dask/tests/execution/test_util.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import annotations - -import pytest - -from ibis.backends.dask.execution.util import assert_identical_grouping_keys - -pytest.importorskip("dask.dataframe") - - -@pytest.mark.parametrize( - "grouping, bad_grouping", - [ - ("dup_strings", "dup_ints"), - (["dup_strings"], ["dup_ints"]), - (["dup_strings", "dup_ints"], ["dup_ints", "dup_strings"]), - ], -) -def test_identical_grouping_keys_assertion(df, grouping, bad_grouping): - gdf = df.groupby(grouping) - - a = gdf.plain_int64 - b = gdf.plain_strings - - # should not raise - assert_identical_grouping_keys(a, b) - - c = df.groupby(bad_grouping).plain_int64 - - with pytest.raises(AssertionError, match=r"Differing grouping keys passed*"): - assert_identical_grouping_keys(a, b, c) diff --git a/ibis/backends/dask/tests/execution/test_arrays.py b/ibis/backends/dask/tests/test_arrays.py similarity index 82% rename from ibis/backends/dask/tests/execution/test_arrays.py rename to ibis/backends/dask/tests/test_arrays.py index 8ebdab2756e4..107cca5fedaa 100644 --- a/ibis/backends/dask/tests/execution/test_arrays.py +++ b/ibis/backends/dask/tests/test_arrays.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd import pytest -from pytest import param import ibis @@ -19,21 +18,17 @@ def test_array_length(t): t.array_of_int64.length().name("array_of_int64_length"), t.array_of_strings.length().name("array_of_strings_length"), ) - result = expr.compile() - expected = dd.from_pandas( - pd.DataFrame( - { - "array_of_float64_length": [2, 1, 0], - "array_of_int64_length": [2, 0, 1], - "array_of_strings_length": [2, 0, 1], - } - ), - npartitions=1, + result = expr.execute() + expected = pd.DataFrame( + { + "array_of_float64_length": [2, 1, 0], + "array_of_int64_length": [2, 0, 1], + "array_of_strings_length": [2, 0, 1], + } ) tm.assert_frame_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True), expected.reset_index(drop=True) ) @@ -61,7 +56,6 @@ def test_array_collect(t, df): ) -@pytest.mark.notimpl(["dask"], reason="windowing - #2553") def test_array_collect_rolling_partitioned(t, df): window = ibis.trailing_window(1, order_by=t.plain_int64) colexpr = t.plain_float64.collect().over(window) @@ -137,25 +131,22 @@ def test_array_slice_scalar(client, start, stop): @pytest.mark.parametrize( "index", - [param(1, marks=pytest.mark.xfail_version(dask=["pandas>=2"])), 3, 4, 11, -11], + [1, 3, 4, 11, -11], ) def test_array_index(t, df, index): expr = t[t.array_of_float64[index].name("indexed")] - result = expr.compile() - expected = dd.from_pandas( - pd.DataFrame( - { - "indexed": df.array_of_float64.apply( - lambda x: x[index] if -len(x) <= index < len(x) else None, - meta=("array_of_float64", "object"), - ) - } - ), - npartitions=1, + result = expr.execute() + expected = pd.DataFrame( + { + "indexed": df.array_of_float64.apply( + lambda x: x[index] if -len(x) <= index < len(x) else np.nan, + meta=("array_of_float64", "object"), + ) + } ) + tm.assert_frame_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True), expected.reset_index(drop=True) ) @@ -169,7 +160,6 @@ def test_array_index_scalar(client, index): assert result == expected -@pytest.mark.notimpl(["dask"], reason="arrays - #2553") @pytest.mark.parametrize("n", [1, 3, 4, 7, -2]) # negative returns empty list @pytest.mark.parametrize("mul", [lambda x, n: x * n, lambda x, n: n * x]) def test_array_repeat(t, df, n, mul): diff --git a/ibis/backends/dask/tests/execution/test_cast.py b/ibis/backends/dask/tests/test_cast.py similarity index 62% rename from ibis/backends/dask/tests/execution/test_cast.py rename to ibis/backends/dask/tests/test_cast.py index 530708babbc4..55446626eaca 100644 --- a/ibis/backends/dask/tests/execution/test_cast.py +++ b/ibis/backends/dask/tests/test_cast.py @@ -2,19 +2,18 @@ import decimal +import pandas as pd import pytest import pytz -from pandas import Timestamp from pytest import param import ibis import ibis.expr.datatypes as dt +from ibis.backends.conftest import is_older_than pytest.importorskip("dask.dataframe") from dask.dataframe.utils import tm # noqa: E402 -from ibis.backends.dask.execution import execute # noqa: E402 - TIMESTAMP = "2022-03-13 06:59:10.467417" @@ -53,9 +52,14 @@ def test_cast_string(t, df, from_, to, expected): @pytest.mark.parametrize( ("to", "expected"), [ - ("string", "object"), + pytest.param( + "string", + "object", + marks=pytest.mark.skipif( + is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + ), + ), ("int64", "int64"), - param("double", "float64", marks=pytest.mark.xfail(raises=TypeError)), ( dt.Timestamp("America/Los_Angeles"), "datetime64[ns, America/Los_Angeles]", @@ -79,32 +83,40 @@ def test_cast_timestamp_column(t, df, column, to, expected): @pytest.mark.parametrize( ("to", "expected"), [ - ("string", str), - ("int64", lambda x: Timestamp(x).value // int(1e9)), - param( - "double", - float, - marks=pytest.mark.notimpl(["dask"]), + pytest.param( + "string", + str, + marks=pytest.mark.skipif( + is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + ), ), + ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), + ("double", lambda x: float(pd.Timestamp(x).value // int(1e9))), ( dt.Timestamp("America/Los_Angeles"), - lambda x: x.astimezone(tz=pytz.timezone("America/Los_Angeles")), + lambda x: x.tz_localize(tz="America/Los_Angeles"), ), ], ) -def test_cast_timestamp_scalar_naive(to, expected): - literal_expr = ibis.literal(Timestamp(TIMESTAMP)) +def test_cast_timestamp_scalar_naive(con, to, expected): + literal_expr = ibis.literal(pd.Timestamp(TIMESTAMP)) value = literal_expr.cast(to) - result = execute(value.op()) - raw = execute(literal_expr.op()) + result = con.execute(value) + raw = con.execute(literal_expr) assert result == expected(raw) @pytest.mark.parametrize( ("to", "expected"), [ - ("string", str), - ("int64", lambda x: Timestamp(x).value // int(1e9)), + pytest.param( + "string", + str, + marks=pytest.mark.skipif( + is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + ), + ), + ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), param("double", float, marks=pytest.mark.notimpl(["dask"])), ( dt.Timestamp("America/Los_Angeles"), @@ -113,11 +125,11 @@ def test_cast_timestamp_scalar_naive(to, expected): ], ) @pytest.mark.parametrize("tz", ["UTC", "America/New_York"]) -def test_cast_timestamp_scalar(to, expected, tz): - literal_expr = ibis.literal(Timestamp(TIMESTAMP).tz_localize(tz)) +def test_cast_timestamp_scalar(to, expected, tz, con): + literal_expr = ibis.literal(pd.Timestamp(TIMESTAMP).tz_localize(tz)) value = literal_expr.cast(to) - result = execute(value.op()) - raw = execute(literal_expr.op()) + result = con.execute(value) + raw = con.execute(literal_expr) assert result == expected(raw) @@ -133,30 +145,34 @@ def test_timestamp_with_timezone_is_inferred_correctly(t): ) def test_cast_date(t, df, column): expr = t[column].cast("date") - result = expr.compile() - expected = df[column].dt.normalize() - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute() + expected = ( + df[column] + .dt.normalize() + .map(lambda x: x.date()) + .compute() + .rename(expr.get_name()) + ) + tm.assert_series_equal(result, expected, check_index=False) -@pytest.mark.parametrize("type", [dt.Decimal(9, 0), dt.Decimal(12, 3)]) -def test_cast_to_decimal(t, df, type): +@pytest.mark.parametrize("type", [dt.Decimal(9, 2), dt.Decimal(12, 3)]) +def test_cast_to_decimal(t, pandas_df, type): expr = t.float64_as_strings.cast(type) - result = expr.compile() + result = expr.execute() context = decimal.Context(prec=type.precision) - expected = df.float64_as_strings.apply( + expected = pandas_df.float64_as_strings.apply( lambda x: context.create_decimal(x).quantize( decimal.Decimal( "{}.{}".format("0" * (type.precision - type.scale), "0" * type.scale) ) - ), - meta=("float64_as_strings", "object"), + ) ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + tm.assert_series_equal(result, expected, check_names=False) assert all( - abs(element.as_tuple().exponent) == type.scale - for element in result.compute().values + abs(element.as_tuple().exponent) == type.scale for element in result.values ) assert all( 1 <= len(element.as_tuple().digits) <= type.precision - for element in result.compute().values + for element in result.values ) diff --git a/ibis/backends/dask/tests/test_core.py b/ibis/backends/dask/tests/test_core.py index 2e97232e5d02..6f480ff0d921 100644 --- a/ibis/backends/dask/tests/test_core.py +++ b/ibis/backends/dask/tests/test_core.py @@ -1,111 +1,39 @@ from __future__ import annotations +import pandas as pd import pytest from dask.dataframe.utils import tm import ibis -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.df.scope import Scope -from ibis.backends.pandas.dispatch import execute_node as pandas_execute_node dd = pytest.importorskip("dask.dataframe") -import pandas as pd # noqa: E402 -from ibis.backends.dask.core import execute # noqa: E402 -from ibis.backends.dask.dispatch import ( # noqa: E402 - execute_node, - post_execute, - pre_execute, -) -dd = pytest.importorskip("dask.dataframe") - - -def test_table_from_dataframe(dataframe, ibis_table, core_client): - t = core_client.from_dataframe(dataframe) +def test_table_from_dataframe(dataframe, ibis_table, con): + t = con.from_dataframe(dataframe) result = t.execute() expected = ibis_table.execute() tm.assert_frame_equal(result, expected) - t = core_client.from_dataframe(dataframe, name="foo") + t = con.from_dataframe(dataframe, name="foo") expected = ibis_table.execute() tm.assert_frame_equal(result, expected) - client = core_client - t = core_client.from_dataframe(dataframe, name="foo", client=client) + t = con.from_dataframe(dataframe, name="foo", client=con) expected = ibis_table.execute() tm.assert_frame_equal(result, expected) -def test_array_literal_from_series(core_client): +def test_array_literal_from_series(con): values = [1, 2, 3, 4] s = dd.from_pandas(pd.Series(values), npartitions=1) expr = ibis.array(s) assert expr.equals(ibis.array(values)) - assert core_client.execute(expr) == pytest.approx([1, 2, 3, 4]) - - -def test_pre_execute_basic(): - """Test that pre_execute has intercepted execution and provided its own - scope dict.""" + assert con.execute(expr) == pytest.approx([1, 2, 3, 4]) - @pre_execute.register(ops.Add) - def pre_execute_test(op, *clients, scope=None, **kwargs): - return Scope({op: 4}, None) - one = ibis.literal(1) - expr = one + one - result = execute(expr.op()) - assert result == 4 - - del pre_execute.funcs[(ops.Add,)] - pre_execute.reorder() - pre_execute._cache.clear() - - -def test_execute_parameter_only(): +def test_execute_parameter_only(con): param = ibis.param("int64") - result = execute(param.op(), params={param.op(): 42}) + result = con.execute(param, params={param.op(): 42}) assert result == 42 - - -def test_missing_data_sources(): - t = ibis.table([("a", "string")]) - expr = t.a.length() - with pytest.raises(com.UnboundExpressionError): - execute(expr.op()) - - -def test_post_execute_called_on_joins(dataframe, core_client, ibis_table): - count = [0] - - @post_execute.register(ops.InnerJoin, dd.DataFrame) - def tmp_left_join_exe(op, lhs, **kwargs): - count[0] += 1 - return lhs - - left = ibis_table - right = left.view() - join = left.join(right, "plain_strings")[left.plain_int64] - result = join.execute() - assert result is not None - assert len(result.index) > 0 - assert count[0] == 1 - - -def test_scope_look_up(): - # test if scope could lookup items properly - scope = Scope() - one_day = ibis.interval(days=1).op() - one_hour = ibis.interval(hours=1).op() - scope = scope.merge_scope(Scope({one_day: 1}, None)) - assert scope.get_value(one_hour) is None - assert scope.get_value(one_day) is not None - - -def test_new_dispatcher(): - types = (ops.TableColumn, dd.DataFrame) - assert execute_node.dispatch(*types) is not None - assert pandas_execute_node.dispatch(*types).__name__ == "raise_unknown_op" diff --git a/ibis/backends/dask/tests/test_dispatcher.py b/ibis/backends/dask/tests/test_dispatcher.py deleted file mode 100644 index 09e21f4a9f31..000000000000 --- a/ibis/backends/dask/tests/test_dispatcher.py +++ /dev/null @@ -1,143 +0,0 @@ -from __future__ import annotations - -import pytest -from multipledispatch import Dispatcher - -from ibis.backends.dask.trace import TwoLevelDispatcher - - -class A1: - pass - - -class A2(A1): - pass - - -class A3(A2): - pass - - -class B1: - pass - - -class B2(B1): - pass - - -class B3(B2): - pass - - -@pytest.fixture -def foo_dispatchers(): - foo = TwoLevelDispatcher("foo", doc="Test dispatcher foo") - foo_m = Dispatcher("foo_m", doc="Control dispatcher foo_m") - - @foo.register(A1, B1) - @foo_m.register(A1, B1) - def foo0(x, y): - return 0 - - @foo.register(A1, B2) - @foo_m.register(A1, B2) - def foo1(x, y): - return 1 - - @foo.register(A2, B1) - @foo_m.register(A2, B1) - def foo2(x, y): - return 2 - - @foo.register(A2, B2) - @foo_m.register(A2, B2) - def foo3(x, y): - return 3 - - @foo.register( - (A1, A2), - ) - @foo_m.register( - (A1, A2), - ) - def foo4(x): - return 4 - - return foo, foo_m - - -@pytest.fixture -def foo(foo_dispatchers): - return foo_dispatchers[0] - - -@pytest.fixture -def foo_m(foo_dispatchers): - return foo_dispatchers[1] - - -def test_cache(foo, mocker): - """Test that cache is properly set after calling with args.""" - - spy = mocker.spy(foo, "dispatch") - a1, b1 = A1(), B1() - - assert (A1, B1) not in foo._cache - foo(a1, b1) - assert (A1, B1) in foo._cache - foo(a1, b1) - spy.assert_called_once_with(A1, B1) - - -def test_dispatch(foo, mocker): - """Test that calling dispatcher with a signature that is registered does - not trigger a linear search through dispatch_iter.""" - - spy = mocker.spy(foo, "dispatch_iter") - - # This should not trigger a linear search - foo(A1(), B1()) - assert not spy.called, ( - "Calling dispatcher with registered signature should " - "not trigger linear search" - ) - - foo(A3(), B3()) - spy.assert_called_once_with(A3, B3) - - -@pytest.mark.parametrize( - "args", - [ - (A1(), B1()), - (A1(), B2()), - (A1(), B3()), - (A2(), B1()), - (A2(), B2()), - (A2(), B3()), - (A3(), B1()), - (A3(), B2()), - (A3(), B3()), - (A1(),), - (A2(),), - (A3(),), - ], -) -def test_registered(foo_dispatchers, args): - foo, foo_m = foo_dispatchers - assert foo(*args) == foo_m(*args) - - -def test_ordering(foo, foo_m): - assert foo.ordering == foo_m.ordering - - -def test_funcs(foo, foo_m): - assert foo.funcs == foo_m.funcs - - -@pytest.mark.parametrize("args", [(B1(),), (B2(),), (A1(), A1()), (A1(), A2(), A3())]) -def test_unregistered(foo, args): - with pytest.raises(NotImplementedError, match="Could not find signature for foo.*"): - foo(*args) diff --git a/ibis/backends/dask/tests/execution/test_functions.py b/ibis/backends/dask/tests/test_functions.py similarity index 51% rename from ibis/backends/dask/tests/execution/test_functions.py rename to ibis/backends/dask/tests/test_functions.py index 18cb39a90534..4f65abb48cb3 100644 --- a/ibis/backends/dask/tests/execution/test_functions.py +++ b/ibis/backends/dask/tests/test_functions.py @@ -13,12 +13,7 @@ import ibis import ibis.expr.datatypes as dt -from ibis.common.exceptions import OperationNotDefinedError - -dd = pytest.importorskip("dask.dataframe") -from dask.dataframe.utils import tm # noqa: E402 - -from ibis.backends.dask.execution import execute # noqa: E402 +from ibis.backends.dask.tests.conftest import TestConf as tm @pytest.mark.parametrize( @@ -35,22 +30,22 @@ ) def test_binary_operations(t, df, op): expr = op(t.plain_float64, t.plain_int64) - result = expr.compile() - expected = op(df.plain_float64, df.plain_int64) + result = expr.execute() + expected = op(df.plain_float64, df.plain_int64).compute() tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True).rename("tmp"), + expected.reset_index(drop=True).rename("tmp"), ) @pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor]) -def test_binary_boolean_operations(t, df, op): +def test_binary_boolean_operations(t, pandas_df, op): expr = op(t.plain_int64 == 1, t.plain_int64 == 2) - result = expr.compile() - expected = op(df.plain_int64 == 1, df.plain_int64 == 2) + result = expr.execute() + expected = op(pandas_df.plain_int64 == 1, pandas_df.plain_int64 == 2) tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True), + expected.reset_index(drop=True), ) @@ -66,9 +61,8 @@ def wrapper(*args, **kwargs): @pytest.mark.parametrize( - ("ibis_func", "dask_func"), + ("ibis_func", "pandas_func"), [ - param(methodcaller("round"), lambda x: np.int64(round(x)), id="round"), param( methodcaller("round", 2), lambda x: x.quantize(decimal.Decimal(".00")), @@ -83,7 +77,15 @@ def wrapper(*args, **kwargs): param( methodcaller("floor"), lambda x: decimal.Decimal(math.floor(x)), id="floor" ), - param(methodcaller("exp"), methodcaller("exp"), id="exp"), + param( + methodcaller("exp"), + methodcaller("exp"), + id="exp", + marks=pytest.mark.xfail( + reason="Unable to normalize Decimal('2.71513316E+43') as decimal with precision 12 and scale 3", + raises=TypeError, + ), + ), param( methodcaller("sign"), lambda x: x if not x else decimal.Decimal(1).copy_sign(x), @@ -104,51 +106,35 @@ def wrapper(*args, **kwargs): param(methodcaller("log10"), operate(lambda x: x.log10()), id="log10"), ], ) -def test_math_functions_decimal(t, df, ibis_func, dask_func): +def test_math_functions_decimal(t, pandas_df, ibis_func, pandas_func): dtype = dt.Decimal(12, 3) - result = ibis_func(t.float64_as_strings.cast(dtype)).compile() context = decimal.Context(prec=dtype.precision) - expected = df.float64_as_strings.apply( - lambda x: context.create_decimal(x).quantize( - decimal.Decimal( - f"{'0' * (dtype.precision - dtype.scale)}.{'0' * dtype.scale}" - ) - ), - meta=("float64_as_strings", "object"), - ).apply(dask_func, meta=("float64_as_strings", "object")) - # dask.dataframe.Series doesn't do direct item assignment - # TODO - maybe use .where instead - computed_result = result.compute().reset_index(drop=True) - computed_result[computed_result.apply(math.isnan)] = -99999 - computed_expected = expected.compute().reset_index(drop=True) - computed_expected[computed_expected.apply(math.isnan)] = -99999 - # result[result.apply(math.isnan)] = -99999 - # expected[expected.apply(math.isnan)] = -99999 - tm.assert_series_equal(computed_result, computed_expected) - - -def test_round_decimal_with_negative_places(t, df): + p = decimal.Decimal(f"{'0' * (dtype.precision - dtype.scale)}.{'0' * dtype.scale}") + + def func(x): + x = context.create_decimal(x) + x = pandas_func(x) + if math.isnan(x): + return float("nan") + return x.quantize(p) + + expr = ibis_func(t.float64_as_strings.cast(dtype)) + result = expr.execute() + expected = pandas_df.float64_as_strings.map(func, na_action="ignore") + tm.assert_series_equal(result, expected, check_names=False) + + +def test_round_decimal_with_negative_places(t): type = dt.Decimal(12, 3) expr = t.float64_as_strings.cast(type).round(-1) - result = expr.compile() - expected = dd.from_pandas( - pd.Series( - list(map(decimal.Decimal, ["1.0E+2", "2.3E+2", "-1.00E+3"])), - name="float64_as_strings", - ), - npartitions=1, - ) - tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result = expr.execute() + expected = pd.Series( + list(map(decimal.Decimal, ["1.0E+2", "2.3E+2", "-1.00E+3"])), + name="float64_as_strings", ) + tm.assert_series_equal(result, expected) -@pytest.mark.xfail( - raises=OperationNotDefinedError, - reason="TODO - arrays - #2553", - # Need an ops.MultiQuantile execution func that dispatches on ndarrays -) @pytest.mark.parametrize( ("ibis_func", "dask_func"), [ @@ -159,10 +145,10 @@ def test_round_decimal_with_negative_places(t, df): ], ) @pytest.mark.parametrize("column", ["float64_with_zeros", "int64_with_zeros"]) -def test_quantile_list(t, df, ibis_func, dask_func, column): +def test_quantile_list(t, pandas_df, ibis_func, dask_func, column): expr = ibis_func(t[column]) - result = expr.compile() - expected = dask_func(df[column]) + result = expr.execute() + expected = dask_func(pandas_df[column]) assert result == expected @@ -177,14 +163,14 @@ def test_quantile_list(t, df, ibis_func, dask_func, column): ), ], ) -def test_quantile_scalar(t, df, ibis_func, dask_func): - result = ibis_func(t.float64_with_zeros).compile() - expected = dask_func(df.float64_with_zeros) - assert result.compute() == expected.compute() +def test_quantile_scalar(t, pandas_df, ibis_func, dask_func): + result = ibis_func(t.float64_with_zeros).execute() + expected = dask_func(pandas_df.float64_with_zeros) + assert result == expected - result = ibis_func(t.int64_with_zeros).compile() - expected = dask_func(df.int64_with_zeros) - assert result.compute() == expected.compute() + result = ibis_func(t.int64_with_zeros).execute() + expected = dask_func(pandas_df.int64_with_zeros) + assert result == expected @pytest.mark.parametrize( @@ -201,24 +187,11 @@ def test_arraylike_functions_transform_errors(t, df, ibis_func, exc): ibis_func(t.float64_with_zeros).execute() -@pytest.mark.xfail( - raises=OperationNotDefinedError, - reason="TODO - arrays - #2553", - # Need an ops.MultiQuantile execution func that dispatches on ndarrays -) -def test_quantile_array_access(client, t, df): - quantile = t.float64_with_zeros.quantile([0.25, 0.5]) - expr = quantile[0], quantile[1] - result = tuple(map(client.execute, expr)) - expected = tuple(df.float64_with_zeros.quantile([0.25, 0.5])) - assert result == expected - - -def test_ifelse_returning_bool(): +def test_ifelse_returning_bool(con): one = ibis.literal(1) two = ibis.literal(2) true = ibis.literal(True) false = ibis.literal(False) expr = ibis.ifelse(one + one == two, true, false) - result = execute(expr.op()) - assert result is True + result = con.execute(expr) + assert result is np.bool_(True) diff --git a/ibis/backends/dask/tests/execution/test_join.py b/ibis/backends/dask/tests/test_join.py similarity index 61% rename from ibis/backends/dask/tests/execution/test_join.py rename to ibis/backends/dask/tests/test_join.py index e76097b65cdd..13900659ca06 100644 --- a/ibis/backends/dask/tests/execution/test_join.py +++ b/ibis/backends/dask/tests/test_join.py @@ -2,8 +2,7 @@ import pandas as pd import pytest -from pandas import Timedelta, date_range -from pytest import param +from pandas import date_range import ibis @@ -25,18 +24,6 @@ "left", "right", "outer", - param( - "semi", - marks=pytest.mark.xfail( - raises=NotImplementedError, reason="Semi join not implemented" - ), - ), - param( - "anti", - marks=pytest.mark.xfail( - raises=NotImplementedError, reason="Anti join not implemented" - ), - ), ], ) @@ -50,20 +37,7 @@ def test_join(how, left, right, df1, df2): expected = dd.merge(df1, df2, how=how, on="key") tm.assert_frame_equal( result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), - ) - - -def test_cross_join(left, right, df1, df2): - expr = left.cross_join(right)[left, right.other_value, right.key3] - result = expr.compile() - expected = dd.merge( - df1.assign(dummy=1), df2.assign(dummy=1), how="inner", on="dummy" - ).rename(columns={"key_x": "key"}) - del expected["dummy"], expected["key_y"] - tm.assert_frame_equal( - result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) @@ -74,50 +48,8 @@ def test_join_project_left_table(how, left, right, df1, df2): expected = dd.merge(df1, df2, how=how, on="key")[list(left.columns) + ["key3"]] tm.assert_frame_equal( result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), - ) - - -def test_cross_join_project_left_table(left, right, df1, df2): - expr = left.cross_join(right)[left, right.key3] - result = expr.compile() - expected = dd.merge( - df1.assign(dummy=1), df2.assign(dummy=1), how="inner", on="dummy" - ).rename(columns={"key_x": "key"})[list(left.columns) + ["key3"]] - tm.assert_frame_equal( - result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), - ) - - -@join_type -def test_join_with_multiple_predicates(how, left, right, df1, df2): - expr = left.join(right, [left.key == right.key, left.key2 == right.key3], how=how)[ - left, right.key3, right.other_value - ] - result = expr.execute().sort_values(by=["key"]).reset_index(drop=True) - - expected = ( - dd.merge(df1, df2, how=how, left_on=["key", "key2"], right_on=["key", "key3"]) - .compute(scheduler="single-threaded") - .sort_values(by=["key"]) - .reset_index(drop=True) - ) - tm.assert_frame_equal(result[expected.columns], expected) - - -@join_type -def test_join_with_multiple_predicates_written_as_one(how, left, right, df1, df2): - predicate = (left.key == right.key) & (left.key2 == right.key3) - expr = left.join(right, predicate, how=how)[left, right.key3, right.other_value] - result = expr.execute().sort_values(by=["key"]).reset_index(drop=True) - expected = ( - dd.merge(df1, df2, how=how, left_on=["key", "key2"], right_on=["key", "key3"]) - .compute(scheduler="single-threaded") - .sort_values(by=["key"]) - .reset_index(drop=True) + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) - tm.assert_frame_equal(result[expected.columns], expected) @join_type @@ -146,25 +78,6 @@ def test_join_with_duplicate_non_key_columns(how, left, right, df1, df2): expr.compile() -@join_type -def test_join_with_duplicate_non_key_columns_not_selected(how, left, right, df1, df2): - left = left.mutate(x=left.value * 2) - right = right.mutate(x=right.other_value * 3) - right = right[["key", "other_value"]] - expr = left.join(right, left.key == right.key, how=how)[left, right.other_value] - result = expr.compile() - expected = dd.merge( - df1.assign(x=df1.value * 2), - df2[["key", "other_value"]], - how=how, - on="key", - ) - tm.assert_frame_equal( - result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), - ) - - @join_type def test_join_with_post_expression_selection(how, left, right, df1, df2): join = left.join(right, left.key == right.key, how=how) @@ -173,7 +86,7 @@ def test_join_with_post_expression_selection(how, left, right, df1, df2): expected = dd.merge(df1, df2, on="key", how=how)[["key", "value", "other_value"]] tm.assert_frame_equal( result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) @@ -228,7 +141,6 @@ def test_multi_join_with_post_expression_filter(how, left, df1): ) -@pytest.mark.xfail(reason="TODO - execute_join - #2553") @join_type def test_join_with_non_trivial_key(how, left, right, df1, df2): # also test that the order of operands in the predicate doesn't matter @@ -252,7 +164,6 @@ def test_join_with_non_trivial_key(how, left, right, df1, df2): ) -@pytest.mark.xfail(reason="TODO - execute_join - #2553") @join_type def test_join_with_non_trivial_key_project_table(how, left, right, df1, df2): # also test that the order of operands in the predicate doesn't matter @@ -293,33 +204,7 @@ def test_join_with_project_right_duplicate_column(client, how, left, df1, df3): ) tm.assert_frame_equal( result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), - ) - - -def test_join_with_window_function(players_base, players_df, batting, batting_df): - players = players_base - - # this should be semi_join - tbl = batting.left_join(players, ["playerID"]) - t = tbl[batting.G, batting.playerID, batting.teamID] - expr = t.group_by(t.teamID).mutate( - team_avg=lambda d: d.G.mean(), - demeaned_by_player=lambda d: d.G - d.G.mean(), - ) - result = expr.execute() - - expected = dd.merge( - batting_df, players_df[["playerID"]], on="playerID", how="left" - )[["G", "playerID", "teamID"]] - team_avg = expected.groupby("teamID").G.transform("mean") - expected = expected.assign( - team_avg=team_avg, demeaned_by_player=lambda df: df.G - team_avg - ) - - tm.assert_frame_equal( - result[expected.columns], - expected.compute(scheduler="single-threaded"), + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) @@ -336,20 +221,7 @@ def test_asof_join(time_left, time_right, time_df1, time_df2): expected = dd.merge_asof(time_df1, time_df2, on="time") tm.assert_frame_equal( result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), - ) - - -@merge_asof_minversion -def test_asof_join_predicate(time_left, time_right, time_df1, time_df2): - expr = time_left.asof_join(time_right, time_left.time == time_right.time)[ - time_left, time_right.other_value - ] - result = expr.compile() - expected = dd.merge_asof(time_df1, time_df2, on="time") - tm.assert_frame_equal( - result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) @@ -364,28 +236,7 @@ def test_keyed_asof_join( expected = dd.merge_asof(time_keyed_df1, time_keyed_df2, on="time", by="key") tm.assert_frame_equal( result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), - ) - - -@merge_asof_minversion -def test_keyed_asof_join_with_tolerance( - time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2 -): - expr = time_keyed_left.asof_join( - time_keyed_right, "time", by="key", tolerance=2 * ibis.interval(days=1) - )[time_keyed_left, time_keyed_right.other_value] - result = expr.compile() - expected = dd.merge_asof( - time_keyed_df1, - time_keyed_df2, - on="time", - by="key", - tolerance=Timedelta("2D"), - ) - tm.assert_frame_equal( - result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) @@ -403,16 +254,14 @@ def test_asof_join_overlapping_non_predicate( time_keyed_df1.assign(collide=time_keyed_df1["key"] + time_keyed_df1["value"]) time_keyed_df2.assign(collide=time_keyed_df2["key"] + time_keyed_df2["other_value"]) - expr = time_keyed_left.asof_join( - time_keyed_right, predicates=[("time", "time")], by=[("key", "key")] - ) + expr = time_keyed_left.asof_join(time_keyed_right, on="time", by=[("key", "key")]) result = expr.compile() expected = dd.merge_asof( time_keyed_df1, time_keyed_df2, on="time", by="key", suffixes=("", "_right") ) tm.assert_frame_equal( result[expected.columns].compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) @@ -433,30 +282,23 @@ def test_asof_join_overlapping_non_predicate( pytest.param(lambda join: join.select(["a0", "a1"]), id="select"), ], ) -def test_select_on_unambiguous_join(how, func, npartitions): - df_t = dd.from_pandas( - pd.DataFrame({"a0": [1, 2, 3], "b1": list("aab")}), - npartitions=npartitions, - ) - df_s = dd.from_pandas( - pd.DataFrame({"a1": [2, 3, 4], "b2": list("abc")}), - npartitions=npartitions, - ) - con = ibis.dask.connect({"t": df_t, "s": df_s}) - t = con.table("t") - s = con.table("s") +def test_select_on_unambiguous_join(con, how, func): + df_t = pd.DataFrame({"a0": [1, 2, 3], "b1": list("aab")}) + df_s = pd.DataFrame({"a1": [2, 3, 4], "b2": list("abc")}) + + t = ibis.memtable(df_t) + s = ibis.memtable(df_s) method = getattr(t, f"{how}_join") join = method(s, t.b1 == s.b2) - expected = dd.merge(df_t, df_s, left_on=["b1"], right_on=["b2"], how=how)[ + expr = func(join) + result = con.compile(expr).compute(scheduler="single-threaded") + + expected = pd.merge(df_t, df_s, left_on=["b1"], right_on=["b2"], how=how)[ ["a0", "a1"] ] - assert not expected.compute(scheduler="single-threaded").empty - expr = func(join) - result = expr.compile() - tm.assert_frame_equal( - result.compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), - ) + assert not expected.empty + + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -487,7 +329,7 @@ def test_select_on_unambiguous_asof_join(func, npartitions): result = expr.compile() tm.assert_frame_equal( result.compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) @@ -520,5 +362,5 @@ def test_outer_join(npartitions): ) tm.assert_frame_equal( result.compute(scheduler="single-threaded"), - expected.compute(scheduler="single-threaded"), + expected.compute(scheduler="single-threaded").reset_index(drop=True), ) diff --git a/ibis/backends/dask/tests/test_maps.py b/ibis/backends/dask/tests/test_maps.py new file mode 100644 index 000000000000..b7445434211d --- /dev/null +++ b/ibis/backends/dask/tests/test_maps.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd +import pytest + +import ibis + +dd = pytest.importorskip("dask.dataframe") +from dask.dataframe.utils import tm # noqa: E402 + + +def test_map_length_expr(t): + expr = t.map_of_integers_strings.length() + result = expr.execute() + expected = pd.Series([0, None, 2], name="MapLength(map_of_integers_strings)") + tm.assert_series_equal(result, expected, check_index=False) + + +def test_map_value_for_key_expr(t): + expr = t.map_of_integers_strings[1] + result = expr.execute() + expected = pd.Series( + [None, None, "a"], name="MapGet(map_of_integers_strings, 1, None)" + ) + tm.assert_series_equal(result, expected, check_index=False) + + +def test_map_value_or_default_for_key_expr(t): + expr = t.map_of_complex_values.get("a") + result = expr.execute() + expected = pd.Series( + [None, [1, 2, 3], None], + dtype="object", + name=expr.get_name(), + ) + tm.assert_series_equal(result, expected, check_index=False) + + +def safe_sorter(element): + return sorted(element) if isinstance(element, list) else element + + +def test_map_keys_expr(t): + expr = t.map_of_strings_integers.keys() + result = expr.execute().map(safe_sorter) + expected = pd.Series( + [["a", "b"], None, []], + dtype="object", + name="MapKeys(map_of_strings_integers)", + ).map(safe_sorter) + tm.assert_series_equal(result, expected, check_index=False) + + +def test_map_values_expr(t): + expr = t.map_of_complex_values.values() + result = expr.execute() + expected = pd.Series( + [ + None, + np.array([[1, 2, 3], []], dtype="object"), + np.array([], dtype="object"), + ], + dtype="object", + name="MapValues(map_of_complex_values)", + ) + tm.assert_series_equal(result, expected, check_index=False) + + +def test_map_concat_expr(t): + expr = t.map_of_complex_values + {"b": [4, 5, 6], "c": [], "a": []} + result = expr.execute() + expected = pd.Series( + [ + None, + {"a": [], "b": [4, 5, 6], "c": []}, + {"b": [4, 5, 6], "c": [], "a": []}, + ], + dtype="object", + name=expr.get_name(), + ) + tm.assert_series_equal(result, expected, check_index=False) + + +def test_map_value_for_key_literal_broadcast(t): + lookup_table = ibis.literal({"a": 1, "b": 2, "c": 3, "d": 4}) + expr = lookup_table.get(t.dup_strings) + result = expr.execute() + expected = pd.Series([4, 1, 4], dtype="int8", name=expr.get_name()) + tm.assert_series_equal(result, expected, check_index=False) diff --git a/ibis/backends/dask/tests/execution/test_operations.py b/ibis/backends/dask/tests/test_operations.py similarity index 72% rename from ibis/backends/dask/tests/execution/test_operations.py rename to ibis/backends/dask/tests/test_operations.py index 0da077ceba4b..71abfeb40f49 100644 --- a/ibis/backends/dask/tests/execution/test_operations.py +++ b/ibis/backends/dask/tests/test_operations.py @@ -11,21 +11,18 @@ import ibis import ibis.expr.datatypes as dt -from ibis.common.exceptions import OperationNotDefinedError da = pytest.importorskip("dask.array") dd = pytest.importorskip("dask.dataframe") from dask.dataframe.utils import tm # noqa: E402 -from ibis.backends.dask.execution import execute # noqa: E402 - -def test_table_column(t, df): +def test_table_column(t, pandas_df): expr = t.plain_int64 - result = expr.compile() - expected = df.plain_int64 - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute() + expected = pandas_df.plain_int64 + tm.assert_series_equal(result, expected) def test_literal(client): @@ -85,31 +82,33 @@ def test_project_scope_does_not_override(t, df): @pytest.mark.parametrize( "where", [ - lambda t: None, - lambda t: t.dup_strings == "d", - lambda t: (t.dup_strings == "d") | (t.plain_int64 < 100), + param(lambda _: None, id="none"), + param(lambda t: t.dup_strings == "d", id="simple"), + param(lambda t: (t.dup_strings == "d") | (t.plain_int64 < 100), id="complex"), ], ) @pytest.mark.parametrize( - ("ibis_func", "dask_func"), + ("ibis_func", "pandas_func"), [ - (methodcaller("abs"), np.abs), - (methodcaller("ceil"), np.ceil), - (methodcaller("exp"), np.exp), - (methodcaller("floor"), np.floor), - (methodcaller("ln"), np.log), - (methodcaller("log10"), np.log10), - (methodcaller("log", 2), lambda x: np.log(x) / np.log(2)), - (methodcaller("log2"), np.log2), - (methodcaller("round", 0), lambda x: x.round(0).astype("int64")), - (methodcaller("round", -2), methodcaller("round", -2)), - (methodcaller("round", 2), methodcaller("round", 2)), - (methodcaller("round"), lambda x: x.round().astype("int64")), - (methodcaller("sign"), np.sign), - (methodcaller("sqrt"), np.sqrt), + param(methodcaller("abs"), np.abs, id="abs"), + param(methodcaller("ceil"), np.ceil, id="ceil"), + param(methodcaller("exp"), np.exp, id="exp"), + param(methodcaller("floor"), np.floor, id="floor"), + param(methodcaller("ln"), np.log, id="log"), + param(methodcaller("log10"), np.log10, id="log10"), + param(methodcaller("log", 2), lambda x: np.log(x) / np.log(2), id="logb"), + param(methodcaller("log2"), np.log2, id="log2"), + param( + methodcaller("round", 0), lambda x: x.round(0).astype("int64"), id="round0" + ), + param(methodcaller("round", -2), methodcaller("round", -2), id="roundm2"), + param(methodcaller("round", 2), methodcaller("round", 2), id="round2"), + param(methodcaller("round"), lambda x: x.round().astype("int64"), id="round"), + param(methodcaller("sign"), np.sign, id="sign"), + param(methodcaller("sqrt"), np.sqrt, id="sqrt"), ], ) -def test_aggregation_group_by(t, df, where, ibis_func, dask_func): +def test_aggregation_group_by(t, pandas_df, where, ibis_func, pandas_func): ibis_where = where(t) expr = t.group_by(t.dup_strings).aggregate( avg_plain_int64=t.plain_int64.mean(where=ibis_where), @@ -118,21 +117,20 @@ def test_aggregation_group_by(t, df, where, ibis_func, dask_func): neg_mean_int64_with_zeros=(-t.int64_with_zeros).mean(where=ibis_where), nunique_dup_ints=t.dup_ints.nunique(), ) - result = expr.compile() + result = expr.execute() - dask_where = where(df.compute()) - mask = slice(None) if dask_where is None else dask_where + df = pandas_df + pandas_where = where(df) + mask = slice(None) if pandas_where is None else pandas_where expected = ( - df.compute() - .groupby("dup_strings") + df.groupby("dup_strings") .agg( { "plain_int64": lambda x, mask=mask: x[mask].mean(), - # Note we force min count here to match dask behavior - "plain_float64": lambda x, mask=mask: x[mask].sum(min_count=1), + "plain_float64": lambda x, mask=mask: x[mask].sum(), "dup_ints": "nunique", "float64_positive": ( - lambda x, mask=mask, func=dask_func: func(x[mask]).mean() + lambda x, mask=mask, func=pandas_func: func(x[mask]).mean() ), "int64_with_zeros": lambda x, mask=mask: (-x[mask]).mean(), } @@ -148,21 +146,6 @@ def test_aggregation_group_by(t, df, where, ibis_func, dask_func): } ) ) - - result = result.compute() - - # TODO(phillipc): Why does pandas not return floating point values here? - expected["avg_plain_int64"] = expected.avg_plain_int64.astype("float64") - result["avg_plain_int64"] = result.avg_plain_int64.astype("float64") - expected["neg_mean_int64_with_zeros"] = expected.neg_mean_int64_with_zeros.astype( - "float64" - ) - result["neg_mean_int64_with_zeros"] = result.neg_mean_int64_with_zeros.astype( - "float64" - ) - expected["mean_float64_positive"] = expected.mean_float64_positive.astype("float64") - result["mean_float64_positive"] = result.mean_float64_positive.astype("float64") - lhs = result[expected.columns] rhs = expected tm.assert_frame_equal(lhs, rhs) @@ -248,19 +231,19 @@ def test_group_by_rename_key(t, df): lambda t: None, ], ) -def test_reduction(t, df, reduction, where): +def test_reduction(t, pandas_df, reduction, where): func = getattr(t.plain_int64, reduction) mask = where(t) expr = func(where=mask) - result = expr.compile() + result = expr.execute() - df_mask = where(df) + df_mask = where(pandas_df) expected_func = getattr( - df.loc[df_mask if df_mask is not None else slice(None), "plain_int64"], + pandas_df.loc[df_mask if df_mask is not None else slice(None), "plain_int64"], reduction, ) expected = expected_func() - assert result.compute() == expected.compute() + assert result == expected @pytest.mark.parametrize( @@ -329,9 +312,9 @@ def test_grouped_reduction(t, df, where): if df_mask is None: expected["mean_plain_int64"] = expected.mean_plain_int64.astype("float64") else: - expected["sum_plain_int64"] = expected.sum_plain_int64.astype("float64") - expected["count_plain_int64"] = expected.count_plain_int64.astype("float64") - expected["nunique_plain_int64"] = expected.nunique_plain_int64.astype("float64") + expected["sum_plain_int64"] = expected.sum_plain_int64.astype("int64") + expected["count_plain_int64"] = expected.count_plain_int64.astype("int64") + expected["nunique_plain_int64"] = expected.nunique_plain_int64.astype("int64") tm.assert_frame_equal(result, expected) @@ -345,19 +328,19 @@ def test_grouped_reduction(t, df, where): lambda x: ~(x.all()), ], ) -def test_boolean_aggregation(t, df, reduction): +def test_boolean_aggregation(t, pandas_df, reduction): expr = reduction(t.plain_int64 == 1) - result = expr.compile() - expected = reduction(df.plain_int64 == 1) - assert result.compute() == expected.compute() + result = expr.execute() + expected = reduction(pandas_df.plain_int64 == 1) + assert result == expected @pytest.mark.parametrize("column", ["float64_with_zeros", "int64_with_zeros"]) -def test_nullif_zero(t, df, column): +def test_nullif_zero(t, pandas_df, column): expr = t[column].nullif(0) - result = expr.compile() - expected = df[column].replace(0, np.nan) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute() + expected = pandas_df[column].replace(0, np.nan) + tm.assert_series_equal(result, expected, check_index=False, check_names=False) @pytest.mark.parametrize( @@ -392,42 +375,27 @@ def test_nullif_zero(t, df, column): id="series_series", ), param( - lambda _: ibis.literal("a"), + lambda t: ibis.literal("a"), lambda t: t.dup_strings, - lambda _: dd.from_array( - np.array(["d", np.nan, "d"], dtype="object") - ).rename("dup_strings"), + lambda _: pd.Series(["a", np.nan, "a"], name="dup_strings"), tm.assert_series_equal, id="literal_series", ), ], ) -def test_nullif(t, df, left, right, expected, compare): +def test_nullif(t, con, pandas_df, left, right, expected, compare): expr = left(t).nullif(right(t)) - result = execute(expr.op()) - if isinstance(result, (dd.Series, dd.DataFrame)): - compare( - result.compute().reset_index(drop=True), - expected(df).compute().reset_index(drop=True), - ) - else: - compare(result, expected(df)) + result = con.execute(expr.name("dup_strings")) + compare(result, expected(pandas_df)) -def test_nullif_inf(npartitions): - df = dd.from_pandas( - pd.DataFrame({"a": [np.inf, 3.14, -np.inf, 42.0]}), - npartitions=npartitions, - ) - con = ibis.dask.connect({"t": df}) - t = con.table("t") +def test_nullif_inf(con): + df = pd.DataFrame({"a": [np.inf, 3.14, -np.inf, 42.0]}) + t = ibis.memtable(df) expr = t.a.nullif(np.inf).nullif(-np.inf) - result = expr.compile() - expected = dd.from_pandas( - pd.Series([np.nan, 3.14, np.nan, 42.0], name="a"), - npartitions=npartitions, - ).reset_index(drop=True) # match dask reset index behavior - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = con.execute(expr) + expected = pd.Series([np.nan, 3.14, np.nan, 42.0], name="a") + tm.assert_series_equal(result, expected, check_names=False) def test_group_concat(t, df): @@ -478,46 +446,6 @@ def test_series_limit(t, df, offset): ) -@pytest.mark.parametrize( - ("key", "dask_by", "dask_ascending"), - [ - (lambda t, col: [t[col]], lambda col: [col], True), - param( - lambda t, col: [ibis.desc(t[col])], - lambda col: [col], - False, - marks=pytest.mark.xfail(reason="TODO -sorting - #2553"), - ), - param( - lambda t, col: [t[col], t.plain_int64], - lambda col: [col, "plain_int64"], - [True, False], - marks=pytest.mark.xfail(reason="TODO - sorting - #2553"), - ), - ( - lambda t, col: [t.plain_int64 * 2], - lambda col: ["plain_int64"], - True, - ), - ], -) -@pytest.mark.parametrize( - "column", - ["plain_datetimes_naive", "plain_datetimes_ny", "plain_datetimes_utc"], -) -def test_order_by(t, df, column, key, dask_by, dask_ascending): - expr = t.order_by(key(t, column)) - result = expr.compile() - expected = ( - df.compute() - .sort_values(dask_by(column), ascending=dask_ascending) - .reset_index(drop=True) - ) - tm.assert_frame_equal( - result[expected.columns].compute().reset_index(drop=True), expected - ) - - @pytest.mark.xfail(reason="TODO - sorting - #2553") def test_complex_order_by(t, df): expr = t.order_by([ibis.desc(t.plain_int64 * t.plain_float64), t.plain_float64]) @@ -535,11 +463,11 @@ def test_complex_order_by(t, df): ) -def test_count_distinct(t, df): +def test_count_distinct(t, pandas_df): expr = t.dup_strings.nunique() - result = expr.compile() - expected = df.dup_strings.nunique() - assert result.compute() == expected.compute() + result = expr.execute() + expected = pandas_df.dup_strings.nunique() + assert result == expected def test_value_counts(t, df): @@ -561,7 +489,7 @@ def test_value_counts(t, df): def test_table_count(t, df): expr = t.count() - result = expr.compile() + result = expr.execute() expected = len(df) assert result == expected @@ -645,43 +573,43 @@ def test_group_by_with_unnamed_arithmetic(t, df): ) -def test_isnull(t, df): +def test_isnull(t, pandas_df): expr = t.strings_with_nulls.isnull() - result = expr.compile() - expected = df.strings_with_nulls.isnull() - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute() + expected = pandas_df.strings_with_nulls.isnull() + tm.assert_series_equal(result, expected, check_index=False, check_names=False) -def test_notnull(t, df): +def test_notnull(t, pandas_df): expr = t.strings_with_nulls.notnull() - result = expr.compile() - expected = df.strings_with_nulls.notnull() - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute() + expected = pandas_df.strings_with_nulls.notnull() + tm.assert_series_equal(result, expected, check_names=False) @pytest.mark.parametrize("raw_value", [0.0, 1.0]) -def test_scalar_parameter(t, df, raw_value): +def test_scalar_parameter(t, pandas_df, raw_value): value = ibis.param(dt.double) expr = t.float64_with_zeros == value - result = expr.compile(params={value: raw_value}) - expected = df.float64_with_zeros == raw_value - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute(params={value: raw_value}) + expected = pandas_df.float64_with_zeros == raw_value + tm.assert_series_equal(result, expected, check_names=False) @pytest.mark.parametrize("elements", [[1], (1,), {1}, frozenset({1})]) -def test_isin(t, df, elements): +def test_isin(t, pandas_df, elements): expr = t.plain_float64.isin(elements) - expected = df.plain_float64.isin(elements) - result = expr.compile() - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + expected = pandas_df.plain_float64.isin(elements) + result = expr.execute() + tm.assert_series_equal(result, expected, check_names=False) @pytest.mark.parametrize("elements", [[1], (1,), {1}, frozenset({1})]) -def test_notin(t, df, elements): +def test_notin(t, pandas_df, elements): expr = t.plain_float64.notin(elements) - expected = ~df.plain_float64.isin(elements) - result = expr.compile() - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + expected = ~pandas_df.plain_float64.isin(elements) + result = expr.execute() + tm.assert_series_equal(result, expected, check_index=False, check_names=False) def test_cast_on_group_by(t, df): @@ -716,11 +644,11 @@ def test_cast_on_group_by(t, df): ids=operator.attrgetter("__name__"), ) @pytest.mark.parametrize("args", [lambda c: (1.0, c), lambda c: (c, 1.0)]) -def test_left_binary_op(t, df, op, args): +def test_left_binary_op(t, pandas_df, op, args): expr = op(*args(t.float64_with_zeros)) - result = expr.compile() - expected = op(*args(df.float64_with_zeros)) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute() + expected = op(*args(pandas_df.float64_with_zeros)).astype(result.dtype) + tm.assert_series_equal(result, expected, check_index=False, check_names=False) @pytest.mark.parametrize( @@ -737,27 +665,25 @@ def test_left_binary_op(t, df, op, args): ids=operator.attrgetter("__name__"), ) @pytest.mark.parametrize("argfunc", [lambda c: (1.0, c), lambda c: (c, 1.0)]) -def test_left_binary_op_gb(t, df, op, argfunc): +def test_left_binary_op_gb(t, pandas_df, op, argfunc): expr = t.group_by("dup_strings").aggregate( foo=op(*argfunc(t.float64_with_zeros)).sum() ) - result = expr.compile() + result = expr.execute() expected = ( - df.groupby("dup_strings") + pandas_df.groupby("dup_strings") .float64_with_zeros.apply(lambda s: op(*argfunc(s)).sum()) .reset_index() .rename(columns={"float64_with_zeros": "foo"}) ) - tm.assert_frame_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), - ) + expected["foo"] = expected["foo"].astype(result["foo"].dtype) + tm.assert_frame_equal(result, expected, check_names=False) @pytest.mark.parametrize( "left_f", [ - param(lambda e: e - 1, id="sub_one"), + param(lambda e: e - 1, id="sub"), param(lambda _: 0.0, id="zero"), param(lambda _: None, id="none"), ], @@ -765,64 +691,65 @@ def test_left_binary_op_gb(t, df, op, argfunc): @pytest.mark.parametrize( "right_f", [ - param(lambda e: e + 1, id="add_one"), + param(lambda e: e + 1, id="add"), param(lambda _: 1.0, id="one"), param(lambda _: None, id="none"), ], ) -def test_ifelse_series(t, df, left_f, right_f): +def test_ifelse_series(t, pandas_df, left_f, right_f): col_expr = t["plain_int64"] result = ibis.ifelse( col_expr > col_expr.mean(), left_f(col_expr), right_f(col_expr) ).execute() - series = df["plain_int64"].compute() + series = pandas_df["plain_int64"] cond = series > series.mean() left = left_f(series) if not isinstance(left, pd.Series): left = pd.Series(np.repeat(left, len(cond)), name=cond.name) - expected = left.where(cond, right_f(series)).astype(result.dtype) + expected = left.where(cond, right_f(series)) - tm.assert_series_equal(result, expected, check_index=False, check_names=False) + tm.assert_series_equal( + result.astype(object).fillna(pd.NA), + expected.astype(object).fillna(pd.NA), + check_dtype=False, + check_names=False, + ) @pytest.mark.parametrize( ("cond", "expected_func"), [ - (True, lambda df: df["plain_int64"]), - (False, lambda df: dd.from_array(np.repeat(3.0, len(df)))), + param(True, lambda df: df["plain_int64"].astype("float64"), id="true"), + param(False, lambda df: pd.Series(np.repeat(3.0, len(df))), id="false"), ], ) -def test_ifelse_scalar(t, df, cond, expected_func): +def test_ifelse_scalar(t, pandas_df, cond, expected_func): expr = ibis.ifelse(cond, t["plain_int64"], 3.0) - result = expr.compile() - expected = expected_func(df) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute() + expected = expected_func(pandas_df) + tm.assert_series_equal(result, expected, check_names=False) -def test_ifelse_long(batting, batting_df): +def test_ifelse_long(batting, batting_pandas_df): col_expr = batting["AB"] - result = ibis.ifelse(col_expr > col_expr.mean(), col_expr, 0.0).compile() + result = ibis.ifelse(col_expr > col_expr.mean(), col_expr, 0.0).execute() - series = batting_df["AB"] - expected = series.where(series > series.mean(), other=0.0) + series = batting_pandas_df["AB"] + expected = series.where(series > series.mean(), other=0.0).astype("float64") - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + tm.assert_series_equal(result, expected, check_names=False) -def test_round(t, df): +def test_round(t, pandas_df): precision = 2 mult = 3.33333 - result = (t.count() * mult).round(precision).compile() - expected = np.around(len(df) * mult, precision) + result = (t.count() * mult).round(precision).execute() + expected = np.around(len(pandas_df) * mult, precision) npt.assert_almost_equal(result, expected, decimal=precision) -@pytest.mark.xfail( - raises=OperationNotDefinedError, - reason="MultiQuantile is not implemented for the dask backend", -) -def test_quantile_group_by(batting, batting_df): +def test_quantile_group_by(batting, batting_pandas_df): def q_fun(x, quantile): res = x.quantile(quantile).tolist() return [res for _ in range(len(x))] @@ -831,14 +758,14 @@ def q_fun(x, quantile): result = ( batting.group_by("teamID") .mutate(res=lambda x: x.RBI.quantile([frac, 1 - frac])) - .res.compile() + .res.execute() ) expected = ( - batting_df.groupby("teamID") + batting_pandas_df.groupby("teamID") .RBI.transform(q_fun, quantile=[frac, 1 - frac]) .rename("res") ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + tm.assert_series_equal(result, expected, check_index=False) def test_searched_case_scalar(client): @@ -848,9 +775,9 @@ def test_searched_case_scalar(client): assert result == expected -def test_searched_case_column(batting, batting_df): +def test_searched_case_column(batting, batting_pandas_df): t = batting - df = batting_df + df = batting_pandas_df expr = ( ibis.case() .when(t.RBI < 5, "really bad team") @@ -858,15 +785,15 @@ def test_searched_case_column(batting, batting_df): .else_(t.teamID) .end() ) - result = expr.compile() - expected = dd.from_array( + result = expr.execute() + expected = pd.Series( np.select( [df.RBI < 5, df.teamID == "PH1"], ["really bad team", "ph1 team"], df.teamID, ) ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + tm.assert_series_equal(result, expected, check_names=False) def test_simple_case_scalar(client): @@ -877,9 +804,9 @@ def test_simple_case_scalar(client): assert result == expected -def test_simple_case_column(batting, batting_df): +def test_simple_case_column(batting, batting_pandas_df): t = batting - df = batting_df + df = batting_pandas_df expr = ( t.RBI.case() .when(5, "five") @@ -888,15 +815,15 @@ def test_simple_case_column(batting, batting_df): .else_("could be good?") .end() ) - result = expr.compile() - expected = dd.from_array( + result = expr.execute() + expected = pd.Series( np.select( [df.RBI == 5, df.RBI == 4, df.RBI == 3], ["five", "four", "three"], "could be good?", ) ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + tm.assert_series_equal(result, expected, check_names=False) def test_table_distinct(t, df): diff --git a/ibis/backends/dask/tests/execution/test_strings.py b/ibis/backends/dask/tests/test_strings.py similarity index 91% rename from ibis/backends/dask/tests/execution/test_strings.py rename to ibis/backends/dask/tests/test_strings.py index 68fe0efc480b..240d78b2fd17 100644 --- a/ibis/backends/dask/tests/execution/test_strings.py +++ b/ibis/backends/dask/tests/test_strings.py @@ -24,19 +24,16 @@ lambda s: s[s.length() - 1 :], lambda s: s.str[-1:], id="expr_slice_begin", - marks=pytest.mark.xfail, ), param( lambda s: s[: s.length()], lambda s: s, id="expr_slice_end", - marks=pytest.mark.xfail, ), param( lambda s: s[s.length() - 2 : s.length() - 1], lambda s: s.str[-2:-1], id="expr_slice_begin_end", - marks=pytest.mark.xfail, ), param(lambda s: s.strip(), lambda s: s.str.strip(), id="strip"), param(lambda s: s.lstrip(), lambda s: s.str.lstrip(), id="lstrip"), @@ -96,7 +93,6 @@ lambda s: s.split(" "), lambda s: s.str.split(" "), id="split_spaces", - marks=pytest.mark.notimpl(["dask"], reason="arrays - #2553"), ), ], ) @@ -104,9 +100,9 @@ def test_string_ops(t, df, case_func, expected_func): # ignore matching UserWarnings with catch_warnings(record=True): expr = case_func(t.strings_with_space) - result = expr.compile() - series = expected_func(df.strings_with_space) - tm.assert_series_equal(result.compute(), series.compute(), check_index=False) + result = expr.name("result").execute() + series = expected_func(df.strings_with_space).rename("result").compute() + tm.assert_series_equal(result, series, check_index=False) def test_grouped_string_re_search(t, df): diff --git a/ibis/backends/dask/tests/execution/test_structs.py b/ibis/backends/dask/tests/test_structs.py similarity index 66% rename from ibis/backends/dask/tests/execution/test_structs.py rename to ibis/backends/dask/tests/test_structs.py index aca55d438696..c92f2b72c49d 100644 --- a/ibis/backends/dask/tests/execution/test_structs.py +++ b/ibis/backends/dask/tests/test_structs.py @@ -12,8 +12,6 @@ from dask.dataframe.utils import tm # noqa: E402 -from ibis.backends.dask.execution import execute # noqa: E402 - @pytest.fixture(scope="module") def value(): @@ -49,56 +47,50 @@ def struct_table(struct_client): ) -def test_struct_field_literal(value): +def test_struct_field_literal(value, con): struct = ibis.literal(value) assert struct.type() == dt.Struct.from_tuples( [("fruit", dt.string), ("weight", dt.int8)] ) expr = struct["fruit"] - result = execute(expr.op()) + result = con.execute(expr) assert result == "pear" expr = struct["weight"] - result = execute(expr.op()) + result = con.execute(expr) assert result == 0 def test_struct_field_series(struct_table): t = struct_table expr = t.s["fruit"] - result = expr.compile() - expected = dd.from_pandas( - pd.Series(["apple", "pear", "pear"], name="fruit"), - npartitions=1, - ) - tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) + result = expr.execute() + expected = pd.Series(["apple", "pear", "pear"], name="fruit") + + tm.assert_series_equal(result, expected, check_index=False) def test_struct_field_series_group_by_key(struct_table): t = struct_table expr = t.group_by(t.s["fruit"]).aggregate(total=t.value.sum()) - result = expr.compile() - expected = dd.from_pandas( - pd.DataFrame([("apple", 1), ("pear", 5)], columns=["fruit", "total"]), - npartitions=1, - ) + result = expr.execute() + expected = pd.DataFrame([("apple", 1), ("pear", 5)], columns=["fruit", "total"]) + tm.assert_frame_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True), expected.reset_index(drop=True) ) def test_struct_field_series_group_by_value(struct_table): t = struct_table expr = t.group_by(t.key).aggregate(total=t.s["weight"].sum()) - result = expr.compile() + result = expr.execute() # these are floats because we have a NULL value in the input data - expected = dd.from_pandas( - pd.DataFrame([("a", 0.0), ("b", 1.0)], columns=["key", "total"]), - npartitions=1, - ) + expected = pd.DataFrame([("a", 0.0), ("b", 1.0)], columns=["key", "total"]) tm.assert_frame_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result, + expected.assign( + total=lambda df: df.total.astype(expr.total.type().to_pandas()) + ), ) diff --git a/ibis/backends/dask/tests/execution/test_temporal.py b/ibis/backends/dask/tests/test_temporal.py similarity index 64% rename from ibis/backends/dask/tests/execution/test_temporal.py rename to ibis/backends/dask/tests/test_temporal.py index ba8d7f207723..a70bd37005f0 100644 --- a/ibis/backends/dask/tests/execution/test_temporal.py +++ b/ibis/backends/dask/tests/test_temporal.py @@ -16,8 +16,6 @@ dd = pytest.importorskip("dask.dataframe") from dask.dataframe.utils import tm # noqa: E402 -from ibis.backends.dask.execution import execute # noqa: E402 - @pytest.mark.parametrize( ("case_func", "expected_func"), @@ -45,7 +43,7 @@ ] ], ) -def test_timestamp_functions(case_func, expected_func): +def test_timestamp_functions(con, case_func, expected_func): v = L("2015-09-01 14:48:05.359").cast("timestamp") vt = datetime.datetime( year=2015, @@ -58,7 +56,7 @@ def test_timestamp_functions(case_func, expected_func): ) result = case_func(v) expected = expected_func(vt) - assert execute(result.op()) == expected + assert con.execute(result) == expected @pytest.mark.parametrize( @@ -67,15 +65,13 @@ def test_timestamp_functions(case_func, expected_func): ) def test_cast_datetime_strings_to_date(t, df, column): expr = t[column].cast("date") - result = expr.compile() + result = expr.execute() df_computed = df.compute() - expected = dd.from_pandas( - pd.to_datetime(df_computed[column]).dt.normalize(), - npartitions=1, - ) + expected = pd.to_datetime(df_computed[column]).map(lambda x: x.date()) + tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True).rename("tmp"), + expected.reset_index(drop=True).rename("tmp"), ) @@ -83,73 +79,63 @@ def test_cast_datetime_strings_to_date(t, df, column): "column", ["datetime_strings_naive", "datetime_strings_ny", "datetime_strings_utc"], ) -def test_cast_datetime_strings_to_timestamp(t, df, column): - expr = t[column].cast("timestamp") - result = expr.compile() - df_computed = df.compute() - expected = dd.from_pandas(pd.to_datetime(df_computed[column]), npartitions=1) +def test_cast_datetime_strings_to_timestamp(t, pandas_df, column): + expr = t[column].cast(dt.Timestamp(scale=9)) + result = expr.execute() + expected = pd.to_datetime(pandas_df[column]) if getattr(expected.dtype, "tz", None) is not None: expected = expected.dt.tz_convert(None) - tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), - ) + tm.assert_series_equal(result, expected, check_names=False) @pytest.mark.parametrize( "column", ["plain_datetimes_naive", "plain_datetimes_ny", "plain_datetimes_utc"], ) -def test_cast_integer_to_temporal_type(t, df, column): +def test_cast_integer_to_temporal_type(t, df, pandas_df, column): column_type = t[column].type() expr = t.plain_int64.cast(column_type) - result = expr.compile() - df_computed = df.compute() - expected = dd.from_pandas( - pd.Series( - pd.to_datetime(df_computed.plain_int64.values, unit="s").values, - index=df_computed.index, - name="plain_int64", - ).dt.tz_localize(column_type.timezone), - npartitions=1, - ) + result = expr.execute() + + expected = pd.Series( + pd.to_datetime(pandas_df.plain_int64.values, unit="s").values, + index=pandas_df.index, + name="plain_int64", + ).dt.tz_localize(column_type.timezone) + tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True), + expected.reset_index(drop=True), + check_names=False, ) -def test_cast_integer_to_date(t, df): +def test_cast_integer_to_date(t, pandas_df): expr = t.plain_int64.cast("date") - result = expr.compile() - df_computed = df.compute() - expected = dd.from_pandas( - pd.Series( - pd.to_datetime(df_computed.plain_int64.values, unit="D").values, - index=df_computed.index, - name="plain_int64", - ), - npartitions=1, - ) - tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result = expr.execute() + expected = pd.Series( + pd.to_datetime(pandas_df.plain_int64.values, unit="D").date, + index=pandas_df.index, + name="plain_int64", ) + tm.assert_series_equal(result, expected, check_names=False) def test_times_ops(t, df): - result = t.plain_datetimes_naive.time().between("10:00", "10:00").compile() - expected = dd.from_array(np.zeros(len(df), dtype=bool)) + result = t.plain_datetimes_naive.time().between("10:00", "10:00").execute() + expected = pd.Series(np.zeros(len(df), dtype=bool)) tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True), + expected.reset_index(drop=True), + check_names=False, ) - result = t.plain_datetimes_naive.time().between("01:00", "02:00").compile() - expected = dd.from_array(np.ones(len(df), dtype=bool)) + result = t.plain_datetimes_naive.time().between("01:00", "02:00").execute() + expected = pd.Series(np.ones(len(df), dtype=bool)) tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True), + expected.reset_index(drop=True), + check_names=False, ) @@ -166,24 +152,24 @@ def test_times_ops(t, df): ids=lambda x: str(getattr(x, "__name__", x)).lower().replace("/", "_"), ) def test_times_ops_with_tz(t, df, tz, rconstruct, column): - expected = dd.from_array( - rconstruct(len(df), dtype=bool), - ) + expected = dd.from_array(rconstruct(len(df), dtype=bool)) time = t[column].time() expr = time.between("01:00", "02:00", timezone=tz) - result = expr.compile() + result = expr.execute() tm.assert_series_equal( - result.compute().reset_index(drop=True), + result.reset_index(drop=True), expected.compute().reset_index(drop=True), + check_names=False, ) # Test that casting behavior is the same as using the timezone kwarg ts = t[column].cast(dt.Timestamp(timezone=tz)) expr = ts.time().between("01:00", "02:00") - result = expr.compile() + result = expr.execute() tm.assert_series_equal( - result.compute().reset_index(drop=True), + result.reset_index(drop=True), expected.compute().reset_index(drop=True), + check_names=False, ) @@ -219,12 +205,9 @@ def test_interval_arithmetic(op, expected): ) t1 = con.table("df1") expr = op(t1.td, t1.td) - result = expr.compile() - expected = dd.from_pandas( - pd.Series(expected(data, data), name="td"), - npartitions=1, - ) + result = expr.execute() + expected = pd.Series(expected(data, data), name=expr.get_name()) + tm.assert_series_equal( - result.compute().reset_index(drop=True), - expected.compute().reset_index(drop=True), + result.reset_index(drop=True), expected.reset_index(drop=True) ) diff --git a/ibis/backends/dask/tests/test_udf.py b/ibis/backends/dask/tests/test_udf.py index efa7a5ff4199..f4bea4226382 100644 --- a/ibis/backends/dask/tests/test_udf.py +++ b/ibis/backends/dask/tests/test_udf.py @@ -1,7 +1,5 @@ from __future__ import annotations -import collections - import numpy as np import pandas as pd import pandas.testing as tm @@ -323,144 +321,21 @@ def test_compose_udfs(t2, df2): tm.assert_series_equal(result, expected, check_names=False, check_index=False) -@pytest.mark.xfail(raises=NotImplementedError, reason="TODO - windowing - #2553") def test_udaf_window(t2, df2): window = ibis.trailing_window(2, order_by="a", group_by="key") expr = t2.mutate(rolled=my_mean(t2.b).over(window)) result = expr.execute().sort_values(["key", "a"]) - expected = df2.sort_values(["key", "a"]).assign( - rolled=lambda df: df.groupby("key") - .b.rolling(3, min_periods=1) - .mean() - .reset_index(level=0, drop=True) - ) - tm.assert_frame_equal( - result.reset_index(drop=True), expected.reset_index(drop=True) - ) - - -@pytest.mark.xfail(raises=NotImplementedError, reason="TODO - windowing - #2553") -def test_udaf_window_interval(npartitions): - df = pd.DataFrame( - collections.OrderedDict( - [ - ( - "time", - pd.date_range(start="20190105", end="20190101", freq="-1D"), - ), - ("key", [1, 2, 1, 2, 1]), - ("value", np.arange(5)), - ] - ) - ) - df = dd.from_pandas(df, npartitions=npartitions) - - con = ibis.dask.connect({"df": df}) - t = con.table("df") - window = ibis.trailing_range_window( - ibis.interval(days=2), order_by="time", group_by="key" - ) - - expr = t.mutate(rolled=my_mean(t.value).over(window)) - - result = expr.execute().sort_values(["time", "key"]).reset_index(drop=True) expected = ( - df.sort_values(["time", "key"]) - .set_index("time") + df2.compute() + .sort_values(["key", "a"]) .assign( rolled=lambda df: df.groupby("key") - .value.rolling("2D", closed="both") - .mean() - .reset_index(level=0, drop=True) - ) - ).reset_index(drop=False) - - tm.assert_frame_equal( - result.reset_index(drop=True), expected.reset_index(drop=True) - ) - - -@pytest.mark.xfail(raises=NotImplementedError, reason="TODO - windowing - #2553") -def test_multiple_argument_udaf_window(npartitions): - @reduction(["double", "double"], "double") - def my_wm(v, w): - return np.average(v, weights=w) - - df = pd.DataFrame( - { - "a": np.arange(4, 0, dtype=float, step=-1).tolist() - + np.random.rand(3).tolist(), - "b": np.arange(4, dtype=float).tolist() + np.random.rand(3).tolist(), - "c": np.arange(4, dtype=float).tolist() + np.random.rand(3).tolist(), - "d": np.repeat(1, 7), - "key": list("deefefd"), - } - ) - df = dd.from_pandas(df, npartitions=npartitions) - - con = ibis.dask.connect({"df": df}) - t = con.table("df") - window = ibis.trailing_window(2, order_by="a", group_by="key") - window2 = ibis.trailing_window(1, order_by="b", group_by="key") - expr = t.mutate( - wm_b=my_wm(t.b, t.d).over(window), - wm_c=my_wm(t.c, t.d).over(window), - wm_c2=my_wm(t.c, t.d).over(window2), - ) - result = expr.execute().sort_values(["key", "a"]) - expected = ( - df.sort_values(["key", "a"]) - .assign( - wm_b=lambda df: df.groupby("key") .b.rolling(3, min_periods=1) .mean() .reset_index(level=0, drop=True) ) - .assign( - wm_c=lambda df: df.groupby("key") - .c.rolling(3, min_periods=1) - .mean() - .reset_index(level=0, drop=True) - ) - ) - expected = expected.sort_values(["key", "b"]).assign( - wm_c2=lambda df: df.groupby("key") - .c.rolling(2, min_periods=1) - .mean() - .reset_index(level=0, drop=True) - ) - expected = expected.sort_values(["key", "a"]) - - tm.assert_frame_equal( - result.reset_index(drop=True), expected.reset_index(drop=True) - ) - - -@pytest.mark.xfail(raises=NotImplementedError, reason="TODO - windowing - #2553") -def test_udaf_window_nan(npartitions): - df = pd.DataFrame( - { - "a": np.arange(10, dtype=float), - "b": [3.0, np.NaN] * 5, - "key": list("ddeefffggh"), - } - ) - df = dd.from_pandas(df, npartitions=npartitions) - - con = ibis.dask.connect({"df": df}) - t = con.table("df") - window = ibis.trailing_window(2, order_by="a", group_by="key") - expr = t.mutate(rolled=my_mean(t.b).over(window)) - result = expr.execute().sort_values(["key", "a"]) - expected = df.sort_values(["key", "a"]).assign( - rolled=lambda d: d.groupby("key") - .b.rolling(3, min_periods=1) - .apply(lambda x: x.mean(), raw=True) - .reset_index(level=0, drop=True) - ) - tm.assert_frame_equal( - result.reset_index(drop=True), expected.reset_index(drop=True) ) + tm.assert_frame_equal(result, expected) def test_array_return_type_reduction(t, df): diff --git a/ibis/backends/dask/tests/execution/test_window.py b/ibis/backends/dask/tests/test_window.py similarity index 63% rename from ibis/backends/dask/tests/execution/test_window.py rename to ibis/backends/dask/tests/test_window.py index 83d04047e4d9..34a14d7a00e6 100644 --- a/ibis/backends/dask/tests/execution/test_window.py +++ b/ibis/backends/dask/tests/test_window.py @@ -1,6 +1,5 @@ from __future__ import annotations -import io from datetime import date from operator import methodcaller @@ -11,10 +10,8 @@ from dask.dataframe.utils import tm import ibis -import ibis.common.exceptions as com import ibis.expr.datatypes as dt from ibis.backends.dask import Backend -from ibis.backends.dask.execution import execute from ibis.legacy.udf.vectorized import reduction @@ -47,59 +44,62 @@ def range_window(): @default @row_offset -def test_lead(t, df, row_offset, default, row_window): +def test_lead(con, t, df, row_offset, default, row_window): expr = t.dup_strings.lead(row_offset, default=default).over(row_window) result = expr.execute() - expected = df.dup_strings.shift(execute((-row_offset).op())).compute() + expected = df.dup_strings.shift(con.execute(-row_offset)).compute() if default is not ibis.NA: - expected = expected.fillna(execute(default.op())) + expected = expected.fillna(con.execute(default)) tm.assert_series_equal(result, expected, check_names=False) @default @row_offset -def test_lag(t, df, row_offset, default, row_window): +def test_lag(con, t, df, row_offset, default, row_window): expr = t.dup_strings.lag(row_offset, default=default).over(row_window) result = expr.execute() - expected = df.dup_strings.shift(execute(row_offset.op())).compute() + expected = df.dup_strings.shift(con.execute(row_offset)).compute() if default is not ibis.NA: - expected = expected.fillna(execute(default.op())) + expected = expected.fillna(con.execute(default)) tm.assert_series_equal(result, expected, check_names=False) @default @range_offset -def test_lead_delta(t, df, range_offset, default, range_window): +def test_lead_delta(con, t, pandas_df, range_offset, default, range_window): expr = t.dup_strings.lead(range_offset, default=default).over(range_window) result = expr.execute() + expected = ( - df[["plain_datetimes_naive", "dup_strings"]] + pandas_df[["plain_datetimes_naive", "dup_strings"]] .set_index("plain_datetimes_naive") .squeeze() - .shift(freq=execute((-range_offset).op())) - .compute() + .shift(freq=con.execute(-range_offset)) + .reindex(pandas_df.plain_datetimes_naive) .reset_index(drop=True) ) if default is not ibis.NA: - expected = expected.fillna(execute(default.op())) + expected = expected.fillna(con.execute(default)) tm.assert_series_equal(result, expected, check_names=False) @default @range_offset -def test_lag_delta(t, df, range_offset, default, range_window): +@pytest.mark.filterwarnings("ignore:Non-vectorized") +def test_lag_delta(t, con, pandas_df, range_offset, default, range_window): expr = t.dup_strings.lag(range_offset, default=default).over(range_window) result = expr.execute() + expected = ( - df[["plain_datetimes_naive", "dup_strings"]] + pandas_df[["plain_datetimes_naive", "dup_strings"]] .set_index("plain_datetimes_naive") .squeeze() - .shift(freq=execute(range_offset.op())) - .compute() + .shift(freq=con.execute(range_offset)) + .reindex(pandas_df.plain_datetimes_naive) .reset_index(drop=True) ) if default is not ibis.NA: - expected = expected.fillna(execute(default.op())) + expected = expected.fillna(con.execute(default)) tm.assert_series_equal(result, expected, check_names=False) @@ -199,51 +199,22 @@ def test_batting_avg_change_in_games_per_year(players, players_df): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail( - raises=AttributeError, reason="'Series' object has no attribute 'rank'" -) -def test_batting_most_hits(players, players_df): - expr = players.mutate( - hits_rank=lambda t: t.H.rank().over( - ibis.cumulative_window(order_by=ibis.desc(t.H)) - ) - ) - result = expr.execute() - hits_rank = players_df.groupby("playerID").H.rank(method="min", ascending=False) - expected = players_df.assign(hits_rank=hits_rank) - tm.assert_frame_equal(result[expected.columns], expected) - - -@pytest.mark.xfail( - raises=NotImplementedError, - reason="Quantile not implemented for Dask SeriesGroupBy, Dask #9824", -) -def test_batting_quantile(players, players_df): - expr = players.mutate(hits_quantile=lambda t: t.H.quantile(0.25)) - hits_quantile = players_df.groupby("playerID").H.transform("quantile", 0.25) - expected = players_df.assign(hits_quantile=hits_quantile) - cols = expected.columns.tolist() - result = expr.execute()[cols].sort_values(cols).reset_index(drop=True) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("op", ["sum", "min", "max", "mean"]) -def test_batting_specific_cumulative(batting, batting_df, op, sort_kind): +@pytest.mark.parametrize("op", ["sum", "mean", "min", "max"]) +def test_batting_specific_cumulative(batting, batting_pandas_df, op, sort_kind): ibis_method = methodcaller(f"cum{op}", order_by=batting.yearID) - expr = ibis_method(batting.G) + expr = ibis_method(batting.G).name("tmp") result = expr.execute().astype("float64") pandas_method = methodcaller(op) expected = pandas_method( - batting_df[["G", "yearID"]] - .sort_values("yearID") - .G.rolling(len(batting_df), min_periods=1) - ) - expected = expected.compute().sort_index().reset_index(drop=True) - tm.assert_series_equal(result, expected.rename(f"{op.capitalize()}(G)")) + batting_pandas_df[["G", "yearID"]] + .sort_values("yearID", kind=sort_kind) + .G.expanding() + ).reset_index(drop=True) + tm.assert_series_equal(result, expected.rename("tmp")) -def test_batting_cumulative(batting, batting_df, sort_kind): +def test_batting_cumulative(batting, batting_pandas_df, sort_kind): expr = batting.mutate( more_values=lambda t: t.G.sum().over(ibis.cumulative_window(order_by=t.yearID)) ) @@ -251,32 +222,28 @@ def test_batting_cumulative(batting, batting_df, sort_kind): columns = ["G", "yearID"] more_values = ( - batting_df[columns] - .sort_values("yearID") - .G.rolling(len(batting_df), min_periods=1) + batting_pandas_df[columns] + .sort_values("yearID", kind=sort_kind) + .G.expanding() .sum() .astype("int64") ) - expected = batting_df.assign(more_values=more_values).compute() + expected = batting_pandas_df.assign(more_values=more_values) tm.assert_frame_equal(result[expected.columns], expected) -@pytest.mark.xfail( - raises=NotImplementedError, - reason="Grouped and order windows not supported yet.", -) -def test_batting_cumulative_partitioned(batting, batting_df, sort_kind): +def test_batting_cumulative_partitioned(batting, batting_pandas_df, sort_kind): group_by = "playerID" order_by = "yearID" t = batting - expr = t.G.cumsum(order_by=order_by, group_by=group_by) + expr = t.G.sum().over(ibis.cumulative_window(order_by=order_by, group_by=group_by)) expr = t.mutate(cumulative=expr) result = expr.execute() columns = [group_by, order_by, "G"] expected = ( - batting_df[columns] + batting_pandas_df[columns] .set_index(order_by) .groupby(group_by) .G.expanding() @@ -290,7 +257,7 @@ def test_batting_cumulative_partitioned(batting, batting_df, sort_kind): ) -def test_batting_rolling(batting, batting_df, sort_kind): +def test_batting_rolling(batting, batting_pandas_df, sort_kind): expr = batting.mutate( more_values=lambda t: t.G.sum().over(ibis.trailing_window(5, order_by=t.yearID)) ) @@ -298,21 +265,17 @@ def test_batting_rolling(batting, batting_df, sort_kind): columns = ["G", "yearID"] more_values = ( - batting_df[columns] - .sort_values("yearID") + batting_pandas_df[columns] + .sort_values("yearID", kind=sort_kind) .G.rolling(6, min_periods=1) .sum() .astype("int64") ) - expected = batting_df.assign(more_values=more_values).compute() + expected = batting_pandas_df.assign(more_values=more_values) tm.assert_frame_equal(result[expected.columns], expected) -@pytest.mark.xfail( - raises=NotImplementedError, - reason="Grouped and order windows not supported yet", -) -def test_batting_rolling_partitioned(batting, batting_df, sort_kind): +def test_batting_rolling_partitioned(batting, batting_pandas_df, sort_kind): t = batting group_by = "playerID" order_by = "yearID" @@ -324,13 +287,13 @@ def test_batting_rolling_partitioned(batting, batting_df, sort_kind): columns = [group_by, order_by, "G"] expected = ( - batting_df[columns] + batting_pandas_df[columns] .set_index(order_by) .groupby(group_by) .G.rolling(4, min_periods=1) .sum() .rename("rolled") - ).compute() + ) tm.assert_series_equal( result.set_index([group_by, order_by]).sort_index().rolled, @@ -367,51 +330,39 @@ def test_scalar_broadcasting(batting, batting_df): tm.assert_frame_equal(result, expected) -def test_mutate_with_window_after_join(sort_kind, npartitions): - left_df = dd.from_pandas( - pd.DataFrame( - { - "ints": [0, 1, 2], - "strings": ["a", "b", "c"], - "dates": pd.date_range("20170101", periods=3), - } - ), - npartitions=npartitions, +def test_mutate_with_window_after_join(con, sort_kind): + left_df = pd.DataFrame( + { + "ints": [0, 1, 2], + "strings": ["a", "b", "c"], + "dates": pd.date_range("20170101", periods=3), + } ) - right_df = dd.from_pandas( - pd.DataFrame( - { - "group": [0, 1, 2] * 3, - "value": [0, 1, np.nan, 3, 4, np.nan, 6, 7, 8], - } - ), - npartitions=npartitions, + right_df = pd.DataFrame( + { + "group": [0, 1, 2] * 3, + "value": [0, 1, np.nan, 3, 4, np.nan, 6, 7, 8], + } ) - con = Backend().connect({"left": left_df, "right": right_df}) - left, right = map(con.table, ("left", "right")) + + left = ibis.memtable(left_df) + right = ibis.memtable(right_df) joined = left.outer_join(right, left.ints == right.group) proj = joined[left, right.value] expr = proj.group_by("ints").mutate(sum=proj.value.sum()) - result = expr.execute() - result = result.sort_values(["dates", "ints", "value"]).reset_index(drop=True) - expected = ( - pd.DataFrame( - { - "dates": dd.concat([left_df.dates] * 3) - .compute() - .sort_values() - .reset_index(drop=True), - "ints": [0] * 3 + [1] * 3 + [2] * 3, - "strings": ["a"] * 3 + ["b"] * 3 + ["c"] * 3, - "value": [0.0, 3.0, 6.0, 1.0, 4.0, 7.0, np.nan, np.nan, 8.0], - "sum": [9.0] * 3 + [12.0] * 3 + [8.0] * 3, - } - ) - .sort_values(["dates", "ints", "value"]) - .reset_index(drop=True) + result = con.execute(expr) + expected = pd.DataFrame( + { + "dates": pd.concat([left_df.dates] * 3) + .sort_values(kind=sort_kind) + .reset_index(drop=True), + "ints": [0] * 3 + [1] * 3 + [2] * 3, + "strings": ["a"] * 3 + ["b"] * 3 + ["c"] * 3, + "value": [0.0, 3.0, 6.0, 1.0, 4.0, 7.0, np.nan, np.nan, 8.0], + "sum": [9.0] * 3 + [12.0] * 3 + [8.0] * 3, + } ) - tm.assert_frame_equal(result[expected.columns], expected) @@ -467,7 +418,7 @@ def test_project_scalar_after_join(npartitions): joined = left.outer_join(right, left.ints == right.group) proj = joined[left, right.value] expr = proj[proj.value.sum().name("sum"), ibis.literal(1).name("const")] - result = expr.execute() + result = expr.execute().reset_index(drop=True) expected = pd.DataFrame( { "sum": [29.0] * 9, @@ -477,10 +428,6 @@ def test_project_scalar_after_join(npartitions): tm.assert_frame_equal(result[expected.columns], expected) -@pytest.mark.xfail( - raises=ibis.common.exceptions.OperationNotDefinedError, - reason="MultiQuantile not implemented", -) def test_project_list_scalar(npartitions): df = dd.from_pandas(pd.DataFrame({"ints": range(3)}), npartitions=npartitions) con = ibis.dask.connect({"df": df}) @@ -492,47 +439,6 @@ def test_project_list_scalar(npartitions): tm.assert_series_equal(result.res, expected) -@pytest.mark.xfail( - raises=NotImplementedError, reason="Group and order by not implemented" -) -def test_window_with_mlb(npartitions): - index = pd.date_range("20170501", "20170507") - data = np.random.randn(len(index), 3) - df = ( - pd.DataFrame(data, columns=list("abc"), index=index) - .rename_axis("time") - .reset_index(drop=False) - ) - df = dd.from_pandas(df, npartitions=npartitions) - client = ibis.dask.connect({"df": df}) - t = client.table("df") - rows_with_mlb = ibis.rows_with_max_lookback(5, ibis.interval(days=10)) - expr = t.mutate( - sum=lambda df: df.a.sum().over( - ibis.trailing_window(rows_with_mlb, order_by="time", group_by="b") - ) - ) - result = expr.execute() - expected = df.set_index("time") - gb_df = ( - expected.groupby(["b"])["a"] - .rolling("10d", closed="both") - .apply(lambda s: s.iloc[-5:].sum(), raw=False) - .sort_index(level=["time"]) - .reset_index(drop=True) - ) - expected = expected.reset_index(drop=False).assign(sum=gb_df) - tm.assert_frame_equal(result, expected) - - rows_with_mlb = ibis.rows_with_max_lookback(5, 10) - with pytest.raises(com.IbisInputError): - t.mutate( - sum=lambda df: df.a.sum().over( - ibis.trailing_window(rows_with_mlb, order_by="time") - ) - ) - - def test_window_grouping_key_has_scope(t, df): param = ibis.param(dt.string) window = ibis.window(group_by=t.dup_strings + param) @@ -545,10 +451,6 @@ def test_window_grouping_key_has_scope(t, df): ) -@pytest.mark.xfail( - raises=NotImplementedError, - reason="Grouped and order windows not supported yet", -) def test_window_on_and_by_key_as_window_input(t, df): order_by = "plain_int64" group_by = "dup_ints" @@ -621,45 +523,3 @@ def events(npartitions) -> dd.DataFrame: df.at[5, "measurement"] = 42.0 df.at[7, "measurement"] = 11.0 return dd.from_pandas(df, npartitions=npartitions) - - -@pytest.mark.xfail( - raises=NotImplementedError, reason="Group and order by not implemented" -) -def test_bfill(events): - con = ibis.dask.connect({"t": events}) - t = con.table("t") - - win = ibis.window( - group_by=t.event_id, order_by=ibis.desc(t.measured_on), following=0 - ) - grouped = t.mutate(grouper=t.measurement.count().over(win)) - - expr = ( - grouped.group_by([grouped.event_id, grouped.grouper]) - .mutate(bfill=grouped.measurement.max()) - .order_by("measured_on") - ) - result = expr.execute().reset_index(drop=True) - - expected_raw = """\ -event_id measured_on measurement grouper bfill - 2 2021-05-05 42.0 3 42.0 - 2 2021-05-06 42.0 2 42.0 - 2 2021-05-07 NaN 1 11.0 - 2 2021-05-08 11.0 1 11.0 - 2 2021-05-09 NaN 0 NaN - 2 2021-05-10 NaN 0 NaN - 1 2021-06-01 NaN 1 5.0 - 1 2021-06-02 5.0 1 5.0 - 1 2021-06-03 NaN 0 NaN - 1 2021-06-04 NaN 0 NaN - 3 2021-07-11 NaN 0 NaN - 3 2021-07-12 NaN 0 NaN""" - expected = pd.read_csv( - io.StringIO(expected_raw), - sep=r"\s+", - header=0, - parse_dates=["measured_on"], - ) - tm.assert_frame_equal(result, expected) diff --git a/ibis/backends/dask/trace.py b/ibis/backends/dask/trace.py deleted file mode 100644 index 146ed9a896af..000000000000 --- a/ibis/backends/dask/trace.py +++ /dev/null @@ -1,159 +0,0 @@ -"""Module that adds tracing to dask execution. - -With tracing enabled, this module will log time and call stack information of -the executed expression. Call stack information is presented with indentation -level. -For example: -import dask.dataframe as dd -import pandas as pd -import logging -import ibis.expr.datatypes as dt -import ibis.dask -from ibis.legacy.udf.vectorized import elementwise -from ibis.backends.dask import trace -logging.basicConfig() -trace.enable() -df = dd.from_pandas( - pd.DataFrame({'a': [1, 2, 3]}) -) -con = ibis.dask.connect({"table1": df}) -@elementwise( - input_type=[dt.double], - output_type=dt.double -) -def add_one(v): - import time - time.sleep(5) - return v + 1 -table = con.table("table1") -table = table.mutate(b=add_one(table['a'])) -table.execute() -Output: -DEBUG:ibis.dask.trace: main_execute Selection -DEBUG:ibis.dask.trace: execute_until_in_scope Selection -DEBUG:ibis.dask.trace: execute_until_in_scope DaskTable -DEBUG:ibis.dask.trace: execute_database_table_client DaskTable -DEBUG:ibis.dask.trace: execute_database_table_client DaskTable 0:00:00.000085 -DEBUG:ibis.dask.trace: execute_until_in_scope DaskTable 0:00:00.000362 -DEBUG:ibis.dask.trace: execute_selection_dataframe Selection -DEBUG:ibis.dask.trace: main_execute ElementWiseVectorizedUDF -DEBUG:ibis.dask.trace: execute_until_in_scope ElementWiseVectorizedUDF -DEBUG:ibis.dask.trace: execute_until_in_scope TableColumn -DEBUG:ibis.dask.trace: execute_until_in_scope DaskTable -DEBUG:ibis.dask.trace: execute_until_in_scope DaskTable 0:00:00.000061 -DEBUG:ibis.dask.trace: execute_table_column_df_or_df_groupby TableColumn -DEBUG:ibis.dask.trace: execute_table_column_df_or_df_groupby TableColumn 0:00:00.000304 # noqa: E501 -DEBUG:ibis.dask.trace: execute_until_in_scope TableColumn 0:00:00.000584 -DEBUG:ibis.dask.trace: execute_udf_node ElementWiseVectorizedUDF -DEBUG:ibis.dask.trace: execute_udf_node ElementWiseVectorizedUDF 0:00:05.019173 -DEBUG:ibis.dask.trace: execute_until_in_scope ElementWiseVectorizedUDF 0:00:05.052604 # noqa: E501 -DEBUG:ibis.dask.trace: main_execute ElementWiseVectorizedUDF 0:00:05.052819 -DEBUG:ibis.dask.trace: execute_selection_dataframe Selection 0:00:05.054894 -DEBUG:ibis.dask.trace: execute_until_in_scope Selection 0:00:05.055662 -DEBUG:ibis.dask.trace: main_execute Selection 0:00:05.056556. -""" - -from __future__ import annotations - -import functools -import logging -import traceback -from datetime import datetime - -import ibis -from ibis.backends.pandas.dispatcher import TwoLevelDispatcher -from ibis.config import options -from ibis.expr import types as ir - -_logger = logging.getLogger("ibis.dask.trace") - -# A list of funcs that is traced -_trace_funcs = set() - - -def enable(): - """Enable tracing.""" - if options.dask is None: - # dask options haven't been registered yet - force module __getattr__ - ibis.dask # noqa: B018 - - options.dask.enable_trace = True - logging.getLogger("ibis.dask.trace").setLevel(logging.DEBUG) - - -def _log_trace(func, start=None): - level = 0 - current_frame = None - - # Increase the current level for each traced function in the stackframe - # This way we can visualize the call stack. - for frame, _ in traceback.walk_stack(None): - current_frame = current_frame if current_frame is not None else frame - func_name = frame.f_code.co_name - if func_name in _trace_funcs: - level += 1 - - # We can assume we have 'args' because we only call _log_trace inside - # trace or TraceDispatcher.register - current_op = current_frame.f_locals["args"][0] - - # If the first argument is a Expr, we print its op because it's more - # informative. - if isinstance(current_op, ir.Expr): - current_op = current_op.op() - - _logger.debug( - "%s %s %s %s", - " " * level, - func.__name__, - type(current_op).__qualname__, - f"{datetime.now() - start}" if start else "", - ) - - -def trace(func): - """Return a function decorator that wraps `func` with tracing.""" - _trace_funcs.add(func.__name__) - - @functools.wraps(func) - def traced_func(*args, **kwargs): - import ibis - - # Similar to the pandas backend, it is possible to call this function - # without having initialized the configuration option. This can happen - # when tests are distributed across multiple processes, for example. - ibis.dask # noqa: B018 - - if not options.dask.enable_trace: - return func(*args, **kwargs) - else: - start = datetime.now() - _log_trace(func) - res = func(*args, **kwargs) - _log_trace(func, start) - return res - - return traced_func - - -class TraceTwoLevelDispatcher(TwoLevelDispatcher): - """A Dispatcher that also wraps the registered function with tracing.""" - - def __init__(self, name, doc=None): - super().__init__(name, doc) - - def register(self, *types, **kwargs): - """Register a function with this Dispatcher. - - The function will also be wrapped with tracing information. - """ - - def _(func): - trace_func = trace(func) - TwoLevelDispatcher.register(self, *types, **kwargs)(trace_func) - # return func instead trace_func here so that - # chained register didn't get wrapped multiple - # times - return func - - return _ diff --git a/ibis/backends/dask/udf.py b/ibis/backends/dask/udf.py deleted file mode 100644 index 520a2e98cb2a..000000000000 --- a/ibis/backends/dask/udf.py +++ /dev/null @@ -1,298 +0,0 @@ -from __future__ import annotations - -import contextlib -import itertools -from typing import TYPE_CHECKING - -import dask.dataframe as dd -import dask.dataframe.groupby as ddgb -import dask.delayed -import pandas as pd - -import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis.backends.base import BaseBackend -from ibis.backends.dask.aggcontext import Transform -from ibis.backends.dask.dispatch import execute_node, pre_execute -from ibis.backends.dask.execution.util import ( - assert_identical_grouping_keys, - make_meta_series, - make_selected_obj, -) -from ibis.backends.pandas.udf import create_gens_from_args_groupby - -if TYPE_CHECKING: - import numpy as np - - -def make_struct_op_meta(op: ir.Expr) -> list[tuple[str, np.dtype]]: - """Unpacks a dt.Struct into a DataFrame meta.""" - return list( - zip( - op.return_type.names, - [x.to_pandas() for x in op.return_type.types], - ) - ) - - -@pre_execute.register(ops.ElementWiseVectorizedUDF) -@pre_execute.register(ops.ElementWiseVectorizedUDF, BaseBackend) -def pre_execute_elementwise_udf(op, *clients, scope=None, **kwargs): - """Register execution rules for elementwise UDFs.""" - input_type = op.input_type - - # definitions - - # Define an execution rule for elementwise operations on a - # grouped Series - nargs = len(input_type) - - @execute_node.register( - ops.ElementWiseVectorizedUDF, - *(itertools.repeat(ddgb.SeriesGroupBy, nargs)), - ) - def execute_udf_node_groupby(op, *args, **kwargs): - func = op.func - - # all grouping keys must be identical - assert_identical_grouping_keys(*args) - - # we're performing a scalar operation on grouped column, so - # perform the operation directly on the underlying Series - # and regroup after it's finished - args_objs = [make_selected_obj(arg) for arg in args] - groupings = args[0].index - return dd.map_partitions(func, *args_objs).groupby(groupings) - - # Define an execution rule for a simple elementwise Series - # function - @execute_node.register( - ops.ElementWiseVectorizedUDF, *(itertools.repeat(dd.Series, nargs)) - ) - def execute_udf_node(op, *args, cache=None, timecontext=None, **kwargs): - # We have rewritten op.func to be a closure enclosing - # the kwargs, and therefore, we do not need to pass - # kwargs here. This is true for all udf execution in this - # file. - # See ibis.legacy.udf.vectorized.UserDefinedFunction - with contextlib.suppress(KeyError): - return cache[(op, timecontext)] - - if op.return_type.is_struct(): - meta = make_struct_op_meta(op) - df = dd.map_partitions(op.func, *args, meta=meta) - else: - name = args[0].name if len(args) == 1 else None - meta = pd.Series([], name=name, dtype=op.return_type.to_pandas()) - df = dd.map_partitions(op.func, *args, meta=meta) - - cache[(op, timecontext)] = df - - return df - - return scope - - -@pre_execute.register(ops.AnalyticVectorizedUDF) -@pre_execute.register(ops.AnalyticVectorizedUDF, BaseBackend) -@pre_execute.register(ops.ReductionVectorizedUDF) -@pre_execute.register(ops.ReductionVectorizedUDF, BaseBackend) -def pre_execute_analytic_and_reduction_udf(op, *clients, scope=None, **kwargs): - input_type = op.input_type - nargs = len(input_type) - - # An execution rule to handle analytic and reduction UDFs over - # 1) an ungrouped window, - # 2) an ungrouped Aggregate node, or - # 3) an ungrouped custom aggregation context - # Ungrouped analytic/reduction functions receive the entire Series at once - # This is generally not recommended. - @execute_node.register(type(op), *(itertools.repeat(dd.Series, nargs))) - def execute_udaf_node_no_groupby(op, *args, aggcontext, **kwargs): - # This function is in essence fully materializing the dd.Series and - # passing that (now) pd.Series to aggctx. This materialization - # happens at `.compute()` time, making this "lazy" - @dask.delayed - def lazy_agg(*series: pd.Series): - return aggcontext.agg(series[0], op.func, *series[1:]) - - lazy_result = lazy_agg(*args) - - # Depending on the type of operation, lazy_result is a Delayed that - # could become a dd.Series or a dd.core.Scalar - if isinstance(op, ops.AnalyticVectorizedUDF): - if op.return_type.is_struct(): - meta = make_struct_op_meta(op) - else: - meta = make_meta_series( - dtype=op.return_type.to_pandas(), - name=args[0].name, - ) - result = dd.from_delayed(lazy_result, meta=meta) - - if args[0].known_divisions: - if not len({a.divisions for a in args}) == 1: - raise ValueError("Mixed divisions passed to AnalyticVectorized UDF") - # result is going to be a single partitioned thing, but we - # need it to be able to dd.concat it with other data - # downstream. We know that this udf operation did not change - # the index. Thus, we know the divisions, allowing dd.concat - # to align this piece with the other pieces. - original_divisions = args[0].divisions - result.divisions = ( - original_divisions[0], - original_divisions[-1], - ) - result = result.repartition(divisions=original_divisions) - else: - # lazy_result is a dd.core.Scalar from an ungrouped reduction - return_type = op.return_type - if return_type.is_array() or return_type.is_struct(): - # we're outputting a dt.Struct that will need to be destructured - # or an array of an unknown size. - # we compute so we can work with items inside downstream. - result = lazy_result.compute() - else: - # manually construct a dd.core.Scalar out of the delayed result - result = dd.from_delayed( - lazy_result, - meta=op.return_type.to_pandas(), - # otherwise dask complains this is a scalar - verify_meta=False, - ) - - return result - - @execute_node.register( - ops.ReductionVectorizedUDF, - *(itertools.repeat(ddgb.SeriesGroupBy, nargs)), - ) - def execute_reduction_node_groupby(op, *args, aggcontext, **kwargs): - # To apply a udf func to a list of grouped series we: - # 1. Grab the dataframe they're grouped off of - # 2. Grab the column name for each series - # 3. .apply a wrapper that performs the selection using the col name - # and applies the udf on to those - # This way we rely on dask dealing with groups and pass the udf down - # to the frame level. - assert_identical_grouping_keys(*args) - - func = op.func - groupings = args[0].index - parent_df = args[0].obj - - if isinstance(aggcontext, Transform): - # We are aggregating over an unbounded (and GROUPED) window, - # which uses a Transform aggregation context. - # We need to do some pre-processing to func and args so that - # Transform can pull data out of the SeriesGroupBys in args. - - # Construct a generator that yields the next group of data - # for every argument excluding the first (pandas performs - # the iteration for the first argument) for each argument - # that is a SeriesGroupBy. - iters = create_gens_from_args_groupby(*args[1:]) - - # TODO: Unify calling convention here to be more like - # window - def aggregator(first, *rest): - # map(next, *rest) gets the inputs for the next group - # TODO: might be inefficient to do this on every call - return func(first, *map(next, rest)) - - return aggcontext.agg(args[0], aggregator, *iters) - else: - columns = [parent_df[idx] for idx in args[0].index] - for arg in args: - df = arg.obj - column = df[arg._meta.obj.name] - columns.append(column) - parent_df = dd.concat(columns, axis=1) - - out_type = op.return_type.to_pandas() - - grouped_df = parent_df.groupby(groupings) - col_names = [col._meta._selected_obj.name for col in args] - - def apply_wrapper(df, apply_func, col_names): - cols = (df[col] for col in col_names) - return apply_func(*cols) - - if len(groupings) > 1: - meta_index = pd.MultiIndex.from_arrays( - [[0]] * len(groupings), names=groupings - ) - meta_value = [dd.utils.make_meta(out_type)] - else: - meta_index = pd.Index([], name=groupings[0]) - meta_value = [] - - return grouped_df.apply( - apply_wrapper, - func, - col_names, - meta=pd.Series(meta_value, index=meta_index, dtype=out_type), - ) - - @execute_node.register( - ops.AnalyticVectorizedUDF, - *(itertools.repeat(ddgb.SeriesGroupBy, nargs)), - ) - def execute_analytic_node_groupby(op, *args, aggcontext, **kwargs): - # To apply a udf func to a list of grouped series we: - # 1. Grab the dataframe they're grouped off of - # 2. Grab the column name for each series - # 3. .apply a wrapper that performs the selection using the col name - # and applies the udf on to those - # This way we rely on dask dealing with groups and pass the udf down - # to the frame level. - assert_identical_grouping_keys(*args) - - func = op.func - groupings = args[0].index - parent_df = args[0].obj - - columns = [parent_df[idx] for idx in groupings] - columns.extend(arg.obj[arg._meta.obj.name] for arg in args) - parent_df = dd.concat(columns, axis=1) - - out_type = op.return_type.to_pandas() - - grouped_df = parent_df.groupby(groupings) - col_names = [col._meta._selected_obj.name for col in args] - - def apply_wrapper(df, apply_func, col_names): - cols = (df[col] for col in col_names) - return apply_func(*cols) - - if op.return_type.is_struct(): - # with struct output we destruct to a dataframe directly - meta = dd.utils.make_meta(make_struct_op_meta(op)) - meta.index.name = parent_df.index.name - result = grouped_df.apply( - apply_wrapper, - func, - col_names, - meta=meta, - ) - # we don't know how data moved around here - result = result.reset_index().set_index(parent_df.index.name) - else: - # after application we will get a series with a multi-index of - # groupings + index - meta_index = pd.MultiIndex.from_arrays( - [[0]] * (len(groupings) + 1), - names=groupings + [parent_df.index.name], - ) - meta_value = [dd.utils.make_meta(out_type)] - - result = grouped_df.apply( - apply_wrapper, - func, - col_names, - meta=pd.Series(meta_value, index=meta_index, dtype=out_type), - ) - - return result - - return scope diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 881a460b7f5e..2e56487ae761 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -186,15 +186,10 @@ def create_table( raise com.IbisError("The schema or obj parameter is required") if obj is not None: - if not self._supports_conversion(obj): - raise com.BackendConversionError( - f"Unable to convert {obj.__class__} object " - f"to backend type: {self.__class__.backend_table_type}" - ) df = self._convert_object(obj) else: dtypes = dict(PandasSchema.from_ibis(schema)) - df = self._from_pandas(pd.DataFrame(columns=dtypes.keys()).astype(dtypes)) + df = pd.DataFrame(columns=dtypes.keys()).astype(dtypes) if name in self.dictionary and not overwrite: raise com.IbisError(f"Cannot overwrite existing table `{name}`") @@ -227,25 +222,25 @@ def drop_table(self, name: str, *, force: bool = False) -> None: ) del self.dictionary[name] - @classmethod - def _supports_conversion(cls, obj: Any) -> bool: - if isinstance(obj, ir.Table): - return isinstance(obj.op(), ops.InMemoryTable) - return True - - @staticmethod - def _from_pandas(df: pd.DataFrame) -> pd.DataFrame: - return df - - @classmethod - def _convert_object(cls, obj: Any) -> Any: - if isinstance(obj, ir.Table): - # Support memtables - assert isinstance(obj.op(), ops.InMemoryTable) - return obj.op().data.to_frame() + def _convert_object(self, obj: Any) -> Any: + if isinstance(obj, pd.DataFrame): + return obj + elif isinstance(obj, ir.Table): + op = obj.op() + if isinstance(op, ops.InMemoryTable): + return op.data.to_frame() + else: + raise com.BackendConversionError( + f"Unable to convert {obj.__class__} object " + f"to backend type: {self.__class__.backend_table_type}" + ) elif isinstance(obj, pa.Table): return obj.to_pandas() - return cls.backend_table_type(obj) + else: + raise com.BackendConversionError( + f"Unable to convert {obj.__class__} object " + f"to backend type: {self.__class__.backend_table_type}" + ) @classmethod @lru_cache @@ -304,7 +299,7 @@ class Backend(BasePandasBackend): name = "pandas" def execute(self, query, params=None, limit="default", **kwargs): - from ibis.backends.pandas.executor import Executor + from ibis.backends.pandas.executor import PandasExecutor if limit != "default" and limit is not None: raise ValueError( @@ -322,7 +317,7 @@ def execute(self, query, params=None, limit="default", **kwargs): params = params or {} params = {k.op() if isinstance(k, ir.Expr) else k: v for k, v in params.items()} - return Executor.execute(query.op(), backend=self, params=params) + return PandasExecutor.execute(query.op(), backend=self, params=params) def _load_into_cache(self, name, expr): self.create_table(name, expr.execute()) diff --git a/ibis/backends/pandas/convert.py b/ibis/backends/pandas/convert.py index 76528d3e9258..30778b4d4ffb 100644 --- a/ibis/backends/pandas/convert.py +++ b/ibis/backends/pandas/convert.py @@ -55,7 +55,7 @@ def convert_Floating(cls, s, dtype, pandas_type): @classmethod def convert_Timestamp(cls, s, dtype, pandas_type): - if isinstance(dtype, pd.DatetimeTZDtype): + if isinstance(s.dtype, pd.DatetimeTZDtype): return s.dt.tz_convert(dtype.timezone) elif pdt.is_datetime64_dtype(s.dtype): return s.dt.tz_localize(dtype.timezone) @@ -63,12 +63,9 @@ def convert_Timestamp(cls, s, dtype, pandas_type): return pd.to_datetime(s, unit="s").dt.tz_localize(dtype.timezone) else: try: - return s.astype(pandas_type) + return pd.to_datetime(s).dt.tz_convert(dtype.timezone) except TypeError: - try: - return pd.to_datetime(s).dt.tz_convert(dtype.timezone) - except TypeError: - return pd.to_datetime(s).dt.tz_localize(dtype.timezone) + return pd.to_datetime(s).dt.tz_localize(dtype.timezone) @classmethod def convert_Date(cls, s, dtype, pandas_type): diff --git a/ibis/backends/pandas/executor.py b/ibis/backends/pandas/executor.py index 815cecdf51f9..b39b4610ac66 100644 --- a/ibis/backends/pandas/executor.py +++ b/ibis/backends/pandas/executor.py @@ -6,62 +6,38 @@ import numpy as np import pandas as pd +import ibis.backends.pandas.kernels as pandas_kernels import ibis.expr.operations as ops from ibis.backends.pandas.convert import PandasConverter from ibis.backends.pandas.helpers import ( GroupedFrame, + PandasUtils, RangeFrame, RowsFrame, UngroupedFrame, - agg, - asframe, - asseries, - columnwise, - elementwise, - rowwise, - serieswise, ) -from ibis.backends.pandas.kernels import pick_kernel from ibis.backends.pandas.rewrites import ( PandasAggregate, PandasAsofJoin, PandasJoin, PandasLimit, PandasRename, + PandasResetIndex, PandasScalarSubquery, plan, ) from ibis.common.dispatch import Dispatched from ibis.common.exceptions import OperationNotDefinedError, UnboundExpressionError -from ibis.formats.pandas import PandasData -from ibis.util import gen_name +from ibis.formats.pandas import PandasData, PandasType +from ibis.util import any_of, gen_name # ruff: noqa: F811 -_reduction_operations = { - ops.Min: lambda x: x.min(), - ops.Max: lambda x: x.max(), - ops.Sum: lambda x: x.sum(), - ops.Mean: lambda x: x.mean(), - ops.Count: lambda x: x.count(), - ops.Mode: lambda x: x.mode().iat[0], - ops.Any: lambda x: x.any(), - ops.All: lambda x: x.all(), - ops.Median: lambda x: x.median(), - ops.ApproxMedian: lambda x: x.median(), - ops.BitAnd: lambda x: np.bitwise_and.reduce(x.values), - ops.BitOr: lambda x: np.bitwise_or.reduce(x.values), - ops.BitXor: lambda x: np.bitwise_xor.reduce(x.values), - ops.Last: lambda x: x.iat[-1], - ops.First: lambda x: x.iat[0], - ops.CountDistinct: lambda x: x.nunique(), - ops.ApproxCountDistinct: lambda x: x.nunique(), - ops.ArrayCollect: lambda x: x.tolist(), -} - - -class Executor(Dispatched): +class PandasExecutor(Dispatched, PandasUtils): + name = "pandas" + kernels = pandas_kernels + @classmethod def visit(cls, op: ops.Node, **kwargs): raise OperationNotDefinedError( @@ -95,7 +71,9 @@ def visit(cls, op: ops.SortKey, expr, ascending): @classmethod def visit(cls, op: ops.Cast, arg, to): - if isinstance(arg, pd.Series): + if arg is None: + return None + elif isinstance(arg, pd.Series): return PandasConverter.convert_column(arg, to) else: return PandasConverter.convert_scalar(arg, to) @@ -110,19 +88,71 @@ def visit(cls, op: ops.RandomScalar): @classmethod def visit(cls, op: ops.Greatest, arg): - return columnwise(lambda df: df.max(axis=1), arg) + return cls.columnwise(lambda df: df.max(axis=1), arg) @classmethod def visit(cls, op: ops.Least, arg): - return columnwise(lambda df: df.min(axis=1), arg) + return cls.columnwise(lambda df: df.min(axis=1), arg) @classmethod def visit(cls, op: ops.Coalesce, arg): - return columnwise(lambda df: df.bfill(axis=1).iloc[:, 0], arg) + return cls.columnwise(lambda df: df.bfill(axis=1).iloc[:, 0], arg) @classmethod def visit(cls, op: ops.Value, **operands): - return pick_kernel(op, operands) + # automatically pick the correct kernel based on the operand types + typ = type(op) + name = op.name + dtype = PandasType.from_ibis(op.dtype) + kwargs = {"operands": operands, "name": name, "dtype": dtype} + + # decimal operations have special implementations + if op.dtype.is_decimal(): + func = cls.kernels.elementwise_decimal[typ] + return cls.elementwise(func, **kwargs) + + # prefer generic implementations if available + if func := cls.kernels.generic.get(typ): + return cls.generic(func, **kwargs) + + _, *rest = operands.values() + is_multi_arg = bool(rest) + is_multi_column = any_of(rest, pd.Series) + + if is_multi_column: + if func := cls.kernels.columnwise.get(typ): + return cls.columnwise(func, **kwargs) + elif func := cls.kernels.rowwise.get(typ): + return cls.rowwise(func, **kwargs) + else: + raise OperationNotDefinedError( + "No columnwise or rowwise implementation found for " + f"multi-column operation {typ}" + ) + elif is_multi_arg: + if func := cls.kernels.columnwise.get(typ): + return cls.columnwise(func, **kwargs) + elif func := cls.kernels.serieswise.get(typ): + return cls.serieswise(func, **kwargs) + elif func := cls.kernels.rowwise.get(typ): + return cls.rowwise(func, **kwargs) + elif func := cls.kernels.elementwise.get(typ): + return cls.elementwise(func, **kwargs) + else: + raise OperationNotDefinedError( + "No columnwise, serieswise, rowwise or elementwise " + f"implementation found for multi-argument operation {typ}" + ) + else: # noqa: PLR5501 + if func := cls.kernels.serieswise.get(typ): + return cls.serieswise(func, **kwargs) + elif func := cls.kernels.elementwise.get(typ): + return cls.elementwise(func, **kwargs) + else: + raise OperationNotDefinedError( + "No serieswise or elementwise implementation found for " + f"single-argument operation {typ}" + ) @classmethod def visit(cls, op: ops.IsNan, arg): @@ -135,21 +165,13 @@ def visit(cls, op: ops.IsNan, arg): return arg != arg @classmethod - def visit(cls, op: ops.SearchedCase, cases, results, default): - cases, _ = asframe(cases, concat=False) - results, _ = asframe(results, concat=False) - out = np.select(cases, results, default) - return pd.Series(out) - - @classmethod - def visit(cls, op: ops.SimpleCase, base, cases, results, default): - if isinstance(default, pd.Series): - raise NotImplementedError( - "SimpleCase with a columnar shaped default value is not implemented" - ) - cases = tuple(base == case for case in cases) - cases, _ = asframe(cases, concat=False) - results, _ = asframe(results, concat=False) + def visit( + cls, op: ops.SearchedCase | ops.SimpleCase, cases, results, default, base=None + ): + if base is not None: + cases = tuple(base == case for case in cases) + cases, _ = cls.asframe(cases, concat=False) + results, _ = cls.asframe(results, concat=False) out = np.select(cases, results, default) return pd.Series(out) @@ -165,9 +187,9 @@ def visit(cls, op: ops.TimestampTruncate | ops.DateTruncate, arg, unit): @classmethod def visit(cls, op: ops.IntervalFromInteger, unit, **kwargs): if unit.short in {"Y", "Q", "M", "W"}: - return elementwise(lambda v: pd.DateOffset(**{unit.plural: v}), kwargs) + return cls.elementwise(lambda v: pd.DateOffset(**{unit.plural: v}), kwargs) else: - return serieswise( + return cls.serieswise( lambda arg: arg.astype(f"timedelta64[{unit.short}]"), kwargs ) @@ -183,7 +205,7 @@ def visit(cls, op: ops.BetweenTime, arg, lower_bound, upper_bound): @classmethod def visit(cls, op: ops.FindInSet, needle, values): - (needle, *haystack), _ = asframe((needle, *values), concat=False) + (needle, *haystack), _ = cls.asframe((needle, *values), concat=False) condlist = [needle == col for col in haystack] choicelist = [i for i, _ in enumerate(haystack)] result = np.select(condlist, choicelist, default=-1) @@ -191,15 +213,15 @@ def visit(cls, op: ops.FindInSet, needle, values): @classmethod def visit(cls, op: ops.Array, exprs): - return rowwise(lambda row: np.array(row, dtype=object), exprs) + return cls.rowwise(lambda row: np.array(row, dtype=object), exprs) @classmethod def visit(cls, op: ops.ArrayConcat, arg): - return rowwise(lambda row: np.concatenate(row.values), arg) + return cls.rowwise(lambda row: np.concatenate(row.values), arg) @classmethod def visit(cls, op: ops.Unnest, arg): - arg = asseries(arg) + arg = cls.asseries(arg) mask = arg.map(lambda v: bool(len(v)), na_action="ignore") return arg[mask].explode() @@ -220,8 +242,8 @@ def visit( @classmethod def visit(cls, op: ops.Reduction, arg, where): - func = _reduction_operations[type(op)] - return agg(func, arg, where) + func = cls.kernels.reductions[type(op)] + return cls.agg(func, arg, where) @classmethod def visit(cls, op: ops.CountStar, arg, where): @@ -246,9 +268,9 @@ def agg(df): @classmethod def visit(cls, op: ops.Arbitrary, arg, where, how): if how == "first": - return agg(lambda x: x.iat[0], arg, where) + return cls.agg(cls.kernels.reductions[ops.First], arg, where) elif how == "last": - return agg(lambda x: x.iat[-1], arg, where) + return cls.agg(cls.kernels.reductions[ops.Last], arg, where) else: raise OperationNotDefinedError(f"Arbitrary {how!r} is not supported") @@ -274,12 +296,12 @@ def agg(df): @classmethod def visit(cls, op: ops.Variance, arg, where, how): ddof = {"pop": 0, "sample": 1}[how] - return agg(lambda x: x.var(ddof=ddof), arg, where) + return cls.agg(lambda x: x.var(ddof=ddof), arg, where) @classmethod def visit(cls, op: ops.StandardDev, arg, where, how): ddof = {"pop": 0, "sample": 1}[how] - return agg(lambda x: x.std(ddof=ddof), arg, where) + return cls.agg(lambda x: x.std(ddof=ddof), arg, where) @classmethod def visit(cls, op: ops.Correlation, left, right, where, how): @@ -333,11 +355,11 @@ def agg(df): @classmethod def visit(cls, op: ops.Quantile, arg, quantile, where): - return agg(lambda x: x.quantile(quantile), arg, where) + return cls.agg(lambda x: x.quantile(quantile), arg, where) @classmethod def visit(cls, op: ops.MultiQuantile, arg, quantile, where): - return agg(lambda x: list(x.quantile(quantile)), arg, where) + return cls.agg(lambda x: list(x.quantile(quantile)), arg, where) @classmethod def visit( @@ -361,21 +383,28 @@ def agg(df, order_keys): @classmethod def visit(cls, op: ops.Lag | ops.Lead, arg, offset, default): if isinstance(op, ops.Lag): - sign = lambda x: x + sign = operator.pos else: - sign = lambda x: -x + sign = operator.neg if op.offset is not None and op.offset.dtype.is_interval(): def agg(df, order_keys): df = df.set_index(order_keys) col = df[arg.name].shift(freq=sign(offset)) - return col.reindex(df.index, fill_value=default) + res = col.reindex(df.index) + if not pd.isnull(default): + res = res.fillna(default) + return res.reset_index(drop=True) + else: offset = 1 if offset is None else offset def agg(df, order_keys): - return df[arg.name].shift(sign(offset), fill_value=default) + res = df[arg.name].shift(sign(offset)) + if not pd.isnull(default): + res = res.fillna(default) + return res return agg @@ -438,7 +467,11 @@ def visit( ): def agg(df, order_keys): args = [df[col.name] for col in func_args] - return func(*args) + res = func(*args) + if isinstance(res, pd.DataFrame): + # it is important otherwise it is going to fill up the memory + res = res.apply(lambda row: row.to_dict(), axis=1) + return res return agg @@ -452,14 +485,10 @@ def visit(cls, op: ops.WindowBoundary, value, preceding): def visit( cls, op: ops.WindowFrame, table, start, end, group_by, order_by, **kwargs ): - if start is not None: - start = asseries(start, len(table)) - if op.start.preceding: - start = -start - if end is not None: - end = asseries(end, len(table)) - if op.end.preceding: - end = -end + if start is not None and op.start.preceding: + start = -start + if end is not None and op.end.preceding: + end = -end table = table.assign(__start__=start, __end__=end) @@ -516,7 +545,7 @@ def visit(cls, op: ops.InMemoryTable, name, schema, data): @classmethod def visit(cls, op: ops.DummyTable, values): - df, _ = asframe(values) + df, _ = cls.asframe(values) return df @classmethod @@ -536,16 +565,20 @@ def visit(cls, op: PandasLimit, parent, n, offset): else: return parent.iloc[offset : offset + n] + @classmethod + def visit(cls, op: PandasResetIndex, parent): + return parent.reset_index(drop=True) + @classmethod def visit(cls, op: ops.Sample, parent, fraction, method, seed): return parent.sample(frac=fraction, random_state=seed) @classmethod def visit(cls, op: ops.Project, parent, values): - df, all_scalars = asframe(values) + df, all_scalars = cls.asframe(values) if all_scalars and len(parent) != len(df): - df = pd.concat([df] * len(parent)) - return df.reset_index(drop=True) + df = cls.concat([df] * len(parent)) + return df @classmethod def visit(cls, op: ops.Filter, parent, predicates): @@ -575,27 +608,25 @@ def visit(cls, op: PandasAggregate, parent, groups, metrics): if groups: parent = parent.groupby([col.name for col in groups.values()]) metrics = {k: parent.apply(v) for k, v in metrics.items()} - result = pd.concat(metrics, axis=1).reset_index() + result = cls.concat(metrics, axis=1).reset_index() renames = {v.name: k for k, v in op.groups.items()} return result.rename(columns=renames) else: results = {k: v(parent) for k, v in metrics.items()} - combined, _ = asframe(results) + combined, _ = cls.asframe(results) return combined @classmethod def visit(cls, op: PandasJoin, how, left, right, left_on, right_on): # broadcast predicates if they are scalar values - left_size = len(left) - left_on = [asseries(v, left_size) for v in left_on] - right_size = len(right) - right_on = [asseries(v, right_size) for v in right_on] + left_on = [cls.asseries(v, like=left) for v in left_on] + right_on = [cls.asseries(v, like=right) for v in right_on] if how == "cross": assert not left_on and not right_on - return pd.merge(left, right, how="cross") + return cls.merge(left, right, how="cross") elif how == "anti": - df = pd.merge( + df = cls.merge( left, right, how="outer", @@ -606,13 +637,19 @@ def visit(cls, op: PandasJoin, how, left, right, left_on, right_on): df = df[df["_merge"] == "left_only"] return df.drop(columns=["_merge"]) elif how == "semi": - mask = asseries(True, left_size) + mask = cls.asseries(True, like=left) for left_pred, right_pred in zip(left_on, right_on): mask = mask & left_pred.isin(right_pred) return left[mask] else: - df = left.merge(right, how=how, left_on=left_on, right_on=right_on) - return df.drop(columns=[f"key_{i}" for i in range(len(left_on))]) + left_columns = {gen_name("left"): s for s in left_on} + right_columns = {gen_name("right"): s for s in right_on} + left_keys = list(left_columns.keys()) + right_keys = list(right_columns.keys()) + left = left.assign(**left_columns) + right = right.assign(**right_columns) + df = left.merge(right, how=how, left_on=left_keys, right_on=right_keys) + return df @classmethod def visit( @@ -628,12 +665,10 @@ def visit( operator, ): # broadcast predicates if they are scalar values - left_size = len(left) - right_size = len(right) - left_on = [asseries(v, left_size) for v in left_on] - left_by = [asseries(v, left_size) for v in left_by] - right_on = [asseries(v, right_size) for v in right_on] - right_by = [asseries(v, right_size) for v in right_by] + left_on = [cls.asseries(v, like=left) for v in left_on] + left_by = [cls.asseries(v, like=left) for v in left_by] + right_on = [cls.asseries(v, like=right) for v in right_on] + right_by = [cls.asseries(v, like=right) for v in right_by] # merge_asof only works with column names not with series left_on = {gen_name("left"): s for s in left_on} @@ -667,7 +702,7 @@ def visit( # merge_asof requires the left side to be sorted by the join keys left = left.sort_values(by=list(left_on.keys())) - df = pd.merge_asof( + df = cls.merge_asof( left, right, left_on=list(left_on.keys()), @@ -681,7 +716,7 @@ def visit( @classmethod def visit(cls, op: ops.Union, left, right, distinct): - result = pd.concat([left, right], axis=0) + result = cls.concat([left, right], axis=0) return result.drop_duplicates() if distinct else result @classmethod diff --git a/ibis/backends/pandas/helpers.py b/ibis/backends/pandas/helpers.py index 92597be14461..c1b05bd8891f 100644 --- a/ibis/backends/pandas/helpers.py +++ b/ibis/backends/pandas/helpers.py @@ -1,6 +1,7 @@ from __future__ import annotations import itertools +import math from typing import Callable import numpy as np @@ -9,94 +10,113 @@ from ibis.util import gen_name -def asseries(value, size=1): - """Ensure that value is a pandas Series object, broadcast if necessary.""" - if isinstance(value, pd.Series): - return value - elif isinstance(value, (list, np.ndarray)): - return pd.Series(itertools.repeat(np.array(value), size)) - else: - return pd.Series(np.repeat(value, size)) - - -def asframe(values: dict | tuple, concat=True): - """Construct a DataFrame from a dict or tuple of Series objects.""" - if isinstance(values, dict): - names, values = zip(*values.items()) - elif isinstance(values, tuple): - names = [f"_{i}" for i in range(len(values))] - else: - raise TypeError(f"values must be a dict, or tuple; got {type(values)}") - - size = 1 - all_scalars = True - for v in values: - if isinstance(v, pd.Series): - size = len(v) - all_scalars = False - break - - columns = [asseries(v, size) for v in values] - if concat: - df = pd.concat(columns, axis=1, keys=names) - return df, all_scalars - else: - return columns, all_scalars - - -def generic(func: Callable, operands): - return func(*operands.values()) - - -def rowwise(func: Callable, operands): - """Kernel applied to a row, where all the operands are scalars.""" - # dealing with a collection of series objects - df, _ = asframe(operands) - return df.apply(func, axis=1) - - -def columnwise(func: Callable, operands): - """Kernel where all the operands are series objects.""" - df, _ = asframe(operands) - return func(df) - - -def serieswise(func, operands): - """Kernel where the first operand is a series object.""" - (key, value), *rest = operands.items() - # ensure that the first operand is a series object - value = asseries(value) - operands = {key: value, **dict(rest)} - return func(**operands) - - -def elementwise(func, operands): - """Kernel applied to an element, where all the operands are scalars.""" - value = operands.pop(next(iter(operands))) - if isinstance(value, pd.Series): - # dealing with a single series object - if operands: - return value.apply(func, **operands) - else: - return value.map(func, na_action="ignore") - else: - # dealing with a single scalar object - return func(value, **operands) +def isnull(obj): + return obj is None or obj is pd.NA or (isinstance(obj, float) and math.isnan(obj)) + +class PandasUtils: + @classmethod + def merge(cls, *args, **kwargs): + return pd.merge(*args, **kwargs) -def agg(func, arg_column, where_column): - if where_column is None: + @classmethod + def merge_asof(cls, *args, **kwargs): + return pd.merge_asof(*args, **kwargs) - def applier(df): - return func(df[arg_column.name]) - else: + @classmethod + def concat(cls, dfs, **kwargs): + return pd.concat(dfs, **kwargs) - def applier(df): - mask = df[where_column.name] - col = df[arg_column.name][mask] - return func(col) + @classmethod + def asseries(cls, value, like=None): + """Ensure that value is a pandas Series object, broadcast if necessary.""" + size = len(like) if like is not None else 1 + if isinstance(value, pd.Series): + return value + elif isinstance(value, (list, np.ndarray)): + return pd.Series(itertools.repeat(np.array(value), size)) + else: + return pd.Series(np.repeat(value, size)) + + @classmethod + def asframe(cls, values: dict | tuple, concat=True): + """Construct a DataFrame from a dict or tuple of Series objects.""" + if isinstance(values, dict): + names, values = zip(*values.items()) + elif isinstance(values, tuple): + names = [f"_{i}" for i in range(len(values))] + else: + raise TypeError(f"values must be a dict, or tuple; got {type(values)}") + + all_scalars = True + representative = None + for v in values: + if isinstance(v, pd.Series): + representative = v + all_scalars = False + break + + columns = [cls.asseries(v, like=representative) for v in values] + if concat: + df = pd.concat(columns, axis=1, keys=names) + return df, all_scalars + else: + return columns, all_scalars - return applier + @classmethod + def agg(cls, func, arg_column, where_column): + if where_column is None: + + def applier(df): + return func(df[arg_column.name]) + else: + + def applier(df): + mask = df[where_column.name] + col = df[arg_column.name][mask] + return func(col) + + return applier + + @classmethod + def generic(cls, func: Callable, operands, **kwargs): + return func(*operands.values()) + + @classmethod + def rowwise(cls, func: Callable, operands, **kwargs): + """Kernel applied to a row, where all the operands are scalars.""" + # dealing with a collection of series objects + df, _ = cls.asframe(operands) + return df.apply(func, axis=1) + + @classmethod + def columnwise(cls, func: Callable, operands, **kwargs): + """Kernel where all the operands are series objects.""" + df, _ = cls.asframe(operands) + return func(df) + + @classmethod + def serieswise(cls, func, operands, **kwargs): + """Kernel where the first operand is a series object.""" + (key, value), *rest = operands.items() + # ensure that the first operand is a series object + value = cls.asseries(value) + operands = {key: value, **dict(rest)} + return func(**operands) + + @classmethod + def elementwise(cls, func, operands, **kwargs): + """Kernel applied to an element, where all the operands are scalars.""" + value = operands.pop(next(iter(operands))) + if isinstance(value, pd.Series): + # dealing with a single series object + if operands: + return value.apply(func, **operands) + else: + return value.map(func, na_action="ignore") + else: + # dealing with a single scalar object + return func(value, **operands) class UngroupedFrame: diff --git a/ibis/backends/pandas/kernels.py b/ibis/backends/pandas/kernels.py index 7bfea9883fdd..09da329ff4de 100644 --- a/ibis/backends/pandas/kernels.py +++ b/ibis/backends/pandas/kernels.py @@ -17,20 +17,16 @@ import toolz import ibis.expr.operations as ops -from ibis.backends.pandas.helpers import ( - columnwise, - elementwise, - generic, - rowwise, - serieswise, -) -from ibis.common.exceptions import OperationNotDefinedError -from ibis.util import any_of +from ibis.backends.pandas.helpers import isnull def substring_rowwise(row): arg, start, length = row["arg"], row["start"], row["length"] - if length is None: + if isnull(arg): + return None + elif isnull(start): + return None + elif isnull(length): return arg[start:] else: return arg[start : start + length] @@ -146,6 +142,18 @@ def array_position_rowwise(row): return -1 +def array_slice_rowwise(row): + arg, start, stop = row["arg"], row["start"], row["stop"] + if isnull(start) and isnull(stop): + return arg + elif isnull(start): + return arg[:stop] + elif isnull(stop): + return arg[start:] + else: + return arg[start:stop] + + def integer_range_rowwise(row): if not row["step"]: return [] @@ -161,7 +169,7 @@ def timestamp_range_rowwise(row): def _safe_method(mapping, method, *args, **kwargs): - if mapping is None or mapping is pd.NA: + if isnull(mapping): return None try: method = getattr(mapping, method) @@ -169,7 +177,7 @@ def _safe_method(mapping, method, *args, **kwargs): return None else: result = method(*args, **kwargs) - return None if result is pd.NA else result + return None if isnull(result) else result def safe_len(mapping): @@ -201,9 +209,7 @@ def safe_values(mapping): def safe_merge(left, right): - if left is None or left is pd.NA: - return None - elif right is None or right is pd.NA: + if isnull(left) or isnull(right): return None else: return {**left, **right} @@ -246,7 +252,28 @@ def round_serieswise(arg, digits): return np.round(arg, digits).astype("float64") -_generic_impls = { +reductions = { + ops.Min: lambda x: x.min(), + ops.Max: lambda x: x.max(), + ops.Sum: lambda x: x.sum(), + ops.Mean: lambda x: x.mean(), + ops.Count: lambda x: x.count(), + ops.Mode: lambda x: x.mode().iat[0], + ops.Any: lambda x: x.any(), + ops.All: lambda x: x.all(), + ops.Median: lambda x: x.median(), + ops.ApproxMedian: lambda x: x.median(), + ops.BitAnd: lambda x: np.bitwise_and.reduce(x.values), + ops.BitOr: lambda x: np.bitwise_or.reduce(x.values), + ops.BitXor: lambda x: np.bitwise_xor.reduce(x.values), + ops.Last: lambda x: x.iat[-1], + ops.First: lambda x: x.iat[0], + ops.CountDistinct: lambda x: x.nunique(), + ops.ApproxCountDistinct: lambda x: x.nunique(), + ops.ArrayCollect: lambda x: x.tolist(), +} + +generic = { ops.Abs: abs, ops.Acos: np.arccos, ops.Add: operator.add, @@ -312,7 +339,7 @@ def round_serieswise(arg, digits): ops.Log: lambda x, base: np.log(x) if base is None else np.log(x) / np.log(base), } -_columnwise_impls = { +columnwise = { ops.Clip: lambda df: df["arg"].clip(lower=df["lower"], upper=df["upper"]), ops.IfElse: lambda df: df["true_expr"].where( df["bool_expr"], other=df["false_null_expr"] @@ -321,13 +348,13 @@ def round_serieswise(arg, digits): ops.Repeat: lambda df: df["arg"] * df["times"], } -_rowwise_impls = { +rowwise = { ops.ArrayContains: lambda row: row["other"] in row["arg"], ops.ArrayIndex: array_index_rowwise, ops.ArrayPosition: array_position_rowwise, ops.ArrayRemove: lambda row: [x for x in row["arg"] if x != row["other"]], ops.ArrayRepeat: lambda row: np.tile(row["arg"], max(0, row["times"])), - ops.ArraySlice: lambda row: row["arg"][row["start"] : row["stop"]], + ops.ArraySlice: array_slice_rowwise, ops.ArrayUnion: lambda row: toolz.unique(row["left"] + row["right"]), ops.EndsWith: lambda row: row["arg"].endswith(row["end"]), ops.IntegerRange: integer_range_rowwise, @@ -364,7 +391,7 @@ def round_serieswise(arg, digits): ops.Strftime: lambda row: row["arg"].strftime(row["format_str"]), } -_serieswise_impls = { +serieswise = { ops.Between: lambda arg, lower_bound, upper_bound: arg.between( lower_bound, upper_bound ), @@ -387,6 +414,8 @@ def round_serieswise(arg, digits): ops.ExtractSecond: lambda arg: arg.dt.second, ops.ExtractWeekOfYear: lambda arg: arg.dt.isocalendar().week.astype("int32"), ops.ExtractYear: lambda arg: arg.dt.year, + ops.IsNull: lambda arg: arg.isnull(), + ops.NotNull: lambda arg: arg.notnull(), ops.Lowercase: lambda arg: arg.str.lower(), ops.LPad: lambda arg, length, pad: arg.str.rjust(length, fillchar=pad), ops.LStrip: lambda arg: arg.str.lstrip(), @@ -420,7 +449,7 @@ def round_serieswise(arg, digits): ops.Uppercase: lambda arg: arg.str.upper(), } -_elementwise_impls = { +elementwise = { ops.ExtractProtocol: lambda x: getattr(urlsplit(x), "scheme", ""), ops.ExtractAuthority: lambda x: getattr(urlsplit(x), "netloc", ""), ops.ExtractPath: lambda x: getattr(urlsplit(x), "path", ""), @@ -435,10 +464,11 @@ def round_serieswise(arg, digits): ops.MapLength: safe_len, ops.MapKeys: safe_keys, ops.MapValues: safe_values, + ops.Round: lambda x, digits=0: round(x, digits), } -_elementwise_decimal_impls = { +elementwise_decimal = { ops.Round: lambda x, digits=0: round(x, digits), ops.Log10: safe_decimal(lambda x: x.log10()), ops.Ln: safe_decimal(lambda x: x.ln()), @@ -452,62 +482,10 @@ def round_serieswise(arg, digits): } -def pick_kernel(op, operands): - typ = type(op) - - # decimal operations have special implementations - if op.dtype.is_decimal(): - func = _elementwise_decimal_impls[typ] - return elementwise(func, operands) - - # prefer generic implementations if available - if func := _generic_impls.get(typ): - return generic(func, operands) - - first, *rest = operands.values() - is_multi_arg = bool(rest) - is_multi_column = any_of(rest, pd.Series) - - if is_multi_column: - if func := _columnwise_impls.get(typ): - return columnwise(func, operands) - elif func := _rowwise_impls.get(typ): - return rowwise(func, operands) - else: - raise OperationNotDefinedError( - "No columnwise or rowwise implementation found for " - f"multi-column operation {typ}" - ) - elif is_multi_arg: - if func := _columnwise_impls.get(typ): - return columnwise(func, operands) - elif func := _serieswise_impls.get(typ): - return serieswise(func, operands) - elif func := _rowwise_impls.get(typ): - return rowwise(func, operands) - elif func := _elementwise_impls.get(typ): - return elementwise(func, operands) - else: - raise OperationNotDefinedError( - "No columnwise, serieswise, rowwise or elementwise " - f"implementation found for multi-argument operation {typ}" - ) - else: # noqa: PLR5501 - if func := _serieswise_impls.get(typ): - return serieswise(func, operands) - elif func := _elementwise_impls.get(typ): - return elementwise(func, operands) - else: - raise OperationNotDefinedError( - "No serieswise or elementwise implementation found for " - f"single-argument operation {typ}" - ) - - supported_operations = ( - _generic_impls.keys() - | _columnwise_impls.keys() - | _rowwise_impls.keys() - | _serieswise_impls.keys() - | _elementwise_impls.keys() + generic.keys() + | columnwise.keys() + | rowwise.keys() + | serieswise.keys() + | elementwise.keys() ) diff --git a/ibis/backends/pandas/rewrites.py b/ibis/backends/pandas/rewrites.py index 435c931277b7..63f93c830f2c 100644 --- a/ibis/backends/pandas/rewrites.py +++ b/ibis/backends/pandas/rewrites.py @@ -46,6 +46,19 @@ def schema(self): ) +@public +class PandasResetIndex(PandasRelation): + parent: ops.Relation + + @attribute + def values(self): + return self.parent.values + + @attribute + def schema(self): + return self.parent.schema + + @public class PandasJoin(PandasRelation): left: ops.Relation @@ -118,12 +131,14 @@ def is_columnar(node): @replace(ops.Project) def rewrite_project(_, **kwargs): + unnests = [] winfuncs = [] for v in _.values.values(): - winfuncs.extend(v.find(ops.WindowFunction, ops.Value)) + unnests.extend(v.find(ops.Unnest, filter=ops.Value)) + winfuncs.extend(v.find(ops.WindowFunction, filter=ops.Value)) if not winfuncs: - return _ + return PandasResetIndex(_) if unnests else _ selects = {ops.Field(_.parent, k): k for k in _.parent.schema} for node in winfuncs: @@ -161,7 +176,9 @@ def rewrite_project(_, **kwargs): # STEP 3: reconstruct the current projection with the window functions subs.update(metrics) values = {k: v.replace(subs, filter=ops.Value) for k, v in _.values.items()} - return ops.Project(proj, values) + result = ops.Project(proj, values) + + return PandasResetIndex(result) @replace(ops.Aggregate) diff --git a/ibis/backends/pandas/tests/test_cast.py b/ibis/backends/pandas/tests/test_cast.py index 7ca38a675261..e07395126e77 100644 --- a/ibis/backends/pandas/tests/test_cast.py +++ b/ibis/backends/pandas/tests/test_cast.py @@ -8,6 +8,7 @@ import ibis import ibis.expr.datatypes as dt +from ibis.backends.conftest import is_older_than from ibis.backends.pandas.tests.conftest import TestConf as tm TIMESTAMP = "2022-03-13 06:59:10.467417" @@ -67,7 +68,13 @@ def test_cast_array(t, from_, to, expected): @pytest.mark.parametrize( ("to", "expected"), [ - ("string", "object"), + pytest.param( + "string", + "object", + marks=pytest.mark.skipif( + is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + ), + ), ("int64", "int64"), ("double", "float64"), ( @@ -93,7 +100,13 @@ def test_cast_timestamp_column(t, df, column, to, expected): @pytest.mark.parametrize( ("to", "expected"), [ - ("string", str), + pytest.param( + "string", + str, + marks=pytest.mark.skipif( + is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + ), + ), ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), ("double", lambda x: float(pd.Timestamp(x).value // int(1e9))), ( @@ -113,7 +126,13 @@ def test_cast_timestamp_scalar_naive(client, to, expected): @pytest.mark.parametrize( ("to", "expected"), [ - ("string", str), + pytest.param( + "string", + str, + marks=pytest.mark.skipif( + is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + ), + ), ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), ("double", lambda x: float(pd.Timestamp(x).value // int(1e9))), ( diff --git a/ibis/backends/pandas/tests/test_join.py b/ibis/backends/pandas/tests/test_join.py index a9acaad3ed6e..711da76f954c 100644 --- a/ibis/backends/pandas/tests/test_join.py +++ b/ibis/backends/pandas/tests/test_join.py @@ -6,6 +6,7 @@ import pytest import ibis +from ibis.backends.conftest import is_older_than # SEMI and ANTI are checked in backend tests mutating_join_type = pytest.mark.parametrize( @@ -51,7 +52,20 @@ def test_cross_join_project_left_table(left, right, df1, df2): tm.assert_frame_equal(result[expected.columns], expected) -@mutating_join_type +@pytest.mark.parametrize( + "how", + [ + pytest.param( + "inner", + marks=pytest.mark.xfail( + condition=is_older_than("pandas", "2.0.0"), reason="different indices" + ), + ), + "left", + "right", + "outer", + ], +) def test_join_with_multiple_predicates(how, left, right, df1, df2): expr = left.join(right, [left.key == right.key, left.key2 == right.key3], how=how)[ left, right.key3, right.other_value @@ -80,7 +94,20 @@ def test_join_with_multiple_predicates(how, left, right, df1, df2): tm.assert_frame_equal(result, expected) -@mutating_join_type +@pytest.mark.parametrize( + "how", + [ + pytest.param( + "inner", + marks=pytest.mark.xfail( + condition=is_older_than("pandas", "2.0.0"), reason="different indices" + ), + ), + "left", + "right", + "outer", + ], +) def test_join_with_multiple_predicates_written_as_one(how, left, right, df1, df2): predicate = (left.key == right.key) & (left.key2 == right.key3) expr = left.join(right, predicate, how=how)[left, right.key3, right.other_value] diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 63c40a7a24a7..76c4056f173e 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -127,7 +127,6 @@ def mean_udf(s): "flink", "exasol", ] -argidx_grouped_marks = ["dask"] + argidx_not_grouped_marks def make_argidx_params(marks, grouped=False): @@ -171,7 +170,7 @@ def test_aggregate(backend, alltypes, df, result_fn, expected_fn): @pytest.mark.parametrize( ("result_fn", "expected_fn"), - aggregate_test_params + make_argidx_params(argidx_grouped_marks, grouped=True), + aggregate_test_params + make_argidx_params(argidx_not_grouped_marks, grouped=True), ) def test_aggregate_grouped(backend, alltypes, df, result_fn, expected_fn): grouping_key_col = "bigint_col" @@ -647,7 +646,7 @@ def mean_and_std(v): id="first", marks=[ pytest.mark.notimpl( - ["dask", "druid", "impala", "mssql", "mysql", "oracle", "flink"], + ["druid", "impala", "mssql", "mysql", "oracle", "flink"], raises=com.OperationNotDefinedError, ), pytest.mark.notimpl( @@ -662,7 +661,7 @@ def mean_and_std(v): id="last", marks=[ pytest.mark.notimpl( - ["dask", "druid", "impala", "mssql", "mysql", "oracle", "flink"], + ["druid", "impala", "mssql", "mysql", "oracle", "flink"], raises=com.OperationNotDefinedError, ), pytest.mark.notimpl( @@ -684,11 +683,6 @@ def mean_and_std(v): pytest.mark.notyet( ["impala", "pyspark", "flink"], raises=com.OperationNotDefinedError ), - pytest.mark.broken( - ["dask"], - raises=AttributeError, - reason="'Series' object has no attribute 'bitand'", - ), ], ), param( @@ -703,11 +697,6 @@ def mean_and_std(v): pytest.mark.notyet( ["impala", "pyspark", "flink"], raises=com.OperationNotDefinedError ), - pytest.mark.broken( - ["dask"], - raises=AttributeError, - reason="'Series' object has no attribute 'bitor'", - ), ], ), param( @@ -722,11 +711,6 @@ def mean_and_std(v): pytest.mark.notyet( ["impala", "pyspark", "flink"], raises=com.OperationNotDefinedError ), - pytest.mark.broken( - ["dask"], - raises=AttributeError, - reason="'Series' object has no attribute 'bitxor'", - ), ], ), param( @@ -911,7 +895,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): pytest.mark.notimpl( [ "bigquery", - "dask", "datafusion", "polars", "druid", @@ -938,6 +921,11 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): reason="backend implements approximate quantiles", raises=AssertionError, ), + pytest.mark.broken( + ["dask"], + reason="backend implements approximate quantiles", + raises=AssertionError, + ), pytest.mark.never( ["flink"], reason="backend doesn't implement approximate quantiles yet", @@ -996,7 +984,7 @@ def test_quantile( id="covar_pop", marks=[ pytest.mark.notimpl( - ["dask", "polars", "druid"], + ["polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1016,7 +1004,7 @@ def test_quantile( id="covar_samp", marks=[ pytest.mark.notimpl( - ["dask", "polars", "druid"], + ["polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1036,7 +1024,7 @@ def test_quantile( id="corr_pop", marks=[ pytest.mark.notimpl( - ["dask", "druid"], + ["druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1061,7 +1049,7 @@ def test_quantile( id="corr_samp", marks=[ pytest.mark.notimpl( - ["dask", "druid"], + ["druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1100,7 +1088,7 @@ def test_quantile( id="covar_pop_bool", marks=[ pytest.mark.notimpl( - ["dask", "polars", "druid"], + ["polars", "druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1124,7 +1112,7 @@ def test_quantile( id="corr_pop_bool", marks=[ pytest.mark.notimpl( - ["dask", "druid"], + ["druid"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1187,11 +1175,6 @@ def test_corr_cov( ["mysql", "sqlite", "mssql", "druid", "exasol"], raises=com.OperationNotDefinedError, ) -@pytest.mark.broken( - ["dask"], - raises=AttributeError, - reason="'Series' object has no attribute 'approx_median'", -) @pytest.mark.notyet(["flink"], raises=com.OperationNotDefinedError) def test_approx_median(alltypes): expr = alltypes.double_col.approx_median() @@ -1325,7 +1308,6 @@ def test_date_quantile(alltypes, func): lambda t: t.string_col.isin(["1", "7"]), marks=[ pytest.mark.notyet(["trino"], raises=TrinoUserError), - pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), ], id="is_in", ), @@ -1334,7 +1316,6 @@ def test_date_quantile(alltypes, func): lambda t: ~t.string_col.isin(["1", "7"]), marks=[ pytest.mark.notyet(["trino"], raises=TrinoUserError), - pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), ], id="not_in", ), @@ -1386,11 +1367,6 @@ def test_group_concat( @pytest.mark.broken( ["druid"], raises=PyDruidProgrammingError, reason="Java NullPointerException" ) -@pytest.mark.notimpl( - ["dask"], - raises=NotImplementedError, - reason="sorting on aggregations not yet implemented", -) @pytest.mark.notimpl(["mssql"], raises=PyODBCProgrammingError) def test_topk_op(alltypes, df): # TopK expression will order rows by "count" but each backend @@ -1607,11 +1583,6 @@ def test_grouped_case(backend, con): @pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) -@pytest.mark.broken( - ["dask"], - reason="Dask does not windowize this operation correctly", - raises=AssertionError, -) @pytest.mark.notyet(["flink"], raises=com.UnsupportedOperationError) @pytest.mark.notyet(["impala"], raises=ImpalaHiveServer2Error) @pytest.mark.notyet(["clickhouse"], raises=ClickHouseDatabaseError) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 08485208bd3a..c8d1d8afd8c5 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -250,7 +250,6 @@ def test_array_discovery(backend): reason="BigQuery doesn't support casting array to array", raises=GoogleBadRequest, ) -@pytest.mark.notimpl(["dask"], raises=ValueError) @pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["risingwave"], @@ -273,7 +272,6 @@ def test_unnest_simple(backend): @builtin_array -@pytest.mark.notimpl("dask", raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) def test_unnest_complex(backend): array_types = backend.array_types @@ -311,8 +309,10 @@ def test_unnest_complex(backend): reason="clickhouse throws away nulls in groupArray", raises=AssertionError, ) -@pytest.mark.notimpl(["dask"], raises=ValueError) @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) +@pytest.mark.broken( + "dask", reason="DataFrame.index are different", raises=AssertionError +) def test_unnest_idempotent(backend): array_types = backend.array_types df = array_types.execute() @@ -332,8 +332,10 @@ def test_unnest_idempotent(backend): @builtin_array -@pytest.mark.notimpl("dask", raises=ValueError) @pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError) +@pytest.mark.broken( + "dask", reason="DataFrame.index are different", raises=AssertionError +) def test_unnest_no_nulls(backend): array_types = backend.array_types df = array_types.execute() @@ -360,6 +362,11 @@ def test_unnest_no_nulls(backend): @builtin_array @pytest.mark.notimpl("dask", raises=ValueError) +@pytest.mark.notimpl( + "pandas", + raises=ValueError, + reason="all the input arrays must have same number of dimensions", +) @pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) @pytest.mark.broken(["risingwave"], raises=AssertionError) def test_unnest_default_name(backend): @@ -410,7 +417,6 @@ def test_unnest_default_name(backend): @pytest.mark.notimpl( ["datafusion"], raises=Exception, reason="array_types table isn't defined" ) -@pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["risingwave"], raises=AssertionError, @@ -554,7 +560,6 @@ def test_array_filter(con, input, output): @builtin_array @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["risingwave"], raises=AssertionError, @@ -597,7 +602,7 @@ def test_array_contains(backend, con): ), ], ) -@pytest.mark.notimpl(["dask", "polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notyet(["impala"], raises=com.UnsupportedBackendType) def test_array_position(backend, con, a, expected_array): t = ibis.memtable({"a": a}) @@ -608,7 +613,7 @@ def test_array_position(backend, con, a, expected_array): @builtin_array -@pytest.mark.notimpl(["dask", "polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.parametrize( ("a"), [ @@ -637,9 +642,7 @@ def test_array_remove(con, a): @builtin_array -@pytest.mark.notimpl( - ["dask", "datafusion", "polars"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["sqlite"], raises=com.UnsupportedBackendType, reason="Unsupported type: Array..." ) @@ -691,7 +694,7 @@ def test_array_unique(con, input, expected): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "flink", "polars"], + ["datafusion", "flink", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken( @@ -712,7 +715,7 @@ def test_array_sort(backend, con): @builtin_array @pytest.mark.notimpl( - ["dask", "datafusion", "polars"], raises=com.OperationNotDefinedError + ["datafusion", "polars"], raises=com.OperationNotDefinedError ) @pytest.mark.parametrize( ("a", "b", "expected_array"), @@ -983,7 +986,7 @@ def test_array_flatten(backend, flatten_data, column, expected): reason="range isn't implemented upstream", raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl(["flink", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) @pytest.mark.parametrize("n", [-2, 0, 2]) def test_range_single_argument(con, n): expr = ibis.range(n) @@ -997,7 +1000,7 @@ def test_range_single_argument(con, n): raises=com.OperationNotDefinedError, ) @pytest.mark.parametrize("n", [-2, 0, 2]) -@pytest.mark.notimpl(["polars", "flink", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars", "flink"], raises=com.OperationNotDefinedError) @pytest.mark.skip("risingwave") def test_range_single_argument_unnest(backend, con, n): expr = ibis.range(n).unnest() @@ -1029,7 +1032,7 @@ def test_range_single_argument_unnest(backend, con, n): reason="range and unnest aren't implemented upstream", raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl(["flink", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) def test_range_start_stop_step(con, start, stop, step): expr = ibis.range(start, stop, step) result = con.execute(expr) @@ -1044,7 +1047,7 @@ def test_range_start_stop_step(con, start, stop, step): @pytest.mark.notyet( ["datafusion"], raises=com.OperationNotDefinedError, reason="not supported upstream" ) -@pytest.mark.notimpl(["flink", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) @pytest.mark.never( ["risingwave"], raises=PsycoPg2InternalError, @@ -1223,9 +1226,7 @@ def swap(token): ], ) @timestamp_range_tzinfos -@pytest.mark.notimpl( - ["dask", "flink", "datafusion"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["flink", "datafusion"], raises=com.OperationNotDefinedError) def test_timestamp_range(con, start, stop, step, freq, tzinfo): start = start.replace(tzinfo=tzinfo) stop = stop.replace(tzinfo=tzinfo) @@ -1274,9 +1275,7 @@ def test_timestamp_range(con, start, stop, step, freq, tzinfo): ], ) @timestamp_range_tzinfos -@pytest.mark.notimpl( - ["dask", "flink", "datafusion"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["flink", "datafusion"], raises=com.OperationNotDefinedError) def test_timestamp_range_zero_step(con, start, stop, step, tzinfo): start = start.replace(tzinfo=tzinfo) stop = stop.replace(tzinfo=tzinfo) @@ -1300,10 +1299,13 @@ def test_repr_timestamp_array(con, monkeypatch): @pytest.mark.notyet( - ["dask", "datafusion", "flink", "polars"], + ["datafusion", "flink", "polars"], raises=com.OperationNotDefinedError, ) @pytest.mark.broken(["pandas"], raises=ValueError, reason="reindex on duplicate values") +@pytest.mark.broken( + ["dask"], raises=AssertionError, reason="DataFrame.index are different" +) def test_unnest_range(con): expr = ibis.range(2).unnest().name("x").as_table().mutate({"y": 1.0}) result = con.execute(expr) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 8bad125da763..6fabd77ec840 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -86,7 +86,7 @@ def _create_temp_table_with_schema(backend, con, temp_table_name, schema, data=N ), ], ) -@pytest.mark.notimpl(["dask", "druid", "impala"]) +@pytest.mark.notimpl(["druid", "impala"]) @pytest.mark.notimpl( ["flink"], reason="Flink backend supports creating only TEMPORARY VIEW for in-memory data.", @@ -864,7 +864,7 @@ def test_self_join_memory_table(backend, con, monkeypatch): ], ids=["python", "pandas"], ) -@pytest.mark.notimpl(["dask", "druid"]) +@pytest.mark.notimpl(["druid"]) @pytest.mark.notimpl( ["flink"], reason="Flink backend supports creating only TEMPORARY VIEW for in-memory data.", diff --git a/ibis/backends/tests/test_examples.py b/ibis/backends/tests/test_examples.py index 5a9cab87f2e7..62ae44281b35 100644 --- a/ibis/backends/tests/test_examples.py +++ b/ibis/backends/tests/test_examples.py @@ -15,7 +15,7 @@ (LINUX or MACOS) and SANDBOXED, reason="nix on linux cannot download duckdb extensions or data due to sandboxing", ) -@pytest.mark.notimpl(["dask", "pyspark", "flink", "exasol"]) +@pytest.mark.notimpl(["pyspark", "flink", "exasol"]) @pytest.mark.notyet(["clickhouse", "druid", "impala", "mssql", "trino", "risingwave"]) @pytest.mark.parametrize( ("example", "columns"), diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 8ffd569a71a9..a685628c4c1d 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -323,6 +323,7 @@ def test_filter(backend, alltypes, sorted_df, predicate_fn, expected_fn): "exasol", "pandas", "pyspark", + "dask", ] ) @pytest.mark.never( @@ -524,7 +525,6 @@ def test_select_sort_sort(alltypes): param( ibis.desc("id"), {"by": "id", "ascending": False}, - marks=pytest.mark.notimpl(["dask"]), ), param( ["id", "int_col"], @@ -606,11 +606,6 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): backend.assert_frame_equal(result, expected) -@pytest.mark.notyet( - ["dask"], - reason="dask doesn't support Series as isin/notin argument", - raises=NotImplementedError, -) @pytest.mark.notimpl(["druid"]) @pytest.mark.parametrize( ("ibis_op", "pandas_op"), @@ -1138,7 +1133,7 @@ def test_pivot_wider(backend): reason="arbitrary not implemented in the backend", ) @pytest.mark.notimpl( - ["dask", "datafusion"], + ["datafusion"], raises=com.OperationNotDefinedError, reason="backend doesn't implement window functions", ) @@ -1212,7 +1207,7 @@ def test_distinct_on_keep(backend, on, keep): raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( - ["dask", "datafusion"], + ["datafusion"], raises=com.OperationNotDefinedError, reason="backend doesn't implement window functions", ) @@ -1832,9 +1827,6 @@ def test_sample_with_seed(backend): backend.assert_frame_equal(df1, df2) -@pytest.mark.broken( - ["dask"], reason="implementation somehow differs from pandas", raises=ValueError -) def test_substitute(backend): val = "400" t = backend.functional_alltypes diff --git a/ibis/backends/tests/test_interactive.py b/ibis/backends/tests/test_interactive.py index b25311b3650b..207492fe3c8d 100644 --- a/ibis/backends/tests/test_interactive.py +++ b/ibis/backends/tests/test_interactive.py @@ -33,7 +33,7 @@ def table(backend): return backend.functional_alltypes -@pytest.mark.notimpl(["pandas", "polars"]) +@pytest.mark.notimpl(["dask", "pandas", "polars"]) def test_interactive_execute_on_repr(table, queries, snapshot): repr(table.bigint_col.sum()) snapshot.assert_match(queries[0], "out.sql") @@ -53,21 +53,21 @@ def test_repr_png_is_not_none_in_not_interactive(table): assert table._repr_png_() is not None -@pytest.mark.notimpl(["pandas", "polars"]) +@pytest.mark.notimpl(["dask", "pandas", "polars"]) def test_default_limit(table, snapshot, queries): repr(table.select("id", "bool_col")) snapshot.assert_match(queries[0], "out.sql") -@pytest.mark.notimpl(["pandas", "polars"]) +@pytest.mark.notimpl(["dask", "pandas", "polars"]) def test_respect_set_limit(table, snapshot, queries): repr(table.select("id", "bool_col").limit(10)) snapshot.assert_match(queries[0], "out.sql") -@pytest.mark.notimpl(["pandas", "polars"]) +@pytest.mark.notimpl(["dask", "pandas", "polars"]) def test_disable_query_limit(table, snapshot, queries): assert ibis.options.sql.default_limit is None diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 887c10547b6d..8d84385bf747 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -196,7 +196,7 @@ def test_semi_join_topk(con, batting, awards_players, func): assert not expr.limit(5).execute().empty -@pytest.mark.notimpl(["dask", "druid", "exasol", "oracle"]) +@pytest.mark.notimpl(["druid", "exasol", "oracle"]) @pytest.mark.notimpl( ["postgres", "mssql", "risingwave"], raises=com.IbisTypeError, @@ -211,7 +211,6 @@ def test_join_with_pandas(batting, awards_players): assert df.yearID.nunique() == 7 -@pytest.mark.notimpl(["dask"]) def test_join_with_pandas_non_null_typed_columns(batting, awards_players): batting_filt = batting[lambda t: t.yearID < 1900][["yearID"]] awards_players_filt = awards_players[lambda t: t.yearID < 1900][ @@ -270,11 +269,6 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): param("outer", marks=[sqlite_right_or_full_mark]), ], ) -@pytest.mark.notimpl( - ["dask"], - raises=TypeError, - reason="dask doesn't support join predicates", -) def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_value): n = 5 diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 67c7b5123281..e1f59c1fcbf9 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -63,7 +63,7 @@ def test_timestamp_accepts_date_literals(alltypes): assert expr.compile(params=params) is not None -@pytest.mark.notimpl(["dask", "impala", "druid", "oracle", "exasol"]) +@pytest.mark.notimpl(["impala", "druid", "oracle", "exasol"]) @pytest.mark.never( ["mysql", "sqlite", "mssql"], reason="backend will never implement array types" ) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 8e4e8d5fc0c4..689e608f1947 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -512,11 +512,8 @@ def uses_java_re(t): ), lambda t: t.int_col == 1, id="startswith", - # pyspark doesn't support `cases` yet marks=[ - pytest.mark.notimpl( - ["dask", "mssql"], raises=com.OperationNotDefinedError - ), + pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -525,10 +522,9 @@ def uses_java_re(t): ), lambda t: t.int_col == 1, id="endswith", - # pyspark doesn't support `cases` yet marks=[ pytest.mark.notimpl( - ["dask", "datafusion", "mssql"], raises=com.OperationNotDefinedError + ["datafusion", "mssql"], raises=com.OperationNotDefinedError ), ], ), @@ -537,9 +533,7 @@ def uses_java_re(t): lambda t: t.date_string_col.str.startswith("2010-01"), id="startswith-simple", marks=[ - pytest.mark.notimpl( - ["dask", "mssql"], raises=com.OperationNotDefinedError - ), + pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError), ], ), param( @@ -548,7 +542,7 @@ def uses_java_re(t): id="endswith-simple", marks=[ pytest.mark.notimpl( - ["dask", "datafusion", "mssql"], raises=com.OperationNotDefinedError + ["datafusion", "mssql"], raises=com.OperationNotDefinedError ), ], ), @@ -626,11 +620,6 @@ def uses_java_re(t): "Polars does not support columnar argument Subtract(StringLength(date_string_col), 1)" ), ), - pytest.mark.broken( - ["dask"], - reason="'Series' object has no attribute 'items'", - raises=AttributeError, - ), pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), ], ), @@ -647,11 +636,6 @@ def uses_java_re(t): "Polars does not support columnar argument Subtract(StringLength(date_string_col), 1)" ), ), - pytest.mark.broken( - ["dask"], - reason="'Series' object has no attribute 'items'", - raises=AttributeError, - ), pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), ], ), @@ -669,11 +653,6 @@ def uses_java_re(t): "Subtract(StringLength(date_string_col), 0)" ), ), - pytest.mark.broken( - ["dask"], - reason="'Series' object has no attribute 'items'", - raises=AttributeError, - ), pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), ], ), @@ -692,11 +671,6 @@ def uses_java_re(t): "Polars does not support columnar argument Subtract(StringLength(date_string_col), 1)" ), ), - pytest.mark.broken( - ["dask"], - reason="'Series' object has no attribute 'items'", - raises=AttributeError, - ), pytest.mark.broken(["druid"], raises=PyDruidProgrammingError), ], ), @@ -706,7 +680,6 @@ def uses_java_re(t): id="split", marks=pytest.mark.notimpl( [ - "dask", "impala", "mysql", "sqlite", @@ -969,7 +942,7 @@ def test_no_conditional_percent_escape(con, expr): assert con.execute(expr) == "%" -@pytest.mark.notimpl(["dask", "mssql", "exasol"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["mssql", "exasol"], raises=com.OperationNotDefinedError) def test_non_match_regex_search_is_false(con): expr = ibis.literal("foo").re_search("bar") result = con.execute(expr) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 3573a598ddf6..fad799de5e3b 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -16,6 +16,7 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt from ibis.backends.base import _get_backend_names +from ibis.backends.conftest import is_older_than from ibis.backends.tests.errors import ( ArrowInvalid, ClickHouseDatabaseError, @@ -222,6 +223,11 @@ def test_timestamp_extract_milliseconds(backend, alltypes, df): reason="UNIX_SECONDS does not support DATETIME arguments", ) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) +@pytest.mark.broken( + ["dask", "pandas"], + raises=AssertionError, + condition=is_older_than("pandas", "2.0.0"), +) def test_timestamp_extract_epoch_seconds(backend, alltypes, df): expr = alltypes.timestamp_col.epoch_seconds().name("tmp") result = expr.execute() @@ -751,10 +757,8 @@ def convert_to_offset(offset, displacement_type=displacement_type): "D", ], ) -# TODO - DateOffset - #2553 @pytest.mark.notimpl( [ - "dask", "datafusion", "flink", "impala", @@ -836,7 +840,7 @@ def convert_to_offset(x): id="timestamp-add-interval-binop", marks=[ pytest.mark.notimpl( - ["dask", "snowflake", "sqlite", "bigquery", "exasol"], + ["snowflake", "sqlite", "bigquery", "exasol"], raises=com.OperationNotDefinedError, ), pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError), @@ -1428,7 +1432,6 @@ def test_interval_add_cast_column(backend, alltypes, df): raises=com.UnsupportedArgumentError, reason="Polars does not support columnar argument StringConcat()", ), - pytest.mark.notyet(["dask"], raises=com.OperationNotDefinedError), pytest.mark.notyet(["impala"], raises=com.UnsupportedOperationError), pytest.mark.notimpl( ["druid"], @@ -1946,7 +1949,6 @@ def test_time_literal(con, backend): @pytest.mark.broken( ["sqlite"], raises=AssertionError, reason="SQLite returns Timedelta from execution" ) -@pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.notyet(["oracle"], raises=OracleDatabaseError) @pytest.mark.parametrize( "microsecond", @@ -2668,6 +2670,12 @@ def test_time_literal_sql(dialect, snapshot, micros): reason="clickhouse doesn't support dates after 2149-06-06", ), pytest.mark.notyet(["datafusion"], raises=Exception), + pytest.mark.broken( + ["pandas", "dask"], + condition=is_older_than("pandas", "2.0.0"), + raises=ValueError, + reason="Out of bounds nanosecond timestamp: 9999-01-02 00:00:00", + ), ], id="large", ), @@ -2681,6 +2689,12 @@ def test_time_literal_sql(dialect, snapshot, micros): reason="clickhouse doesn't support dates before the UNIX epoch", ), pytest.mark.notyet(["datafusion"], raises=Exception), + pytest.mark.broken( + ["pandas", "dask"], + condition=is_older_than("pandas", "2.0.0"), + raises=ValueError, + reason="Out of bounds nanosecond timestamp: 1-07-17 00:00:00", + ), ], ), param( diff --git a/ibis/backends/tests/test_timecontext.py b/ibis/backends/tests/test_timecontext.py index 88376a4f961b..a974bc9ee296 100644 --- a/ibis/backends/tests/test_timecontext.py +++ b/ibis/backends/tests/test_timecontext.py @@ -28,6 +28,7 @@ "druid", "oracle", "pyspark", + "dask", ] ) diff --git a/ibis/backends/tests/test_vectorized_udf.py b/ibis/backends/tests/test_vectorized_udf.py index fa6728acb7f2..0f9878f02487 100644 --- a/ibis/backends/tests/test_vectorized_udf.py +++ b/ibis/backends/tests/test_vectorized_udf.py @@ -8,6 +8,7 @@ import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt +from ibis.backends.conftest import is_older_than from ibis.legacy.udf.vectorized import analytic, elementwise, reduction pytestmark = pytest.mark.notimpl(["druid", "oracle", "risingwave"]) @@ -320,6 +321,11 @@ def test_reduction_udf_array_return_type(udf_backend, udf_alltypes, udf_df): udf_backend.assert_frame_equal(result, expected) +@pytest.mark.broken( + ["pandas"], + condition=is_older_than("pandas", "2.0.0"), + reason="FutureWarning: Not prepending group keys to the result index of transform-like apply", +) def test_reduction_udf_on_empty_data(udf_backend, udf_alltypes): """Test that summarization can handle empty data.""" # First filter down to zero rows @@ -519,7 +525,6 @@ def test_elementwise_udf_overwrite_destruct_and_assign(udf_backend, udf_alltypes @pytest.mark.xfail_version(pyspark=["pyspark<3.1"]) @pytest.mark.parametrize("method", ["destructure", "unpack"]) -@pytest.mark.skip("dask") def test_elementwise_udf_destructure_exact_once(udf_alltypes, method, tmp_path): @elementwise( input_type=[dt.double], @@ -637,7 +642,6 @@ def test_analytic_udf_destruct_no_group_by(udf_backend, udf_alltypes): @pytest.mark.notimpl(["pyspark"]) -@pytest.mark.xfail_version(dask=["pandas>=2"]) def test_analytic_udf_destruct_overwrite(udf_backend, udf_alltypes): w = ibis.window(preceding=None, following=None, group_by="year") @@ -723,7 +727,7 @@ def test_reduction_udf_destruct_no_group_by_overwrite(udf_backend, udf_alltypes) # TODO - windowing - #2553 -@pytest.mark.notimpl(["dask", "pyspark"]) +@pytest.mark.notimpl(["pyspark"]) def test_reduction_udf_destruct_window(udf_backend, udf_alltypes): win = ibis.window( preceding=ibis.interval(hours=2), diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 1841a1ac0287..b592100783ea 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -89,7 +89,6 @@ def calc_zscore(s): lambda t: t.float_col.shift(1), id="lag", marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["flink"], raises=Py4JJavaError, @@ -107,7 +106,6 @@ def calc_zscore(s): reason="upstream is broken; returns all nulls", raises=AssertionError, ), - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["flink"], raises=Py4JJavaError, @@ -119,17 +117,11 @@ def calc_zscore(s): lambda t, win: t.id.rank().over(win), lambda t: t.id.rank(method="min").astype("int64") - 1, id="rank", - marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), - ], ), param( lambda t, win: t.id.dense_rank().over(win), lambda t: t.id.rank(method="dense").astype("int64") - 1, id="dense_rank", - marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), - ], ), param( lambda t, win: t.id.percent_rank().over(win), @@ -145,7 +137,6 @@ def calc_zscore(s): reason="clickhouse doesn't implement percent_rank", raises=com.OperationNotDefinedError, ), - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["risingwave"], raises=PsycoPg2InternalError, @@ -161,8 +152,6 @@ def calc_zscore(s): pytest.mark.notyet( ["clickhouse", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["risingwave"], raises=PsycoPg2InternalError, @@ -206,7 +195,6 @@ def calc_zscore(s): lambda t: t.float_col.transform("first"), id="first", marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), @@ -215,7 +203,6 @@ def calc_zscore(s): lambda t: t.float_col.transform("last"), id="last", marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), @@ -235,7 +222,7 @@ def calc_zscore(s): pytest.mark.notyet( ["impala", "mssql"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl(["dask"], raises=NotImplementedError), + pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError), pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError), ], @@ -244,33 +231,26 @@ def calc_zscore(s): lambda _, win: ibis.row_number().over(win), lambda t: t.cumcount(), id="row_number", - marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), - ], ), param( lambda t, win: t.double_col.cumsum().over(win), lambda t: t.double_col.cumsum(), id="cumsum", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), param( lambda t, win: t.double_col.cummean().over(win), lambda t: (t.double_col.expanding().mean().reset_index(drop=True, level=0)), id="cummean", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), param( lambda t, win: t.float_col.cummin().over(win), lambda t: t.float_col.cummin(), id="cummin", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), param( lambda t, win: t.float_col.cummax().over(win), lambda t: t.float_col.cummax(), id="cummax", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), param( lambda t, win: (t.double_col == 0).any().over(win), @@ -281,10 +261,7 @@ def calc_zscore(s): .astype(bool) ), id="cumany", - marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError), - ], + marks=[pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError)], ), param( lambda t, win: (t.double_col == 0).notany().over(win), @@ -296,7 +273,6 @@ def calc_zscore(s): ), id="cumnotany", marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.broken(["oracle"], raises=OracleDatabaseError), pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError), ], @@ -310,10 +286,7 @@ def calc_zscore(s): .astype(bool) ), id="cumall", - marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), - pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError), - ], + marks=[pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError)], ), param( lambda t, win: (t.double_col == 0).notall().over(win), @@ -325,7 +298,6 @@ def calc_zscore(s): ), id="cumnotall", marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.broken(["oracle"], raises=OracleDatabaseError), pytest.mark.broken(["mssql"], raises=com.OperationNotDefinedError), ], @@ -334,7 +306,6 @@ def calc_zscore(s): lambda t, win: t.double_col.sum().over(win), lambda gb: gb.double_col.cumsum(), id="sum", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), param( lambda t, win: t.double_col.mean().over(win), @@ -342,19 +313,16 @@ def calc_zscore(s): gb.double_col.expanding().mean().reset_index(drop=True, level=0) ), id="mean", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), param( lambda t, win: t.float_col.min().over(win), lambda gb: gb.float_col.cummin(), id="min", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), param( lambda t, win: t.float_col.max().over(win), lambda gb: gb.float_col.cummax(), id="max", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), param( lambda t, win: t.double_col.count().over(win), @@ -362,7 +330,6 @@ def calc_zscore(s): # that we must, so we add one to the pandas result lambda gb: gb.double_col.cumcount() + 1, id="count", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), ), ], ) @@ -400,7 +367,6 @@ def test_grouped_bounded_expanding_window( lambda df: (df.double_col.expanding().mean()), id="mean", marks=[ - pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( ["risingwave"], raises=PsycoPg2InternalError, @@ -435,7 +401,6 @@ def test_grouped_bounded_expanding_window( ], raises=com.OperationNotDefinedError, ), - pytest.mark.broken(["dask"], raises=ValueError), ], ), ], @@ -469,7 +434,6 @@ def test_ungrouped_bounded_expanding_window( ], ) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["dask"], raises=NotImplementedError) @pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, @@ -539,7 +503,6 @@ def test_grouped_bounded_following_window(backend, alltypes, df, preceding, foll ], ) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["dask"], raises=NotImplementedError) def test_grouped_bounded_preceding_window(backend, alltypes, df, window_fn): window = window_fn(alltypes) expr = alltypes.mutate(val=alltypes.double_col.sum().over(window)) @@ -602,11 +565,6 @@ def test_grouped_bounded_preceding_window(backend, alltypes, df, window_fn): @pytest.mark.parametrize( ("ordered"), [ - param( - True, - id="ordered", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), - ), param( False, id="unordered", @@ -687,11 +645,6 @@ def test_simple_ungrouped_unbound_following_window( raises=com.UnsupportedOperationError, reason="OVER RANGE FOLLOWING windows are not supported in Flink yet", ) -@pytest.mark.notimpl( - ["dask"], - raises=NotImplementedError, - reason="support scalar sorting keys are not yet implemented", -) @pytest.mark.never( ["mssql"], raises=Exception, reason="order by constant is not supported" ) @@ -719,11 +672,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): True, id="ordered-mean", marks=[ - pytest.mark.notimpl( - ["dask"], - raises=NotImplementedError, - reason="Window operations are unsupported in the dask backend", - ), pytest.mark.broken( ["flink", "impala"], reason="default window semantics are different", @@ -801,11 +749,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ], raises=com.OperationNotDefinedError, ), - pytest.mark.broken( - ["dask"], - raises=ValueError, - reason="Dask windowing order_by not yet implemented", - ), ], ), param( @@ -967,11 +910,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ], raises=com.OperationNotDefinedError, ), - pytest.mark.broken( - ["dask"], - raises=ValueError, - reason="Dask windowing order_by not yet implemented", - ), ], ), param( @@ -1041,7 +979,6 @@ def test_ungrouped_unbounded_window( @pytest.mark.notimpl( ["impala"], raises=ImpalaHiveServer2Error, reason="limited RANGE support" ) -@pytest.mark.notimpl(["dask"], raises=NotImplementedError) @pytest.mark.notimpl( ["flink"], raises=com.UnsupportedOperationError, @@ -1112,7 +1049,6 @@ def gb_fn(df): @pytest.mark.notimpl(["clickhouse", "polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["dask"], raises=AttributeError) @pytest.mark.notyet( ["clickhouse"], reason="clickhouse doesn't implement percent_rank", @@ -1134,7 +1070,6 @@ def test_percent_rank_whole_table_no_order_by(backend, alltypes, df): @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["dask"], raises=NotImplementedError) def test_grouped_ordered_window_coalesce(backend, alltypes, df): t = alltypes expr = ( @@ -1194,12 +1129,6 @@ def test_mutate_window_filter(backend, alltypes): raises=Exception, reason="KeyError: Table with name win doesn't exist.", ) -@pytest.mark.notimpl(["dask"], raises=NotImplementedError) -@pytest.mark.notimpl( - ["flink"], - raises=com.UnsupportedOperationError, - reason="Windows in Flink can only be ordered by a single time column", -) def test_first_last(backend): t = backend.win w = ibis.window(group_by=t.g, order_by=[t.x, t.y], preceding=1, following=0) @@ -1293,7 +1222,7 @@ def test_range_expression_bounds(backend): assert len(result) == con.execute(t.count()) -@pytest.mark.notimpl(["polars", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( ["clickhouse"], reason="clickhouse doesn't implement percent_rank", @@ -1307,6 +1236,7 @@ def test_range_expression_bounds(backend): raises=PsycoPg2InternalError, reason="Feature is not yet implemented: Unrecognized window function: percent_rank", ) +@pytest.mark.broken(["dask"], reason="different result ordering", raises=AssertionError) def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): # GH #7631 t = alltypes @@ -1325,11 +1255,6 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.notyet( - ["dask"], - reason="multiple ordering keys in a window function not supported for ranking", - raises=ValueError, -) @pytest.mark.notyet( ["mssql"], reason="IS NULL not valid syntax for mssql", diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index 280fbaec04f1..9c4269efe23a 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -397,18 +397,6 @@ def convert(value): return convert -class DaskData(PandasData): - @staticmethod - def concat(*args, **kwargs): - import dask.dataframe as dd - - return dd.concat(*args, **kwargs) - - @classmethod - def infer_column(cls, s): - return PyArrowData.infer_column(s.compute()) - - class PandasDataFrameProxy(TableProxy[pd.DataFrame]): def to_frame(self) -> pd.DataFrame: return self.obj diff --git a/ibis/formats/tests/test_dask.py b/ibis/formats/tests/test_dask.py deleted file mode 100644 index 2dbe9b61ad7d..000000000000 --- a/ibis/formats/tests/test_dask.py +++ /dev/null @@ -1,201 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd -import pytest - -import ibis -import ibis.expr.datatypes as dt -from ibis.formats.pandas import DaskData - -dask = pytest.importorskip("dask") -dd = pytest.importorskip("dask.dataframe") - -dask.config.set({"dataframe.convert-string": False}) - -from dask.dataframe.utils import tm # noqa: E402 - - -@pytest.mark.parametrize( - ("col_data", "schema_type"), - [ - ([True, False, False], "bool"), - (np.int8([-3, 9, 17]), "int8"), - (np.int16([-5, 0, 12]), "int16"), - (np.int32([-12, 3, 25000]), "int32"), - (np.int64([102, 67228734, -0]), "int64"), - (np.float32([45e-3, -0.4, 99.0]), "float32"), - (np.float64([-3e43, 43.0, 10000000.0]), "double"), - (np.uint8([3, 0, 16]), "uint8"), - (np.uint16([5569, 1, 33]), "uint16"), - (np.uint32([100, 0, 6]), "uint32"), - (np.uint64([666, 2, 3]), "uint64"), - ( - [ - pd.Timestamp("2010-11-01 00:01:00"), - pd.Timestamp("2010-11-01 00:02:00.1000"), - pd.Timestamp("2010-11-01 00:03:00.300000"), - ], - "timestamp", - ), - ( - [ - pd.Timedelta("1 days"), - pd.Timedelta("-1 days 2 min 3us"), - pd.Timedelta("-2 days +23:57:59.999997"), - ], - "interval('ns')", - ), - (["foo", "bar", "hello"], "string"), - (pd.Series(["a", "b", "c", "a"]).astype("category"), dt.String()), - ], -) -def test_schema_infer_dataframe(col_data, schema_type): - df = dd.from_pandas(pd.DataFrame({"col": col_data}), npartitions=1) - inferred = DaskData.infer_table(df) - expected = ibis.schema([("col", schema_type)]) - assert inferred == expected - - -def test_schema_infer_exhaustive_dataframe(): - npartitions = 2 - df = dd.from_pandas( - pd.DataFrame( - { - "bigint_col": np.array( - [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype="i8" - ), - "bool_col": np.array( - [ - True, - False, - True, - False, - True, - None, - True, - False, - True, - False, - ], - dtype=np.bool_, - ), - "bool_obj_col": np.array( - [ - True, - False, - np.nan, - False, - True, - np.nan, - True, - np.nan, - True, - False, - ], - dtype=np.object_, - ), - "date_string_col": [ - "11/01/10", - None, - "11/01/10", - "11/01/10", - "11/01/10", - "11/01/10", - "11/01/10", - "11/01/10", - "11/01/10", - "11/01/10", - ], - "double_col": np.array( - [ - 0.0, - 10.1, - np.nan, - 30.299999999999997, - 40.399999999999999, - 50.5, - 60.599999999999994, - 70.700000000000003, - 80.799999999999997, - 90.899999999999991, - ], - dtype=np.float64, - ), - "float_col": np.array( - [ - np.nan, - 1.1000000238418579, - 2.2000000476837158, - 3.2999999523162842, - 4.4000000953674316, - 5.5, - 6.5999999046325684, - 7.6999998092651367, - 8.8000001907348633, - 9.8999996185302734, - ], - dtype=np.float32, - ), - "int_col": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype="i4"), - "month": [11, 11, 11, 11, 2, 11, 11, 11, 11, 11], - "smallint_col": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype="i2"), - "string_col": [ - "0", - "1", - None, - "double , whammy", - "4", - "5", - "6", - "7", - "8", - "9", - ], - "timestamp_col": [ - pd.Timestamp("2010-11-01 00:00:00"), - None, - pd.Timestamp("2010-11-01 00:02:00.100000"), - pd.Timestamp("2010-11-01 00:03:00.300000"), - pd.Timestamp("2010-11-01 00:04:00.600000"), - pd.Timestamp("2010-11-01 00:05:00.100000"), - pd.Timestamp("2010-11-01 00:06:00.150000"), - pd.Timestamp("2010-11-01 00:07:00.210000"), - pd.Timestamp("2010-11-01 00:08:00.280000"), - pd.Timestamp("2010-11-01 00:09:00.360000"), - ], - "tinyint_col": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype="i1"), - "year": [ - 2010, - 2010, - 2010, - 2010, - 2010, - 2009, - 2009, - 2009, - 2009, - 2009, - ], - } - ), - npartitions=npartitions, - ) - - expected = [ - ("bigint_col", dt.int64), - ("bool_col", dt.boolean), - ("bool_obj_col", dt.boolean), - ("date_string_col", dt.string), - ("double_col", dt.float64), - ("float_col", dt.float32), - ("int_col", dt.int32), - ("month", dt.int64), - ("smallint_col", dt.int16), - ("string_col", dt.string), - ("timestamp_col", dt.timestamp), - ("tinyint_col", dt.int8), - ("year", dt.int64), - ] - - assert DaskData.infer_table(df) == ibis.schema(expected) diff --git a/pyproject.toml b/pyproject.toml index f90144a22823..de60e862673c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -265,6 +265,8 @@ filterwarnings = [ # pyspark and impala leave sockets open "ignore:Exception ignored in:", # dask + "ignore:Using the ``in`` operator to test for membership in Series is deprecated:FutureWarning", + "ignore:In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby:FutureWarning", "ignore:index is deprecated and will be removed in a future release:FutureWarning", "ignore:`meta` is not specified:UserWarning", "ignore:Concatenating dataframes with unknown divisions:UserWarning", From acd2a476c7a894608834adc5742c0263247759c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 10:37:56 +0100 Subject: [PATCH 146/161] fix(polars): remove deprecated `pl.count()` and `with_time_unit` --- ibis/backends/polars/compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 83e216eccae4..8d7624ac115c 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -803,7 +803,7 @@ def count_star(op, **kw): condition = translate(where, **kw) result = condition.sum() else: - result = pl.count() + result = pl.len() return result.cast(dtype_to_polars(op.dtype)) @@ -904,7 +904,7 @@ def timestamp_from_unix(op, **kw): if unit == "s": arg = arg.cast(pl.Int64) * 1_000 unit = "ms" - return arg.cast(pl.Datetime).dt.with_time_unit(unit) + return arg.cast(pl.Int64).cast(pl.Datetime(time_unit=unit)) @translate.register(ops.IntervalFromInteger) From c68cc96f8465a1f2c495e24fbcd486d62dd51f4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 10:43:59 +0100 Subject: [PATCH 147/161] test(snowflake): enable xpassing `test_dot_sql::test_order_by_no_projection` --- ibis/backends/tests/test_dot_sql.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index f938ea0143b7..30e51cb4e297 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -289,7 +289,6 @@ def test_con_dot_sql_transpile(backend, con, dialect, df): @dot_sql_notimpl @dot_sql_never @pytest.mark.notimpl(["druid", "flink", "polars", "exasol"]) -@pytest.mark.notyet(["snowflake"], reason="snowflake column names are case insensitive") def test_order_by_no_projection(backend): con = backend.connection expr = ( From 28ce539465aabac1bafae0d814e3569ff367229e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 10:45:49 +0100 Subject: [PATCH 148/161] style: remove extra newline from docstrings --- ibis/__init__.py | 1 + ibis/backends/base/sql/__init__.py | 7 +++ ibis/backends/base/sql/alchemy/__init__.py | 7 +++ ibis/backends/base/sql/alchemy/datatypes.py | 2 + .../base/sql/compiler/query_builder.py | 2 + ibis/backends/base/sql/compiler/translator.py | 1 + ibis/backends/base/sqlglot/__init__.py | 3 ++ ibis/backends/base/sqlglot/rewrites.py | 1 + ibis/backends/bigquery/__init__.py | 9 ++++ ibis/backends/bigquery/client.py | 1 + ibis/backends/bigquery/udf/core.py | 2 + ibis/backends/bigquery/udf/find.py | 1 + ibis/backends/clickhouse/__init__.py | 7 +++ ibis/backends/dask/__init__.py | 3 ++ ibis/backends/dask/helpers.py | 1 + ibis/backends/datafusion/__init__.py | 7 +++ ibis/backends/druid/__init__.py | 1 + ibis/backends/duckdb/__init__.py | 25 +++++++++++ ibis/backends/exasol/__init__.py | 2 + ibis/backends/flink/__init__.py | 17 +++++++ ibis/backends/impala/__init__.py | 24 ++++++++++ ibis/backends/impala/client.py | 6 +++ ibis/backends/impala/udf.py | 4 ++ ibis/backends/mysql/__init__.py | 3 ++ ibis/backends/oracle/__init__.py | 3 ++ ibis/backends/pandas/__init__.py | 4 ++ ibis/backends/pandas/kernels.py | 1 + ibis/backends/polars/__init__.py | 7 +++ ibis/backends/pyspark/__init__.py | 18 ++++++++ ibis/backends/snowflake/__init__.py | 8 ++++ ibis/backends/sqlite/__init__.py | 6 +++ ibis/backends/sqlite/udf.py | 1 + ibis/backends/tests/test_array.py | 4 +- ibis/backends/trino/__init__.py | 5 +++ ibis/common/annotations.py | 11 +++++ ibis/common/bases.py | 1 + ibis/common/collections.py | 2 + ibis/common/deferred.py | 8 ++++ ibis/common/dispatch.py | 1 + ibis/common/egraph.py | 23 ++++++++++ ibis/common/graph.py | 19 ++++++++ ibis/common/grounds.py | 1 + ibis/common/patterns.py | 34 ++++++++++++++ ibis/common/temporal.py | 1 + ibis/common/typing.py | 4 ++ ibis/config.py | 5 +++ ibis/expr/analysis.py | 1 + ibis/expr/api.py | 45 +++++++++++++++++++ ibis/expr/builders.py | 5 +++ ibis/expr/datatypes/core.py | 4 ++ ibis/expr/datatypes/parse.py | 1 + ibis/expr/decompile.py | 1 + ibis/expr/operations/analytic.py | 1 + ibis/expr/operations/core.py | 3 ++ ibis/expr/operations/generic.py | 2 + ibis/expr/operations/numeric.py | 2 + ibis/expr/operations/udf.py | 5 +++ ibis/expr/rules.py | 2 + ibis/expr/schema.py | 3 ++ ibis/expr/sql.py | 2 + ibis/formats/__init__.py | 12 +++++ ibis/legacy/udf/validate.py | 1 + ibis/legacy/udf/vectorized.py | 5 +++ ibis/selectors.py | 15 +++++++ ibis/util.py | 13 ++++++ 65 files changed, 424 insertions(+), 3 deletions(-) diff --git a/ibis/__init__.py b/ibis/__init__.py index 50cc746d9451..6c1cd08abae0 100644 --- a/ibis/__init__.py +++ b/ibis/__init__.py @@ -49,6 +49,7 @@ def __getattr__(name: str) -> BaseBackend: is called, and a backend with the `sqlite` name is tried to load from the `ibis.backends` entrypoints. If successful, the `ibis.sqlite` attribute is "cached", so this function is only called the first time. + """ entry_points = {ep for ep in util.backend_entry_points() if ep.name == name} diff --git a/ibis/backends/base/sql/__init__.py b/ibis/backends/base/sql/__init__.py index 618fa85678fa..b89d265f434a 100644 --- a/ibis/backends/base/sql/__init__.py +++ b/ibis/backends/base/sql/__init__.py @@ -49,6 +49,7 @@ def _from_url(self, url: str, **kwargs: Any) -> BaseBackend: ------- BaseBackend A backend instance + """ import sqlalchemy as sa @@ -84,6 +85,7 @@ def table(self, name: str, database: str | None = None) -> ir.Table: ------- Table Table expression + """ if database is not None and not isinstance(database, str): raise exc.IbisTypeError( @@ -120,6 +122,7 @@ def sql( ------- Table Table expression + """ query = self._transpile_sql(query, dialect=dialect) if schema is None: @@ -182,6 +185,7 @@ def raw_sql(self, query: str): [(1,)] >>> cursor.closed True + """ return self.con.execute(query) @@ -235,6 +239,7 @@ def to_pyarrow_batches( ------- RecordBatchReader Collection of pyarrow `RecordBatch`s. + """ pa = self._import_pyarrow() @@ -326,6 +331,7 @@ def execute( * `Table`: pandas.DataFrame * `Column`: pandas.Series * `Scalar`: Python scalar value + """ # TODO Reconsider having `kwargs` here. It's needed to support # `external_tables` in clickhouse, but better to deprecate that @@ -391,6 +397,7 @@ def compile( Any The output of compilation. The type of this value depends on the backend. + """ self._define_udf_translation_rules(expr) return self.compiler.to_ast_ensure_limit(expr, limit, params=params).compile() diff --git a/ibis/backends/base/sql/alchemy/__init__.py b/ibis/backends/base/sql/alchemy/__init__.py index ec64b484061a..165187ee53f4 100644 --- a/ibis/backends/base/sql/alchemy/__init__.py +++ b/ibis/backends/base/sql/alchemy/__init__.py @@ -264,6 +264,7 @@ def create_table( ------- Table The table that was created. + """ if obj is None and schema is None: raise com.IbisError("The schema or obj parameter is required") @@ -422,6 +423,7 @@ def drop_table( Database to drop table from force Check for existence before dropping + """ if database == self.current_database: # avoid fully qualified name @@ -461,6 +463,7 @@ def schema(self, name: str) -> sch.Schema: ------- Schema The ibis schema of `name` + """ return self.database().schema(name) @@ -529,6 +532,7 @@ def _schema_from_sqla_table( ------- schema An ibis schema corresponding to the types of the columns in `table`. + """ schema = schema if schema is not None else {} pairs = [] @@ -624,6 +628,7 @@ def raw_sql(self, query: str | sa.sql.ClauseElement): [(1,)] >>> cursor.closed True + """ return self.con.connect().execute( sa.text(query) if isinstance(query, str) else query @@ -657,6 +662,7 @@ def table( ------- Table Table expression + """ namespace = ops.Namespace(schema=schema, database=database) @@ -707,6 +713,7 @@ def insert( If inserting data from a different database ValueError If the type of `obj` isn't supported + """ import pandas as pd diff --git a/ibis/backends/base/sql/alchemy/datatypes.py b/ibis/backends/base/sql/alchemy/datatypes.py index d78739264dc3..9d7f36ecc0af 100644 --- a/ibis/backends/base/sql/alchemy/datatypes.py +++ b/ibis/backends/base/sql/alchemy/datatypes.py @@ -152,6 +152,7 @@ def from_ibis(cls, dtype: dt.DataType) -> sat.TypeEngine: Returns ------- SQLAlchemy type. + """ if dtype.is_decimal(): return sat.NUMERIC(dtype.precision, dtype.scale) @@ -174,6 +175,7 @@ def to_ibis(cls, typ: sat.TypeEngine, nullable: bool = True) -> dt.DataType: Returns ------- Ibis type. + """ if dtype := _from_sqlalchemy_types.get(type(typ)): return dtype(nullable=nullable) diff --git a/ibis/backends/base/sql/compiler/query_builder.py b/ibis/backends/base/sql/compiler/query_builder.py index 4376e03b4a55..44f5a728275f 100644 --- a/ibis/backends/base/sql/compiler/query_builder.py +++ b/ibis/backends/base/sql/compiler/query_builder.py @@ -475,6 +475,7 @@ def flatten_set_op(op) -> Iterable[ops.Table | bool]: ------- Iterable[Table | bool] Iterable of tables and `bool`s indicating `distinct`. + """ if isinstance(op, ops.SetOp): @@ -501,6 +502,7 @@ def flatten(op: ops.TableNode): ------- Iterable[Table | bool] Iterable of tables and `bool`s indicating `distinct`. + """ return list(toolz.concatv(flatten_set_op(op.left), flatten_set_op(op.right))) diff --git a/ibis/backends/base/sql/compiler/translator.py b/ibis/backends/base/sql/compiler/translator.py index c1cd6d7dc9c3..2df24b4b93a4 100644 --- a/ibis/backends/base/sql/compiler/translator.py +++ b/ibis/backends/base/sql/compiler/translator.py @@ -51,6 +51,7 @@ def collapse(self, queries: Iterable[str]) -> str: ------- query A single query string + """ return "\n\n".join(queries) diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index 1a7564e2dcb1..e919d07dc0d1 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -77,6 +77,7 @@ def table( ------- Table Table expression + """ table_schema = self.get_schema(name, schema=schema, database=database) return ops.DatabaseTable( @@ -218,6 +219,7 @@ def _register_temp_view_cleanup(self, name: str) -> None: ---------- name The temporary view to register for clean up. + """ def _load_into_cache(self, name, expr): @@ -310,6 +312,7 @@ def to_pyarrow_batches( ------- RecordBatchReader Collection of pyarrow `RecordBatch`s. + """ pa = self._import_pyarrow() diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 56836962cb0c..05140a76158c 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -179,6 +179,7 @@ def sqlize( Returns ------- Tuple of the rewritten expression graph and a list of CTEs. + """ assert isinstance(node, ops.Relation) diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index ab2f32eab055..4c2be367bfc7 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -233,6 +233,7 @@ def read_parquet( ------- Table An Ibis table expression + """ return self._read_file( path, @@ -261,6 +262,7 @@ def read_csv( ------- Table An Ibis table expression + """ job_config = bq.LoadJobConfig( source_format=bq.SourceFormat.CSV, @@ -290,6 +292,7 @@ def read_json( ------- Table An Ibis table expression + """ job_config = bq.LoadJobConfig( source_format=bq.SourceFormat.NEWLINE_DELIMITED_JSON, @@ -377,6 +380,7 @@ def do_connect( ------- Backend An instance of the BigQuery backend. + """ default_project_id = client.project if client is not None else project_id @@ -629,6 +633,7 @@ def _to_sqlglot( Any The output of compilation. The type of this value depends on the backend. + """ self._make_session() self._define_udf_translation_rules(expr) @@ -711,6 +716,7 @@ def execute(self, expr, params=None, limit="default", **kwargs): ------- pd.DataFrame | pd.Series | scalar Output from execution + """ self._run_pre_execute_hooks(expr) @@ -849,6 +855,7 @@ def list_tables( The `schema` parameter does **not** refer to the column names and types of `table`. ::: + """ if database is not None and schema is None: raise com.com.IbisInputError( @@ -927,6 +934,7 @@ def create_table( ------- Table The table that was just created + """ if obj is None and schema is None: raise com.IbisError("One of the `schema` or `obj` parameter is required") @@ -1233,6 +1241,7 @@ def connect( ------- Backend An instance of the BigQuery backend + """ backend = Backend() return backend.connect( diff --git a/ibis/backends/bigquery/client.py b/ibis/backends/bigquery/client.py index d785e99ddfa7..e643b678e01a 100644 --- a/ibis/backends/bigquery/client.py +++ b/ibis/backends/bigquery/client.py @@ -192,6 +192,7 @@ def parse_project_and_dataset(project: str, dataset: str = "") -> tuple[str, str >>> data_project, billing_project, _dataset = parse_project_and_dataset("ibis-gbq") >>> data_project 'ibis-gbq' + """ if dataset.count(".") > 1: raise ValueError( diff --git a/ibis/backends/bigquery/udf/core.py b/ibis/backends/bigquery/udf/core.py index 58351841e7f1..41e09969d02a 100644 --- a/ibis/backends/bigquery/udf/core.py +++ b/ibis/backends/bigquery/udf/core.py @@ -22,6 +22,7 @@ class SymbolTable(ChainMap): JavaScript requires declarations in strict mode, so to implement this we shove a "let" at the beginning of every variable name if it doesn't already exist in the current scope. + """ def __getitem__(self, key): @@ -44,6 +45,7 @@ def indent(lines, spaces=4): Returns ------- indented_lines : str + """ if isinstance(lines, str): text = [lines] diff --git a/ibis/backends/bigquery/udf/find.py b/ibis/backends/bigquery/udf/find.py index 2ab1b6f42162..27f73a1e8df9 100644 --- a/ibis/backends/bigquery/udf/find.py +++ b/ibis/backends/bigquery/udf/find.py @@ -52,6 +52,7 @@ def find_names(node: ast.AST) -> list[ast.Name]: 'a' >>> names[1].id 'b' + """ return list( toolz.unique( diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index d2a492d709e8..4913fc2e3f60 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -54,6 +54,7 @@ class Options(ibis.config.Config): ---------- bool_type : str Type to use for boolean columns + """ bool_type: Literal["Bool", "UInt8", "Int8"] = "Bool" @@ -72,6 +73,7 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend: ------- BaseBackend A backend instance + """ url = urlparse(url) database = url.path[1:] @@ -148,6 +150,7 @@ def do_connect( >>> client = ibis.clickhouse.connect() >>> client + """ if settings is None: settings = {} @@ -325,6 +328,7 @@ def to_pyarrow_batches( This is not implemented because it adds an unnecessary pandas step in between Python object -> arrow. We can go directly to record batches without pandas in the middle. + """ table = expr.as_table() sql = self.compile(table, limit=limit, params=params) @@ -425,6 +429,7 @@ def raw_sql( ------- Cursor Clickhouse cursor + """ external_tables = toolz.valmap(_to_memtable, external_tables or {}) external_data = self._normalize_external_tables(external_tables) @@ -456,6 +461,7 @@ def get_schema( ------- sch.Schema Ibis schema + """ if schema is not None: raise com.UnsupportedBackendFeatureError( @@ -603,6 +609,7 @@ def create_table( ------- Table The new table + """ if temp and overwrite: raise com.IbisInputError( diff --git a/ibis/backends/dask/__init__.py b/ibis/backends/dask/__init__.py index 941e5a5864bf..e7fed7b55cf8 100644 --- a/ibis/backends/dask/__init__.py +++ b/ibis/backends/dask/__init__.py @@ -47,6 +47,7 @@ def do_connect( ... "s": dd.read_csv("path/to/file.csv"), ... } >>> ibis.dask.connect(data) + """ if dictionary is None: dictionary = {} @@ -133,6 +134,7 @@ def read_csv( ------- ir.Table The just-registered table + """ table_name = table_name or util.gen_name("read_csv") df = dd.read_csv(source, **kwargs) @@ -161,6 +163,7 @@ def read_parquet( ------- ir.Table The just-registered table + """ table_name = table_name or util.gen_name("read_parquet") df = dd.read_parquet(source, **kwargs) diff --git a/ibis/backends/dask/helpers.py b/ibis/backends/dask/helpers.py index 11ec33792bbf..7b40f1ca431d 100644 --- a/ibis/backends/dask/helpers.py +++ b/ibis/backends/dask/helpers.py @@ -167,6 +167,7 @@ def add_globally_consecutive_column( ------- dd.DataFrame New dask dataframe with sorted partitioned index + """ if isinstance(df, dd.Series): df = df.to_frame() diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 8880771d9b1c..dbe8f90d989e 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -72,6 +72,7 @@ def do_connect( >>> import ibis >>> config = {"t": "path/to/file.parquet", "s": "path/to/file.csv"} >>> ibis.datafusion.connect(config) + """ if isinstance(config, SessionContext): (self.con, config) = (config, None) @@ -191,6 +192,7 @@ def raw_sql(self, query: str | sg.exp.Expression) -> Any: Raw SQL string kwargs Backend specific query arguments + """ with contextlib.suppress(AttributeError): query = query.sql(dialect=self.dialect, pretty=True) @@ -312,6 +314,7 @@ def register( >>> dataset = ds.dataset("path/to/table") >>> conn.register(dataset, "my_table") >>> conn.table("my_table") + """ import pandas as pd @@ -392,6 +395,7 @@ def read_csv( ------- ir.Table The just-registered table + """ path = normalize_filename(path) table_name = table_name or gen_name("read_csv") @@ -419,6 +423,7 @@ def read_parquet( ------- ir.Table The just-registered table + """ path = normalize_filename(path) table_name = table_name or gen_name("read_parquet") @@ -447,6 +452,7 @@ def read_delta( ------- ir.Table The just-registered table + """ source_table = normalize_filename(source_table) @@ -547,6 +553,7 @@ def create_table( overwrite If `True`, replace the table if it already exists, otherwise fail if the table exists + """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") diff --git a/ibis/backends/druid/__init__.py b/ibis/backends/druid/__init__.py index 17dbf65b29dd..16cf0002eb5e 100644 --- a/ibis/backends/druid/__init__.py +++ b/ibis/backends/druid/__init__.py @@ -53,6 +53,7 @@ def _from_url(self, url: str, **kwargs): ------- BaseBackend A backend instance + """ url = urlparse(url) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index ff4c64c3cf3f..e66b0b99763e 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -160,6 +160,7 @@ def create_table( overwrite If `True`, replace the table if it already exists, otherwise fail if the table exists + """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") @@ -276,6 +277,7 @@ def table( ------- Table Table expression + """ table_schema = self.get_schema(name, schema=schema, database=database) # load geospatial only if geo columns @@ -307,6 +309,7 @@ def get_schema( ------- sch.Schema Ibis schema + """ conditions = [sg.column("table_name").eq(sge.convert(table_name))] @@ -421,6 +424,7 @@ def do_connect( >>> import ibis >>> ibis.duckdb.connect("database.ddb", threads=4, memory_limit="1GB") + """ if ( not isinstance(database, Path) @@ -484,6 +488,7 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend: ------- BaseBackend A backend instance + """ url = urlparse(url) database = url.path[1:] or ":memory:" @@ -509,6 +514,7 @@ def load_extension(self, extension: str, force_install: bool = False) -> None: The extension name or path. force_install Force reinstallation of the extension. + """ self._load_extensions([extension], force_install=force_install) @@ -562,6 +568,7 @@ def register( ------- ir.Table The just-registered table + """ if isinstance(source, (str, Path)): @@ -628,6 +635,7 @@ def read_json( ------- Table An ibis table expression + """ if not table_name: table_name = util.gen_name("read_json") @@ -671,6 +679,7 @@ def read_csv( ------- ir.Table The just-registered table + """ source_list = normalize_filenames(source_list) @@ -741,6 +750,7 @@ def read_geo( ------- ir.Table The just-registered table + """ if not table_name: @@ -794,6 +804,7 @@ def read_parquet( ------- ir.Table The just-registered table + """ source_list = normalize_filenames(source_list) @@ -861,6 +872,7 @@ def read_in_memory( ------- ir.Table The just-registered table + """ table_name = table_name or util.gen_name("read_in_memory") self.con.register(table_name, source) @@ -895,6 +907,7 @@ def read_delta( ------- ir.Table The just-registered table. + """ source_table = normalize_filenames(source_table)[0] @@ -956,6 +969,7 @@ def list_tables( <...> >>> con.list_tables(schema="my_schema") ['baz'] + """ database = F.current_database() if database is None else sge.convert(database) schema = F.current_schema() if schema is None else sge.convert(schema) @@ -996,6 +1010,7 @@ def read_postgres( ------- ir.Table The just-registered table. + """ if table_name is None: raise ValueError( @@ -1051,6 +1066,7 @@ def read_sqlite(self, path: str | Path, table_name: str | None = None) -> ir.Tab │ 2 │ b │ │ 3 │ c │ └───────┴────────┘ + """ if table_name is None: @@ -1081,6 +1097,7 @@ def attach( Name to attach the database as. Defaults to the basename of `path`. read_only Whether to attach the database as read-only. + """ code = f"ATTACH '{path}'" @@ -1100,6 +1117,7 @@ def detach(self, name: str) -> None: ---------- name The name of the database to detach. + """ name = sg.to_identifier(name).sql(self.name) self.con.execute(f"DETACH {name}").fetchall() @@ -1136,6 +1154,7 @@ def attach_sqlite( >>> con.attach_sqlite("/tmp/attach_sqlite.db") >>> con.list_tables() ['t'] + """ self.load_extension("sqlite") with self._safe_raw_sql(f"SET GLOBAL sqlite_all_varchar={all_varchar}") as cur: @@ -1177,6 +1196,7 @@ def register_filesystem(self, filesystem: AbstractFileSystem): DatabaseTable: band_members name string band string + """ self.con.register_filesystem(filesystem) @@ -1226,6 +1246,7 @@ def to_pyarrow_batches( ::: {.callout-warning} ## DuckDB returns 1024 size batches regardless of what argument is passed. ::: + """ self._run_pre_execute_hooks(expr) table = expr.as_table() @@ -1287,6 +1308,7 @@ def to_torch( ------- dict[str, torch.Tensor] A dictionary of torch tensors, keyed by column name. + """ compiled = self.compile(expr, limit=limit, params=params, **kwargs) with self._safe_raw_sql(compiled) as cur: @@ -1341,6 +1363,7 @@ def to_parquet( Partition on multiple columns. >>> con.to_parquet(penguins, tempfile.mkdtemp(), partition_by=("year", "island")) + """ self._run_pre_execute_hooks(expr) query = self._to_sql(expr, params=params) @@ -1376,6 +1399,7 @@ def to_csv( Whether to write the column names as the first line of the CSV file. **kwargs DuckDB CSV writer arguments. https://duckdb.org/docs/data/csv.html#parameters + """ self._run_pre_execute_hooks(expr) query = self._to_sql(expr, params=params) @@ -1516,6 +1540,7 @@ def insert( If inserting data from a different database ValueError If the type of `obj` isn't supported + """ table = sg.table(table_name, db=database) if overwrite: diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index 031087f7e5b4..11e88170493c 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -76,6 +76,7 @@ def do_connect( Port number to connect to (default: 8563) kwargs Additional keyword arguments passed to `pyexasol.connect`. + """ if kwargs.pop("quote_ident", None) is not None: raise com.UnsupportedArgumentError( @@ -291,6 +292,7 @@ def create_table( if the table exists temp Create a temporary table (not supported) + """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 704ede4707a1..52febaf80e3a 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -59,6 +59,7 @@ def do_connect(self, table_env: TableEnvironment) -> None: >>> table_env = TableEnvironment.create(EnvironmentSettings.in_streaming_mode()) >>> ibis.flink.connect(table_env) + """ self._table_env = table_env @@ -97,6 +98,7 @@ def create_database( Name of the catalog in which the new database will be created. force : bool, optional If `False`, an exception is raised if the database already exists. + """ statement = CreateDatabase( name=name, db_properties=db_properties, catalog=catalog, can_exist=force @@ -116,6 +118,7 @@ def drop_database( Name of the catalog from which the database will be dropped. force : bool, optional If `False`, an exception is raised if the database does not exist. + """ statement = DropDatabase(name=name, catalog=catalog, must_exist=not force) self.raw_sql(statement.compile()) @@ -150,6 +153,7 @@ def list_tables( ------- list[str] The list of the table/view names that match the pattern `like`. + """ catalog = catalog or self.current_catalog database = database or self.current_database @@ -188,6 +192,7 @@ def list_views( ------- list[str] The list of the view names that match the pattern `like`. + """ if temp: @@ -233,6 +238,7 @@ def table( ------- Table Table named `name` from `database` + """ if database is not None and not isinstance(database, str): raise exc.IbisTypeError( @@ -271,6 +277,7 @@ def get_schema( ------- sch.Schema Ibis schema + """ from pyflink.table.types import create_arrow_schema @@ -374,6 +381,7 @@ def create_table( ------- Table The table that was created. + """ import pandas as pd import pyarrow as pa @@ -490,6 +498,7 @@ def drop_table( Whether the table is temporary or not. force If `False`, an exception is raised if the table does not exist. + """ statement = DropTable( table_name=name, @@ -516,6 +525,7 @@ def rename_table( The new name of the table. force If `False`, an exception is raised if the table does not exist. + """ statement = RenameTable( old_name=old_name, @@ -567,6 +577,7 @@ def create_view( ------- Table The view that was created. + """ import pandas as pd @@ -639,6 +650,7 @@ def drop_view( Whether the view is temporary or not. force If `False`, an exception is raised if the view does not exist. + """ # TODO(deepyaman): Support (and differentiate) permanent views. @@ -682,6 +694,7 @@ def _read_file( ------ ValueError If `schema` is None. + """ if schema is None: raise ValueError( @@ -723,6 +736,7 @@ def read_parquet( ------- ir.Table The just-registered table + """ return self._read_file( file_type="parquet", path=path, schema=schema, table_name=table_name @@ -750,6 +764,7 @@ def read_csv( ------- ir.Table The just-registered table + """ return self._read_file( file_type="csv", path=path, schema=schema, table_name=table_name @@ -777,6 +792,7 @@ def read_json( ------- ir.Table The just-registered table + """ return self._read_file( file_type="json", path=path, schema=schema, table_name=table_name @@ -824,6 +840,7 @@ def insert( ------ ValueError If the type of `obj` isn't supported + """ import pandas as pd import pyarrow as pa diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index a2142846aa57..ac5e10c71176 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -80,6 +80,7 @@ class Options(ibis.config.Config): Database to use for temporary objects. temp_path : str, default "/tmp/ibis" Path for storage of temporary data. + """ temp_db: str = "__ibis_tmp" @@ -99,6 +100,7 @@ def _from_url(self, url: str, **kwargs: Any) -> Backend: ------- BaseBackend A backend instance + """ url = urlparse(url) @@ -188,6 +190,7 @@ def do_connect( >>> client = ibis.impala.connect(host=impala_host, port=impala_port) >>> client # doctest: +ELLIPSIS + """ if ca_cert is not None: params["ca_cert"] = str(ca_cert) @@ -301,6 +304,7 @@ def create_database(self, name, path=None, force=False): Path where to store the database data; otherwise uses the Impala default force Forcibly create the database + """ statement = CreateDatabase(name, path=path, can_exist=force) self._safe_exec_sql(statement) @@ -315,6 +319,7 @@ def drop_database(self, name, force=False): force If False and there are any tables in this database, raises an IntegrityError + """ if not force or name in self.list_databases(): tables = self.list_tables(database=name) @@ -370,6 +375,7 @@ def get_schema( ------- Schema Ibis schema + """ query = sge.Describe( this=sg.table( @@ -395,6 +401,7 @@ def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: ------- Iterator[tuple[str, dt.DataType]] Iterator of column name and Ibis type pairs + """ tmpview = util.gen_name("impala_tmpview") query = f"CREATE VIEW IF NOT EXISTS {tmpview} AS {query}" @@ -493,6 +500,7 @@ def create_table( expression. like_parquet Can specify instead of a schema + """ if obj is None and schema is None: raise com.IbisError("The schema or obj parameter is required") @@ -564,6 +572,7 @@ def avro_file( ------- ImpalaTable Impala table expression + """ name, database = self._get_concrete_table_path(name, database) @@ -615,6 +624,7 @@ def delimited_file( ------- ImpalaTable Impala table expression + """ name, database = self._get_concrete_table_path(name, database) @@ -674,6 +684,7 @@ def parquet_file( ------- ImpalaTable Impala table expression + """ name, database = self._get_concrete_table_path(name, database) @@ -743,6 +754,7 @@ def insert( Completely overwrite contents >>> con.insert(table, table_expr, overwrite=True) # quartodoc: +SKIP # doctest: +SKIP + """ if isinstance(obj, ir.Table): self._run_pre_execute_hooks(obj) @@ -774,6 +786,7 @@ def drop_table( >>> table = "my_table" >>> db = "operations" >>> con.drop_table(table, database=db, force=True) # quartodoc: +SKIP # doctest: +SKIP + """ statement = DropTable(name, database=database, must_exist=not force) self._safe_exec_sql(statement) @@ -787,6 +800,7 @@ def truncate_table(self, name: str, database: str | None = None) -> None: Table name database Database name + """ statement = TruncateTable(name, database=database) self._safe_exec_sql(statement) @@ -800,6 +814,7 @@ def rename_table(self, old_name: str, new_name: str) -> None: The old name of the table. new_name The new name of the table. + """ statement = RenameTable(old_name, new_name) self._safe_exec_sql(statement) @@ -832,6 +847,7 @@ def cache_table(self, table_name, *, database=None, pool="default"): >>> db = "operations" >>> pool = "op_4GB_pool" >>> con.cache_table("my_table", database=db, pool=pool) # quartodoc: +SKIP # doctest: +SKIP + """ statement = ddl.CacheTable(table_name, database=database, pool=pool) self._safe_exec_sql(statement) @@ -847,6 +863,7 @@ def create_function(self, func, name=None, database=None): Function name database Database name + """ if name is None: name = func.name @@ -885,6 +902,7 @@ def drop_udf( Database name aggregate Whether the function is an aggregate + """ if not input_types: if not database: @@ -1016,6 +1034,7 @@ def compute_stats( Database name incremental If True, issue COMPUTE INCREMENTAL STATS + """ maybe_inc = "INCREMENTAL " if incremental else "" cmd = f"COMPUTE {maybe_inc}STATS" @@ -1038,6 +1057,7 @@ def invalidate_metadata( Table name. Can be fully qualified (with database) database Database name + """ stmt = "INVALIDATE METADATA" if name is not None: @@ -1058,6 +1078,7 @@ def refresh(self, name: str, database: str | None = None) -> None: Table name. Can be fully qualified (with database) database Database name + """ # TODO(wesm): can this statement be cancelled? stmt = self._table_command("REFRESH", name, database=database) @@ -1078,6 +1099,7 @@ def describe_formatted( Table name. Can be fully qualified (with database) database Database name + """ from ibis.backends.impala.metadata import parse_metadata @@ -1105,6 +1127,7 @@ def show_files( Table name. Can be fully qualified (with database) database Database name + """ stmt = self._table_command("SHOW FILES IN", name, database=database) return self._exec_statement(stmt) @@ -1184,6 +1207,7 @@ def explain( ------- str Query plan + """ query = self.compile(expr, params=params) statement = f"EXPLAIN {query}" diff --git a/ibis/backends/impala/client.py b/ibis/backends/impala/client.py index cac294bd5cba..ab27b8f7ee18 100644 --- a/ibis/backends/impala/client.py +++ b/ibis/backends/impala/client.py @@ -96,6 +96,7 @@ def insert( Completely overwrite contents >>> t.insert(table_expr, overwrite=True) # quartodoc: +SKIP # doctest: +SKIP + """ if values is not None: raise NotImplementedError @@ -145,6 +146,7 @@ def load_data(self, path, overwrite=False, partition=None): partition partition If specified, the partition must already exist + """ if partition is not None: partition_schema = self.partition_schema() @@ -216,6 +218,7 @@ def alter( Table properties serde_properties Serialization/deserialization properties + """ def _run_ddl(**kwds): @@ -257,6 +260,7 @@ def alter_partition( Table properties serde_properties Serialization/deserialization properties + """ part_schema = self.partition_schema() @@ -305,6 +309,7 @@ def stats(self) -> pd.DataFrame: ------- DataFrame Table statistics + """ return self._client.table_stats(self._qualified_name) @@ -315,6 +320,7 @@ def column_stats(self) -> pd.DataFrame: ------- DataFrame Column statistics + """ return self._client.column_stats(self._qualified_name) diff --git a/ibis/backends/impala/udf.py b/ibis/backends/impala/udf.py index 7fc21d1fb31c..21ab7befae7c 100644 --- a/ibis/backends/impala/udf.py +++ b/ibis/backends/impala/udf.py @@ -187,6 +187,7 @@ def wrap_uda( Used internally to track function database Name of database + """ return ImpalaUDA( inputs, @@ -221,6 +222,7 @@ def wrap_udf(hdfs_file, inputs, output, so_symbol, name=None, database=None): Used internally to track function database Name of database + """ func = ImpalaUDF( inputs, output, so_symbol, name=name, lib_path=hdfs_file, database=database @@ -241,6 +243,7 @@ def scalar_function(inputs, output, name=None, database=None): Used internally to track function database Name of database + """ return ScalarFunction(inputs, output, name=name, database=database) @@ -258,5 +261,6 @@ def aggregate_function(inputs, output, name=None, database=None): Used internally to track function database Name of database + """ return AggregateFunction(inputs, output, name=name, database=database) diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 24c9ee97f1b6..6acfd5aa8585 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -54,6 +54,7 @@ def _from_url(self, url: str, **kwargs): ------- BaseBackend A backend instance + """ url = urlparse(url) @@ -154,6 +155,7 @@ def do_connect( timestamp_col : timestamp year : int32 month : int32 + """ con = pymysql.connect( user=user, @@ -283,6 +285,7 @@ def list_tables( A pattern to use for listing tables. schema The schema to perform the list against. + """ conditions = [TRUE] diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index 5dd3fde4cfa9..fb6746bf3656 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -77,6 +77,7 @@ def do_connect( dsn An Oracle Data Source Name. If provided, overrides all other connection arguments except username and password. + """ # SID: unique name of an INSTANCE running an oracle process (a single, identifiable machine) # service name: an ALIAS to one (or many) individual instances that can @@ -172,6 +173,7 @@ def list_tables( A pattern to use for listing tables. schema The schema to perform the list against. + """ conditions = [TRUE] @@ -272,6 +274,7 @@ def create_table( overwrite If `True`, replace the table if it already exists, otherwise fail if the table exists + """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 2e56487ae761..82237d7a01e5 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -47,6 +47,7 @@ def do_connect( >>> import ibis >>> ibis.pandas.connect({"t": pd.DataFrame({"a": [1, 2, 3]})}) + """ self.dictionary = dictionary or {} self.schemas: MutableMapping[str, sch.Schema] = {} @@ -73,6 +74,7 @@ def from_dataframe( ------- Table A table expression + """ if client is None: return self.connect({name: df}).table(name) @@ -101,6 +103,7 @@ def read_csv( ------- ir.Table The just-registered table + """ table_name = table_name or util.gen_name("read_csv") df = pd.read_csv(source, **kwargs) @@ -129,6 +132,7 @@ def read_parquet( ------- ir.Table The just-registered table + """ table_name = table_name or util.gen_name("read_parquet") df = pd.read_parquet(source, **kwargs) diff --git a/ibis/backends/pandas/kernels.py b/ibis/backends/pandas/kernels.py index 09da329ff4de..0b8b4eefbca0 100644 --- a/ibis/backends/pandas/kernels.py +++ b/ibis/backends/pandas/kernels.py @@ -72,6 +72,7 @@ def _sql_like_to_regex(pattern, escape): '^.*abc$' >>> sql_like_to_regex("abc%") # any string starting with "abc" '^abc.*$' + """ cur_i = 0 pattern_length = len(pattern) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 0550f82d3b2d..f4c7268d0416 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -48,6 +48,7 @@ def do_connect( ---------- tables An optional mapping of string table names to polars LazyFrames. + """ for name, table in (tables or {}).items(): self._add_table(name, table) @@ -90,6 +91,7 @@ def register( ------- ir.Table The just-registered table + """ if isinstance(source, (str, Path)): @@ -166,6 +168,7 @@ def read_csv( ------- ir.Table The just-registered table + """ path = normalize_filename(path) table_name = table_name or gen_name("read_csv") @@ -197,6 +200,7 @@ def read_json( ------- ir.Table The just-registered table + """ path = normalize_filename(path) table_name = table_name or gen_name("read_json") @@ -228,6 +232,7 @@ def read_delta( ------- ir.Table The just-registered table + """ try: import deltalake # noqa: F401 @@ -263,6 +268,7 @@ def read_pandas( ------- ir.Table The just-registered table + """ table_name = table_name or gen_name("read_in_memory") self._add_table(table_name, pl.from_pandas(source, **kwargs).lazy()) @@ -294,6 +300,7 @@ def read_parquet( ------- ir.Table The just-registered table + """ table_name = table_name or gen_name("read_parquet") if not isinstance(path, (str, Path)) and len(path) == 1: diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index de074a93bf2c..ac07e43da940 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -54,6 +54,7 @@ def __init__(self, query: DataFrame) -> None: ---------- query PySpark query + """ self.query = query @@ -96,6 +97,7 @@ class Options(ibis.config.Config): ---------- treat_nan_as_null : bool Treat NaNs in floating point expressions as NULL. + """ treat_nan_as_null: bool = False @@ -144,6 +146,7 @@ def do_connect(self, session: SparkSession) -> None: >>> session = SparkSession.builder.getOrCreate() >>> ibis.pyspark.connect(session) + """ self._context = session.sparkContext self._session = session @@ -262,6 +265,7 @@ def create_database( Path where to store the database data; otherwise uses Spark default force Whether to append `IF NOT EXISTS` to the database creation SQL + """ if path is not None: properties = sge.Properties( @@ -289,6 +293,7 @@ def drop_database(self, name: str, force: bool = False) -> Any: force If False, Spark throws exception if database is not empty or database does not exist + """ sql = sge.Drop(kind="DATABASE", exist=force, this=sg.to_identifier(name)) with self._safe_raw_sql(sql): @@ -317,6 +322,7 @@ def get_schema( ------- Schema An ibis schema + """ if schema is not None: raise com.UnsupportedArgumentError( @@ -367,6 +373,7 @@ def create_table( Examples -------- >>> con.create_table("new_table_name", table_expr) # quartodoc: +SKIP # doctest: +SKIP + """ if temp is True: raise NotImplementedError( @@ -400,6 +407,7 @@ def truncate_table(self, name: str, database: str | None = None) -> None: Table name database Database name + """ table = sg.table(name, db=database) query = f"TRUNCATE TABLE {table}" @@ -434,6 +442,7 @@ def create_view( ------- Table The created view + """ src = sge.Create( this=sg.table( @@ -457,6 +466,7 @@ def rename_table(self, old_name: str, new_name: str) -> None: The old name of the table. new_name The new name of the table. + """ old = sg.table(old_name, quoted=True) new = sg.table(new_name, quoted=True) @@ -484,6 +494,7 @@ def insert( # Completely overwrite contents >>> con.insert(table, table_expr, overwrite=True) # quartodoc: +SKIP # doctest: +SKIP + """ if isinstance(obj, ir.Expr): @@ -512,6 +523,7 @@ def compute_stats( noscan If `True`, collect only basic statistics for the table (number of rows, size in bytes). + """ maybe_noscan = " NOSCAN" * noscan table = sg.table(name, db=database, quoted=self.compiler.quoted).sql( @@ -559,6 +571,7 @@ def read_delta( ------- ir.Table The just-registered table + """ source = util.normalize_filename(source) spark_df = self._session.read.format("delta").load(source, **kwargs) @@ -590,6 +603,7 @@ def read_parquet( ------- ir.Table The just-registered table + """ source = util.normalize_filename(source) spark_df = self._session.read.parquet(source, **kwargs) @@ -622,6 +636,7 @@ def read_csv( ------- ir.Table The just-registered table + """ inferSchema = kwargs.pop("inferSchema", True) header = kwargs.pop("header", True) @@ -658,6 +673,7 @@ def read_json( ------- ir.Table The just-registered table + """ source_list = normalize_filenames(source_list) spark_df = self._session.read.json(source_list, **kwargs) @@ -690,6 +706,7 @@ def register( ------- ir.Table The just-registered table + """ if isinstance(source, (str, Path)): first = str(source) @@ -741,6 +758,7 @@ def to_delta( **kwargs PySpark Delta Lake table write arguments. https://spark.apache.org/docs/3.1.1/api/python/reference/api/pyspark.sql.DataFrameWriter.save.html + """ expr.compile().write.format("delta").save(os.fspath(path), **kwargs) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 6edf9aa75d8d..357056a1ac96 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -113,6 +113,7 @@ def _from_url(self, url: str, **kwargs): ------- BaseBackend A backend instance + """ url = urlparse(url) @@ -219,6 +220,7 @@ def do_connect(self, create_object_udfs: bool = True, **kwargs: Any): Additional arguments passed to the DBAPI connection call. kwargs Additional arguments passed to the URL constructor. + """ connect_args = kwargs.copy() session_parameters = connect_args.pop("session_parameters", {}) @@ -534,6 +536,7 @@ def list_tables( The `schema` parameter does **not** refer to the column names and types of `table`. ::: + """ if database is not None and schema is None: @@ -712,6 +715,7 @@ def create_table( if the table exists comment Add a comment to the table + """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") @@ -795,6 +799,7 @@ def read_csv( ------- Table The table that was read from the CSV file + """ stage = ibis.util.gen_name("stage") file_format = ibis.util.gen_name("format") @@ -903,6 +908,7 @@ def read_json( ------- Table An ibis table expression + """ stage = util.gen_name("read_json_stage") file_format = util.gen_name("read_json_format") @@ -974,6 +980,7 @@ def read_parquet( ------- Table An ibis table expression + """ import pyarrow.dataset as ds @@ -1051,6 +1058,7 @@ def insert( Name of the attached database that the table is located in. overwrite If `True` then replace existing contents of table + """ if not isinstance(obj, ir.Table): obj = ibis.memtable(obj) diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index c0a3d8f9a396..bc12542b3b70 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -79,6 +79,7 @@ def do_connect( -------- >>> import ibis >>> ibis.sqlite.connect("path/to/my/sqlite.db") + """ _init_sqlite3() @@ -107,6 +108,7 @@ def _from_url(self, url: str, **kwargs): ------- BaseBackend A backend instance + """ url = urlparse(url) database = url.path[1:] or ":memory:" @@ -233,6 +235,7 @@ def get_schema( ------- sch.Schema Ibis schema + """ if schema is not None: raise TypeError("sqlite doesn't support `schema`, use `database` instead") @@ -388,6 +391,7 @@ def attach(self, name: str, path: str | Path) -> None: >>> con2 = ibis.sqlite.connect("new.db") >>> con1.attach("new", "new.db") >>> con1.list_tables(database="new") + """ with self.begin() as cur: cur.execute(f"ATTACH DATABASE {str(path)!r} AS {_quote(name)}") @@ -422,6 +426,7 @@ def create_table( overwrite If `True`, replace the table if it already exists, otherwise fail if the table exists + """ if schema is None and obj is None: raise ValueError("Either `obj` or `schema` must be specified") @@ -581,6 +586,7 @@ def insert( If inserting data from a different database ValueError If the type of `obj` isn't supported + """ table = sg.table(table_name, catalog=database, quoted=self.compiler.quoted) if not isinstance(obj, ir.Expr): diff --git a/ibis/backends/sqlite/udf.py b/ibis/backends/sqlite/udf.py index aa568049ef93..380be055095e 100644 --- a/ibis/backends/sqlite/udf.py +++ b/ibis/backends/sqlite/udf.py @@ -75,6 +75,7 @@ def udf(func=None, *, skip_if_exists=False, deterministic=True): callable A callable object that returns ``None`` if any of its inputs are ``None``. + """ if func is None: return lambda func: udf( diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index c8d1d8afd8c5..a5fbaa58ff96 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -714,9 +714,7 @@ def test_array_sort(backend, con): @builtin_array -@pytest.mark.notimpl( - ["datafusion", "polars"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.parametrize( ("a", "b", "expected_array"), [ diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index 0d790fc25948..619a4af23717 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -89,6 +89,7 @@ def _safe_raw_sql( ------ trino.dbapi.Cursor The cursor of the executed query. + """ cur = self.raw_sql(query) try: @@ -116,6 +117,7 @@ def get_schema( ------- sch.Schema Ibis schema + """ conditions = [sg.column("table_name").eq(sge.convert(table_name))] @@ -209,6 +211,7 @@ def list_tables( The `schema` parameter does **not** refer to the column names and types of `table`. ::: + """ query = "SHOW TABLES" @@ -282,6 +285,7 @@ def do_connect( >>> con = ibis.trino.connect(database=catalog, schema=schema) >>> con = ibis.trino.connect(database=catalog, schema=schema, source="my-app") + """ self.con = trino.dbapi.connect( user=user, @@ -396,6 +400,7 @@ def create_table( Add a comment to the table properties Table properties to set on creation + """ if obj is None and schema is None: raise com.IbisError("One of the `schema` or `obj` parameter is required") diff --git a/ibis/common/annotations.py b/ibis/common/annotations.py index a3d89b1f3371..25e20edf7dc2 100644 --- a/ibis/common/annotations.py +++ b/ibis/common/annotations.py @@ -119,6 +119,7 @@ def validate(self, name: str, value: AnyType, this: AnyType) -> AnyType: Returns ------- The validated value for the field. + """ result = self.pattern.match(value, this) if result is NoMatch: @@ -142,6 +143,7 @@ class Attribute(Annotation): Pattern to validate the field. default : Callable, default EMPTY Callable to compute the default value of the field. + """ def __init__(self, pattern: Pattern = _any, default: AnyType = EMPTY): @@ -153,6 +155,7 @@ def has_default(self): Returns ------- bool + """ return self.default is not EMPTY @@ -169,6 +172,7 @@ def get_default(self, name: str, this: AnyType) -> AnyType: Returns ------- The default value for the field. + """ if callable(self.default): value = self.default(this) @@ -195,6 +199,7 @@ class Argument(Annotation): kind Kind of the argument, one of `inspect.Parameter` constants. Defaults to positional or keyword. + """ __slots__ = ("typehint", "kind") @@ -311,6 +316,7 @@ def merge(cls, *signatures, **annotations): Returns ------- Signature + """ params = {} for sig in signatures: @@ -366,6 +372,7 @@ def from_callable(cls, fn, patterns=None, return_pattern=None): Returns ------- Signature + """ sig = super().from_callable(fn) typehints = get_type_hints(fn) @@ -424,6 +431,7 @@ def unbind(self, this: dict[str, Any]) -> tuple[tuple[Any, ...], dict[str, Any]] ------- args : (args, kwargs) Tuple of positional and keyword arguments. + """ # does the reverse of bind, but doesn't apply defaults args: list = [] @@ -460,6 +468,7 @@ def validate(self, func, args, kwargs): ------- validated : dict Dictionary of validated arguments. + """ try: bound = self.bind(*args, **kwargs) @@ -537,6 +546,7 @@ def validate_return(self, func, value): ------- validated : Any Validated return value. + """ if self.return_annotation is EMPTY: return value @@ -602,6 +612,7 @@ def annotated(_1=None, _2=None, _3=None, **kwargs): Returns ------- Callable + """ if _1 is None: return functools.partial(annotated, **kwargs) diff --git a/ibis/common/bases.py b/ibis/common/bases.py index 00244f98ff6d..a1f4ac24fc8b 100644 --- a/ibis/common/bases.py +++ b/ibis/common/bases.py @@ -69,6 +69,7 @@ def __call__(cls, *args, **kwargs): Returns ------- The newly created instance of the class. No extra initialization + """ return cls.__create__(*args, **kwargs) diff --git a/ibis/common/collections.py b/ibis/common/collections.py index 632f185cfc7b..db98bba48588 100644 --- a/ibis/common/collections.py +++ b/ibis/common/collections.py @@ -202,6 +202,7 @@ class MapSet(Mapping[K, V]): MyMap({'a': 1, 'b': 2}) >>> m | n MyMap({'a': 1, 'b': 2, 'c': 3}) + """ def _check_conflict(self, other: collections.abc.Mapping) -> set[K]: @@ -342,6 +343,7 @@ class RewindableIterator(Iterator[V]): 3 >>> next(it) 4 + """ __slots__ = ("_iterator", "_checkpoint") diff --git a/ibis/common/deferred.py b/ibis/common/deferred.py index 17290ba4aaa6..f13ee2fbf160 100644 --- a/ibis/common/deferred.py +++ b/ibis/common/deferred.py @@ -38,6 +38,7 @@ def resolve(self, context: dict): Returns ------- The constructed object. + """ @abstractmethod @@ -76,6 +77,7 @@ class Deferred(Slotted, Immutable, Final): instead of the default. This is useful for complex deferred expressions where the arguments don't necessarily make sense to be user facing in the repr. + """ __slots__ = ("_resolver", "_repr") @@ -212,6 +214,7 @@ class Variable(FrozenSlotted, Resolver): ---------- name The key to retrieve from the state. + """ __slots__ = ("name",) @@ -234,6 +237,7 @@ class Just(FrozenSlotted, Resolver): ---------- value The value to return when the deferred is called. + """ __slots__ = ("value",) @@ -273,6 +277,7 @@ class JustUnhashable(FrozenSlotted, Resolver): ---------- value The value to return when the deferred is called. + """ __slots__ = ("value",) @@ -307,6 +312,7 @@ class Factory(FrozenSlotted, Resolver): ---------- func The function to apply. + """ __slots__ = ("func",) @@ -373,6 +379,7 @@ class Call(FrozenSlotted, Resolver): The positional argument patterns. kwargs The keyword argument patterns. + """ __slots__ = ("func", "args", "kwargs") @@ -589,6 +596,7 @@ def deferrable(func=None, *, repr=None): instead of the usual. This is useful for complex deferred expressions where the arguments don't necessarily make sense to be user facing in the repr. + """ def wrapper(func): diff --git a/ibis/common/dispatch.py b/ibis/common/dispatch.py index 2b5c4bd5ee0e..d375c00c4fa9 100644 --- a/ibis/common/dispatch.py +++ b/ibis/common/dispatch.py @@ -28,6 +28,7 @@ def normalize(r: str | re.Pattern): ------- Pattern The compiled regex. + """ r = getattr(r, "pattern", r) return re.compile("^" + r.lstrip("^").rstrip("$") + "$") diff --git a/ibis/common/egraph.py b/ibis/common/egraph.py index 09cf244a6cc8..ea18870ab401 100644 --- a/ibis/common/egraph.py +++ b/ibis/common/egraph.py @@ -46,6 +46,7 @@ class DisjointSet(Mapping[K, set[K]]): 1 >>> ds.union(1, 3) False + """ __slots__ = ("_parents", "_classes") @@ -71,6 +72,7 @@ def __contains__(self, id) -> bool: ------- ined: True if the id is in the disjoint set, False otherwise. + """ return id in self._parents @@ -87,6 +89,7 @@ def __getitem__(self, id) -> set[K]: class: The set of ids that are in the same class as the given id, including the given id. + """ id = self._parents[id] return self._classes[id] @@ -111,6 +114,7 @@ def __eq__(self, other: object) -> bool: ------- equal: True if the disjoint sets are equal, False otherwise. + """ if not isinstance(other, DisjointSet): return NotImplemented @@ -131,6 +135,7 @@ def add(self, id: K) -> K: ------- id: The id that was added to the disjoint set. + """ if id in self._parents: return self._parents[id] @@ -152,6 +157,7 @@ def find(self, id: K) -> K: ------- id: The canonicalized id for the given id. + """ return self._parents[id] @@ -172,6 +178,7 @@ def union(self, id1, id2) -> bool: ------- merged: True if the classes were merged, False otherwise. + """ # Find the root of each class id1 = self._parents[id1] @@ -213,6 +220,7 @@ def connected(self, id1, id2): ------- connected: True if the ids are connected, False otherwise. + """ return self._parents[id1] == self._parents[id2] @@ -227,6 +235,7 @@ def verify(self): ------- verified: True if the disjoint set is not corrupted, False otherwise. + """ for id in self._parents: if id not in self._classes[self._parents[id]]: @@ -274,6 +283,7 @@ class Variable(Slotted): ---------- name : str The name of the variable. + """ __slots__ = ("name",) @@ -303,6 +313,7 @@ def substitute(self, egraph, enode, subst): ------- value : Any The substituted value. + """ return subst[self.name] @@ -323,6 +334,7 @@ class Pattern(Slotted): variables or leaf values. name : str, optional The name of the pattern which is used to refer to it in a rewrite rule. + """ __slots__ = ("head", "args", "name") @@ -389,6 +401,7 @@ def flatten(self, var=None, counter=None): (var, pattern) : tuple[Variable, Pattern] The variable and the flattened pattern where the flattened pattern cannot contain any patterns just variables. + """ # TODO(kszucs): convert a pattern to a query object instead by flattening it counter = counter or itertools.count() @@ -429,6 +442,7 @@ def substitute(self, egraph, enode, subst): ------- enode : ENode The substituted pattern which is a ground term aka. an ENode. + """ args = [] for arg in self.args: @@ -481,6 +495,7 @@ class ENode(Slotted, Node): The type of the Node the ENode represents. args : tuple The arguments of the ENode which are either ENodes or leaf values. + """ __slots__ = ("head", "args") @@ -576,6 +591,7 @@ def add(self, node: Node) -> ENode: ------- enode : The canonical enode. + """ enode = self._as_enode(node) if enode in self._eclasses: @@ -611,6 +627,7 @@ def union(self, node1: Node, node2: Node) -> ENode: ------- enode : The canonical enode. + """ enode1 = self._as_enode(node1) enode2 = self._as_enode(node2) @@ -638,6 +655,7 @@ def _match_args(self, args, patargs): ------- dict[str, Any] : The mapping of variable names to enodes or leaf values. + """ subst = {} for arg, patarg in zip(args, patargs): @@ -672,6 +690,7 @@ def match(self, pattern: Pattern) -> dict[ENode, dict[str, Any]]: ------- matches : A dictionary mapping the matched enodes to their substitutions. + """ # patterns could be reordered to match on the most selective one first patterns = dict(reversed(list(pattern.flatten()))) @@ -718,6 +737,7 @@ def apply(self, rewrites: list[Rewrite]) -> int: ------- n_changes The number of changes made to the egraph. + """ n_changes = 0 for rewrite in promote_list(rewrites): @@ -741,6 +761,7 @@ def run(self, rewrites: list[Rewrite], n: int = 10) -> bool: ------- saturated : True if the egraph is saturated, False otherwise. + """ return any(not self.apply(rewrites) for _i in range(n)) @@ -762,6 +783,7 @@ def extract(self, node: Node) -> Node: ------- node : The extracted node. + """ enode = self._as_enode(node) enode = self._eclasses.find(enode) @@ -811,6 +833,7 @@ def equivalent(self, node1: Node, node2: Node) -> bool: ------- equivalent : True if the nodes are equivalent, False otherwise. + """ enode1 = self._as_enode(node1) enode2 = self._as_enode(node2) diff --git a/ibis/common/graph.py b/ibis/common/graph.py index c27dcc480f01..9914a5d56966 100644 --- a/ibis/common/graph.py +++ b/ibis/common/graph.py @@ -58,6 +58,7 @@ def _flatten_collections(node: Any) -> Iterator[N]: >>> assert list(_flatten_collections(a)) == [a] >>> assert list(_flatten_collections((c,))) == [c] >>> assert list(_flatten_collections([a, b, (c, a)])) == [a, b, c, a] + """ if isinstance(node, Node): yield node @@ -110,6 +111,7 @@ def _recursive_lookup(obj: Any, dct: dict) -> Any: {1: 'A', 2: 'B'} >>> _recursive_lookup((a, frozendict({1: c})), dct) ('A', {1: MyNode(number=2, ...)}) + """ if isinstance(obj, Node): return dct.get(obj, obj) @@ -134,6 +136,7 @@ def _coerce_finder(obj: FinderLike, context: Optional[dict] = None) -> Finder: Returns ------- A callable finder function which can be used to match nodes. + """ if isinstance(obj, Pattern): ctx = context or {} @@ -166,6 +169,7 @@ def _coerce_replacer(obj: ReplacerLike, context: Optional[dict] = None) -> Repla Returns ------- A callable replacer function which can be used to replace nodes. + """ if isinstance(obj, Pattern): ctx = context or {} @@ -243,6 +247,7 @@ def map(self, fn: Callable, filter: Optional[Finder] = None) -> dict[Node, Any]: Returns ------- A mapping of nodes to their results. + """ results: dict[Node, Any] = {} @@ -284,6 +289,7 @@ def map_clear( ------- In contrast to `map`, this method returns the result of the root node only since the rest of the results are already discarded. + """ results: dict[Node, Any] = {} @@ -334,6 +340,7 @@ def find( ------- The list of nodes matching the given pattern. The order of the nodes is determined by a breadth-first search. + """ nodes = Graph.from_bfs(self, filter=filter, context=context).nodes() finder = _coerce_finder(finder, context) @@ -358,6 +365,7 @@ def find_topmost( Returns ------- The list of topmost nodes matching the given pattern. + """ seen = set() queue = deque([self]) @@ -400,6 +408,7 @@ def replace( Returns ------- The root node of the graph with the replaced nodes. + """ replacer = _coerce_replacer(replacer, context) results = self.map(replacer, filter=filter) @@ -417,6 +426,7 @@ class Graph(dict[Node, Sequence[Node]]): ---------- mapping : Node or Mapping[Node, Sequence[Node]], default () Either a root node or a mapping of nodes to their children. + """ def __init__(self, mapping=(), /, **kwargs): @@ -449,6 +459,7 @@ def from_bfs( Returns ------- A graph constructed from the root node. + """ if filter is None: return bfs(root) @@ -481,6 +492,7 @@ def from_dfs( Returns ------- A graph constructed from the root node. + """ if filter is None: return dfs(root) @@ -504,6 +516,7 @@ def invert(self) -> Self: Returns ------- The inverted graph. + """ result: dict[Node, list[Node]] = {node: [] for node in self} for node, dependencies in self.items(): @@ -522,6 +535,7 @@ def toposort(self) -> Self: Returns ------- The topologically sorted graph. + """ dependents = self.invert() in_degree = {k: len(v) for k, v in self.items()} @@ -561,6 +575,7 @@ def traverse( the traversal, and the second is the result if its not `None`. node The Node expression or a list of expressions. + """ args = reversed(node) if isinstance(node, Sequence) else [node] @@ -603,6 +618,7 @@ def bfs(root: Node) -> Graph: Returns ------- A graph constructed from the root node. + """ # fast path for the default no filter case, according to benchmarks # this is gives a 10% speedup compared to the filtered version @@ -635,6 +651,7 @@ def bfs_while(root: Node, filter: Finder) -> Graph: Returns ------- A graph constructed from the root node. + """ if not isinstance(root, Node): raise TypeError("node must be an instance of ibis.common.graph.Node") @@ -665,6 +682,7 @@ def dfs(root: Node) -> Graph: Returns ------- A graph constructed from the root node. + """ # fast path for the default no filter case, according to benchmarks # this is gives a 10% speedup compared to the filtered version @@ -697,6 +715,7 @@ def dfs_while(root: Node, filter: Finder) -> Graph: Returns ------- A graph constructed from the root node. + """ if not isinstance(root, Node): raise TypeError("node must be an instance of ibis.common.graph.Node") diff --git a/ibis/common/grounds.py b/ibis/common/grounds.py index 7f498b03fcdb..86324723770c 100644 --- a/ibis/common/grounds.py +++ b/ibis/common/grounds.py @@ -176,6 +176,7 @@ def copy(self, **overrides: Any) -> Annotable: ------- Annotable New instance of the copied object + """ this = copy(self) for name, value in overrides.items(): diff --git a/ibis/common/patterns.py b/ibis/common/patterns.py index 77b97ed88834..82d7ba251d3b 100644 --- a/ibis/common/patterns.py +++ b/ibis/common/patterns.py @@ -82,6 +82,7 @@ def from_typehint(cls, annot: type, allow_coercion: bool = True) -> Pattern: Returns ------- A pattern that matches the given type annotation. + """ # TODO(kszucs): cache the result of this function # TODO(kszucs): explore issubclass(typ, SupportsInt) etc. @@ -211,6 +212,7 @@ def match(self, value: AnyType, context: dict[str, AnyType]) -> AnyType: ------- The result of the pattern matching. If the pattern doesn't match the value, then it must return the `NoMatch` sentinel value. + """ ... @@ -236,6 +238,7 @@ def __or__(self, other: Pattern) -> AnyOf: Returns ------- New pattern that matches if either of the patterns match. + """ return AnyOf(self, other) @@ -250,6 +253,7 @@ def __and__(self, other: Pattern) -> AllOf: Returns ------- New pattern that matches if both of the patterns match. + """ return AllOf(self, other) @@ -264,6 +268,7 @@ def __rshift__(self, other: Deferred) -> Replace: Returns ------- New replace pattern. + """ return Replace(self, other) @@ -278,6 +283,7 @@ def __rmatmul__(self, name: str) -> Capture: Returns ------- New capture pattern. + """ return Capture(name, self) @@ -292,6 +298,7 @@ class Is(Slotted, Pattern): ---------- value The reference value to match against. + """ __slots__ = ("value",) @@ -330,6 +337,7 @@ class Capture(Slotted, Pattern): The pattern to match against. key The key to use in the context if the pattern matches. + """ __slots__ = ("key", "pattern") @@ -362,6 +370,7 @@ class Replace(Slotted, Pattern): The pattern to match against. replacer The deferred to use as a replacement. + """ __slots__ = ("matcher", "replacer") @@ -397,6 +406,7 @@ class Check(Slotted, Pattern): ---------- predicate The predicate to use. + """ __slots__ = ("predicate",) @@ -454,6 +464,7 @@ class Custom(Slotted, Pattern): ---------- func The function to apply. + """ __slots__ = ("func",) @@ -474,6 +485,7 @@ class EqualTo(Slotted, Pattern): ---------- value The value to check against. + """ __slots__ = ("value",) @@ -506,6 +518,7 @@ class DeferredEqualTo(Slotted, Pattern): ---------- value The value to check against. + """ __slots__ = ("resolver",) @@ -532,6 +545,7 @@ class Option(Slotted, Pattern): ---------- pattern The inner pattern to use. + """ __slots__ = ("pattern", "default") @@ -599,6 +613,7 @@ class SubclassOf(Slotted, Pattern): ---------- type The type to check against. + """ __slots__ = ("type",) @@ -626,6 +641,7 @@ class InstanceOf(Slotted, Singleton, Pattern): ---------- types The type to check against. + """ __slots__ = ("type",) @@ -672,6 +688,7 @@ class GenericInstanceOf(Slotted, Pattern): >>> p = GenericInstanceOf(MyNumber[float]) >>> assert p.match(MyNumber(1.0), {}) == MyNumber(1.0) >>> assert p.match(MyNumber(1), {}) is NoMatch + """ __slots__ = ("type", "origin", "fields") @@ -716,6 +733,7 @@ class LazyInstanceOf(Slotted, Pattern): ---------- types The types to check against. + """ __fields__ = ("qualname", "package") @@ -753,6 +771,7 @@ class CoercedTo(Slotted, Pattern, Generic[T_co]): ---------- type The type to coerce to. + """ __slots__ = ("type", "func") @@ -822,6 +841,7 @@ class GenericCoercedTo(Slotted, Pattern): >>> p = GenericCoercedTo(MyNumber[float]) >>> assert p.match(3.14, {}) == MyNumber(3.14) >>> assert p.match("15", {}) == MyNumber(15.0) + """ __slots__ = ("origin", "params", "checker") @@ -860,6 +880,7 @@ class Not(Slotted, Pattern): ---------- pattern The pattern which the value should not match. + """ __slots__ = ("pattern",) @@ -889,6 +910,7 @@ class AnyOf(Slotted, Pattern): patterns The patterns to match against. The first pattern that matches will be returned. + """ __slots__ = ("patterns",) @@ -921,6 +943,7 @@ class AllOf(Slotted, Pattern): The patterns to match against. The value will be passed through each pattern in order. The changes applied to the value propagate through the patterns. + """ __slots__ = ("patterns",) @@ -956,6 +979,7 @@ class Length(Slotted, Pattern): The minimum length of the value. at_most The maximum length of the value. + """ __slots__ = ("at_least", "at_most") @@ -1006,6 +1030,7 @@ class Between(Slotted, Pattern): The lower bound. upper The upper bound. + """ __slots__ = ("lower", "upper") @@ -1029,6 +1054,7 @@ class Contains(Slotted, Pattern): ---------- needle The item that the passed value should contain. + """ __slots__ = ("needle",) @@ -1054,6 +1080,7 @@ class IsIn(Slotted, Pattern): ---------- haystack The set of values that the passed value should be in. + """ __slots__ = ("haystack",) @@ -1085,6 +1112,7 @@ class SequenceOf(Slotted, Pattern): The pattern to match against each item in the sequence. type The type to coerce the sequence to. Defaults to tuple. + """ __slots__ = ("item", "type") @@ -1132,6 +1160,7 @@ class GenericSequenceOf(Slotted, Pattern): The minimum length of the sequence. at_most The maximum length of the sequence. + """ __slots__ = ("item", "type", "length") @@ -1185,6 +1214,7 @@ class GenericMappingOf(Slotted, Pattern): The pattern to match the values against. type The type to coerce the mapping to. Defaults to dict. + """ __slots__ = ("key", "value", "type") @@ -1251,6 +1281,7 @@ class Object(Slotted, Pattern): The positional arguments to match against the attributes of the object. **kwargs The keyword arguments to match against the attributes of the object. + """ __slots__ = ("type", "args", "kwargs") @@ -1439,6 +1470,7 @@ class PatternList(Slotted, Pattern): ---------- fields The patterns to match the respective items in the tuple. + """ __slots__ = ("patterns", "type") @@ -1598,6 +1630,7 @@ def pattern(obj: AnyType) -> Pattern: Returns ------- The constructed pattern. + """ if obj is Ellipsis: return _any @@ -1653,6 +1686,7 @@ def match( ... 2, ... "three", ... ] + """ if context is None: context = {} diff --git a/ibis/common/temporal.py b/ibis/common/temporal.py index 893105704d29..8540d2a46e9b 100644 --- a/ibis/common/temporal.py +++ b/ibis/common/temporal.py @@ -166,6 +166,7 @@ def normalize_timedelta( 3000 >>> normalize_timedelta(timedelta(seconds=3), IntervalUnit.MICROSECOND) 3000000 + """ if isinstance(value, datetime.timedelta): # datetime.timedelta only stores days, seconds, and microseconds internally diff --git a/ibis/common/typing.py b/ibis/common/typing.py index 0ae48a2fc7c9..1d4c16fb4440 100644 --- a/ibis/common/typing.py +++ b/ibis/common/typing.py @@ -66,6 +66,7 @@ class properties. Returns ------- Mapping of parameter or attribute name to type hint. + """ try: hints = _get_type_hints(obj, include_extras=include_extras) @@ -111,6 +112,7 @@ def get_type_params(obj: Any) -> dict[str, type]: >>> >>> get_type_params(MyDict[int, str]) {'T': , 'U': } + """ args = get_args(obj) origin = get_origin(obj) or obj @@ -156,6 +158,7 @@ def get_bound_typevars(obj: Any) -> dict[TypeVar, tuple[str, type]]: ... ... >>> get_bound_typevars(MyStruct[float, bytes]) {~T: ('a', ), ~U: ('myprop', )} + """ origin = get_origin(obj) or obj hints = get_type_hints(origin, include_properties=True) @@ -198,6 +201,7 @@ def evaluate_annotations( >>> annots = {"a": "dict[str, float]", "b": "int"} >>> evaluate_annotations(annots, __name__) {'a': dict[str, float], 'b': } + """ module = sys.modules.get(module_name, None) globalns = getattr(module, "__dict__", None) diff --git a/ibis/config.py b/ibis/config.py index 89ab08b34a93..50b82183c9f3 100644 --- a/ibis/config.py +++ b/ibis/config.py @@ -50,6 +50,7 @@ class ContextAdjustment(Config): time_col : str Name of the timestamp column for execution with a `timecontext`. See `ibis/expr/timecontext.py` for details. + """ time_col: str = "time" @@ -65,6 +66,7 @@ class SQL(Config): explicit limit. [](`None`) means no limit. default_dialect : str Dialect to use for printing SQL when the backend cannot be determined. + """ default_limit: Optional[PosInt] = None @@ -90,6 +92,7 @@ class Interactive(Config): Maximum depth for nested data types. show_types : bool Show the inferred type of value expressions in the interactive repr. + """ max_rows: int = 10 @@ -118,6 +121,7 @@ class Repr(Config): Show the inferred type of value expressions in the repr. interactive : bool Options controlling the interactive repr. + """ depth: Optional[PosInt] = None @@ -160,6 +164,7 @@ class Options(Config): Pandas specific options. pyspark : Config | None PySpark specific options. + """ interactive: bool = False diff --git a/ibis/expr/analysis.py b/ibis/expr/analysis.py index e210c99f1c28..10feb3d08ed5 100644 --- a/ibis/expr/analysis.py +++ b/ibis/expr/analysis.py @@ -48,6 +48,7 @@ def flatten_predicates(node): a int64 b string right: r0.b == 'foo' + """ def predicate(node): diff --git a/ibis/expr/api.py b/ibis/expr/api.py index c570c49bebed..fbf27e2bc53e 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -260,6 +260,7 @@ def param(type: dt.DataType) -> ir.Scalar: 5.0 >>> expr.execute(params={start: date(2013, 1, 3)}) 3.0 + """ return ops.ScalarParameter(type).to_expr() @@ -293,6 +294,7 @@ def schema( >>> sc = schema(names=["foo", "bar", "baz"], types=["string", "int64", "boolean"]) >>> sc = schema(dict(foo="string")) >>> sc = schema(Schema(dict(foo="string"))) # no-op + """ if pairs is not None: return sch.schema(pairs) @@ -336,6 +338,7 @@ def table( UnboundTable: t a int64 b string + """ if name is None: if isinstance(schema, type): @@ -423,6 +426,7 @@ def memtable( col0 col1 0 1 foo 1 2 baz + """ if columns is not None and schema is not None: raise NotImplementedError( @@ -551,6 +555,7 @@ def desc(expr: ir.Column | str) -> ir.Value: ------- ir.ValueExpr An expression + """ return _deferred_method_call(expr, "desc") @@ -589,6 +594,7 @@ def asc(expr: ir.Column | str) -> ir.Value: ------- ir.ValueExpr An expression + """ return _deferred_method_call(expr, "asc") @@ -614,6 +620,7 @@ def and_(*predicates: ir.BooleanValue) -> ir.BooleanValue: BooleanValue A new predicate that evaluates to True if all composing predicates are True. If no predicates were provided, returns True. + """ if not predicates: return literal(True) @@ -633,6 +640,7 @@ def or_(*predicates: ir.BooleanValue) -> ir.BooleanValue: BooleanValue A new predicate that evaluates to True if any composing predicates are True. If no predicates were provided, returns False. + """ if not predicates: return literal(False) @@ -677,6 +685,7 @@ def random() -> ir.FloatingScalar: ------- FloatingScalar Random float value expression + """ return ops.RandomScalar().to_expr() @@ -763,6 +772,7 @@ def timestamp( │ 2001-01-02 03:00:00 │ │ 2002-04-05 06:00:00 │ └─────────────────────┘ + """ args = (value_or_year, month, day, hour, minute, second) is_ymdhms = any(a is not None for a in args[1:]) @@ -845,6 +855,7 @@ def date(value_or_year, month=None, day=None, /): │ 2001-01-02 │ │ 2002-03-04 │ └────────────┘ + """ if month is not None or day is not None: return ops.DateFromYMD(value_or_year, month, day).to_expr() @@ -915,6 +926,7 @@ def time(value_or_hour, minute=None, second=None, /): │ 01:02:03 │ │ 04:05:06 │ └──────────┘ + """ if minute is not None or second is not None: return ops.TimeFromHMS(value_or_hour, minute, second).to_expr() @@ -975,6 +987,7 @@ def interval( ------- IntervalScalar An interval expression + """ keyword_value_unit = [ ("nanoseconds", nanoseconds, "ns"), @@ -1062,6 +1075,7 @@ def case() -> bl.SearchedCaseBuilder: │ 3 │ * │ 7 │ 21.0 │ │ 4 │ / │ 8 │ 0.5 │ └───────┴────────┴───────┴─────────┘ + """ return bl.SearchedCaseBuilder() @@ -1073,6 +1087,7 @@ def now() -> ir.TimestampScalar: ------- TimestampScalar An expression representing the current timestamp. + """ return ops.TimestampNow().to_expr() @@ -1105,6 +1120,7 @@ def rank() -> ir.IntegerColumn: │ 2 │ 2 │ │ 3 │ 5 │ └────────┴───────┘ + """ return ops.MinRank().to_expr() @@ -1139,6 +1155,7 @@ def dense_rank() -> ir.IntegerColumn: │ 2 │ 1 │ │ 3 │ 2 │ └────────┴───────┘ + """ return ops.DenseRank().to_expr() @@ -1169,6 +1186,7 @@ def percent_rank() -> ir.FloatingColumn: │ 2 │ 0.4 │ │ 3 │ 1.0 │ └────────┴──────────┘ + """ return ops.PercentRank().to_expr() @@ -1199,6 +1217,7 @@ def cume_dist() -> ir.FloatingColumn: │ 2 │ 0.833333 │ │ 3 │ 1.000000 │ └────────┴──────────┘ + """ return ops.CumeDist().to_expr() @@ -1229,6 +1248,7 @@ def ntile(buckets: int | ir.IntegerValue) -> ir.IntegerColumn: │ 2 │ 1 │ │ 3 │ 1 │ └────────┴───────┘ + """ return ops.NTile(buckets).to_expr() @@ -1259,6 +1279,7 @@ def row_number() -> ir.IntegerColumn: │ 3 │ 4 │ │ 2 │ 5 │ └────────┴────────┘ + """ return ops.RowNumber().to_expr() @@ -1312,6 +1333,7 @@ def read_csv( │ 2 │ NULL │ │ NULL │ f │ └───────┴────────┘ + """ from ibis.config import _default_backend @@ -1367,6 +1389,7 @@ def read_json( │ 2 │ NULL │ │ NULL │ f │ └───────┴────────┘ + """ from ibis.config import _default_backend @@ -1424,6 +1447,7 @@ def read_parquet( │ 2 │ h │ │ 3 │ i │ └───────┴────────┘ + """ from ibis.config import _default_backend @@ -1474,6 +1498,7 @@ def read_delta( │ 2 │ h │ │ 3 │ i │ └───────┴────────┘ + """ from ibis.config import _default_backend @@ -1505,6 +1530,7 @@ def set_backend(backend: str | BaseBackend) -> None: Or as an existing backend instance >>> ibis.set_backend(ibis.duckdb.connect()) + """ import ibis @@ -1532,6 +1558,7 @@ def get_backend(expr: Expr | None = None) -> BaseBackend: ------- BaseBackend The Ibis backend. + """ if expr is None: from ibis.config import _default_backend @@ -1561,6 +1588,7 @@ def rows_with_max_lookback( ------- RowsWithMaxLookback A named tuple of rows and maximum look-back in time. + """ return RowsWithMaxLookback(rows, max_lookback) @@ -1604,6 +1632,7 @@ def window( ------- Window A window frame + """ if isinstance(preceding, RowsWithMaxLookback): max_lookback = preceding.max_lookback @@ -1661,6 +1690,7 @@ def rows_window(preceding=None, following=None, group_by=None, order_by=None): ------- Window A window frame + """ if isinstance(preceding, RowsWithMaxLookback): max_lookback = preceding.max_lookback @@ -1700,6 +1730,7 @@ def range_window(preceding=None, following=None, group_by=None, order_by=None): ------- Window A window frame + """ return ( bl.LegacyWindowBuilder() @@ -1725,6 +1756,7 @@ def cumulative_window(group_by=None, order_by=None): ------- Window A window frame + """ return window(rows=(None, 0), group_by=group_by, order_by=order_by) @@ -1745,6 +1777,7 @@ def trailing_window(preceding, group_by=None, order_by=None): ------- Window A window frame + """ return window( preceding=preceding, following=0, group_by=group_by, order_by=order_by @@ -1767,6 +1800,7 @@ def trailing_rows_window(preceding, group_by=None, order_by=None): ------- Window A window frame + """ return rows_window( preceding=preceding, following=0, group_by=group_by, order_by=order_by @@ -1789,6 +1823,7 @@ def trailing_range_window(preceding, order_by, group_by=None): ------- Window A window frame + """ return range_window( preceding=preceding, following=0, group_by=group_by, order_by=order_by @@ -1859,6 +1894,7 @@ def union(table: ir.Table, *rest: ir.Table, distinct: bool = False) -> ir.Table: │ 2 │ │ 3 │ └───────┘ + """ return table.union(*rest, distinct=distinct) if rest else table @@ -1914,6 +1950,7 @@ def intersect(table: ir.Table, *rest: ir.Table, distinct: bool = True) -> ir.Tab ├───────┤ │ 2 │ └───────┘ + """ return table.intersect(*rest, distinct=distinct) if rest else table @@ -1969,6 +2006,7 @@ def difference(table: ir.Table, *rest: ir.Table, distinct: bool = True) -> ir.Ta ├───────┤ │ 1 │ └───────┘ + """ return table.difference(*rest, distinct=distinct) if rest else table @@ -1992,6 +2030,7 @@ def watermark(time_col: str, allowed_delay: ir.IntervalScalar) -> Watermark: ------- Watermark A watermark object. + """ return Watermark(time_col=time_col, allowed_delay=allowed_delay) @@ -2103,6 +2142,7 @@ def range(start, stop, step) -> ir.ArrayValue: │ 2002-01-19 00:00:00 │ │ … │ └─────────────────────┘ + """ raise NotImplementedError() @@ -2247,6 +2287,7 @@ def ifelse(condition: Any, true_expr: Any, false_expr: Any) -> ir.Value: │ yes │ │ no │ └────────────────────────────────┘ + """ if not isinstance(condition, ir.Value): condition = literal(condition, type="bool") @@ -2272,6 +2313,7 @@ def where(cond, true_expr, false_expr) -> ir.Value: ------- Value : ir.Value The value of `true_expr` if `arg` is `True` else `false_expr` + """ return ifelse(cond, true_expr, false_expr) @@ -2301,6 +2343,7 @@ def coalesce(*args: Any) -> ir.Value: >>> ibis.options.interactive = True >>> ibis.coalesce(None, 4, 5) 4 + """ return ops.Coalesce(args).to_expr() @@ -2325,6 +2368,7 @@ def greatest(*args: Any) -> ir.Value: >>> ibis.options.interactive = True >>> ibis.greatest(None, 4, 5) 5 + """ return ops.Greatest(args).to_expr() @@ -2349,5 +2393,6 @@ def least(*args: Any) -> ir.Value: >>> ibis.options.interactive = True >>> ibis.least(None, 4, 5) 4 + """ return ops.Least(args).to_expr() diff --git a/ibis/expr/builders.py b/ibis/expr/builders.py index 333d3456bf43..b15434fb948d 100644 --- a/ibis/expr/builders.py +++ b/ibis/expr/builders.py @@ -49,6 +49,7 @@ def when(self, case_expr: Any, result_expr: Any) -> Self: Predicate expression to use for this case. result_expr Value when the case predicate evaluates to true. + """ return self.copy( cases=self.cases + (case_expr,), results=self.results + (result_expr,) @@ -61,6 +62,7 @@ def else_(self, result_expr: Any) -> Self: ---------- result_expr Value to use when all case predicates evaluate to false. + """ return self.copy(default=result_expr) @@ -87,6 +89,7 @@ def when(self, case_expr: Any, result_expr: Any) -> Self: comparable with the base. result_expr Value when the case predicate evaluates to true. + """ if not isinstance(case_expr, ir.Value): case_expr = ibis.literal(case_expr) @@ -109,6 +112,7 @@ def else_(self, result_expr: Any) -> Self: ---------- result_expr Value to use when all case predicates evaluate to false. + """ return self.copy(default=result_expr) @@ -135,6 +139,7 @@ class WindowBuilder(Builder): Using `None` for `preceding` or `following` indicates an unbounded frame. Use 0 for `CURRENT ROW`. + """ how: Literal["rows", "range"] = "rows" diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py index a317dd991cf1..6486fd07eef1 100644 --- a/ibis/expr/datatypes/core.py +++ b/ibis/expr/datatypes/core.py @@ -54,6 +54,7 @@ def dtype(value: Any, nullable: bool = True) -> DataType: >>> import pyarrow as pa >>> ibis.dtype(pa.int32()) Int32(nullable=True) + """ if isinstance(value, DataType): return value @@ -521,6 +522,7 @@ class String(Variadic, Singleton): ----- Because of differences in the way different backends handle strings, we cannot assume that strings are UTF-8 encoded. + """ scalar = "StringScalar" @@ -538,6 +540,7 @@ class Binary(Variadic, Singleton): For example, Impala doesn't make a distinction between string and binary types but PostgreSQL has a `TEXT` type and a `BYTEA` type which are distinct types that have different behavior. + """ scalar = "BinaryScalar" @@ -835,6 +838,7 @@ def from_tuples( ------- Struct Struct data type instance + """ return cls(dict(pairs), nullable=nullable) diff --git a/ibis/expr/datatypes/parse.py b/ibis/expr/datatypes/parse.py index 70ba8a0b1933..ac802e7f89bf 100644 --- a/ibis/expr/datatypes/parse.py +++ b/ibis/expr/datatypes/parse.py @@ -83,6 +83,7 @@ def parse( >>> ty = dt.parse("array") >>> ty == dt.Array(dt.int64) True + """ geotype = spaceless_string("geography", "geometry") diff --git a/ibis/expr/decompile.py b/ibis/expr/decompile.py index 0567849f0055..43452fa0c09a 100644 --- a/ibis/expr/decompile.py +++ b/ibis/expr/decompile.py @@ -434,6 +434,7 @@ def decompile( ------- str Equivalent Python source code for `node`. + """ if not isinstance(expr, ir.Expr): raise TypeError(f"Expected ibis expression, got {type(expr).__name__}") diff --git a/ibis/expr/operations/analytic.py b/ibis/expr/operations/analytic.py index 37b75834b7ef..084fe3299772 100644 --- a/ibis/expr/operations/analytic.py +++ b/ibis/expr/operations/analytic.py @@ -71,6 +71,7 @@ class RowNumber(RankBase): ------- IntegerColumn Row number + """ diff --git a/ibis/expr/operations/core.py b/ibis/expr/operations/core.py index 5db1e2c2f17a..c9ef8651a7d4 100644 --- a/ibis/expr/operations/core.py +++ b/ibis/expr/operations/core.py @@ -54,6 +54,7 @@ def name(self) -> str: Returns ------- str + """ @@ -113,6 +114,7 @@ def dtype(self) -> T: Returns ------- dt.DataType + """ @property @@ -125,6 +127,7 @@ def shape(self) -> S: Returns ------- ds.Shape + """ @attribute diff --git a/ibis/expr/operations/generic.py b/ibis/expr/operations/generic.py index 15cbd5a5d345..c4bdb0933e7d 100644 --- a/ibis/expr/operations/generic.py +++ b/ibis/expr/operations/generic.py @@ -79,6 +79,7 @@ class IsNull(Unary): ------- ir.BooleanValue Value expression indicating whether values are null + """ dtype = dt.boolean @@ -92,6 +93,7 @@ class NotNull(Unary): ------- ir.BooleanValue Value expression indicating whether values are not null + """ dtype = dt.boolean diff --git a/ibis/expr/operations/numeric.py b/ibis/expr/operations/numeric.py index a35b4cedab0e..a38781d30249 100644 --- a/ibis/expr/operations/numeric.py +++ b/ibis/expr/operations/numeric.py @@ -102,6 +102,7 @@ class Ceil(Unary): DecimalValue | IntegerValue Decimal values: yield decimal Other numeric values: yield integer (int32) + """ arg: SoftNumeric @@ -123,6 +124,7 @@ class Floor(Unary): DecimalValue | IntegerValue Decimal values: yield decimal Other numeric values: yield integer (int32) + """ arg: SoftNumeric diff --git a/ibis/expr/operations/udf.py b/ibis/expr/operations/udf.py index 4fc50d2466f6..9d746ec87856 100644 --- a/ibis/expr/operations/udf.py +++ b/ibis/expr/operations/udf.py @@ -214,6 +214,7 @@ def builtin( >>> con = ibis.connect("duckdb://") >>> con.execute(expr) 1 + """ return _wrap( cls._make_wrapper, @@ -298,6 +299,7 @@ def python( -------- - [`pandas`](./scalar-udfs.qmd#ibis.expr.operations.udf.scalar.pandas) - [`pyarrow`](./scalar-udfs.qmd#ibis.expr.operations.udf.scalar.pyarrow) + """ return _wrap( cls._make_wrapper, @@ -371,6 +373,7 @@ def pandas( -------- - [`python`](./scalar-udfs.qmd#ibis.expr.operations.udf.scalar.python) - [`pyarrow`](./scalar-udfs.qmd#ibis.expr.operations.udf.scalar.pyarrow) + """ return _wrap( cls._make_wrapper, @@ -443,6 +446,7 @@ def pyarrow( -------- - [`python`](./scalar-udfs.qmd#ibis.expr.operations.udf.scalar.python) - [`pandas`](./scalar-udfs.qmd#ibis.expr.operations.udf.scalar.pandas) + """ return _wrap( cls._make_wrapper, @@ -517,6 +521,7 @@ def builtin( >>> expr = favg(t.bill_length_mm) >>> expr 43.9219298245614 + """ return _wrap( cls._make_wrapper, diff --git a/ibis/expr/rules.py b/ibis/expr/rules.py index 30a8210eb9c9..7837b6ae4b9f 100644 --- a/ibis/expr/rules.py +++ b/ibis/expr/rules.py @@ -37,6 +37,7 @@ def highest_precedence_dtype(nodes): ------- dtype: DataType The highest precedence datatype + """ return dt.highest_precedence(node.dtype for node in nodes) @@ -187,6 +188,7 @@ class ValueOf(Concrete, Pattern): ---------- dtype : DataType | None The datatype the constructed Value instance should conform to. + """ dtype: Optional[dt.DataType] = None diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py index 114ffec6a89d..96806c01fe5e 100644 --- a/ibis/expr/schema.py +++ b/ibis/expr/schema.py @@ -82,6 +82,7 @@ def equals(self, other: Schema) -> bool: >>> assert first.equals(second) >>> third = ibis.schema({"a": "array"}) >>> assert not first.equals(third) + """ if not isinstance(other, Schema): raise TypeError( @@ -114,6 +115,7 @@ def from_tuples( a int64 b string } + """ pairs = list(values) if len(pairs) == 0: @@ -204,6 +206,7 @@ def name_at_position(self, i: int) -> str: 'a' >>> sch.name_at_position(1) 'b' + """ return self.names[i] diff --git a/ibis/expr/sql.py b/ibis/expr/sql.py index 1e3a805e4b2f..f6b1b0cdda2d 100644 --- a/ibis/expr/sql.py +++ b/ibis/expr/sql.py @@ -294,6 +294,7 @@ def parse_sql(sqlstring, catalog, dialect=None): Returns ------- expr : ir.Expr + """ catalog = Catalog( {name: ibis.table(schema, name=name) for name, schema in catalog.items()} @@ -355,6 +356,7 @@ def to_sql(expr: ir.Expr, dialect: str | None = None, **kwargs) -> SQLString: ------- str Formatted SQL string + """ # try to infer from a non-str expression or if not possible fallback to # the default pretty dialect for expressions diff --git a/ibis/formats/__init__.py b/ibis/formats/__init__.py index a03d5023f6fd..54467a738ae0 100644 --- a/ibis/formats/__init__.py +++ b/ibis/formats/__init__.py @@ -33,6 +33,7 @@ def from_ibis(cls, dtype: DataType) -> T: Returns ------- Format-specific type object. + """ raise NotImplementedError @@ -50,6 +51,7 @@ def to_ibis(cls, typ: T, nullable: bool = True) -> DataType: Returns ------- Ibis DataType. + """ raise NotImplementedError @@ -67,6 +69,7 @@ def from_string(cls, text: str, nullable: bool = True) -> DataType: Returns ------- Ibis DataType. + """ raise NotImplementedError @@ -82,6 +85,7 @@ def to_string(cls, dtype: DataType) -> str: Returns ------- Backend-specific string representation. + """ raise NotImplementedError @@ -101,6 +105,7 @@ def from_ibis(cls, schema: Schema) -> S: Returns ------- Format-specific schema object. + """ raise NotImplementedError @@ -116,6 +121,7 @@ def to_ibis(cls, obj: S) -> Schema: Returns ------- Ibis Schema. + """ raise NotImplementedError @@ -139,6 +145,7 @@ def convert_scalar(cls, obj: S, dtype: DataType) -> S: Returns ------- Format specific scalar corresponding to the given Ibis datatype. + """ raise NotImplementedError @@ -156,6 +163,7 @@ def convert_column(cls, obj: C, dtype: DataType) -> C: Returns ------- Format specific column corresponding to the given Ibis datatype. + """ raise NotImplementedError @@ -173,6 +181,7 @@ def convert_table(cls, obj: T, schema: Schema) -> T: Returns ------- Format specific table-like object corresponding to the given Ibis schema. + """ raise NotImplementedError @@ -188,6 +197,7 @@ def infer_scalar(cls, obj: S) -> DataType: Returns ------- Ibis datatype corresponding to the given format-specific scalar. + """ raise NotImplementedError @@ -203,6 +213,7 @@ def infer_column(cls, obj: C) -> DataType: Returns ------- Ibis datatype corresponding to the given format-specific column. + """ raise NotImplementedError @@ -218,6 +229,7 @@ def infer_table(cls, obj: T) -> Schema: Returns ------- Ibis schema corresponding to the given format-specific table. + """ raise NotImplementedError diff --git a/ibis/legacy/udf/validate.py b/ibis/legacy/udf/validate.py index 84fbcd42fbbe..749ce5e7b16c 100644 --- a/ibis/legacy/udf/validate.py +++ b/ibis/legacy/udf/validate.py @@ -26,6 +26,7 @@ def _parameter_count(funcsig: Signature) -> int: ------- int The number of parameters + """ kinds = (Parameter.POSITIONAL_OR_KEYWORD, Parameter.POSITIONAL_ONLY) return sum( diff --git a/ibis/legacy/udf/vectorized.py b/ibis/legacy/udf/vectorized.py index f1fd3e41dbd5..a5006c9b848f 100644 --- a/ibis/legacy/udf/vectorized.py +++ b/ibis/legacy/udf/vectorized.py @@ -81,6 +81,7 @@ def _coerce_to_series( ------- pd.Series Output Series + """ import pandas as pd @@ -156,6 +157,7 @@ def _coerce_to_dataframe( >>> _coerce_to_dataframe([1, 2, 3], dt.Struct(dict.fromkeys("abc", "int32"))) # noqa: E501 a b c 0 1 2 3 + """ import pandas as pd @@ -298,6 +300,7 @@ def analytic(input_type, output_type): >>> table = table.mutate( # quartodoc: +SKIP # doctest: +SKIP ... demean_and_zscore(table["v"]).over(win).destructure() ... ) + """ return _udf_decorator(AnalyticVectorizedUDF, input_type, output_type) @@ -342,6 +345,7 @@ def elementwise(input_type, output_type): >>> table = table.mutate( ... year_monthday(table["date"]).destructure() ... ) # quartodoc: +SKIP # doctest: +SKIP + """ return _udf_decorator(ElementWiseVectorizedUDF, input_type, output_type) @@ -380,5 +384,6 @@ def reduction(input_type, output_type): >>> table = table.group_by("key").aggregate( # quartodoc: +SKIP # doctest: +SKIP ... mean_and_std(table["v"]).destructure() ... ) + """ return _udf_decorator(ReductionVectorizedUDF, input_type, output_type) diff --git a/ibis/selectors.py b/ibis/selectors.py index b094b74839df..e225cba1ce27 100644 --- a/ibis/selectors.py +++ b/ibis/selectors.py @@ -85,6 +85,7 @@ def expand(self, table: ir.Table) -> Sequence[ir.Value]: ------- Sequence[Value] A sequence of value expressions that match the selector + """ def positions(self, table: ir.Table) -> Sequence[int]: @@ -99,6 +100,7 @@ def positions(self, table: ir.Table) -> Sequence[int]: ------- Sequence[int] A sequence of column indices where the selector matches + """ raise NotImplementedError( f"`positions` doesn't make sense for {self.__class__.__name__} selector" @@ -115,6 +117,7 @@ def expand(self, table: ir.Table) -> Sequence[ir.Value]: ---------- table An ibis table expression + """ return [col for column in table.columns if self.predicate(col := table[column])] @@ -130,6 +133,7 @@ def __and__(self, other: Selector) -> Predicate: ---------- other Another selector + """ return self.__class__(lambda col: self.predicate(col) and other.predicate(col)) @@ -140,6 +144,7 @@ def __or__(self, other: Selector) -> Predicate: ---------- other Another selector + """ return self.__class__(lambda col: self.predicate(col) or other.predicate(col)) @@ -167,6 +172,7 @@ def where(predicate: Callable[[ir.Value], bool]) -> Predicate: >>> expr = t.select(s.where(lambda col: col.get_name() == "a")) >>> expr.columns ['a'] + """ return Predicate(predicate=predicate) @@ -189,6 +195,7 @@ def numeric() -> Predicate: See Also -------- [`of_type`](#ibis.selectors.of_type) + """ return of_type(dt.Numeric) @@ -234,6 +241,7 @@ def of_type(dtype: dt.DataType | str | type[dt.DataType]) -> Predicate: See Also -------- [`numeric`](#ibis.selectors.numeric) + """ if isinstance(dtype, str): # A mapping of abstract or parametric types, to allow selecting all @@ -284,6 +292,7 @@ def startswith(prefixes: str | tuple[str, ...]) -> Predicate: See Also -------- [`endswith`](#ibis.selectors.endswith) + """ return where(lambda col: col.get_name().startswith(prefixes)) @@ -300,6 +309,7 @@ def endswith(suffixes: str | tuple[str, ...]) -> Predicate: See Also -------- [`startswith`](#ibis.selectors.startswith) + """ return where(lambda col: col.get_name().endswith(suffixes)) @@ -340,6 +350,7 @@ def contains( See Also -------- [`matches`](#ibis.selectors.matches) + """ def predicate(col: ir.Value) -> bool: @@ -370,6 +381,7 @@ def matches(regex: str | re.Pattern) -> Selector: See Also -------- [`contains`](#ibis.selectors.contains) + """ pattern = re.compile(regex) return where(lambda col: pattern.search(col.get_name()) is not None) @@ -487,6 +499,7 @@ def across( │ 42.0 │ 20.2 │ -1.92193 │ … │ │ … │ … │ … │ … │ └────────────────┴───────────────┴─────────────────────────┴───┘ + """ if names is None: names = lambda col, fn: "_".join(filter(None, (col, fn))) @@ -556,6 +569,7 @@ def if_any(selector: Selector, predicate: Deferred | Callable) -> IfAnyAll: │ Adelie │ Dream │ -2.165354 │ -0.836123 │ -0.918466 │ … │ │ … │ … │ … │ … │ … │ … │ └─────────┴────────┴────────────────┴───────────────┴───────────────────┴───┘ + """ return IfAnyAll(selector=selector, predicate=predicate, summarizer=operator.or_) @@ -601,6 +615,7 @@ def if_all(selector: Selector, predicate: Deferred | Callable) -> IfAnyAll: │ Gentoo │ Biscoe │ 1.241499 │ -1.089314 │ 1.570562 │ … │ │ Gentoo │ Biscoe │ 1.351398 │ -1.494420 │ 1.214987 │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ + """ return IfAnyAll(selector=selector, predicate=predicate, summarizer=operator.and_) diff --git a/ibis/util.py b/ibis/util.py index 244ed74087ba..65455d3ba0ce 100644 --- a/ibis/util.py +++ b/ibis/util.py @@ -67,6 +67,7 @@ def indent(text: str, spaces: int) -> str: ------- str Indented text + """ prefix = " " * spaces return textwrap.indent(text, prefix=prefix) @@ -85,6 +86,7 @@ def is_one_of(values: Sequence[T], t: type[U]) -> Iterator[bool]: Returns ------- tuple + """ return (isinstance(x, t) for x in values) @@ -104,6 +106,7 @@ def promote_list(val: V | Sequence[V]) -> list[V]: Returns ------- list + """ if isinstance(val, list): return val @@ -128,6 +131,7 @@ def promote_tuple(val: V | Sequence[V]) -> tuple[V]: Returns ------- tuple + """ if isinstance(val, tuple): return val @@ -146,6 +150,7 @@ def is_function(v: Any) -> bool: ------- bool Whether `v` is a function + """ return isinstance(v, (types.FunctionType, types.LambdaType)) @@ -164,6 +169,7 @@ def approx_equal(a: Real, b: Real, eps: Real): Raises ------ AssertionError + """ assert abs(a - b) < eps @@ -191,6 +197,7 @@ def safe_index(elements: Sequence[int], value: int) -> int: 1 >>> safe_index(sequence, 4) -1 + """ try: return elements.index(value) @@ -224,6 +231,7 @@ def is_iterable(o: Any) -> bool: False >>> is_iterable([]) True + """ if isinstance(o, (str, bytes)): return False @@ -275,6 +283,7 @@ def convert_unit(value, unit, to, floor: bool = True): Traceback (most recent call last): ... ValueError: Cannot convert to or from unit ... to unit ... + """ # Don't do anything if from and to units are equivalent if unit == to: @@ -333,6 +342,7 @@ def get_logger( Returns ------- logging.Logger + """ logging.basicConfig() handler = logging.StreamHandler() @@ -518,6 +528,7 @@ def import_object(qualname: str) -> Any: Is the same as >>> from ibis import examples as ex + """ mod_name, name = qualname.rsplit(".", 1) mod = importlib.import_module(mod_name) @@ -606,6 +617,7 @@ def slice_to_limit_offset( >>> limit, offset = slice_to_limit_offset(what, count) >>> limit, offset (5, 5) + """ if (step := what.step) is not None and step != 1: raise ValueError("Slice step can only be 1") @@ -654,6 +666,7 @@ class Namespace: The pattern to construct with the looked up types. module The module object or name to look up the types. + """ __slots__ = ("_factory", "_module") From 1d9e263c0a8bc2e9aad413f9f61f19deec5d5491 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 10:59:53 +0100 Subject: [PATCH 149/161] test(pandas): skip tests for older pandas --- ibis/backends/pandas/tests/test_cast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ibis/backends/pandas/tests/test_cast.py b/ibis/backends/pandas/tests/test_cast.py index e07395126e77..3f166e79464f 100644 --- a/ibis/backends/pandas/tests/test_cast.py +++ b/ibis/backends/pandas/tests/test_cast.py @@ -72,7 +72,7 @@ def test_cast_array(t, from_, to, expected): "string", "object", marks=pytest.mark.skipif( - is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + is_older_than("pandas", "2.1.0"), reason="raises a NotImplementedError" ), ), ("int64", "int64"), @@ -104,7 +104,7 @@ def test_cast_timestamp_column(t, df, column, to, expected): "string", str, marks=pytest.mark.skipif( - is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + is_older_than("pandas", "2.1.0"), reason="raises a NotImplementedError" ), ), ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), @@ -130,7 +130,7 @@ def test_cast_timestamp_scalar_naive(client, to, expected): "string", str, marks=pytest.mark.skipif( - is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + is_older_than("pandas", "2.1.0"), reason="raises a NotImplementedError" ), ), ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), From 45a76f988af3eb0b08fb0c228e78b21f663fdc68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:00:31 +0100 Subject: [PATCH 150/161] test(dask): skip tests for older pandas --- ibis/backends/dask/tests/test_cast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ibis/backends/dask/tests/test_cast.py b/ibis/backends/dask/tests/test_cast.py index 55446626eaca..23187b59abf9 100644 --- a/ibis/backends/dask/tests/test_cast.py +++ b/ibis/backends/dask/tests/test_cast.py @@ -56,7 +56,7 @@ def test_cast_string(t, df, from_, to, expected): "string", "object", marks=pytest.mark.skipif( - is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + is_older_than("pandas", "2.1.0"), reason="raises a NotImplementedError" ), ), ("int64", "int64"), @@ -87,7 +87,7 @@ def test_cast_timestamp_column(t, df, column, to, expected): "string", str, marks=pytest.mark.skipif( - is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + is_older_than("pandas", "2.1.0"), reason="raises a NotImplementedError" ), ), ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), @@ -113,7 +113,7 @@ def test_cast_timestamp_scalar_naive(con, to, expected): "string", str, marks=pytest.mark.skipif( - is_older_than("pandas", "2.0.0"), reason="raises a NotImplementError" + is_older_than("pandas", "2.1.0"), reason="raises a NotImplementedError" ), ), ("int64", lambda x: pd.Timestamp(x).value // int(1e9)), From 8504d87b4cc7131696e027630a3976d1a32ea547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:02:40 +0100 Subject: [PATCH 151/161] fix(mssql): don't use the removed `sge.TRUE` and `sge.FALSE` literals --- ibis/backends/mssql/compiler.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ibis/backends/mssql/compiler.py b/ibis/backends/mssql/compiler.py index d0c4470d7489..5f19d076ed36 100644 --- a/ibis/backends/mssql/compiler.py +++ b/ibis/backends/mssql/compiler.py @@ -12,7 +12,14 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler, paren +from ibis.backends.base.sqlglot.compiler import ( + FALSE, + NULL, + STAR, + TRUE, + SQLGlotCompiler, + paren, +) from ibis.backends.base.sqlglot.datatypes import MSSQLType from ibis.backends.base.sqlglot.rewrites import ( rewrite_first_to_first_value, @@ -373,7 +380,7 @@ def visit_Mean(self, op, *, arg, where): @visit_node.register(ops.Not) def visit_Not(self, op, *, arg): if isinstance(arg, sge.Boolean): - return sge.FALSE if arg == sge.TRUE else sge.TRUE + return FALSE if arg == TRUE else TRUE return self.if_(arg, 1, 0).eq(0) @visit_node.register(ops.HashBytes) From 0aa91990d75b0d93d1d13c63ebf3c1eb850cc692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:04:04 +0100 Subject: [PATCH 152/161] fix(pyspark): don't use the removed `sge.NULL`, `sge.TRUE` and `sge.FALSE` literals --- ibis/backends/pyspark/compiler.py | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/ibis/backends/pyspark/compiler.py b/ibis/backends/pyspark/compiler.py index b4e75c959735..bdb68f968ea5 100644 --- a/ibis/backends/pyspark/compiler.py +++ b/ibis/backends/pyspark/compiler.py @@ -13,7 +13,7 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler +from ibis.backends.base.sqlglot.compiler import FALSE, NULL, STAR, TRUE, SQLGlotCompiler from ibis.backends.base.sqlglot.datatypes import PySparkType from ibis.backends.base.sqlglot.rewrites import Window, p from ibis.common.patterns import replace @@ -67,7 +67,7 @@ def visit_NonNullLiteral(self, op, *, value, dtype): if dtype.is_floating(): result = super().visit_NonNullLiteral(op, value=value, dtype=dtype) if options.pyspark.treat_nan_as_null: - return self.f.nanvl(result, sge.NULL) + return self.f.nanvl(result, NULL) else: return result elif dtype.is_string(): @@ -89,7 +89,7 @@ def visit_NonNullLiteral(self, op, *, value, dtype): def visit_Field(self, op, *, rel, name): result = super().visit_Field(op, rel=rel, name=name) if op.dtype.is_floating() and options.pyspark.treat_nan_as_null: - return self.f.nanvl(result, sge.NULL) + return self.f.nanvl(result, NULL) else: return result @@ -105,7 +105,7 @@ def visit_Cast(self, op, *, arg, to): @visit_node.register(ops.IsNull) def visit_IsNull(self, op, *, arg): - is_null = arg.is_(sge.NULL) + is_null = arg.is_(NULL) is_nan = self.f.isnan(arg) if op.arg.dtype.is_floating(): return sg.or_(is_null, is_nan) @@ -114,7 +114,7 @@ def visit_IsNull(self, op, *, arg): @visit_node.register(ops.NotNull) def visit_NotNull(self, op, *, arg): - is_not_null = arg.is_(sg.not_(sge.NULL)) + is_not_null = arg.is_(sg.not_(NULL)) is_not_nan = sg.not_(self.f.isnan(arg)) if op.arg.dtype.is_floating(): return sg.and_(is_not_null, is_not_nan) @@ -125,7 +125,7 @@ def visit_NotNull(self, op, *, arg): def visit_IsInf(self, op, *, arg): if op.arg.dtype.is_floating(): return sg.or_(arg == self.POS_INF, arg == self.NEG_INF) - return sge.FALSE + return FALSE @visit_node.register(ops.Xor) def visit_Xor(self, op, left, right): @@ -214,7 +214,7 @@ def visit_CountDistinctStar(self, op, *, arg, where): self.if_( where, sg.column(name, table=arg.alias_or_name, quoted=self.quoted), - sge.NULL, + NULL, ) for name in op.arg.schema ] @@ -224,22 +224,22 @@ def visit_CountDistinctStar(self, op, *, arg, where): def visit_First(self, op, *, arg, where): if where is not None: arg = self.if_(where, arg, NULL) - return self.f.first(arg, sge.TRUE) + return self.f.first(arg, TRUE) @visit_node.register(ops.Last) def visit_Last(self, op, *, arg, where): if where is not None: arg = self.if_(where, arg, NULL) - return self.f.last(arg, sge.TRUE) + return self.f.last(arg, TRUE) @visit_node.register(ops.Arbitrary) def visit_Arbitrary(self, op, *, arg, how, where): if where is not None: arg = self.if_(where, arg, NULL) if how == "first": - return self.f.first(arg, sge.TRUE) + return self.f.first(arg, TRUE) elif how == "last": - return self.f.last(arg, sge.TRUE) + return self.f.last(arg, TRUE) else: raise com.UnsupportedOperationError( f"PySpark backend does not support arbitrary with how={how}. " @@ -253,9 +253,9 @@ def visit_Median(self, op, *, arg, where): @visit_node.register(ops.GroupConcat) def visit_GroupConcat(self, op, *, arg, sep, where): if where is not None: - arg = self.if_(where, arg, sge.NULL) + arg = self.if_(where, arg, NULL) collected = self.f.collect_list(arg) - collected = self.if_(self.f.size(collected).eq(0), sge.NULL, collected) + collected = self.if_(self.f.size(collected).eq(0), NULL, collected) return self.f.array_join(collected, sep) @visit_node.register(ops.Correlation) @@ -358,7 +358,7 @@ def visit_ArrayMap(self, op, *, arg, body, param): @visit_node.register(ops.ArrayFilter) def visit_ArrayFilter(self, op, *, arg, body, param): param = sge.Identifier(this=param) - func = sge.Lambda(this=self.if_(body, param, sge.NULL), expressions=[param]) + func = sge.Lambda(this=self.if_(body, param, NULL), expressions=[param]) transform = self.f.transform(arg, func) func = sge.Lambda(this=param.is_(sg.not_(NULL)), expressions=[param]) return self.f.filter(transform, func) @@ -391,8 +391,8 @@ def visit_ArraySlice(self, op, *, arg, start, stop): def visit_ArrayContains(self, op, *, arg, other): return self.if_( arg.is_(NULL), - sge.NULL, - self.f.coalesce(self.f.array_contains(arg, other), sge.FALSE), + NULL, + self.f.coalesce(self.f.array_contains(arg, other), FALSE), ) @visit_node.register(ops.ArrayStringJoin) @@ -434,7 +434,7 @@ def visit_Window(self, op, *, func, group_by, order_by, **kwargs): order = sge.Order(expressions=order_by) else: # pyspark requires an order by clause for lag/lead - order = sge.Order(expressions=[sge.NULL]) + order = sge.Order(expressions=[NULL]) return sge.Window(this=func, partition_by=group_by, order=order) else: return super().visit_node( From 9982e8b1ef0806561d6a6cbccd8ca88604aa2d7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:06:22 +0100 Subject: [PATCH 153/161] fix(sqlite): don't use the removed `sge.NULL` literal --- ibis/backends/sqlite/compiler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ibis/backends/sqlite/compiler.py b/ibis/backends/sqlite/compiler.py index efc31ed68906..5e1b5f91abfc 100644 --- a/ibis/backends/sqlite/compiler.py +++ b/ibis/backends/sqlite/compiler.py @@ -10,7 +10,7 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sqlglot.compiler import SQLGlotCompiler +from ibis.backends.base.sqlglot.compiler import NULL, SQLGlotCompiler from ibis.backends.base.sqlglot.datatypes import SQLiteType from ibis.backends.base.sqlglot.rewrites import ( rewrite_first_to_first_value, @@ -37,7 +37,7 @@ class SQLiteCompiler(SQLGlotCompiler): rewrite_last_to_last_value, ) - NAN = sge.NULL + NAN = NULL POS_INF = sge.Literal.number("1e999") NEG_INF = sge.Literal.number("-1e999") @@ -187,10 +187,10 @@ def visit_IdenticalTo(self, op, *, left, right): @visit_node.register(ops.Clip) def visit_Clip(self, op, *, arg, lower, upper): if upper is not None: - arg = self.if_(arg.is_(sge.NULL), arg, self.f.min(upper, arg)) + arg = self.if_(arg.is_(NULL), arg, self.f.min(upper, arg)) if lower is not None: - arg = self.if_(arg.is_(sge.NULL), arg, self.f.max(lower, arg)) + arg = self.if_(arg.is_(NULL), arg, self.f.max(lower, arg)) return arg @@ -220,7 +220,7 @@ def visit_ArgMax(self, *args, **kwargs): return self._visit_arg_reduction("max", *args, **kwargs) def _visit_arg_reduction(self, func, op, *, arg, key, where): - cond = arg.is_(sg.not_(sge.NULL)) + cond = arg.is_(sg.not_(NULL)) if op.where is not None: cond = sg.and_(cond, where) From 6b73d2ef65b7884221bd39fe6ef804ec11531c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:24:24 +0100 Subject: [PATCH 154/161] test(ir): remove outdated old-style pytest hookwrapper causing warnings --- ibis/tests/expr/conftest.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/ibis/tests/expr/conftest.py b/ibis/tests/expr/conftest.py index e061b685a4c4..a2d4dd5b39cf 100644 --- a/ibis/tests/expr/conftest.py +++ b/ibis/tests/expr/conftest.py @@ -16,7 +16,6 @@ import pytest import ibis -import ibis.common.exceptions as com from ibis.tests.expr.mocks import MockBackend @@ -80,24 +79,3 @@ def functional_alltypes(con): @pytest.fixture def lineitem(con): return con.table("tpch_lineitem") - - -@pytest.hookimpl(hookwrapper=True) -def pytest_pyfunc_call(pyfuncitem): - """Dynamically add an xfail marker for specific backends.""" - outcome = yield - try: - outcome.get_result() - except ( - com.OperationNotDefinedError, - com.UnsupportedOperationError, - com.UnsupportedBackendType, - NotImplementedError, - ) as e: - markers = list(pyfuncitem.iter_markers(name="xfail_unsupported")) - if not markers: - raise - assert ( - len(markers) == 1 - ), f"More than one xfail_unsupported marker found on test {pyfuncitem}" - pytest.xfail(reason=repr(e)) From 791523765f3960f25fe57f3efa32bf68f4c035d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:26:00 +0100 Subject: [PATCH 155/161] chore(deps): remove `sqlalchemy-views` and `sqlalchemy-risingwave` --- poetry.lock | 32 ++------------- pyproject.toml | 2 - requirements-dev.txt | 94 ++++++++++++++++++++++---------------------- 3 files changed, 50 insertions(+), 78 deletions(-) diff --git a/poetry.lock b/poetry.lock index 83cf246a8e5c..028062bce5a9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4316,6 +4316,8 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -6362,34 +6364,6 @@ postgresql-psycopg2cffi = ["psycopg2cffi"] pymysql = ["pymysql", "pymysql (<1)"] sqlcipher = ["sqlcipher3_binary"] -[[package]] -name = "sqlalchemy-risingwave" -version = "1.0.0" -description = "RisingWave dialect for SQLAlchemy" -optional = true -python-versions = "*" -files = [ - {file = "sqlalchemy-risingwave-1.0.0.tar.gz", hash = "sha256:856a3c44b98cba34d399c3cc9785a74896caca152b3685d87553e4210e3e07a4"}, - {file = "sqlalchemy_risingwave-1.0.0-py3-none-any.whl", hash = "sha256:c733365abc38e88f4d23d83713cfc3f21c0b0d3c81210cbc2f569b49a912ba08"}, -] - -[package.dependencies] -SQLAlchemy = ">=1.4,<2" - -[[package]] -name = "sqlalchemy-views" -version = "0.3.2" -description = "Adds CreateView and DropView constructs to SQLAlchemy" -optional = true -python-versions = "*" -files = [ - {file = "sqlalchemy-views-0.3.2.tar.gz", hash = "sha256:c396416939bc4459c71f15c0279b72c9eb1f92fe022afe7fa74ed3adeef76e3b"}, - {file = "sqlalchemy_views-0.3.2-py3-none-any.whl", hash = "sha256:53a376efc8badbbd6d2d360f0d03fc70f1f5ee7748cf169506ea8a6e4f20326c"}, -] - -[package.dependencies] -sqlalchemy = ">=1.0.0" - [[package]] name = "sqlglot" version = "20.11.0" @@ -7352,4 +7326,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "51b82d38f90ea4682385f0fbdafd5613f57483482a076dd4ff1799217cffb2e6" +content-hash = "3c0e25e6963a7b7c69470015c6f0ba4616fdc98188bfeaa16583b15dd8260ecf" diff --git a/pyproject.toml b/pyproject.toml index de60e862673c..5d1b06db63f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,8 +88,6 @@ shapely = { version = ">=2,<3", optional = true } # issues with versions <3.0.2 snowflake-connector-python = { version = ">=3.0.2,<4,!=3.3.0b1", optional = true } sqlalchemy = { version = ">=1.4,<3", optional = true } -sqlalchemy-views = { version = ">=0.3.1,<1", optional = true } -sqlalchemy-risingwave = { version = ">=1.0.0,<2", optional = true } trino = { version = ">=0.321,<1", optional = true } [tool.poetry.group.dev.dependencies] diff --git a/requirements-dev.txt b/requirements-dev.txt index 8307b0d22465..1cb1ba6d5418 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -aiohttp==3.9.1 ; python_version >= "3.9" and python_version < "4.0" +aiohttp==3.9.3 ; python_version >= "3.9" and python_version < "4.0" aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "4.0" altair==5.2.0 ; python_version >= "3.10" and python_version < "3.13" annotated-types==0.6.0 ; python_version >= "3.10" and python_version < "3.13" @@ -10,13 +10,13 @@ asttokens==2.4.1 ; python_version >= "3.9" and python_version < "4.0" async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.11" atpublic==4.0 ; python_version >= "3.9" and python_version < "4.0" attrs==23.2.0 ; python_version >= "3.9" and python_version < "4.0" -beartype==0.16.4 ; python_version >= "3.10" and python_version < "3.13" -beautifulsoup4==4.12.2 ; python_version >= "3.10" and python_version < "3.13" +beartype==0.17.0 ; python_version >= "3.10" and python_version < "3.13" +beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "3.13" bidict==0.22.1 ; python_version >= "3.9" and python_version < "4.0" bitarray==2.9.2 ; python_version >= "3.9" and python_version < "4.0" -black==23.12.1 ; python_version >= "3.9" and python_version < "4.0" +black==24.1.1 ; python_version >= "3.9" and python_version < "4.0" bqplot==0.12.42 ; python_version >= "3.10" and python_version < "3.13" -branca==0.7.0 ; python_version >= "3.10" and python_version < "3.13" +branca==0.7.1 ; python_version >= "3.10" and python_version < "3.13" build==1.0.3 ; python_version >= "3.9" and python_version < "4.0" cachecontrol[filecache]==0.13.1 ; python_version >= "3.9" and python_version < "4.0" cachetools==5.3.2 ; python_version >= "3.9" and python_version < "4.0" @@ -28,7 +28,7 @@ charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "4.0" cleo==2.1.0 ; python_version >= "3.9" and python_version < "4.0" click-plugins==1.1.1 ; python_version >= "3.9" and python_version < "4.0" click==8.1.7 ; python_version >= "3.9" and python_version < "4.0" -clickhouse-connect[arrow,numpy,pandas]==0.6.23 ; python_version >= "3.9" and python_version < "4.0" +clickhouse-connect[arrow,numpy,pandas]==0.7.0 ; python_version >= "3.9" and python_version < "4.0" cligj==0.7.2 ; python_version >= "3.9" and python_version < "4" cloudpickle==3.0.0 ; python_version >= "3.9" and python_version < "4.0" codespell[hard-encoding-detection,toml]==2.2.6 ; python_version >= "3.9" and python_version < "4.0" @@ -36,19 +36,19 @@ colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and (sys_pl colour==0.1.5 ; python_version >= "3.10" and python_version < "3.13" comm==0.2.1 ; python_version >= "3.10" and python_version < "3.13" contourpy==1.2.0 ; python_version >= "3.10" and python_version < "3.13" -coverage[toml]==7.4.0 ; python_version >= "3.9" and python_version < "4.0" +coverage[toml]==7.4.1 ; python_version >= "3.9" and python_version < "4.0" crashtest==0.4.1 ; python_version >= "3.9" and python_version < "4.0" cryptography==41.0.7 ; python_version >= "3.9" and python_version < "4.0" cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" -dask==2024.1.0 ; python_version >= "3.10" and python_version < "3.13" -dask[array,dataframe]==2024.1.0 ; python_version >= "3.9" and python_version < "4.0" +dask==2024.1.1 ; python_version >= "3.10" and python_version < "3.13" +dask[array,dataframe]==2024.1.1 ; python_version >= "3.9" and python_version < "4.0" datafusion==34.0.0 ; python_version >= "3.9" and python_version < "4.0" db-dtypes==1.2.0 ; python_version >= "3.9" and python_version < "4.0" debugpy==1.8.0 ; python_version >= "3.10" and python_version < "3.13" decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0" deltalake==0.15.1 ; python_version >= "3.9" and python_version < "4.0" distlib==0.3.8 ; python_version >= "3.9" and python_version < "4.0" -distributed==2024.1.0 ; python_version >= "3.10" and python_version < "3.13" +distributed==2024.1.1 ; python_version >= "3.10" and python_version < "3.13" duckdb==0.9.2 ; python_version >= "3.9" and python_version < "4.0" dulwich==0.21.7 ; python_version >= "3.9" and python_version < "4.0" dunamai==1.19.0 ; python_version >= "3.9" and python_version < "4.0" @@ -63,15 +63,15 @@ fonttools==4.47.2 ; python_version >= "3.10" and python_version < "3.13" frozenlist==1.4.1 ; python_version >= "3.9" and python_version < "4.0" fsspec==2023.12.2 ; python_version >= "3.9" and python_version < "4.0" gcsfs==2023.12.2.post1 ; python_version >= "3.9" and python_version < "4.0" -gdown==4.7.1 ; python_version >= "3.10" and python_version < "3.13" +gdown==5.0.1 ; python_version >= "3.10" and python_version < "3.13" geojson==3.1.0 ; python_version >= "3.10" and python_version < "3.13" -geopandas==0.14.2 ; python_version >= "3.9" and python_version < "4.0" -google-api-core==2.15.0 ; python_version >= "3.9" and python_version < "4.0" -google-api-core[grpc]==2.15.0 ; python_version >= "3.9" and python_version < "4.0" +geopandas==0.14.3 ; python_version >= "3.9" and python_version < "4.0" +google-api-core==2.16.1 ; python_version >= "3.9" and python_version < "4.0" +google-api-core[grpc]==2.16.1 ; python_version >= "3.9" and python_version < "4.0" google-auth-oauthlib==1.2.0 ; python_version >= "3.9" and python_version < "4.0" -google-auth==2.26.2 ; python_version >= "3.9" and python_version < "4.0" +google-auth==2.27.0 ; python_version >= "3.9" and python_version < "4.0" google-cloud-bigquery-storage==2.24.0 ; python_version >= "3.9" and python_version < "4.0" -google-cloud-bigquery==3.16.0 ; python_version >= "3.9" and python_version < "4.0" +google-cloud-bigquery==3.17.1 ; python_version >= "3.9" and python_version < "4.0" google-cloud-core==2.4.1 ; python_version >= "3.9" and python_version < "4.0" google-cloud-storage==2.14.0 ; python_version >= "3.9" and python_version < "4.0" google-crc32c==1.5.0 ; python_version >= "3.9" and python_version < "4.0" @@ -79,11 +79,11 @@ google-resumable-media==2.7.0 ; python_version >= "3.9" and python_version < "4. googleapis-common-protos==1.62.0 ; python_version >= "3.9" and python_version < "4.0" graphviz==0.20.1 ; python_version >= "3.9" and python_version < "4.0" greenlet==3.0.3 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0" -griffe==0.38.1 ; python_version >= "3.10" and python_version < "3.13" -grpcio-status==1.60.0 ; python_version >= "3.9" and python_version < "4.0" -grpcio==1.60.0 ; python_version >= "3.9" and python_version < "4.0" +griffe==0.40.0 ; python_version >= "3.10" and python_version < "3.13" +grpcio-status==1.60.1 ; python_version >= "3.9" and python_version < "4.0" +grpcio==1.60.1 ; python_version >= "3.9" and python_version < "4.0" humanize==4.9.0 ; python_version >= "3.9" and python_version < "4.0" -hypothesis==6.93.0 ; python_version >= "3.9" and python_version < "4.0" +hypothesis==6.97.4 ; python_version >= "3.9" and python_version < "4.0" identify==2.5.33 ; python_version >= "3.9" and python_version < "4.0" idna==3.6 ; python_version >= "3.9" and python_version < "4.0" importlib-metadata==7.0.1 ; python_version >= "3.9" and python_version < "4.0" @@ -93,8 +93,8 @@ iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0" installer==0.7.0 ; python_version >= "3.9" and python_version < "4.0" ipyevents==2.0.2 ; python_version >= "3.10" and python_version < "3.13" ipyfilechooser==0.6.0 ; python_version >= "3.10" and python_version < "3.13" -ipykernel==6.28.0 ; python_version >= "3.10" and python_version < "3.13" -ipyleaflet==0.18.1 ; python_version >= "3.10" and python_version < "3.13" +ipykernel==6.29.0 ; python_version >= "3.10" and python_version < "3.13" +ipyleaflet==0.18.2 ; python_version >= "3.10" and python_version < "3.13" ipython==8.18.1 ; python_version >= "3.9" and python_version < "4.0" ipytree==0.2.2 ; python_version >= "3.10" and python_version < "3.13" ipywidgets==8.1.1 ; python_version >= "3.10" and python_version < "3.13" @@ -104,7 +104,7 @@ jeepney==0.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_plat jinja2==3.1.3 ; python_version >= "3.9" and python_version < "4.0" joblib==1.3.2 ; python_version >= "3.9" and python_version < "4.0" jsonschema-specifications==2023.12.1 ; python_version >= "3.10" and python_version < "3.13" -jsonschema==4.20.0 ; python_version >= "3.10" and python_version < "3.13" +jsonschema==4.21.1 ; python_version >= "3.10" and python_version < "3.13" jupyter-client==8.6.0 ; python_version >= "3.10" and python_version < "3.13" jupyter-core==5.7.1 ; python_version >= "3.10" and python_version < "3.13" jupyterlab-widgets==3.0.9 ; python_version >= "3.10" and python_version < "3.13" @@ -115,26 +115,26 @@ locket==1.0.0 ; python_version >= "3.9" and python_version < "4.0" lonboard==0.5.0 ; python_version >= "3.10" and python_version < "3.13" lz4==4.3.3 ; python_version >= "3.9" and python_version < "4.0" markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "4.0" -markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "4.0" +markupsafe==2.1.4 ; python_version >= "3.9" and python_version < "4.0" matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0" matplotlib==3.8.2 ; python_version >= "3.10" and python_version < "3.13" mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0" mizani==0.9.3 ; python_version >= "3.10" and python_version < "3.13" more-itertools==10.2.0 ; python_version >= "3.9" and python_version < "4.0" msgpack==1.0.7 ; python_version >= "3.9" and python_version < "4.0" -multidict==6.0.4 ; python_version >= "3.9" and python_version < "4.0" +multidict==6.0.5 ; python_version >= "3.9" and python_version < "4.0" multipledispatch==1.0.0 ; python_version >= "3.9" and python_version < "4.0" mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0" nbclient==0.9.0 ; python_version >= "3.10" and python_version < "3.13" nbformat==5.9.2 ; python_version >= "3.10" and python_version < "3.13" -nest-asyncio==1.5.8 ; python_version >= "3.10" and python_version < "3.13" +nest-asyncio==1.6.0 ; python_version >= "3.10" and python_version < "3.13" nodeenv==1.8.0 ; python_version >= "3.9" and python_version < "4.0" numpy==1.26.3 ; python_version >= "3.9" and python_version < "4.0" oauthlib==3.2.2 ; python_version >= "3.9" and python_version < "4.0" oracledb==2.0.1 ; python_version >= "3.9" and python_version < "4.0" packaging==23.2 ; python_version >= "3.9" and python_version < "4.0" palettable==3.3.3 ; python_version >= "3.10" and python_version < "3.13" -pandas==2.1.4 ; python_version >= "3.9" and python_version < "4.0" +pandas==2.0.3 ; python_version >= "3.9" and python_version < "4.0" parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0" parsy==2.1 ; python_version >= "3.9" and python_version < "4.0" partd==1.4.1 ; python_version >= "3.9" and python_version < "4.0" @@ -147,19 +147,19 @@ pkginfo==1.9.6 ; python_version >= "3.9" and python_version < "4.0" platformdirs==3.11.0 ; python_version >= "3.9" and python_version < "4.0" plotly==5.18.0 ; python_version >= "3.10" and python_version < "3.13" plotnine==0.12.4 ; python_version >= "3.10" and python_version < "3.13" -pluggy==1.3.0 ; python_version >= "3.9" and python_version < "4.0" -plum-dispatch==2.2.2 ; python_version >= "3.10" and python_version < "3.13" +pluggy==1.4.0 ; python_version >= "3.9" and python_version < "4.0" +plum-dispatch==2.3.2 ; python_version >= "3.10" and python_version < "3.13" poetry-core==1.8.1 ; python_version >= "3.9" and python_version < "4.0" poetry-dynamic-versioning==1.2.0 ; python_version >= "3.9" and python_version < "4.0" poetry-plugin-export==1.6.0 ; python_version >= "3.9" and python_version < "4.0" poetry==1.7.1 ; python_version >= "3.9" and python_version < "4.0" -polars==0.20.4 ; python_version >= "3.9" and python_version < "4.0" +polars==0.20.6 ; python_version >= "3.9" and python_version < "4.0" pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0" pre-commit==3.6.0 ; python_version >= "3.9" and python_version < "4.0" prompt-toolkit==3.0.43 ; python_version >= "3.9" and python_version < "4.0" proto-plus==1.23.0 ; python_version >= "3.9" and python_version < "4.0" protobuf==4.25.2 ; python_version >= "3.9" and python_version < "4.0" -psutil==5.9.7 ; python_version >= "3.10" and python_version < "3.13" +psutil==5.9.8 ; python_version >= "3.10" and python_version < "3.13" psycopg2==2.9.9 ; python_version >= "3.9" and python_version < "4.0" psygnal==0.9.5 ; python_version >= "3.10" and python_version < "3.13" ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" @@ -168,18 +168,18 @@ pure-sasl==0.6.2 ; python_version >= "3.9" and python_version < "4.0" py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "4.0" py4j==0.10.9.7 ; python_version >= "3.9" and python_version < "4.0" pyarrow-hotfix==0.6 ; python_version >= "3.9" and python_version < "4.0" -pyarrow==14.0.2 ; python_version >= "3.9" and python_version < "4.0" +pyarrow==15.0.0 ; python_version >= "3.9" and python_version < "4.0" pyasn1-modules==0.3.0 ; python_version >= "3.9" and python_version < "4.0" pyasn1==0.5.1 ; python_version >= "3.9" and python_version < "4" pycparser==2.21 ; python_version >= "3.9" and python_version < "4.0" -pydantic-core==2.14.6 ; python_version >= "3.10" and python_version < "3.13" -pydantic==2.5.3 ; python_version >= "3.10" and python_version < "3.13" +pydantic-core==2.16.1 ; python_version >= "3.10" and python_version < "3.13" +pydantic==2.6.0 ; python_version >= "3.10" and python_version < "3.13" pydata-google-auth==1.8.2 ; python_version >= "3.9" and python_version < "4.0" -pydeps==1.12.17 ; python_version >= "3.9" and python_version < "4.0" +pydeps==1.12.18 ; python_version >= "3.9" and python_version < "4.0" pydruid==0.6.6 ; python_version >= "3.9" and python_version < "4.0" pyexasol[pandas]==0.25.2 ; python_version >= "3.9" and python_version < "4.0" pygments==2.17.2 ; python_version >= "3.9" and python_version < "4.0" -pyinstrument==4.6.1 ; python_version >= "3.9" and python_version < "4.0" +pyinstrument==4.6.2 ; python_version >= "3.9" and python_version < "4.0" pyjwt==2.8.0 ; python_version >= "3.9" and python_version < "4.0" pymysql==1.1.0 ; python_version >= "3.9" and python_version < "4.0" pyodbc==5.0.1 ; python_version >= "3.9" and python_version < "4.0" @@ -201,17 +201,17 @@ pytest-randomly==3.15.0 ; python_version >= "3.9" and python_version < "4.0" pytest-repeat==0.9.3 ; python_version >= "3.9" and python_version < "4.0" pytest-snapshot==0.9.0 ; python_version >= "3.9" and python_version < "4.0" pytest-xdist==3.5.0 ; python_version >= "3.9" and python_version < "4.0" -pytest==7.4.4 ; python_version >= "3.9" and python_version < "4.0" +pytest==8.0.0 ; python_version >= "3.9" and python_version < "4.0" python-box==7.1.1 ; python_version >= "3.10" and python_version < "3.13" python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0" -pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "4.0" +pytz==2023.4 ; python_version >= "3.9" and python_version < "4.0" pywin32-ctypes==0.2.2 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32" pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "3.13" pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0" pyzmq==25.1.2 ; python_version >= "3.10" and python_version < "3.13" quartodoc==0.7.2 ; python_version >= "3.10" and python_version < "3.13" rapidfuzz==3.6.1 ; python_version >= "3.9" and python_version < "4.0" -referencing==0.32.1 ; python_version >= "3.10" and python_version < "3.13" +referencing==0.33.0 ; python_version >= "3.10" and python_version < "3.13" regex==2023.12.25 ; python_version >= "3.9" and python_version < "4.0" requests-oauthlib==1.3.1 ; python_version >= "3.9" and python_version < "4.0" requests-toolbelt==1.0.0 ; python_version >= "3.9" and python_version < "4.0" @@ -220,22 +220,22 @@ requests[socks]==2.31.0 ; python_version >= "3.10" and python_version < "3.13" rich==13.7.0 ; python_version >= "3.9" and python_version < "4.0" rpds-py==0.17.1 ; python_version >= "3.10" and python_version < "3.13" rsa==4.9 ; python_version >= "3.9" and python_version < "4" -ruff==0.1.13 ; python_version >= "3.9" and python_version < "4.0" -scikit-learn==1.3.2 ; python_version >= "3.10" and python_version < "3.13" -scipy==1.11.4 ; python_version >= "3.10" and python_version < "3.13" +ruff==0.1.15 ; python_version >= "3.9" and python_version < "4.0" +scikit-learn==1.4.0 ; python_version >= "3.10" and python_version < "3.13" +scipy==1.12.0 ; python_version >= "3.10" and python_version < "3.13" scooby==0.9.2 ; python_version >= "3.10" and python_version < "3.13" -seaborn==0.13.1 ; python_version >= "3.10" and python_version < "3.13" +seaborn==0.13.2 ; python_version >= "3.10" and python_version < "3.13" secretstorage==3.3.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "linux" setuptools==69.0.3 ; python_version >= "3.9" and python_version < "4.0" shapely==2.0.2 ; python_version >= "3.9" and python_version < "4.0" shellingham==1.5.4 ; python_version >= "3.9" and python_version < "4.0" six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" -snowflake-connector-python==3.6.0 ; python_version >= "3.9" and python_version < "4.0" +snowflake-connector-python==3.7.0 ; python_version >= "3.9" and python_version < "4.0" sortedcontainers==2.4.0 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.5 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1 ; python_version >= "3.10" and python_version < "3.13" sqlalchemy==1.4.51 ; python_version >= "3.9" and python_version < "4.0" -sqlglot==20.8.0 ; python_version >= "3.9" and python_version < "4.0" +sqlglot==20.11.0 ; python_version >= "3.9" and python_version < "4.0" stack-data==0.6.3 ; python_version >= "3.9" and python_version < "4.0" statsmodels==0.14.1 ; python_version >= "3.10" and python_version < "3.13" stdlib-list==0.10.0 ; python_version >= "3.9" and python_version < "4.0" @@ -247,13 +247,13 @@ thrift-sasl==0.4.3 ; python_version >= "3.9" and python_version < "4.0" thrift==0.16.0 ; python_version >= "3.9" and python_version < "4.0" tomli==2.0.1 ; python_version >= "3.9" and python_full_version <= "3.11.0a6" tomlkit==0.12.3 ; python_version >= "3.9" and python_version < "4.0" -toolz==0.12.0 ; python_version >= "3.9" and python_version < "4.0" +toolz==0.12.1 ; python_version >= "3.9" and python_version < "4.0" tornado==6.4 ; python_version >= "3.10" and python_version < "3.13" tqdm==4.66.1 ; python_version >= "3.9" and python_version < "4.0" traitlets==5.14.1 ; python_version >= "3.9" and python_version < "4.0" traittypes==0.2.1 ; python_version >= "3.10" and python_version < "3.13" trino==0.327.0 ; python_version >= "3.9" and python_version < "4.0" -trove-classifiers==2024.1.8 ; python_version >= "3.9" and python_version < "4.0" +trove-classifiers==2024.1.31 ; python_version >= "3.9" and python_version < "4.0" typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "4.0" tzdata==2023.4 ; python_version >= "3.9" and python_version < "4.0" tzlocal==5.2 ; python_version >= "3.9" and python_version < "4.0" From db398865dab3f5f53fe2f6b2fd27a05eb38448b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:52:40 +0100 Subject: [PATCH 156/161] test(backends): restore deleted `test_benchmarks.py` --- ibis/backends/tests/test_benchmarks.py | 899 +++++++++++++++++++++++++ 1 file changed, 899 insertions(+) create mode 100644 ibis/backends/tests/test_benchmarks.py diff --git a/ibis/backends/tests/test_benchmarks.py b/ibis/backends/tests/test_benchmarks.py new file mode 100644 index 000000000000..fbfd0977887e --- /dev/null +++ b/ibis/backends/tests/test_benchmarks.py @@ -0,0 +1,899 @@ +from __future__ import annotations + +import copy +import functools +import inspect +import itertools +import os +import string + +import numpy as np +import pandas as pd +import pytest +import sqlalchemy as sa +from packaging.version import parse as vparse + +import ibis +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +import ibis.expr.types as ir +from ibis.backends.base import _get_backend_names + +# ruff: noqa: F821 + +pytestmark = pytest.mark.skip(reason="the backends must be rewritten first") + + +def make_t(): + return ibis.table( + [ + ("_timestamp", "int32"), + ("dim1", "int32"), + ("dim2", "int32"), + ("valid_seconds", "int32"), + ("meas1", "int32"), + ("meas2", "int32"), + ("year", "int32"), + ("month", "int32"), + ("day", "int32"), + ("hour", "int32"), + ("minute", "int32"), + ], + name="t", + ) + + +@pytest.fixture(scope="module") +def t(): + return make_t() + + +def make_base(t): + return t[ + ( + (t.year > 2016) + | ((t.year == 2016) & (t.month > 6)) + | ((t.year == 2016) & (t.month == 6) & (t.day > 6)) + | ((t.year == 2016) & (t.month == 6) & (t.day == 6) & (t.hour > 6)) + | ( + (t.year == 2016) + & (t.month == 6) + & (t.day == 6) + & (t.hour == 6) + & (t.minute >= 5) + ) + ) + & ( + (t.year < 2016) + | ((t.year == 2016) & (t.month < 6)) + | ((t.year == 2016) & (t.month == 6) & (t.day < 6)) + | ((t.year == 2016) & (t.month == 6) & (t.day == 6) & (t.hour < 6)) + | ( + (t.year == 2016) + & (t.month == 6) + & (t.day == 6) + & (t.hour == 6) + & (t.minute <= 5) + ) + ) + ] + + +@pytest.fixture(scope="module") +def base(t): + return make_base(t) + + +def make_large_expr(base): + src_table = base + src_table = src_table.mutate( + _timestamp=(src_table["_timestamp"] - src_table["_timestamp"] % 3600) + .cast("int32") + .name("_timestamp"), + valid_seconds=300, + ) + + aggs = [] + for meas in ["meas1", "meas2"]: + aggs.append(src_table[meas].sum().cast("float").name(meas)) + src_table = src_table.aggregate( + aggs, by=["_timestamp", "dim1", "dim2", "valid_seconds"] + ) + + part_keys = ["year", "month", "day", "hour", "minute"] + ts_col = src_table["_timestamp"].cast("timestamp") + new_cols = {} + for part_key in part_keys: + part_col = getattr(ts_col, part_key)() + new_cols[part_key] = part_col + src_table = src_table.mutate(**new_cols) + return src_table[ + [ + "_timestamp", + "dim1", + "dim2", + "meas1", + "meas2", + "year", + "month", + "day", + "hour", + "minute", + ] + ] + + +@pytest.fixture(scope="module") +def large_expr(base): + return make_large_expr(base) + + +@pytest.mark.benchmark(group="construction") +@pytest.mark.parametrize( + "construction_fn", + [ + pytest.param(lambda *_: make_t(), id="small"), + pytest.param(lambda t, *_: make_base(t), id="medium"), + pytest.param(lambda _, base: make_large_expr(base), id="large"), + ], +) +def test_construction(benchmark, construction_fn, t, base): + benchmark(construction_fn, t, base) + + +@pytest.mark.benchmark(group="builtins") +@pytest.mark.parametrize( + "expr_fn", + [ + pytest.param(lambda t, _base, _large_expr: t, id="small"), + pytest.param(lambda _t, base, _large_expr: base, id="medium"), + pytest.param(lambda _t, _base, large_expr: large_expr, id="large"), + ], +) +@pytest.mark.parametrize("builtin", [hash, str]) +def test_builtins(benchmark, expr_fn, builtin, t, base, large_expr): + expr = expr_fn(t, base, large_expr) + benchmark(builtin, expr) + + +_backends = set(_get_backend_names()) +# compile is a no-op +_backends.remove("pandas") + +_XFAIL_COMPILE_BACKENDS = {"dask", "pyspark", "polars", "risingwave"} + + +@pytest.mark.benchmark(group="compilation") +@pytest.mark.parametrize( + "module", + [ + pytest.param( + mod, + marks=pytest.mark.xfail( + condition=mod in _XFAIL_COMPILE_BACKENDS, + reason=f"{mod} backend doesn't support compiling UnboundTable", + ), + ) + for mod in _backends + ], +) +@pytest.mark.parametrize( + "expr_fn", + [ + pytest.param(lambda t, _base, _large_expr: t, id="small"), + pytest.param(lambda _t, base, _large_expr: base, id="medium"), + pytest.param(lambda _t, _base, large_expr: large_expr, id="large"), + ], +) +def test_compile(benchmark, module, expr_fn, t, base, large_expr): + try: + mod = getattr(ibis, module) + except (AttributeError, ImportError) as e: + pytest.skip(str(e)) + else: + expr = expr_fn(t, base, large_expr) + try: + benchmark(mod.compile, expr) + except (sa.exc.NoSuchModuleError, ImportError) as e: # delayed imports + pytest.skip(str(e)) + + +@pytest.fixture(scope="module") +def pt(): + n = 60_000 + data = pd.DataFrame( + { + "key": np.random.choice(16000, size=n), + "low_card_key": np.random.choice(30, size=n), + "value": np.random.rand(n), + "timestamps": pd.date_range( + start="2023-05-05 16:37:57", periods=n, freq="s" + ).values, + "timestamp_strings": pd.date_range( + start="2023-05-05 16:37:39", periods=n, freq="s" + ).values.astype(str), + "repeated_timestamps": pd.date_range(start="2018-09-01", periods=30).repeat( + int(n / 30) + ), + } + ) + + return ibis.pandas.connect(dict(df=data)).table("df") + + +def high_card_group_by(t): + return t.group_by(t.key).aggregate(avg_value=t.value.mean()) + + +def cast_to_dates(t): + return t.timestamps.cast(dt.date) + + +def cast_to_dates_from_strings(t): + return t.timestamp_strings.cast(dt.date) + + +def multikey_group_by_with_mutate(t): + return ( + t.mutate(dates=t.timestamps.cast("date")) + .group_by(["low_card_key", "dates"]) + .aggregate(avg_value=lambda t: t.value.mean()) + ) + + +def simple_sort(t): + return t.order_by([t.key]) + + +def simple_sort_projection(t): + return t[["key", "value"]].order_by(["key"]) + + +def multikey_sort(t): + return t.order_by(["low_card_key", "key"]) + + +def multikey_sort_projection(t): + return t[["low_card_key", "key", "value"]].order_by(["low_card_key", "key"]) + + +def low_card_rolling_window(t): + return ibis.trailing_range_window( + ibis.interval(days=2), + order_by=t.repeated_timestamps, + group_by=t.low_card_key, + ) + + +def low_card_grouped_rolling(t): + return t.value.mean().over(low_card_rolling_window(t)) + + +def high_card_rolling_window(t): + return ibis.trailing_range_window( + ibis.interval(days=2), + order_by=t.repeated_timestamps, + group_by=t.key, + ) + + +def high_card_grouped_rolling(t): + return t.value.mean().over(high_card_rolling_window(t)) + + +# @udf.reduction(["double"], "double") +# def my_mean(series): +# return series.mean() + + +def low_card_grouped_rolling_udf_mean(t): + return my_mean(t.value).over(low_card_rolling_window(t)) + + +def high_card_grouped_rolling_udf_mean(t): + return my_mean(t.value).over(high_card_rolling_window(t)) + + +# @udf.analytic(["double"], "double") +# def my_zscore(series): +# return (series - series.mean()) / series.std() + + +def low_card_window(t): + return ibis.window(group_by=t.low_card_key) + + +def high_card_window(t): + return ibis.window(group_by=t.key) + + +def low_card_window_analytics_udf(t): + return my_zscore(t.value).over(low_card_window(t)) + + +def high_card_window_analytics_udf(t): + return my_zscore(t.value).over(high_card_window(t)) + + +# @udf.reduction(["double", "double"], "double") +# def my_wm(v, w): +# return np.average(v, weights=w) + + +def low_card_grouped_rolling_udf_wm(t): + return my_wm(t.value, t.value).over(low_card_rolling_window(t)) + + +def high_card_grouped_rolling_udf_wm(t): + return my_wm(t.value, t.value).over(low_card_rolling_window(t)) + + +broken_pandas_grouped_rolling = pytest.mark.xfail( + condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"), + raises=ValueError, + reason="https://github.com/pandas-dev/pandas/pull/44068", +) + + +@pytest.mark.benchmark(group="execution") +@pytest.mark.parametrize( + "expression_fn", + [ + pytest.param(high_card_group_by, id="high_card_group_by"), + pytest.param(cast_to_dates, id="cast_to_dates"), + pytest.param(cast_to_dates_from_strings, id="cast_to_dates_from_strings"), + pytest.param(multikey_group_by_with_mutate, id="multikey_group_by_with_mutate"), + pytest.param(simple_sort, id="simple_sort"), + pytest.param(simple_sort_projection, id="simple_sort_projection"), + pytest.param(multikey_sort, id="multikey_sort"), + pytest.param(multikey_sort_projection, id="multikey_sort_projection"), + pytest.param( + low_card_grouped_rolling, + id="low_card_grouped_rolling", + marks=[broken_pandas_grouped_rolling], + ), + pytest.param( + high_card_grouped_rolling, + id="high_card_grouped_rolling", + marks=[broken_pandas_grouped_rolling], + ), + pytest.param( + low_card_grouped_rolling_udf_mean, + id="low_card_grouped_rolling_udf_mean", + marks=[broken_pandas_grouped_rolling], + ), + pytest.param( + high_card_grouped_rolling_udf_mean, + id="high_card_grouped_rolling_udf_mean", + marks=[broken_pandas_grouped_rolling], + ), + pytest.param(low_card_window_analytics_udf, id="low_card_window_analytics_udf"), + pytest.param( + high_card_window_analytics_udf, id="high_card_window_analytics_udf" + ), + pytest.param( + low_card_grouped_rolling_udf_wm, + id="low_card_grouped_rolling_udf_wm", + marks=[broken_pandas_grouped_rolling], + ), + pytest.param( + high_card_grouped_rolling_udf_wm, + id="high_card_grouped_rolling_udf_wm", + marks=[broken_pandas_grouped_rolling], + ), + ], +) +def test_execute(benchmark, expression_fn, pt): + expr = expression_fn(pt) + benchmark(expr.execute) + + +@pytest.fixture(scope="module") +def part(): + return ibis.table( + dict( + p_partkey="int64", + p_size="int64", + p_type="string", + p_mfgr="string", + ), + name="part", + ) + + +@pytest.fixture(scope="module") +def supplier(): + return ibis.table( + dict( + s_suppkey="int64", + s_nationkey="int64", + s_name="string", + s_acctbal="decimal(15, 3)", + s_address="string", + s_phone="string", + s_comment="string", + ), + name="supplier", + ) + + +@pytest.fixture(scope="module") +def partsupp(): + return ibis.table( + dict( + ps_partkey="int64", + ps_suppkey="int64", + ps_supplycost="decimal(15, 3)", + ), + name="partsupp", + ) + + +@pytest.fixture(scope="module") +def nation(): + return ibis.table( + dict(n_nationkey="int64", n_regionkey="int64", n_name="string"), + name="nation", + ) + + +@pytest.fixture(scope="module") +def region(): + return ibis.table(dict(r_regionkey="int64", r_name="string"), name="region") + + +@pytest.fixture(scope="module") +def tpc_h02(part, supplier, partsupp, nation, region): + REGION = "EUROPE" + SIZE = 25 + TYPE = "BRASS" + + expr = ( + part.join(partsupp, part.p_partkey == partsupp.ps_partkey) + .join(supplier, supplier.s_suppkey == partsupp.ps_suppkey) + .join(nation, supplier.s_nationkey == nation.n_nationkey) + .join(region, nation.n_regionkey == region.r_regionkey) + ) + + subexpr = ( + partsupp.join(supplier, supplier.s_suppkey == partsupp.ps_suppkey) + .join(nation, supplier.s_nationkey == nation.n_nationkey) + .join(region, nation.n_regionkey == region.r_regionkey) + ) + + subexpr = subexpr[ + (subexpr.r_name == REGION) & (expr.p_partkey == subexpr.ps_partkey) + ] + + filters = [ + expr.p_size == SIZE, + expr.p_type.like(f"%{TYPE}"), + expr.r_name == REGION, + expr.ps_supplycost == subexpr.ps_supplycost.min(), + ] + q = expr.filter(filters) + + q = q.select( + [ + q.s_acctbal, + q.s_name, + q.n_name, + q.p_partkey, + q.p_mfgr, + q.s_address, + q.s_phone, + q.s_comment, + ] + ) + + return q.order_by( + [ + ibis.desc(q.s_acctbal), + q.n_name, + q.s_name, + q.p_partkey, + ] + ).limit(100) + + +@pytest.mark.benchmark(group="repr") +def test_repr_tpc_h02(benchmark, tpc_h02): + benchmark(repr, tpc_h02) + + +@pytest.mark.benchmark(group="repr") +def test_repr_huge_union(benchmark): + n = 10 + raw_types = [ + "int64", + "float64", + "string", + "array, b: map>>>", + ] + tables = [ + ibis.table( + list(zip(string.ascii_letters, itertools.cycle(raw_types))), + name=f"t{i:d}", + ) + for i in range(n) + ] + expr = functools.reduce(ir.Table.union, tables) + benchmark(repr, expr) + + +@pytest.mark.benchmark(group="node_args") +def test_op_argnames(benchmark): + t = ibis.table([("a", "int64")]) + expr = t[["a"]] + benchmark(lambda op: op.argnames, expr.op()) + + +@pytest.mark.benchmark(group="node_args") +def test_op_args(benchmark): + t = ibis.table([("a", "int64")]) + expr = t[["a"]] + benchmark(lambda op: op.args, expr.op()) + + +@pytest.mark.benchmark(group="datatype") +def test_complex_datatype_parse(benchmark): + type_str = "array, b: map>>>" + expected = dt.Array( + dt.Struct(dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)))) + ) + assert dt.parse(type_str) == expected + benchmark(dt.parse, type_str) + + +@pytest.mark.benchmark(group="datatype") +@pytest.mark.parametrize("func", [str, hash]) +def test_complex_datatype_builtins(benchmark, func): + datatype = dt.Array( + dt.Struct(dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)))) + ) + benchmark(func, datatype) + + +@pytest.mark.benchmark(group="equality") +def test_large_expr_equals(benchmark, tpc_h02): + benchmark(ir.Expr.equals, tpc_h02, copy.deepcopy(tpc_h02)) + + +@pytest.mark.benchmark(group="datatype") +@pytest.mark.parametrize( + "dtypes", + [ + pytest.param( + [ + obj + for _, obj in inspect.getmembers( + dt, + lambda obj: isinstance(obj, dt.DataType), + ) + ], + id="singletons", + ), + pytest.param( + dt.Array( + dt.Struct( + dict( + a=dt.Array(dt.string), + b=dt.Map(dt.string, dt.Array(dt.int64)), + ) + ) + ), + id="complex", + ), + ], +) +def test_eq_datatypes(benchmark, dtypes): + def eq(a, b): + assert a == b + + benchmark(eq, dtypes, copy.deepcopy(dtypes)) + + +def multiple_joins(table, num_joins): + for _ in range(num_joins): + table = table.mutate(dummy=ibis.literal("")) + table = table.left_join(table, ["dummy"])[[table]] + + +@pytest.mark.parametrize("num_joins", [1, 10]) +@pytest.mark.parametrize("num_columns", [1, 10, 100]) +def test_multiple_joins(benchmark, num_joins, num_columns): + table = ibis.table( + {f"col_{i:d}": "string" for i in range(num_columns)}, + name="t", + ) + benchmark(multiple_joins, table, num_joins) + + +@pytest.fixture +def customers(): + return ibis.table( + dict( + customerid="int32", + name="string", + address="string", + citystatezip="string", + birthdate="date", + phone="string", + timezone="string", + lat="float64", + long="float64", + ), + name="customers", + ) + + +@pytest.fixture +def orders(): + return ibis.table( + dict( + orderid="int32", + customerid="int32", + ordered="timestamp", + shipped="timestamp", + items="string", + total="float64", + ), + name="orders", + ) + + +@pytest.fixture +def orders_items(): + return ibis.table( + dict(orderid="int32", sku="string", qty="int32", unit_price="float64"), + name="orders_items", + ) + + +@pytest.fixture +def products(): + return ibis.table( + dict( + sku="string", + desc="string", + weight_kg="float64", + cost="float64", + dims_cm="string", + ), + name="products", + ) + + +@pytest.mark.benchmark(group="compilation") +@pytest.mark.parametrize( + "module", + [ + pytest.param( + mod, + marks=pytest.mark.xfail( + condition=mod in _XFAIL_COMPILE_BACKENDS, + reason=f"{mod} backend doesn't support compiling UnboundTable", + ), + ) + for mod in _backends + ], +) +def test_compile_with_drops( + benchmark, module, customers, orders, orders_items, products +): + expr = ( + customers.join(orders, "customerid") + .join(orders_items, "orderid") + .join(products, "sku") + .drop("customerid", "qty", "total", "items") + .drop("dims_cm", "cost") + .mutate(o_date=lambda t: t.shipped.date()) + .filter(lambda t: t.ordered == t.shipped) + ) + + try: + mod = getattr(ibis, module) + except (AttributeError, ImportError) as e: + pytest.skip(str(e)) + else: + try: + benchmark(mod.compile, expr) + except sa.exc.NoSuchModuleError as e: + pytest.skip(str(e)) + + +def test_repr_join(benchmark, customers, orders, orders_items, products): + expr = ( + customers.join(orders, "customerid") + .join(orders_items, "orderid") + .join(products, "sku") + .drop("customerid", "qty", "total", "items") + ) + op = expr.op() + benchmark(repr, op) + + +@pytest.mark.parametrize("overwrite", [True, False], ids=["overwrite", "no_overwrite"]) +def test_insert_duckdb(benchmark, overwrite, tmp_path): + pytest.importorskip("duckdb") + + n_rows = int(1e4) + table_name = "t" + schema = ibis.schema(dict(a="int64", b="int64", c="int64")) + t = ibis.memtable(dict.fromkeys(list("abc"), range(n_rows)), schema=schema) + + con = ibis.duckdb.connect(tmp_path / "test_insert.ddb") + con.create_table(table_name, schema=schema) + benchmark(con.insert, table_name, t, overwrite=overwrite) + + +def test_snowflake_medium_sized_to_pandas(benchmark): + pytest.importorskip("snowflake.connector") + + if (url := os.environ.get("SNOWFLAKE_URL")) is None: + pytest.skip("SNOWFLAKE_URL environment variable not set") + + con = ibis.connect(url) + + # LINEITEM at scale factor 1 is around 6MM rows, but we limit to 1,000,000 + # to make the benchmark fast enough for development, yet large enough to show a + # difference if there's a performance hit + lineitem = con.table("LINEITEM", schema="SNOWFLAKE_SAMPLE_DATA.TPCH_SF1").limit( + 1_000_000 + ) + + benchmark.pedantic(lineitem.to_pandas, rounds=5, iterations=1, warmup_rounds=1) + + +def test_parse_many_duckdb_types(benchmark): + parse = pytest.importorskip("ibis.backends.duckdb.datatypes").DuckDBType.from_string + + def parse_many(types): + list(map(parse, types)) + + types = ["VARCHAR", "INTEGER", "DOUBLE", "BIGINT"] * 1000 + benchmark(parse_many, types) + + +@pytest.fixture(scope="session") +def sql() -> str: + return """ + SELECT t1.id as t1_id, x, t2.id as t2_id, y + FROM t1 INNER JOIN t2 + ON t1.id = t2.id + """ + + +@pytest.fixture(scope="session") +def ddb(tmp_path_factory): + duckdb = pytest.importorskip("duckdb") + + N = 20_000_000 + + con = duckdb.connect() + + path = str(tmp_path_factory.mktemp("duckdb") / "data.ddb") + sql = ( + lambda var, table, n=N: f""" + CREATE TABLE {table} AS + SELECT ROW_NUMBER() OVER () AS id, {var} + FROM ( + SELECT {var} + FROM RANGE({n}) _ ({var}) + ORDER BY RANDOM() + ) + """ + ) + + with duckdb.connect(path) as con: + con.execute(sql("x", table="t1")) + con.execute(sql("y", table="t2")) + return path + + +def test_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: + # yes, we're benchmarking duckdb here, not ibis + # + # we do this to get a baseline for comparison + duckdb = pytest.importorskip("duckdb") + con = duckdb.connect(ddb, read_only=True) + + benchmark(lambda sql: con.sql(sql).to_arrow_table(), sql) + + +def test_ibis_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: + pytest.importorskip("duckdb") + + con = ibis.duckdb.connect(ddb, read_only=True) + + expr = con.sql(sql) + benchmark(expr.to_pyarrow) + + +@pytest.fixture +def diffs(): + return ibis.table( + { + "id": "int64", + "validation_name": "string", + "difference": "float64", + "pct_difference": "float64", + "pct_threshold": "float64", + "validation_status": "string", + }, + name="diffs", + ) + + +@pytest.fixture +def srcs(): + return ibis.table( + { + "id": "int64", + "validation_name": "string", + "validation_type": "string", + "aggregation_type": "string", + "table_name": "string", + "column_name": "string", + "primary_keys": "string", + "num_random_rows": "string", + "agg_value": "float64", + }, + name="srcs", + ) + + +@pytest.fixture +def nrels(): + return 300 + + +def make_big_union(t, nrels): + return ibis.union(*[t] * nrels) + + +@pytest.fixture +def src(srcs, nrels): + return make_big_union(srcs, nrels) + + +@pytest.fixture +def diff(diffs, nrels): + return make_big_union(diffs, nrels) + + +def test_big_eq_expr(benchmark, src, diff): + benchmark(ops.core.Node.equals, src.op(), diff.op()) + + +def test_big_join_expr(benchmark, src, diff): + benchmark(ir.Table.join, src, diff, ["validation_name"], how="outer") + + +def test_big_join_execute(benchmark, nrels): + pytest.importorskip("duckdb") + + con = ibis.duckdb.connect() + + # cache to avoid a request-per-union operand + src = make_big_union( + con.read_csv( + "https://github.com/ibis-project/ibis/files/12580336/source_pivot.csv" + ) + .rename(id="column0") + .cache(), + nrels, + ) + + diff = make_big_union( + con.read_csv( + "https://github.com/ibis-project/ibis/files/12580340/differences_pivot.csv" + ) + .rename(id="column0") + .cache(), + nrels, + ) + + expr = src.join(diff, ["validation_name"], how="outer") + t = benchmark.pedantic(expr.to_pyarrow, rounds=1, iterations=1, warmup_rounds=1) + assert len(t) From e4df99b481bce5317937c99d1265876a77378a48 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 2 Feb 2024 06:02:09 -0500 Subject: [PATCH 157/161] test(duckdb): account for other errors when running in the nix sandbox --- ibis/backends/duckdb/tests/test_register.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index c1f7b6cfa8f4..eec6a6520334 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -110,7 +110,7 @@ def test_read_geo_from_url(con, monkeypatch): loaded_exts = [] monkeypatch.setattr(con, "_load_extensions", lambda x, **_: loaded_exts.extend(x)) - with pytest.raises(duckdb.IOException): + with pytest.raises((duckdb.IOException, duckdb.CatalogException)): # The read will fail, either because the URL is bogus (which it is) or # because the current connection doesn't have the spatial extension # installed and so the call to `st_read` will raise a catalog error. @@ -355,7 +355,7 @@ def test_set_temp_dir(tmp_path): "nix on linux cannot download duckdb extensions or data due to sandboxing; " "duckdb will try to automatically install and load read_parquet" ), - raises=duckdb.IOException, + raises=(duckdb.Error, duckdb.IOException), ) def test_s3_403_fallback(con, httpserver, monkeypatch): # monkeypatch to avoid downloading extensions in tests From d5e256f632e8ce5aef4aac1183435a60ffa4acb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:14:45 +0100 Subject: [PATCH 158/161] feat(api): support the inner join convenience to not repeat fields known to be equal (#8127) Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> --- ibis/backends/polars/tests/test_join.py | 1 - .../test_many_subqueries/bigquery/out.sql | 68 +++--- .../test_many_subqueries/clickhouse/out.sql | 67 +++--- .../test_many_subqueries/datafusion/out.sql | 67 +++--- .../test_many_subqueries/druid/out.sql | 42 ++++ .../test_many_subqueries/duckdb/out.sql | 67 +++--- .../test_many_subqueries/exasol/out.sql | 42 ++++ .../test_many_subqueries/impala/out.sql | 70 +++--- .../test_many_subqueries/mssql/out.sql | 66 +++--- .../test_many_subqueries/mysql/out.sql | 66 +++--- .../test_many_subqueries/oracle/out.sql | 66 +++--- .../test_many_subqueries/postgres/out.sql | 66 +++--- .../test_many_subqueries/pyspark/out.sql | 42 ++++ .../test_many_subqueries/snowflake/out.sql | 67 +++--- .../test_many_subqueries/trino/out.sql | 66 +++--- .../bigquery/out.sql | 32 ++- .../clickhouse/out.sql | 46 ++-- .../datafusion/out.sql | 56 ++--- .../test_cte_refs_in_topo_order/druid/out.sql | 20 ++ .../duckdb/out.sql | 48 ++-- .../exasol/out.sql | 20 ++ .../impala/out.sql | 32 ++- .../test_cte_refs_in_topo_order/mssql/out.sql | 36 +-- .../test_cte_refs_in_topo_order/mysql/out.sql | 34 ++- .../oracle/out.sql | 34 ++- .../postgres/out.sql | 32 ++- .../pyspark/out.sql | 20 ++ .../snowflake/out.sql | 48 ++-- .../test_cte_refs_in_topo_order/trino/out.sql | 32 ++- ibis/backends/tests/test_generic.py | 16 +- ibis/backends/tests/test_sql.py | 5 +- ibis/common/egraph.py | 13 ++ ibis/common/tests/test_egraph.py | 7 + .../test_table_count_expr/join_repr.txt | 1 - ibis/expr/tests/test_newrels.py | 138 +++++++++++- ibis/expr/types/joins.py | 209 +++++++++++++----- ibis/tests/expr/test_table.py | 78 +++---- 37 files changed, 1058 insertions(+), 762 deletions(-) create mode 100644 ibis/backends/tests/snapshots/test_generic/test_many_subqueries/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_generic/test_many_subqueries/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_generic/test_many_subqueries/pyspark/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/pyspark/out.sql diff --git a/ibis/backends/polars/tests/test_join.py b/ibis/backends/polars/tests/test_join.py index 51a7295dd924..26667c808981 100644 --- a/ibis/backends/polars/tests/test_join.py +++ b/ibis/backends/polars/tests/test_join.py @@ -17,7 +17,6 @@ def test_memtable_join(con): "x": [1, 2, 3], "y": [4, 5, 6], "z": ["a", "b", "c"], - "x_right": [1, 2, 3], "y_right": [9, 8, 7], "z_right": ["f", "e", "d"], } diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/bigquery/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/bigquery/out.sql index 07af57981776..85f1c3cc6b78 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/bigquery/out.sql @@ -1,36 +1,42 @@ -WITH t0 AS ( +WITH t6 AS ( SELECT - t5.*, - ( - row_number() OVER (ORDER BY t5.`street` ASC) - 1 - ) AS `key` - FROM data AS t5 + t5.street, + ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key + FROM ( + SELECT + t2.street, + t2.key + FROM ( + SELECT + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC) - 1 AS key + FROM data AS t0 + ) AS t2 + INNER JOIN ( + SELECT + t1.key + FROM ( + SELECT + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC) - 1 AS key + FROM data AS t0 + ) AS t1 + ) AS t4 + ON t2.key = t4.key + ) AS t5 ), t1 AS ( SELECT - t0.`key` - FROM t0 -), t2 AS ( - SELECT - t0.`street`, - t0.`key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` -), t3 AS ( - SELECT - t2.`street`, - ( - row_number() OVER (ORDER BY t2.`street` ASC) - 1 - ) AS `key` - FROM t2 -), t4 AS ( - SELECT - t3.`key` - FROM t3 + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC) - 1 AS key + FROM data AS t0 ) SELECT - t3.`street`, - t3.`key` -FROM t3 -INNER JOIN t4 - ON t3.`key` = t4.`key` \ No newline at end of file + t8.street, + t8.key +FROM t6 AS t8 +INNER JOIN ( + SELECT + t7.key + FROM t6 AS t7 +) AS t10 + ON t8.key = t10.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql index e472a5727fab..fca431bc4c45 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql @@ -1,55 +1,42 @@ -SELECT - t5.street AS street, - t5.key AS key, - t5.key_right AS key_right -FROM ( - SELECT - t1.street AS street, - ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key, - t3.key AS key_right - FROM ( - SELECT - t0.street AS street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - INNER JOIN ( - SELECT - t1.key AS key - FROM ( - SELECT - t0.street AS street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - ) AS t3 - ON t1.key = t3.key -) AS t5 -INNER JOIN ( +WITH t6 AS ( SELECT - t5.key AS key + t5.street, + ROW_NUMBER() OVER (ORDER BY t5.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM ( SELECT - t1.street AS street, - ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key, - t3.key AS key_right + t2.street, + t2.key FROM ( SELECT - t0.street AS street, + t0.street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM data AS t0 - ) AS t1 + ) AS t2 INNER JOIN ( SELECT - t1.key AS key + t1.key FROM ( SELECT - t0.street AS street, + t0.street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM data AS t0 ) AS t1 - ) AS t3 - ON t1.key = t3.key + ) AS t4 + ON t2.key = t4.key ) AS t5 -) AS t7 - ON t5.key = t7.key \ No newline at end of file +), t1 AS ( + SELECT + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + FROM data AS t0 +) +SELECT + t8.street, + t8.key +FROM t6 AS t8 +INNER JOIN ( + SELECT + t7.key + FROM t6 AS t7 +) AS t10 + ON t8.key = t10.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql index f7cf54e9de51..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql @@ -1,55 +1,42 @@ -SELECT - "t5"."street" AS "street", - "t5"."key" AS "key", - "t5"."key_right" AS "key_right" -FROM ( - SELECT - "t1"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", - "t2"."key" AS "key_right" - FROM ( - SELECT - "t0"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" - FROM "data" AS "t0" - ) AS "t1" - INNER JOIN ( - SELECT - "t1"."key" AS "key" - FROM ( - SELECT - "t0"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" - FROM "data" AS "t0" - ) AS "t1" - ) AS "t2" - ON "t1"."key" = "t2"."key" -) AS "t5" -INNER JOIN ( +WITH "t6" AS ( SELECT - "t5"."key" AS "key" + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM ( SELECT - "t1"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", - "t2"."key" AS "key_right" + "t2"."street", + "t2"."key" FROM ( SELECT - "t0"."street" AS "street", + "t0"."street", ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM "data" AS "t0" - ) AS "t1" + ) AS "t2" INNER JOIN ( SELECT - "t1"."key" AS "key" + "t1"."key" FROM ( SELECT - "t0"."street" AS "street", + "t0"."street", ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM "data" AS "t0" ) AS "t1" - ) AS "t2" - ON "t1"."key" = "t2"."key" + ) AS "t4" + ON "t2"."key" = "t4"."key" ) AS "t5" -) AS "t6" - ON "t5"."key" = "t6"."key" \ No newline at end of file +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/druid/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/druid/out.sql new file mode 100644 index 000000000000..64a6e78e8bf3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/druid/out.sql @@ -0,0 +1,42 @@ +WITH "t6" AS ( + SELECT + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql index 2a699a186d7d..2e5f7d14030f 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql @@ -1,55 +1,42 @@ -SELECT - t5.street AS street, - t5.key AS key, - t5.key_right AS key_right -FROM ( - SELECT - t1.street AS street, - ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key, - t3.key AS key_right - FROM ( - SELECT - t0.street AS street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key - FROM data AS t0 - ) AS t1 - INNER JOIN ( - SELECT - t1.key AS key - FROM ( - SELECT - t0.street AS street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key - FROM data AS t0 - ) AS t1 - ) AS t3 - ON t1.key = t3.key -) AS t5 -INNER JOIN ( +WITH t6 AS ( SELECT - t5.key AS key + t5.street, + ROW_NUMBER() OVER (ORDER BY t5.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key FROM ( SELECT - t1.street AS street, - ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key, - t3.key AS key_right + t2.street, + t2.key FROM ( SELECT - t0.street AS street, + t0.street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key FROM data AS t0 - ) AS t1 + ) AS t2 INNER JOIN ( SELECT - t1.key AS key + t1.key FROM ( SELECT - t0.street AS street, + t0.street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key FROM data AS t0 ) AS t1 - ) AS t3 - ON t1.key = t3.key + ) AS t4 + ON t2.key = t4.key ) AS t5 -) AS t7 - ON t5.key = t7.key \ No newline at end of file +), t1 AS ( + SELECT + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 +) +SELECT + t8.street, + t8.key +FROM t6 AS t8 +INNER JOIN ( + SELECT + t7.key + FROM t6 AS t7 +) AS t10 + ON t8.key = t10.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/exasol/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/exasol/out.sql new file mode 100644 index 000000000000..c2670a045cce --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/exasol/out.sql @@ -0,0 +1,42 @@ +WITH "t6" AS ( + SELECT + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql index eaec992e3f55..208dc189381d 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql @@ -1,36 +1,42 @@ -WITH t0 AS ( +WITH `t6` AS ( SELECT - t5.*, - ( - ROW_NUMBER() OVER (ORDER BY t5.`street` ASC) - 1 - ) AS `key` - FROM `data` AS t5 -), t1 AS ( + `t5`.`street`, + ROW_NUMBER() OVER (ORDER BY `t5`.`street` ASC NULLS LAST) - 1 AS `key` + FROM ( + SELECT + `t2`.`street`, + `t2`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t2` + INNER JOIN ( + SELECT + `t1`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t1` + ) AS `t4` + ON `t2`.`key` = `t4`.`key` + ) AS `t5` +), `t1` AS ( SELECT - t0.`key` - FROM t0 -), t2 AS ( - SELECT - t0.`street`, - t0.`key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` -), t3 AS ( - SELECT - t2.`street`, - ( - ROW_NUMBER() OVER (ORDER BY t2.`street` ASC) - 1 - ) AS `key` - FROM t2 -), t4 AS ( - SELECT - t3.`key` - FROM t3 + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` ) SELECT - t3.`street`, - t3.`key` -FROM t3 -INNER JOIN t4 - ON t3.`key` = t4.`key` \ No newline at end of file + `t8`.`street`, + `t8`.`key` +FROM `t6` AS `t8` +INNER JOIN ( + SELECT + `t7`.`key` + FROM `t6` AS `t7` +) AS `t10` + ON `t8`.`key` = `t10`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql index 3dc5e59da76a..b20ffa2875e7 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH [t6] AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS [key] - FROM data AS t5 -), t1 AS ( + [t5].[street] AS [street], + ROW_NUMBER() OVER (ORDER BY CASE WHEN [t5].[street] IS NULL THEN 1 ELSE 0 END, [t5].[street] ASC) - 1 AS [key] + FROM ( + SELECT + [t2].[street] AS [street], + [t2].[key] AS [key] + FROM ( + SELECT + [t0].[street] AS [street], + ROW_NUMBER() OVER (ORDER BY CASE WHEN [t0].[street] IS NULL THEN 1 ELSE 0 END, [t0].[street] ASC) - 1 AS [key] + FROM [data] AS [t0] + ) AS [t2] + INNER JOIN ( + SELECT + [t1].[key] AS [key] + FROM ( + SELECT + [t0].[street] AS [street], + ROW_NUMBER() OVER (ORDER BY CASE WHEN [t0].[street] IS NULL THEN 1 ELSE 0 END, [t0].[street] ASC) - 1 AS [key] + FROM [data] AS [t0] + ) AS [t1] + ) AS [t4] + ON [t2].[key] = [t4].[key] + ) AS [t5] +), [t1] AS ( SELECT - t0.[key] AS [key] - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.[key] AS [key] - FROM t0 - JOIN t1 - ON t0.[key] = t1.[key] -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS [key] - FROM t2 -), t4 AS ( - SELECT - t3.[key] AS [key] - FROM t3 + [t0].[street] AS [street], + ROW_NUMBER() OVER (ORDER BY CASE WHEN [t0].[street] IS NULL THEN 1 ELSE 0 END, [t0].[street] ASC) - 1 AS [key] + FROM [data] AS [t0] ) SELECT - t3.street, - t3.[key] -FROM t3 -JOIN t4 - ON t3.[key] = t4.[key] \ No newline at end of file + [t8].[street], + [t8].[key] +FROM [t6] AS [t8] +INNER JOIN ( + SELECT + [t7].[key] AS [key] + FROM [t6] AS [t7] +) AS [t10] + ON [t8].[key] = [t10].[key] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mysql/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mysql/out.sql index e0ed4bcbb100..f5252710ab98 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mysql/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mysql/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH `t6` AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS `key` - FROM data AS t5 -), t1 AS ( + `t5`.`street`, + ROW_NUMBER() OVER (ORDER BY CASE WHEN `t5`.`street` IS NULL THEN 1 ELSE 0 END, `t5`.`street` ASC) - 1 AS `key` + FROM ( + SELECT + `t2`.`street`, + `t2`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY CASE WHEN `t0`.`street` IS NULL THEN 1 ELSE 0 END, `t0`.`street` ASC) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t2` + INNER JOIN ( + SELECT + `t1`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY CASE WHEN `t0`.`street` IS NULL THEN 1 ELSE 0 END, `t0`.`street` ASC) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t1` + ) AS `t4` + ON `t2`.`key` = `t4`.`key` + ) AS `t5` +), `t1` AS ( SELECT - t0.`key` AS `key` - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.`key` AS `key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS `key` - FROM t2 -), t4 AS ( - SELECT - t3.`key` AS `key` - FROM t3 + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY CASE WHEN `t0`.`street` IS NULL THEN 1 ELSE 0 END, `t0`.`street` ASC) - 1 AS `key` + FROM `data` AS `t0` ) SELECT - t3.street, - t3.`key` -FROM t3 -INNER JOIN t4 - ON t3.`key` = t4.`key` \ No newline at end of file + `t8`.`street`, + `t8`.`key` +FROM `t6` AS `t8` +INNER JOIN ( + SELECT + `t7`.`key` + FROM `t6` AS `t7` +) AS `t10` + ON `t8`.`key` = `t10`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/oracle/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/oracle/out.sql index 9459ded5586f..bb10644c1dad 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/oracle/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/oracle/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH "t6" AS ( SELECT - t5."street" AS "street", - ROW_NUMBER() OVER (ORDER BY t5."street" ASC) - 1 AS "key" - FROM "data" t5 -), t1 AS ( + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC NULLS LAST) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST) - 1 AS "key" + FROM "data" "t0" + ) "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST) - 1 AS "key" + FROM "data" "t0" + ) "t1" + ) "t4" + ON "t2"."key" = "t4"."key" + ) "t5" +), "t1" AS ( SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( - SELECT - t0."street" AS "street", - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" -), t3 AS ( - SELECT - t2."street" AS "street", - ROW_NUMBER() OVER (ORDER BY t2."street" ASC) - 1 AS "key" - FROM t2 -), t4 AS ( - SELECT - t3."key" AS "key" - FROM t3 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST) - 1 AS "key" + FROM "data" "t0" ) SELECT - t3."street", - t3."key" -FROM t3 -JOIN t4 - ON t3."key" = t4."key" \ No newline at end of file + "t8"."street", + "t8"."key" +FROM "t6" "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" "t7" +) "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/postgres/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/postgres/out.sql index fb8c40cd69ba..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/postgres/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH "t6" AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key - FROM data AS t5 -), t1 AS ( + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS key - FROM t2 -), t4 AS ( - SELECT - t3.key AS key - FROM t3 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" ) SELECT - t3.street, - t3.key -FROM t3 -JOIN t4 - ON t3.key = t4.key \ No newline at end of file + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/pyspark/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/pyspark/out.sql new file mode 100644 index 000000000000..208dc189381d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/pyspark/out.sql @@ -0,0 +1,42 @@ +WITH `t6` AS ( + SELECT + `t5`.`street`, + ROW_NUMBER() OVER (ORDER BY `t5`.`street` ASC NULLS LAST) - 1 AS `key` + FROM ( + SELECT + `t2`.`street`, + `t2`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t2` + INNER JOIN ( + SELECT + `t1`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t1` + ) AS `t4` + ON `t2`.`key` = `t4`.`key` + ) AS `t5` +), `t1` AS ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` +) +SELECT + `t8`.`street`, + `t8`.`key` +FROM `t6` AS `t8` +INNER JOIN ( + SELECT + `t7`.`key` + FROM `t6` AS `t7` +) AS `t10` + ON `t8`.`key` = `t10`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql index d63129cc6985..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql @@ -1,55 +1,42 @@ -SELECT - "t5"."street" AS "street", - "t5"."key" AS "key", - "t5"."key_right" AS "key_right" -FROM ( - SELECT - "t1"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", - "t3"."key" AS "key_right" - FROM ( - SELECT - "t0"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" - FROM "data" AS "t0" - ) AS "t1" - INNER JOIN ( - SELECT - "t1"."key" AS "key" - FROM ( - SELECT - "t0"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" - FROM "data" AS "t0" - ) AS "t1" - ) AS "t3" - ON "t1"."key" = "t3"."key" -) AS "t5" -INNER JOIN ( +WITH "t6" AS ( SELECT - "t5"."key" AS "key" + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM ( SELECT - "t1"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", - "t3"."key" AS "key_right" + "t2"."street", + "t2"."key" FROM ( SELECT - "t0"."street" AS "street", + "t0"."street", ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM "data" AS "t0" - ) AS "t1" + ) AS "t2" INNER JOIN ( SELECT - "t1"."key" AS "key" + "t1"."key" FROM ( SELECT - "t0"."street" AS "street", + "t0"."street", ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM "data" AS "t0" ) AS "t1" - ) AS "t3" - ON "t1"."key" = "t3"."key" + ) AS "t4" + ON "t2"."key" = "t4"."key" ) AS "t5" -) AS "t7" - ON "t5"."key" = "t7"."key" \ No newline at end of file +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/trino/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/trino/out.sql index fb8c40cd69ba..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/trino/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH "t6" AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key - FROM data AS t5 -), t1 AS ( + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS key - FROM t2 -), t4 AS ( - SELECT - t3.key AS key - FROM t3 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" ) SELECT - t3.street, - t3.key -FROM t3 -JOIN t4 - ON t3.key = t4.key \ No newline at end of file + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/bigquery/out.sql index 47f235ccccf8..d0b7a174d49a 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/bigquery/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH t1 AS ( SELECT - t4.* - FROM leaf AS t4 + t0.key + FROM leaf AS t0 WHERE TRUE -), t1 AS ( - SELECT - t0.`key` - FROM t0 -), t2 AS ( - SELECT - t0.`key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` ) SELECT - t2.`key` -FROM t2 -INNER JOIN t2 AS t3 - ON t2.`key` = t3.`key` \ No newline at end of file + t3.key +FROM t1 AS t3 +INNER JOIN t1 AS t4 + ON t3.key = t4.key +INNER JOIN ( + SELECT + t3.key + FROM t1 AS t3 + INNER JOIN t1 AS t4 + ON t3.key = t4.key +) AS t6 + ON t3.key = t6.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql index 6dfef25abe9f..d0b7a174d49a 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql @@ -1,40 +1,20 @@ -SELECT - t2.key AS key, - t3.key AS key_right, - t6.key_right AS key_right_right -FROM ( +WITH t1 AS ( SELECT - t0.key AS key + t0.key FROM leaf AS t0 WHERE TRUE -) AS t2 -INNER JOIN ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE -) AS t3 - ON t2.key = t3.key +) +SELECT + t3.key +FROM t1 AS t3 +INNER JOIN t1 AS t4 + ON t3.key = t4.key INNER JOIN ( SELECT - t2.key AS key, - t3.key AS key_right - FROM ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE - ) AS t2 - INNER JOIN ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE - ) AS t3 - ON t2.key = t3.key + t3.key + FROM t1 AS t3 + INNER JOIN t1 AS t4 + ON t3.key = t4.key ) AS t6 - ON t6.key = t6.key \ No newline at end of file + ON t3.key = t6.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql index 96acd49caaad..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql @@ -1,48 +1,20 @@ -SELECT - "t1"."key" AS "key", - "t2"."key" AS "key_right", - "t4"."key_right" AS "key_right_right" -FROM ( +WITH "t1" AS ( SELECT - * + "t0"."key" FROM "leaf" AS "t0" WHERE TRUE -) AS "t1" -INNER JOIN ( - SELECT - "t1"."key" AS "key" - FROM ( - SELECT - * - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t1" -) AS "t2" - ON "t1"."key" = "t2"."key" +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" INNER JOIN ( SELECT - "t1"."key" AS "key", - "t2"."key" AS "key_right" - FROM ( - SELECT - * - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t1" - INNER JOIN ( - SELECT - "t1"."key" AS "key" - FROM ( - SELECT - * - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t1" - ) AS "t2" - ON "t1"."key" = "t2"."key" -) AS "t4" - ON "t1"."key" = "t1"."key" \ No newline at end of file + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/druid/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/druid/out.sql new file mode 100644 index 000000000000..3cccc7356173 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/druid/out.sql @@ -0,0 +1,20 @@ +WITH "t1" AS ( + SELECT + "t0"."key" + FROM "leaf" AS "t0" + WHERE + TRUE +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql index fb2ee62190b5..d0b7a174d49a 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql @@ -1,40 +1,20 @@ -SELECT - t1.key AS key, - t2.key AS key_right, - t5.key_right AS key_right_right -FROM ( +WITH t1 AS ( SELECT - t0.key AS key + t0.key FROM leaf AS t0 WHERE TRUE -) AS t1 -INNER JOIN ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE -) AS t2 - ON t1.key = t2.key +) +SELECT + t3.key +FROM t1 AS t3 +INNER JOIN t1 AS t4 + ON t3.key = t4.key INNER JOIN ( SELECT - t1.key AS key, - t2.key AS key_right - FROM ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE - ) AS t1 - INNER JOIN ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE - ) AS t2 - ON t1.key = t2.key -) AS t5 - ON t1.key = t5.key \ No newline at end of file + t3.key + FROM t1 AS t3 + INNER JOIN t1 AS t4 + ON t3.key = t4.key +) AS t6 + ON t3.key = t6.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/exasol/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/exasol/out.sql new file mode 100644 index 000000000000..3cccc7356173 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/exasol/out.sql @@ -0,0 +1,20 @@ +WITH "t1" AS ( + SELECT + "t0"."key" + FROM "leaf" AS "t0" + WHERE + TRUE +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/impala/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/impala/out.sql index b5eb154f064d..8d13c9ddda1b 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/impala/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH `t1` AS ( SELECT - t4.* - FROM `leaf` AS t4 + `t0`.`key` + FROM `leaf` AS `t0` WHERE TRUE -), t1 AS ( - SELECT - t0.`key` - FROM t0 -), t2 AS ( - SELECT - t0.`key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` ) SELECT - t2.`key` -FROM t2 -INNER JOIN t2 AS t3 - ON t2.`key` = t3.`key` \ No newline at end of file + `t3`.`key` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +INNER JOIN ( + SELECT + `t3`.`key` + FROM `t1` AS `t3` + INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +) AS `t6` + ON `t3`.`key` = `t6`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql index dbe9900fb111..217eafe26f55 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql @@ -1,22 +1,22 @@ -WITH t0 AS ( +WITH [t1] AS ( SELECT - t4.[key] AS [key] - FROM leaf AS t4 + [t0].[key] AS [key] + FROM [leaf] AS [t0] WHERE - 1 = 1 -), t1 AS ( - SELECT - t0.[key] AS [key] - FROM t0 -), t2 AS ( - SELECT - t0.[key] AS [key] - FROM t0 - JOIN t1 - ON t0.[key] = t1.[key] + ( + 1 = 1 + ) ) SELECT - t2.[key] -FROM t2 -JOIN t2 AS t3 - ON t2.[key] = t3.[key] \ No newline at end of file + [t3].[key] +FROM [t1] AS [t3] +INNER JOIN [t1] AS [t4] + ON [t3].[key] = [t4].[key] +INNER JOIN ( + SELECT + [t3].[key] AS [key] + FROM [t1] AS [t3] + INNER JOIN [t1] AS [t4] + ON [t3].[key] = [t4].[key] +) AS [t6] + ON [t3].[key] = [t6].[key] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mysql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mysql/out.sql index b3bba37ea6d9..8d13c9ddda1b 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mysql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mysql/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH `t1` AS ( SELECT - t4.`key` AS `key` - FROM leaf AS t4 + `t0`.`key` + FROM `leaf` AS `t0` WHERE - TRUE = 1 -), t1 AS ( - SELECT - t0.`key` AS `key` - FROM t0 -), t2 AS ( - SELECT - t0.`key` AS `key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` + TRUE ) SELECT - t2.`key` -FROM t2 -INNER JOIN t2 AS t3 - ON t2.`key` = t3.`key` \ No newline at end of file + `t3`.`key` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +INNER JOIN ( + SELECT + `t3`.`key` + FROM `t1` AS `t3` + INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +) AS `t6` + ON `t3`.`key` = `t6`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/oracle/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/oracle/out.sql index 1a5051a047b2..8a330a708765 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/oracle/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/oracle/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH "t1" AS ( SELECT - t4."key" AS "key" - FROM "leaf" t4 + "t0"."key" + FROM "leaf" "t0" WHERE - 1 = 1 -), t1 AS ( - SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( - SELECT - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" + TRUE ) SELECT - t2."key" -FROM t2 -JOIN t2 t3 - ON t2."key" = t3."key" \ No newline at end of file + "t3"."key" +FROM "t1" "t3" +INNER JOIN "t1" "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" "t3" + INNER JOIN "t1" "t4" + ON "t3"."key" = "t4"."key" +) "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/postgres/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/postgres/out.sql index 8d5d47b6920b..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/postgres/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH "t1" AS ( SELECT - t4.key AS key - FROM leaf AS t4 + "t0"."key" + FROM "leaf" AS "t0" WHERE TRUE -), t1 AS ( - SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key ) SELECT - t2.key -FROM t2 -JOIN t2 AS t3 - ON t2.key = t3.key \ No newline at end of file + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/pyspark/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/pyspark/out.sql new file mode 100644 index 000000000000..8d13c9ddda1b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/pyspark/out.sql @@ -0,0 +1,20 @@ +WITH `t1` AS ( + SELECT + `t0`.`key` + FROM `leaf` AS `t0` + WHERE + TRUE +) +SELECT + `t3`.`key` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +INNER JOIN ( + SELECT + `t3`.`key` + FROM `t1` AS `t3` + INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +) AS `t6` + ON `t3`.`key` = `t6`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql index eb9acf0a45fe..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql @@ -1,40 +1,20 @@ -SELECT - "t1"."key" AS "key", - "t2"."key" AS "key_right", - "t5"."key_right" AS "key_right_right" -FROM ( +WITH "t1" AS ( SELECT - "t0"."key" AS "key" + "t0"."key" FROM "leaf" AS "t0" WHERE TRUE -) AS "t1" -INNER JOIN ( - SELECT - "t0"."key" AS "key" - FROM "leaf" AS "t0" - WHERE - TRUE -) AS "t2" - ON "t1"."key" = "t2"."key" +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" INNER JOIN ( SELECT - "t1"."key" AS "key", - "t2"."key" AS "key_right" - FROM ( - SELECT - "t0"."key" AS "key" - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t1" - INNER JOIN ( - SELECT - "t0"."key" AS "key" - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t2" - ON "t1"."key" = "t2"."key" -) AS "t5" - ON "t1"."key" = "t5"."key" \ No newline at end of file + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/trino/out.sql index 8d5d47b6920b..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/trino/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH "t1" AS ( SELECT - t4.key AS key - FROM leaf AS t4 + "t0"."key" + FROM "leaf" AS "t0" WHERE TRUE -), t1 AS ( - SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key ) SELECT - t2.key -FROM t2 -JOIN t2 AS t3 - ON t2.key = t3.key \ No newline at end of file + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index a685628c4c1d..60df8407803b 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -992,24 +992,12 @@ def test_memtable_column_naming_mismatch(backend, con, monkeypatch, df, columns) ibis.memtable(df, columns=columns) -@pytest.mark.xfail( - raises=com.IntegrityError, reason="inner join convenience not implemented" -) @pytest.mark.notimpl( - ["dask", "datafusion", "pandas", "polars"], + ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend", ) -@pytest.mark.notimpl( - ["pyspark"], reason="pyspark doesn't generate SQL", raises=NotImplementedError -) -@pytest.mark.notimpl(["druid", "flink"], reason="no sqlglot dialect", raises=ValueError) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["risingwave"], - raises=ValueError, - reason="risingwave doesn't support sqlglot.dialects.dialect.Dialect", -) +@pytest.mark.notimpl(["flink"], reason="no sqlglot dialect", raises=ValueError) def test_many_subqueries(con, snapshot): def query(t, group_cols): t2 = t.mutate(key=ibis.row_number().over(ibis.window(order_by=group_cols))) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 0db31eb5662d..a9d526c30146 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -83,10 +83,7 @@ def test_group_by_has_index(backend, snapshot): snapshot.assert_match(sql, "out.sql") -@pytest.mark.xfail( - raises=exc.IntegrityError, reason="inner join convenience not implemented" -) -@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") +@pytest.mark.never(["pandas", "dask", "polars"], reason="not SQL") def test_cte_refs_in_topo_order(backend, snapshot): mr0 = ibis.table(schema=ibis.schema(dict(key="int")), name="leaf") diff --git a/ibis/common/egraph.py b/ibis/common/egraph.py index ea18870ab401..764ac890b1ec 100644 --- a/ibis/common/egraph.py +++ b/ibis/common/egraph.py @@ -120,6 +120,19 @@ def __eq__(self, other: object) -> bool: return NotImplemented return self._parents == other._parents + def copy(self) -> DisjointSet: + """Make a copy of the disjoint set. + + Returns + ------- + copy: + A copy of the disjoint set. + """ + ds = DisjointSet() + ds._parents = self._parents.copy() + ds._classes = self._classes.copy() + return ds + def add(self, id: K) -> K: """Add a new id to the disjoint set. diff --git a/ibis/common/tests/test_egraph.py b/ibis/common/tests/test_egraph.py index b31c527bac17..98fcd04bf1ce 100644 --- a/ibis/common/tests/test_egraph.py +++ b/ibis/common/tests/test_egraph.py @@ -83,6 +83,13 @@ def test_disjoint_set(): ds._classes[1] = {1} ds.verify() + # test copying the disjoint set + ds2 = ds.copy() + assert ds == ds2 + assert ds is not ds2 + ds2.add(5) + assert ds != ds2 + class PatternNamespace: def __init__(self, module): diff --git a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt index 6f7009dc8056..a8504725070a 100644 --- a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt @@ -12,7 +12,6 @@ r2 := JoinChain[r0] values: a: r0.a b: r0.b - a_right: r1.a b_right: r1.b CountStar(): CountStar(r2) \ No newline at end of file diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index cdd4cd64b049..24c9661d6e6c 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -719,7 +719,6 @@ def test_join_predicate_dereferencing(): "foo_id_right": r2.foo_id, "value1": r2.value1, "value3": r2.value3, - "bar_id_right": r3.bar_id, "value2": r3.value2, }, ) @@ -941,7 +940,7 @@ def test_self_join(): rest=[ ops.JoinLink("inner", r2, [r1.key == r2.key]), ], - values={"key": r1.key, "key_right": r2.key}, + values={"key": r1.key}, ) assert t3.op() == expected @@ -951,11 +950,7 @@ def test_self_join(): ops.JoinLink("inner", r2, [r1.key == r2.key]), ops.JoinLink("inner", r3, [r1.key == r3.key]), ], - values={ - "key": r1.key, - "key_right": r2.key, - "key_right_right": r3.key_right, - }, + values={"key": r1.key}, ) assert t4.op() == expected @@ -1061,7 +1056,6 @@ def test_self_join_extensive(): values={ "a": r1.a, "b": r1.b, - "a_right": r2.a, "b_right": r2.b, }, ) @@ -1083,7 +1077,6 @@ def test_self_join_extensive(): values={ "a": r1.a, "b": r1.b, - "a_right": r2.a, "b_right": r2.b, }, ) @@ -1106,7 +1099,6 @@ def test_self_join_with_intermediate_selection(): values={ "b": r1.b, "a": r1.a, - "a_right": r2.a, "b_right": r2.b, }, ) @@ -1124,7 +1116,6 @@ def test_self_join_with_intermediate_selection(): values={ "a": r1.a, "b_right": r2.b, - "a_right": r3.a, "b": r3.b, }, ) @@ -1133,7 +1124,7 @@ def test_self_join_with_intermediate_selection(): # TODO(kszucs): this use case could be supported if `_get_column` gets # overridden to return underlying column reference, but that would mean # that `aa.a` returns with `a.a` instead of `aa.a` which breaks other - # things + # things; the other possible solution is to use 2way dereferencing # aa = a.join(a, [a.a == a.a]) # aaa = aa["a", "b_right"].join(a, [aa.a == a.a]) # a0 = a @@ -1356,3 +1347,126 @@ def test_join_with_compound_predicate(): }, ) assert expr.op() == expected + + +def test_inner_join_convenience(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"a": "int64", "c": "string"}) + t3 = ibis.table(name="t3", schema={"a": "int64", "d": "string"}) + t4 = ibis.table(name="t4", schema={"a": "int64", "e": "string"}) + t5 = ibis.table(name="t5", schema={"a": "int64", "f": "string"}) + + first_join = t1.inner_join(t2, [t1.a == t2.a]) + with join_tables(t1, t2) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + }, + ) + # finish to evaluate the collisions + result = first_join._finish().op() + assert result == expected + + # note that we are joining on r2.a which isn't among the values + second_join = first_join.inner_join(t3, [r2.a == t3.a]) + with join_tables(t1, t2, t3) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r2.a == r3.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r3.d, + }, + ) + # finish to evaluate the collisions + result = second_join._finish().op() + assert result == expected + + third_join = second_join.left_join(t4, [r3.a == t4.a]) + with join_tables(t1, t2, t3, t4) as (r1, r2, r3, r4): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r2.a == r3.a]), + ops.JoinLink("left", r4, [r3.a == r4.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r3.d, + "a_right": r4.a, + "e": r4.e, + }, + ) + # finish to evaluate the collisions + result = third_join._finish().op() + assert result == expected + + fourth_join = third_join.inner_join(t5, [r3.a == t5.a], rname="{name}_") + with join_tables(t1, t2, t3, t4, t5) as (r1, r2, r3, r4, r5): + # equality groups are being reset + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r2.a == r3.a]), + ops.JoinLink("left", r4, [r3.a == r4.a]), + ops.JoinLink("inner", r5, [r3.a == r5.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r3.d, + "a_right": r4.a, + "e": r4.e, + "f": r5.f, + }, + ) + # finish to evaluate the collisions + result = fourth_join._finish().op() + assert result == expected + + with pytest.raises(IntegrityError): + # equality groups are being reset, t5.a would be renamed to 'a_right' + # which already exists + third_join.inner_join(t5, [r4.a == t5.a])._finish() + + fifth_join = third_join.inner_join(t5, [r4.a == t5.a], rname="{name}_") + with join_tables(t1, t2, t3, t4, t5) as (r1, r2, r3, r4, r5): + # equality groups are being reset + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r2.a == r3.a]), + ops.JoinLink("left", r4, [r3.a == r4.a]), + ops.JoinLink("inner", r5, [r4.a == r5.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r3.d, + "a_right": r4.a, + "e": r4.e, + "a_": r5.a, + "f": r5.f, + }, + ) + # finish to evaluate the collisions + result = fifth_join._finish().op() + assert result == expected diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 000a2ecdfb2c..ab6c587b8a33 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -2,7 +2,7 @@ import functools from public import public -from typing import Any, Optional +from typing import Any, Optional, TYPE_CHECKING from collections.abc import Iterator, Mapping import ibis @@ -21,34 +21,92 @@ ) from ibis.expr.operations.relations import JoinKind from ibis.expr.rewrites import peel_join_field +from ibis.common.egraph import DisjointSet +if TYPE_CHECKING: + from collections.abc import Sequence -def disambiguate_fields(how, left_fields, right_fields, lname, rname): + +def disambiguate_fields( + how, + predicates, + equalities, + left_fields, + right_fields, + left_template, + right_template, +): + """ + Resolve name collisions between the left and right tables. + """ collisions = set() + left_template = left_template or "{name}" + right_template = right_template or "{name}" + + if how == "inner" and util.all_of(predicates, ops.Equals): + # for inner joins composed exclusively of equality predicates, we can + # avoid renaming columns with colliding names if their values are + # guaranteed to be equal due to the predicate + equalities = equalities.copy() + for pred in predicates: + if isinstance(pred.left, ops.Field) and isinstance(pred.right, ops.Field): + # disjoint sets are used to track the equality groups + equalities.add(pred.left) + equalities.add(pred.right) + equalities.union(pred.left, pred.right) if how in ("semi", "anti"): # discard the right fields per left semi and left anty join semantics - return left_fields, collisions - - lname = lname or "{name}" - rname = rname or "{name}" - overlap = left_fields.keys() & right_fields.keys() + return left_fields, collisions, equalities fields = {} for name, field in left_fields.items(): - if name in overlap: - name = lname.format(name=name) + if name in right_fields: + # there is an overlap between this field and a field from the right + try: + # check if the fields are equal due to equality predicates + are_equal = equalities.connected(field, right_fields[name]) + except KeyError: + are_equal = False + if not are_equal: + # there is a name collision and the fields are not equal, so + # rename the field from the left according to the provided + # template (which is the name itself by default) + name = left_template.format(name=name) + fields[name] = field + for name, field in right_fields.items(): - if name in overlap: - name = rname.format(name=name) - # only add if there is no collision + if name in left_fields: + # there is an overlap between this field and a field from the left + try: + # check if the fields are equal due to equality predicates + are_equal = equalities.connected(field, left_fields[name]) + except KeyError: + are_equal = False + + if are_equal: + # even though there is a name collision, the fields are equal + # due to equality predicates, so we can safely discard the + # field from the right + continue + else: + # there is a name collision and the fields are not equal, so + # rename the field from the right according to the provided + # template + name = right_template.format(name=name) + if name in fields: + # we can still have collisions after multiple joins, or a wrongly + # chosen template, so we need to track the collisions collisions.add(name) else: + # the field name does not collide with any field from the left + # and not occupied by any field from the right, so add it to the + # fields mapping fields[name] = field - return fields, collisions + return fields, collisions, equalities def dereference_mapping_left(chain): @@ -81,23 +139,60 @@ def dereference_sides(left, right, deref_left, deref_right): return left, right -def dereference_comparison(pred, deref_left, deref_right): - left, right = dereference_sides(pred.left, pred.right, deref_left, deref_right) - return pred.copy(left=left, right=right) - - def dereference_value(pred, deref_left, deref_right): deref_both = {**deref_left, **deref_right} if isinstance(pred, ops.Comparison) and pred.left.relations == pred.right.relations: - return dereference_comparison(pred, deref_left, deref_right) + left, right = dereference_sides(pred.left, pred.right, deref_left, deref_right) + return pred.copy(left=left, right=right) else: return pred.replace(deref_both, filter=ops.Value) def prepare_predicates( - left, right, predicates, deref_left, deref_right, comparison=ops.Equals + left: ops.JoinChain, + right: ops.Relation, + predicates: Sequence[Any], + comparison: type[ops.Comparison] = ops.Equals, ): - """Bind and dereference predicates to the left and right tables.""" + """Bind and dereference predicates to the left and right tables. + + The responsibility of this function is twofold: + 1. Convert the various input values to valid predicates, including binding. + 2. Dereference the predicates one of the ops.JoinTable(s) in the join chain + or the new JoinTable wrapping the right table. JoinTable(s) are used to + ensure that all join participants are unique, even if the same table is + joined multiple times. + + Since join predicates can be ambiguous sometimes, we do the two steps above + in the same time so that we have more contextual information to resolve + ambiguities. + + Possible inputs for the predicates: + 1. A python boolean literal, which is converted to a literal expression + 2. A boolean `Value` expression, which gets flattened and dereferenced. + If there are comparison expressions where both sides depend on the same + relation, then the left side is dereferenced to one of the join tables + already part of the join chain, while the right side is dereferenced to + the new join table wrapping the right table. + 3. A `Deferred` expression, which gets resolved on the left table and then + the same path is followed as for `Value` expressions. + 4. A pair of expression-like objects, which are getting bound to the left + and right tables respectively using the robust `bind` function handling + several cases, including `Deferred` expressions, `Selector`s, literals, + etc. Then the left are dereferenced to the join chain whereas the right + to the new join table wrapping the right table. + + Parameters + ---------- + left + The left table + right + The right table + predicates + Predicates to bind and dereference, see the possible values above + """ + deref_left = dereference_mapping_left(left) + deref_right = dereference_mapping_right(right) left, right = left.to_expr(), right.to_expr() for pred in util.promote_list(predicates): @@ -142,9 +237,9 @@ def wrapper(self, *args, **kwargs): @public class Join(Table): - __slots__ = ("_collisions",) + __slots__ = ("_collisions", "_equalities") - def __init__(self, arg, collisions=None): + def __init__(self, arg, collisions=(), equalities=()): assert isinstance(arg, ops.Node) if not isinstance(arg, ops.JoinChain): # coerce the input node to a join chain operation by first wrapping @@ -154,7 +249,15 @@ def __init__(self, arg, collisions=None): arg = ops.JoinTable(arg, index=0) arg = ops.JoinChain(arg, rest=(), values=arg.fields) super().__init__(arg) + # the collisions and equalities are used to track the name collisions + # and the equality groups join fields based on equality predicates; + # these must be tracked in the join expression because the join chain + # operation doesn't hold any information about `lname` and `rname` + # parameters passed to the join methods and used to disambiguate field + # names; the collisions are used to raise an error if there are any + # name collisions after the join chain is finished object.__setattr__(self, "_collisions", collisions or set()) + object.__setattr__(self, "_equalities", equalities or DisjointSet()) def _finish(self) -> Table: """Construct a valid table expression from this join expression.""" @@ -190,19 +293,9 @@ def join( # noqa: D102 left = self.op() right = ops.JoinTable(right, index=left.length) - subs_left = dereference_mapping_left(left) - subs_right = dereference_mapping_right(right) # bind and dereference the predicates - preds = list( - prepare_predicates( - left, - right, - predicates, - deref_left=subs_left, - deref_right=subs_right, - ) - ) + preds = list(prepare_predicates(left, right, predicates)) if not preds and how != "cross": # if there are no predicates, default to every row matching unless # the join is a cross join, because a cross join already has this @@ -213,8 +306,14 @@ def join( # noqa: D102 # effort to avoid collisions, but does not raise if there are any # if no disambiaution happens using a final .select() call, then # the finish() method will raise due to the name collisions - values, collisions = disambiguate_fields( - how, left.values, right.fields, lname, rname + values, collisions, equalities = disambiguate_fields( + how=how, + predicates=preds, + equalities=self._equalities, + left_fields=left.values, + right_fields=right.fields, + left_template=lname, + right_template=rname, ) # construct a new join link and add it to the join chain @@ -222,7 +321,7 @@ def join( # noqa: D102 left = left.copy(rest=left.rest + (link,), values=values) # return with a new JoinExpr wrapping the new join chain - return self.__class__(left, collisions=collisions) + return self.__class__(left, collisions=collisions, equalities=equalities) @functools.wraps(Table.asof_join) def asof_join( # noqa: D102 @@ -280,30 +379,20 @@ def asof_join( # noqa: D102 left = self.op() right = ops.JoinTable(right, index=left.length) - subs_left = dereference_mapping_left(left) - subs_right = dereference_mapping_right(right) # TODO(kszucs): add extra validation for `on` with clear error messages - (on,) = prepare_predicates( - left, - right, - [on], - deref_left=subs_left, - deref_right=subs_right, - comparison=ops.GreaterEqual, - ) - predicates = prepare_predicates( - left, - right, - predicates, - deref_left=subs_left, - deref_right=subs_right, - comparison=ops.Equals, - ) - preds = [on, *predicates] - - values, collisions = disambiguate_fields( - "asof", left.values, right.fields, lname, rname + (on,) = prepare_predicates(left, right, [on], comparison=ops.GreaterEqual) + preds = prepare_predicates(left, right, predicates, comparison=ops.Equals) + preds = [on, *preds] + + values, collisions, equalities = disambiguate_fields( + how="asof", + predicates=preds, + equalities=self._equalities, + left_fields=left.values, + right_fields=right.fields, + left_template=lname, + right_template=rname, ) # construct a new join link and add it to the join chain @@ -311,7 +400,7 @@ def asof_join( # noqa: D102 left = left.copy(rest=left.rest + (link,), values=values) # return with a new JoinExpr wrapping the new join chain - return self.__class__(left, collisions=collisions) + return self.__class__(left, collisions=collisions, equalities=equalities) @functools.wraps(Table.cross_join) def cross_join( # noqa: D102 diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index ede8a2aa1e54..d3ac796d3d3d 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -1101,7 +1101,7 @@ def test_self_join_no_view_convenience(table): expected_cols = list(table.columns) # TODO(kszucs): the inner join convenience to don't duplicate the # equivalent columns from the right table is not implemented yet - expected_cols.extend(f"{c}_right" for c in table.columns) # if c != "g") + expected_cols.extend(f"{c}_right" for c in table.columns if c != "g") assert result.columns == expected_cols @@ -1207,43 +1207,41 @@ def test_filter_join(): repr(filtered) -# TODO(kszucs): the inner join convenience to don't duplicate the equivalent -# columns from the right table is not implemented yet -# def test_inner_join_overlapping_column_names(): -# t1 = ibis.table([("foo", "string"), ("bar", "string"), ("value1", "double")]) -# t2 = ibis.table([("foo", "string"), ("bar", "string"), ("value2", "double")]) - -# joined = t1.join(t2, "foo") -# expected = t1.join(t2, t1.foo == t2.foo) -# assert_equal(joined, expected) -# assert joined.columns == ["foo", "bar", "value1", "bar_right", "value2"] - -# joined = t1.join(t2, ["foo", "bar"]) -# expected = t1.join(t2, [t1.foo == t2.foo, t1.bar == t2.bar]) -# assert_equal(joined, expected) -# assert joined.columns == ["foo", "bar", "value1", "value2"] - -# # Equality predicates don't have same name, need to rename -# joined = t1.join(t2, t1.foo == t2.bar) -# assert joined.columns == [ -# "foo", -# "bar", -# "value1", -# "foo_right", -# "bar_right", -# "value2", -# ] - -# # Not all predicates are equality, still need to rename -# joined = t1.join(t2, ["foo", t1.value1 < t2.value2]) -# assert joined.columns == [ -# "foo", -# "bar", -# "value1", -# "foo_right", -# "bar_right", -# "value2", -# ] +def test_inner_join_overlapping_column_names(): + t1 = ibis.table([("foo", "string"), ("bar", "string"), ("value1", "double")]) + t2 = ibis.table([("foo", "string"), ("bar", "string"), ("value2", "double")]) + + joined = t1.join(t2, "foo") + expected = t1.join(t2, t1.foo == t2.foo) + assert_equal(joined, expected) + assert joined.columns == ["foo", "bar", "value1", "bar_right", "value2"] + + joined = t1.join(t2, ["foo", "bar"]) + expected = t1.join(t2, [t1.foo == t2.foo, t1.bar == t2.bar]) + assert_equal(joined, expected) + assert joined.columns == ["foo", "bar", "value1", "value2"] + + # Equality predicates don't have same name, need to rename + joined = t1.join(t2, t1.foo == t2.bar) + assert joined.columns == [ + "foo", + "bar", + "value1", + "foo_right", + "bar_right", + "value2", + ] + + # Not all predicates are equality, still need to rename + joined = t1.join(t2, ["foo", t1.value1 < t2.value2]) + assert joined.columns == [ + "foo", + "bar", + "value1", + "foo_right", + "bar_right", + "value2", + ] @pytest.mark.parametrize( @@ -1275,7 +1273,6 @@ def test_join_key_alternatives(con, key_maker): "f": r1.f, "foo_id": r1.foo_id, "bar_id": r1.bar_id, - "foo_id_right": r2.foo_id, "value1": r2.value1, "value3": r2.value3, }, @@ -1364,9 +1361,6 @@ def test_unravel_compound_equijoin(table): "key2": r1.key2, "key3": r1.key3, "value1": r1.value1, - "key1_right": r2.key1, - "key2_right": r2.key2, - "key3_right": r2.key3, "value2": r2.value2, }, ) From 77052c7ba77ddac5390532f07d30a95c2d4d2013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:45:20 +0100 Subject: [PATCH 159/161] fix(polars): columns are picked from the correct side in case of conflicting names (#8134) Fixes https://github.com/ibis-project/ibis/issues/7345 --- ibis/backends/tests/test_generic.py | 4 +--- ibis/backends/tests/test_join.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 60df8407803b..ab353c2acfa2 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -993,9 +993,7 @@ def test_memtable_column_naming_mismatch(backend, con, monkeypatch, df, columns) @pytest.mark.notimpl( - ["dask", "pandas", "polars"], - raises=NotImplementedError, - reason="not a SQL backend", + ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" ) @pytest.mark.notimpl(["flink"], reason="no sqlglot dialect", raises=ValueError) def test_many_subqueries(con, snapshot): diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 8d84385bf747..fa36dea7ab0c 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -362,3 +362,23 @@ def test_complex_join_agg(snapshot): expr = t1.left_join(t2, "key1").group_by(t1.key1).aggregate(avg_diff=avg_diff) snapshot.assert_match(str(ibis.to_sql(expr, dialect="duckdb")), "out.sql") + + +def test_join_conflicting_columns(backend, con): + # GH #7345 + t1 = ibis.memtable({"x": [1, 2, 3], "y": [4, 5, 6], "z": ["a", "b", "c"]}) + t2 = ibis.memtable({"x": [3, 2, 1], "y": [7, 8, 9], "z": ["d", "e", "f"]}) + + expr = t1.join(t2, "x") + result = con.execute(expr).sort_values("x") + + expected = pd.DataFrame( + { + "x": [1, 2, 3], + "y": [4, 5, 6], + "z": ["a", "b", "c"], + "y_right": [9, 8, 7], + "z_right": ["f", "e", "d"], + } + ) + backend.assert_frame_equal(result, expected) From e68000c06673fd4eaa0644dac869fae75f6b0ce5 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 2 Feb 2024 07:37:27 -0500 Subject: [PATCH 160/161] chore: quote everything (#8172) --- ibis/backends/base/__init__.py | 8 +- ibis/backends/base/sqlglot/compiler.py | 2 +- ibis/backends/base/sqlglot/datatypes.py | 2 + .../test_cross_project_query/out.sql | 8 +- .../test_multiple_project_queries/out.sql | 8 +- .../test_subquery_scalar_params/out.sql | 22 +- .../test_approx/filter-approx_median/out.sql | 4 +- .../test_approx/filter-approx_nunique/out.sql | 4 +- .../no_filter-approx_median/out.sql | 4 +- .../no_filter-approx_nunique/out.sql | 4 +- .../test_compiler/test_binary/out.sql | 4 +- .../test_bit/filter-bit_and/out.sql | 4 +- .../test_bit/filter-bit_or/out.sql | 4 +- .../test_bit/filter-bit_xor/out.sql | 4 +- .../test_bit/no_filter-bit_and/out.sql | 4 +- .../test_bit/no_filter-bit_or/out.sql | 4 +- .../test_bit/no_filter-bit_xor/out.sql | 4 +- .../test_bool_reducers/mean/out.sql | 4 +- .../test_bool_reducers/sum/out.sql | 4 +- .../test_bool_reducers_where_conj/out.sql | 18 +- .../test_bool_reducers_where_simple/out.sql | 4 +- .../test_compiler/test_bucket/out.sql | 12 +- .../test_cast_float_to_int/out.sql | 4 +- .../test_compile_toplevel/out.sql | 4 +- .../test_compiler/test_cov/pop/out.sql | 4 +- .../test_compiler/test_cov/sample/out.sql | 4 +- .../test_divide_by_zero/floordiv/out.sql | 4 +- .../test_divide_by_zero/truediv/out.sql | 4 +- .../date/out.sql | 4 +- .../time/out.sql | 4 +- .../test_geospatial_azimuth/out.sql | 4 +- .../test_geospatial_binary/contains/out.sql | 4 +- .../test_geospatial_binary/covered_by/out.sql | 4 +- .../test_geospatial_binary/covers/out.sql | 4 +- .../test_geospatial_binary/d_within/out.sql | 4 +- .../test_geospatial_binary/difference/out.sql | 4 +- .../test_geospatial_binary/disjoint/out.sql | 4 +- .../test_geospatial_binary/distance/out.sql | 4 +- .../test_geospatial_binary/geo_equals/out.sql | 4 +- .../intersection/out.sql | 4 +- .../test_geospatial_binary/intersects/out.sql | 4 +- .../max_distance/out.sql | 4 +- .../test_geospatial_binary/touches/out.sql | 4 +- .../test_geospatial_binary/union/out.sql | 4 +- .../test_geospatial_binary/within/out.sql | 4 +- .../test_geospatial_minmax/x_max/out.sql | 4 +- .../test_geospatial_minmax/x_min/out.sql | 4 +- .../test_geospatial_minmax/y_max/out.sql | 4 +- .../test_geospatial_minmax/y_min/out.sql | 4 +- .../test_geospatial_point/out.sql | 4 +- .../test_geospatial_simplify/out.sql | 4 +- .../test_geospatial_unary/aread/out.sql | 4 +- .../test_geospatial_unary/as_binary/out.sql | 4 +- .../test_geospatial_unary/as_text/out.sql | 4 +- .../test_geospatial_unary/buffer/out.sql | 4 +- .../test_geospatial_unary/centroid/out.sql | 4 +- .../test_geospatial_unary/end_point/out.sql | 4 +- .../geometry_type/out.sql | 4 +- .../test_geospatial_unary/length/out.sql | 4 +- .../test_geospatial_unary/npoints/out.sql | 4 +- .../test_geospatial_unary/perimeter/out.sql | 4 +- .../test_geospatial_unary/point_n/out.sql | 4 +- .../test_geospatial_unary/start_point/out.sql | 4 +- .../test_geospatial_unary_union/out.sql | 4 +- .../test_geospatial_xy/x/out.sql | 4 +- .../test_geospatial_xy/y/out.sql | 4 +- .../test_hashbytes/md5-test-binary/out.sql | 2 +- .../test_hashbytes/md5-test-string/out.sql | 2 +- .../test_hashbytes/sha1-test-binary/out.sql | 2 +- .../test_hashbytes/sha1-test-string/out.sql | 2 +- .../test_hashbytes/sha256-test-binary/out.sql | 2 +- .../test_hashbytes/sha256-test-string/out.sql | 2 +- .../test_hashbytes/sha512-test-binary/out.sql | 2 +- .../test_hashbytes/sha512-test-string/out.sql | 2 +- .../test_compiler/test_identical_to/out.sql | 32 +-- .../test_integer_to_timestamp/ms/out.sql | 2 +- .../test_integer_to_timestamp/ns/out.sql | 2 +- .../test_integer_to_timestamp/s/out.sql | 2 +- .../test_integer_to_timestamp/us/out.sql | 2 +- .../datetime/out.sql | 2 +- .../string_time/out.sql | 2 +- .../string_timestamp/out.sql | 2 +- .../time/out.sql | 2 +- .../timestamp/out.sql | 2 +- .../snapshots/test_compiler/test_now/out.sql | 2 +- .../out.sql | 27 +- .../prec_foll/out.sql | 30 +-- .../prec_prec/out.sql | 30 +-- .../test_set_operation/difference/out.sql | 8 +- .../test_set_operation/intersect/out.sql | 8 +- .../test_set_operation/union_all/out.sql | 8 +- .../test_set_operation/union_distinct/out.sql | 8 +- .../test_compiler/test_substring/out.sql | 8 +- .../test_temporal_truncate/day-date/out.sql | 4 +- .../day-timestamp/out.sql | 4 +- .../test_temporal_truncate/hour-time/out.sql | 4 +- .../hour-timestamp/out.sql | 4 +- .../micros-time/out.sql | 4 +- .../micros-timestamp/out.sql | 4 +- .../millis-time/out.sql | 4 +- .../millis-timestamp/out.sql | 4 +- .../minute-time/out.sql | 4 +- .../minute-timestamp/out.sql | 4 +- .../test_temporal_truncate/month-date/out.sql | 4 +- .../month-timestamp/out.sql | 4 +- .../quarter-date/out.sql | 4 +- .../quarter-timestamp/out.sql | 4 +- .../second-time/out.sql | 4 +- .../second-timestamp/out.sql | 4 +- .../test_temporal_truncate/week-date/out.sql | 4 +- .../week-timestamp/out.sql | 4 +- .../test_temporal_truncate/year-date/out.sql | 4 +- .../year-timestamp/out.sql | 4 +- .../test_to_timestamp_no_timezone/out.sql | 4 +- .../test_to_timestamp_timezone/out.sql | 4 +- .../test_trailing_range_window/days/out.sql | 30 +-- .../test_trailing_range_window/five/out.sql | 30 +-- .../test_trailing_range_window/hours/out.sql | 30 +-- .../test_trailing_range_window/micros/out.sql | 30 +-- .../minutes/out.sql | 30 +-- .../seconds/out.sql | 30 +-- .../two_days/out.sql | 30 +-- .../test_trailing_range_window/week/out.sql | 30 +-- .../test_compiler/test_union/False/out.sql | 32 +-- .../test_compiler/test_union/True/out.sql | 32 +-- .../test_union_cte/False-False/out.sql | 26 +- .../test_union_cte/False-True/out.sql | 26 +- .../test_union_cte/True-False/out.sql | 26 +- .../test_union_cte/True-True/out.sql | 26 +- .../test_unnest/out_one_unnest.sql | 14 +- .../test_unnest/out_two_unnests.sql | 26 +- .../test_window_function/current_foll/out.sql | 30 +-- .../test_window_function/prec_current/out.sql | 30 +-- .../test_window_function/prec_prec/out.sql | 30 +-- .../test_window_unbounded/following/out.sql | 4 +- .../test_window_unbounded/preceding/out.sql | 4 +- .../test_reduction_where/count/out.sql | 4 +- .../test_reduction_where/max/out.sql | 4 +- .../test_reduction_where/mean/out.sql | 4 +- .../test_reduction_where/min/out.sql | 4 +- .../test_reduction_where/std/out.sql | 4 +- .../test_reduction_where/sum/out.sql | 4 +- .../test_reduction_where/var/out.sql | 4 +- .../test_std_var_pop/std/out.sql | 4 +- .../test_std_var_pop/var/out.sql | 4 +- .../test_cast_double_col/float/out.sql | 4 +- .../test_cast_double_col/float32/out.sql | 4 +- .../test_cast_double_col/float64/out.sql | 4 +- .../test_cast_double_col/int16/out.sql | 4 +- .../test_cast_double_col/int8/out.sql | 4 +- .../test_cast_string_col/date/out.sql | 4 +- .../test_cast_string_col/int16/out.sql | 4 +- .../test_cast_string_col/int8/out.sql | 4 +- .../mapstring_int64/out.sql | 4 +- .../test_cast_string_col/string/out.sql | 4 +- .../structa_string_b_int64/out.sql | 4 +- .../test_cast_string_col/timestamp/out.sql | 4 +- .../test_column_regexp_extract/out.sql | 8 +- .../test_column_regexp_replace/out.sql | 4 +- .../test_greatest_least/out1.sql | 4 +- .../test_greatest_least/out2.sql | 4 +- .../test_greatest_least/out3.sql | 4 +- .../test_greatest_least/out4.sql | 4 +- .../test_group_concat/comma_none/out.sql | 6 +- .../test_group_concat/comma_zero/out.sql | 6 +- .../test_group_concat/minus_none/out.sql | 6 +- .../test_functions/test_hash/out.sql | 4 +- .../test_noop_cast/bigint_col/out.sql | 4 +- .../test_noop_cast/bool_col/out.sql | 4 +- .../test_noop_cast/date_string_col/out.sql | 4 +- .../test_noop_cast/double_col/out.sql | 4 +- .../test_noop_cast/float_col/out.sql | 4 +- .../test_functions/test_noop_cast/id/out.sql | 4 +- .../test_noop_cast/int_col/out.sql | 4 +- .../test_noop_cast/month/out.sql | 4 +- .../test_noop_cast/smallint_col/out.sql | 4 +- .../test_noop_cast/string_col/out.sql | 4 +- .../test_noop_cast/timestamp_col/out.sql | 4 +- .../test_noop_cast/tinyint_col/out.sql | 4 +- .../test_noop_cast/year/out.sql | 4 +- .../test_string_column_find/out1.sql | 4 +- .../test_string_column_find/out2.sql | 4 +- .../test_string_column_find_in_set/out.sql | 4 +- .../test_string_column_like/out1.sql | 4 +- .../test_string_column_like/out2.sql | 4 +- .../test_string_column_substring/out1.sql | 6 +- .../test_string_column_substring/out2.sql | 6 +- .../test_timestamp_cast/out1.sql | 4 +- .../test_timestamp_cast/out2.sql | 4 +- .../test_timestamp_from_integer/out.sql | 4 +- .../test_translate_math_functions/abs/out.sql | 4 +- .../ceil/out.sql | 4 +- .../test_translate_math_functions/exp/out.sql | 4 +- .../test_translate_math_functions/log/out.sql | 4 +- .../log10/out.sql | 4 +- .../log2/out.sql | 4 +- .../round/out.sql | 4 +- .../round_0/out.sql | 4 +- .../round_2/out.sql | 4 +- .../sign/out.sql | 4 +- .../sqrt/out.sql | 4 +- .../false/out.sql | 2 +- .../true/out.sql | 2 +- .../test_operators/test_between/out.sql | 4 +- .../test_binary_infix_operators/add/out.sql | 4 +- .../test_binary_infix_operators/eq/out.sql | 4 +- .../test_binary_infix_operators/ge/out.sql | 4 +- .../test_binary_infix_operators/gt/out.sql | 4 +- .../test_binary_infix_operators/le/out.sql | 4 +- .../test_binary_infix_operators/lt/out.sql | 4 +- .../test_binary_infix_operators/mul/out.sql | 4 +- .../test_binary_infix_operators/ne/out.sql | 4 +- .../test_binary_infix_operators/pow/out.sql | 4 +- .../test_binary_infix_operators/sub/out.sql | 4 +- .../truediv/out.sql | 4 +- .../lambda0/out.sql | 6 +- .../lambda1/out.sql | 4 +- .../lambda2/out.sql | 6 +- .../test_negate/bool_col/out.sql | 4 +- .../test_negate/float_col/out.sql | 4 +- .../test_negate/int_col/out.sql | 4 +- .../test_operators/test_search_case/out.sql | 10 +- .../test_operators/test_simple_case/out.sql | 4 +- .../test_array_join_in_subquery/out.sql | 8 +- .../out.sql | 12 +- .../test_select/test_complex_join/out.sql | 30 +-- .../test_select/test_count_name/out.sql | 12 +- .../test_select/test_ifelse_use_if/out.sql | 4 +- .../test_isin_notin_in_select/out1.sql | 30 +-- .../test_isin_notin_in_select/out2.sql | 30 +-- .../out.sql | 4 +- .../test_join_self_reference/out.sql | 32 +-- .../test_named_from_filter_groupby/out1.sql | 18 +- .../test_named_from_filter_groupby/out2.sql | 18 +- .../out.sql | 2 +- .../test_self_reference_simple/out.sql | 2 +- .../playerID-awardID-any_inner_join/out.sql | 50 ++-- .../playerID-awardID-any_left_join/out.sql | 50 ++-- .../playerID-awardID-inner_join/out.sql | 50 ++-- .../playerID-awardID-left_join/out.sql | 50 ++-- .../playerID-playerID-any_inner_join/out.sql | 50 ++-- .../playerID-playerID-any_left_join/out.sql | 50 ++-- .../playerID-playerID-inner_join/out.sql | 50 ++-- .../playerID-playerID-left_join/out.sql | 50 ++-- .../test_simple_scalar_aggregates/out.sql | 34 +-- .../test_table_column_unbox/out.sql | 42 ++-- .../test_timestamp_extract_field/out.sql | 14 +- .../test_where_simple_comparisons/out.sql | 32 +-- .../test_where_with_between/out.sql | 30 +-- .../test_where_with_timestamp/out.sql | 8 +- ibis/backends/duckdb/__init__.py | 2 +- .../test_cast_uints/uint16/out.sql | 4 +- .../test_cast_uints/uint32/out.sql | 4 +- .../test_cast_uints/uint64/out.sql | 4 +- .../test_cast_uints/uint8/out.sql | 4 +- .../test_geospatial_dwithin/out.sql | 4 +- .../as_text/out.sql | 4 +- .../n_points/out.sql | 4 +- ibis/backends/sqlite/__init__.py | 2 +- .../test_dot_sql/test_cte/bigquery/out.sql | 4 +- .../test_dot_sql/test_cte/clickhouse/out.sql | 4 +- .../test_dot_sql/test_cte/duckdb/out.sql | 4 +- .../test_many_subqueries/clickhouse/out.sql | 56 ++--- .../test_many_subqueries/duckdb/out.sql | 56 ++--- .../test_many_subqueries/risingwave/out.sql | 42 ++++ .../test_many_subqueries/sqlite/out.sql | 66 ++--- .../test_default_limit/bigquery/out.sql | 6 +- .../test_default_limit/clickhouse/out.sql | 6 +- .../test_default_limit/duckdb/out.sql | 6 +- .../test_disable_query_limit/bigquery/out.sql | 6 +- .../clickhouse/out.sql | 6 +- .../test_disable_query_limit/duckdb/out.sql | 6 +- .../bigquery/out.sql | 4 +- .../clickhouse/out.sql | 4 +- .../duckdb/out.sql | 4 +- .../test_respect_set_limit/bigquery/out.sql | 8 +- .../test_respect_set_limit/clickhouse/out.sql | 8 +- .../test_respect_set_limit/duckdb/out.sql | 8 +- .../test_join/test_complex_join_agg/out.sql | 24 +- .../clickhouse/out.sql | 26 +- .../duckdb/out.sql | 26 +- .../risingwave/out.sql | 20 ++ .../sqlite/out.sql | 34 ++- .../test_group_by_has_index/bigquery/out.sql | 8 +- .../clickhouse/out.sql | 10 +- .../test_group_by_has_index/duckdb/out.sql | 8 +- .../test_sql/test_isin_bug/bigquery/out.sql | 12 +- .../test_sql/test_isin_bug/clickhouse/out.sql | 10 +- .../test_sql/test_isin_bug/duckdb/out.sql | 10 +- .../test_union_aliasing/bigquery/out.sql | 80 +++--- .../test_union_aliasing/clickhouse/out.sql | 76 +++--- .../test_union_aliasing/duckdb/out.sql | 74 +++--- .../bigquery-date/out.sql | 2 +- .../bigquery-timestamp/out.sql | 2 +- .../clickhouse-timestamp/out.sql | 2 +- .../datafusion-date/out.sql | 2 + .../datafusion-timestamp/out.sql | 2 + .../druid-date/out.sql | 2 + .../druid-timestamp/out.sql | 2 + .../duckdb-timestamp/out.sql | 2 +- .../exasol-date/out.sql | 2 + .../exasol-timestamp/out.sql | 2 + .../mssql-date/out.sql | 2 +- .../mssql-timestamp/out.sql | 2 +- .../oracle-date/out.sql | 3 +- .../oracle-timestamp/out.sql | 3 +- .../test_time_literal_sql/0-bigquery/out.sql | 2 +- .../0-clickhouse/out.sql | 2 + .../0-datafusion/out.sql | 2 + .../test_time_literal_sql/0-druid/out.sql | 2 + .../test_time_literal_sql/0-exasol/out.sql | 2 + .../test_time_literal_sql/0-impala/out.sql | 2 + .../test_time_literal_sql/0-mssql/out.sql | 2 +- .../test_time_literal_sql/0-oracle/out.sql | 2 + .../test_time_literal_sql/0-postgres/out.sql | 2 +- .../test_time_literal_sql/0-sqlite/out.sql | 2 +- .../234567-bigquery/out.sql | 2 +- .../234567-clickhouse/out.sql | 2 + .../234567-datafusion/out.sql | 2 + .../234567-druid/out.sql | 2 + .../234567-exasol/out.sql | 2 + .../234567-impala/out.sql | 2 + .../234567-mssql/out.sql | 2 +- .../234567-oracle/out.sql | 2 + .../234567-postgres/out.sql | 2 +- .../test_agg_and_non_agg_filter/out.sql | 24 +- .../test_compiler/test_agg_filter/out.sql | 22 +- .../test_agg_filter_with_alias/out.sql | 22 +- .../test_column_distinct/out.sql | 6 +- .../test_column_expr_default_name/out.sql | 4 +- .../test_column_expr_retains_name/out.sql | 4 +- .../test_compiler/test_count_distinct/out.sql | 36 +-- .../test_difference_project_column/out.sql | 20 +- .../test_having_from_filter/out.sql | 24 +- .../test_compiler/test_having_size/out.sql | 14 +- .../test_intersect_project_column/out.sql | 20 +- .../test_multiple_count_distinct/out.sql | 8 +- .../test_pushdown_with_or/out.sql | 18 +- .../test_simple_agg_filter/out.sql | 22 +- .../test_subquery_where_location/out.sql | 22 +- .../test_table_difference/out.sql | 22 +- .../test_compiler/test_table_distinct/out.sql | 8 +- .../test_table_drop_with_filter/out.sql | 24 +- .../test_table_intersect/out.sql | 22 +- .../test_compiler/test_union/out.sql | 22 +- .../test_compiler/test_union_order_by/out.sql | 20 +- .../test_union_project_column/out.sql | 20 +- .../test_aggregate_count_joined/out.sql | 18 +- .../test_aggregate_having/explicit.sql | 14 +- .../test_aggregate_having/inline.sql | 14 +- .../out.sql | 22 +- .../agg_filtered.sql | 34 +-- .../agg_filtered2.sql | 36 +-- .../filtered.sql | 28 +-- .../proj.sql | 28 +-- .../test_select_sql/test_anti_join/out.sql | 14 +- .../test_select_sql/test_bool_bool/out.sql | 10 +- .../test_bug_duplicated_where/out.sql | 30 +-- .../test_bug_project_multiple_times/out.sql | 64 ++--- .../test_case_in_projection/out.sql | 34 +-- .../result.sql | 32 +-- .../test_complex_union/result.sql | 18 +- .../out.sql | 24 +- .../test_select_sql/test_endswith/out.sql | 4 +- .../test_exists_subquery/out.sql | 12 +- .../test_filter_inside_exists/out.sql | 14 +- .../test_filter_predicates/out.sql | 10 +- .../result.sql | 40 +-- .../expr3.sql | 28 +-- .../expr4.sql | 28 +-- .../test_fuse_projections/project.sql | 12 +- .../test_fuse_projections/project_filter.sql | 14 +- .../test_identifier_quoting/out.sql | 6 +- .../result.sql | 6 +- .../result.sql | 4 +- .../test_join_between_joins/out.sql | 36 +-- .../out.sql | 42 ++-- .../test_join_just_materialized/out.sql | 40 +-- .../test_join_projection_subquery_bug/out.sql | 32 +-- .../result.sql | 44 ++-- .../test_join_with_limited_table/out.sql | 16 +- .../test_limit_cte_extract/out.sql | 34 +-- .../test_limit_with_self_join/out.sql | 60 ++--- .../test_loj_subquery_filter_handling/out.sql | 30 +-- .../test_multiple_joins/out.sql | 22 +- .../test_multiple_limits/out.sql | 4 +- .../out.sql | 14 +- .../test_projection_filter_fuse/out.sql | 10 +- .../out.sql | 16 +- .../agg_explicit_column/out.sql | 6 +- .../agg_string_columns/out.sql | 8 +- .../aggregate_table_count_metric/out.sql | 2 +- .../test_select_sql/filter_then_limit/out.sql | 12 +- .../test_select_sql/limit_simple/out.sql | 2 +- .../test_select_sql/limit_then_filter/out.sql | 14 +- .../test_select_sql/limit_with_offset/out.sql | 2 +- .../test_select_sql/single_column/out.sql | 12 +- .../out.sql | 2 +- .../out.sql | 34 +-- .../test_select_sql/test_semi_join/out.sql | 14 +- .../test_simple_joins/inner.sql | 14 +- .../test_simple_joins/inner_two_preds.sql | 14 +- .../test_simple_joins/left.sql | 14 +- .../test_simple_joins/outer.sql | 14 +- .../result1.sql | 12 +- .../result2.sql | 12 +- .../test_select_sql/test_startswith/out.sql | 4 +- .../out.sql | 128 +++++----- .../expr.sql | 16 +- .../expr2.sql | 28 +-- .../test_subquery_in_union/out.sql | 52 ++-- .../test_subquery_used_for_self_join/out.sql | 40 +-- .../test_topk_analysis_bug/out.sql | 42 ++-- .../test_topk_operation/e1.sql | 30 +-- .../test_topk_operation/e2.sql | 30 +-- .../test_topk_predicate_pushdown_bug/out.sql | 78 +++--- .../test_topk_to_aggregate/out.sql | 14 +- .../test_tpch_self_join_failure/out.sql | 46 ++-- .../test_where_analyze_scalar_op/out.sql | 34 +-- .../test_where_no_pushdown_possible/out.sql | 30 +-- .../test_where_with_between/out.sql | 27 +- .../test_where_with_join/out.sql | 34 +-- .../test_aggregate/having_count/out.sql | 14 +- .../test_aggregate/having_sum/out.sql | 14 +- .../test_sql/test_aggregate/single/out.sql | 6 +- .../test_sql/test_aggregate/two/out.sql | 8 +- .../snapshots/test_sql/test_between/out.sql | 4 +- .../test_boolean_conjunction/and/out.sql | 11 +- .../test_boolean_conjunction/or/out.sql | 11 +- .../snapshots/test_sql/test_coalesce/out.sql | 12 +- .../test_sql/test_comparisons/eq/out.sql | 4 +- .../test_sql/test_comparisons/ge/out.sql | 4 +- .../test_sql/test_comparisons/gt/out.sql | 4 +- .../test_sql/test_comparisons/le/out.sql | 4 +- .../test_sql/test_comparisons/lt/out.sql | 4 +- .../test_sql/test_comparisons/ne/out.sql | 4 +- .../out.sql | 22 +- .../test_distinct/count_distinct/out.sql | 4 +- .../group_by_count_distinct/out.sql | 6 +- .../test_distinct/projection_distinct/out.sql | 8 +- .../single_column_projection_distinct/out.sql | 6 +- .../test_distinct/table_distinct/out.sql | 2 +- .../sql/snapshots/test_sql/test_exists/e1.sql | 12 +- .../sql/snapshots/test_sql/test_exists/e2.sql | 14 +- .../out.sql | 14 +- .../snapshots/test_sql/test_gh_1045/out.sql | 60 ++--- .../test_isnull_notnull/isnull/out.sql | 4 +- .../test_isnull_notnull/notnull/out.sql | 4 +- .../test_join_just_materialized/out.sql | 40 +-- .../test_sql/test_joins/inner/out.sql | 20 +- .../test_sql/test_joins/inner_select/out.sql | 14 +- .../test_sql/test_joins/left/out.sql | 20 +- .../test_sql/test_joins/left_select/out.sql | 14 +- .../test_sql/test_joins/outer/out.sql | 20 +- .../test_sql/test_joins/outer_select/out.sql | 14 +- .../test_sql/test_limit/expr_fn0/out.sql | 2 +- .../test_sql/test_limit/expr_fn1/out.sql | 2 +- .../test_sql/test_limit_filter/out.sql | 12 +- .../test_sql/test_limit_subquery/out.sql | 14 +- .../test_lower_projection_sort_key/out.sql | 30 +-- .../test_sql/test_multi_join/out.sql | 34 +-- .../out.sql | 4 +- .../test_sql/test_named_expr/out.sql | 4 +- .../snapshots/test_sql/test_negate/out.sql | 6 +- .../test_sql/test_no_cart_join/out.sql | 56 +++-- .../test_sql/test_no_cartesian_join/out.sql | 92 +++---- .../test_sql/test_no_cross_join/out.sql | 30 +-- .../test_sql/test_not_exists/out.sql | 12 +- .../test_sql/test_order_by/column/out.sql | 12 +- .../test_sql/test_order_by/random/out.sql | 10 +- .../test_sql/test_order_by_expr/out.sql | 10 +- .../test_sql/test_searched_case/out.sql | 12 +- .../anti.sql | 32 +-- .../semi.sql | 32 +-- .../test_sql/test_self_reference_join/out.sql | 14 +- .../test_sql/test_simple_case/out.sql | 4 +- .../out.sql | 14 +- .../test_sql/test_subquery_aliased/out.sql | 18 +- .../test_where_correlated_subquery/out.sql | 28 +-- .../out.sql | 40 +-- .../test_where_simple_comparisons/out.sql | 14 +- .../test_where_uncorrelated_subquery/out.sql | 16 +- ibis/backends/tests/test_temporal.py | 5 - .../test_h01/test_tpc_h01/duckdb/h01.sql | 94 +++---- .../test_h02/test_tpc_h02/duckdb/h02.sql | 206 +++++++-------- .../test_h03/test_tpc_h03/duckdb/h03.sql | 178 ++++++------- .../test_h04/test_tpc_h04/duckdb/h04.sql | 47 ++-- .../test_h05/test_tpc_h05/duckdb/h05.sql | 236 +++++++++--------- .../test_h06/test_tpc_h06/duckdb/h06.sql | 46 ++-- .../test_h07/test_tpc_h07/duckdb/h07.sql | 92 +++---- .../test_h08/test_tpc_h08/duckdb/h08.sql | 82 +++--- .../test_h09/test_tpc_h09/duckdb/h09.sql | 68 ++--- .../test_h10/test_tpc_h10/duckdb/h10.sql | 212 ++++++++-------- .../test_h11/test_tpc_h11/duckdb/h11.sql | 176 ++++++------- .../test_h12/test_tpc_h12/duckdb/h12.sql | 140 +++++------ .../test_h13/test_tpc_h13/duckdb/h13.sql | 65 ++--- .../test_h14/test_tpc_h14/duckdb/h14.sql | 125 +++++----- .../test_h15/test_tpc_h15/duckdb/h15.sql | 162 ++++++------ .../test_h16/test_tpc_h16/duckdb/h16.sql | 106 ++++---- .../test_h17/test_tpc_h17/duckdb/h17.sql | 158 ++++++------ .../test_h18/test_tpc_h18/duckdb/h18.sql | 190 +++++++------- .../test_h19/test_tpc_h19/duckdb/h19.sql | 158 ++++++------ .../test_h20/test_tpc_h20/duckdb/h20.sql | 100 ++++---- .../test_h21/test_tpc_h21/duckdb/h21.sql | 86 +++---- .../test_h22/test_tpc_h22/duckdb/h22.sql | 66 ++--- 505 files changed, 4600 insertions(+), 4470 deletions(-) create mode 100644 ibis/backends/tests/snapshots/test_generic/test_many_subqueries/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/risingwave/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/datafusion-date/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/datafusion-timestamp/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/druid-date/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/druid-timestamp/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/exasol-date/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/exasol-timestamp/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-clickhouse/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-impala/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-oracle/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-clickhouse/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-datafusion/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-impala/out.sql create mode 100644 ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-oracle/out.sql diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py index 445997da3b09..8e3f825321e8 100644 --- a/ibis/backends/base/__init__.py +++ b/ibis/backends/base/__init__.py @@ -1355,7 +1355,9 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend: Connect to an on-disk SQLite database: >>> con = ibis.connect("sqlite://relative.db") - >>> con = ibis.connect("sqlite:///absolute/path/to/data.db") + >>> con = ibis.connect( + ... "sqlite:///absolute/path/to/data.db" + ... ) # quartodoc: +SKIP # doctest: +SKIP Connect to a PostgreSQL server: @@ -1418,10 +1420,6 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend: # SQLAlchemy requires a `://`, while urllib may roundtrip # `duckdb://` to `duckdb:`. Here we re-add the missing `//`. url = url.replace(":", "://", 1) - if scheme in ("duckdb", "sqlite", "pyspark"): - # SQLAlchemy wants an extra slash for URLs where the path - # maps to a relative/absolute location on the filesystem - url = url.replace(":", ":/", 1) try: backend = getattr(ibis, scheme) diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py index db90f5ea8061..f54188c5daba 100644 --- a/ibis/backends/base/sqlglot/compiler.py +++ b/ibis/backends/base/sqlglot/compiler.py @@ -163,7 +163,7 @@ class SQLGlotCompiler(abc.ABC): no_limit_value: sge.Null | None = None """The value to use to indicate no limit.""" - quoted: bool | None = None + quoted: bool = True """Whether to always quote identifiers.""" NAN: ClassVar[sge.Expression] = sge.Cast( diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index db1bae762c9a..8ac2dbffba27 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -253,6 +253,8 @@ def _from_sqlglot_INTERVAL( else: unit = precision_or_span.this.this return dt.Interval(unit=unit, nullable=nullable) + elif isinstance(precision_or_span, sge.Var): + return dt.Interval(unit=precision_or_span.this, nullable=nullable) elif precision_or_span is None: raise com.IbisTypeError("Interval precision is None") else: diff --git a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_cross_project_query/out.sql b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_cross_project_query/out.sql index da12e567f6b5..819adf5b9db3 100644 --- a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_cross_project_query/out.sql +++ b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_cross_project_query/out.sql @@ -1,6 +1,6 @@ SELECT - t0.title, - t0.tags -FROM `bigquery-public-data`.stackoverflow.posts_questions AS t0 + `t0`.`title`, + `t0`.`tags` +FROM `bigquery-public-data`.`stackoverflow`.`posts_questions` AS `t0` WHERE - strpos(t0.tags, 'ibis') > 0 \ No newline at end of file + strpos(`t0`.`tags`, 'ibis') > 0 \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_multiple_project_queries/out.sql b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_multiple_project_queries/out.sql index 9832d461eb58..fbae8c0def18 100644 --- a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_multiple_project_queries/out.sql +++ b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_multiple_project_queries/out.sql @@ -1,5 +1,5 @@ SELECT - t2.title -FROM `bigquery-public-data`.stackoverflow.posts_questions AS t2 -INNER JOIN `nyc-tlc`.yellow.trips AS t3 - ON t2.tags = t3.rate_code \ No newline at end of file + `t2`.`title` +FROM `bigquery-public-data`.`stackoverflow`.`posts_questions` AS `t2` +INNER JOIN `nyc-tlc`.`yellow`.`trips` AS `t3` + ON `t2`.`tags` = `t3`.`rate_code` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_subquery_scalar_params/out.sql b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_subquery_scalar_params/out.sql index ec28287c9f13..8ecfda1c0ec8 100644 --- a/ibis/backends/bigquery/tests/system/snapshots/test_client/test_subquery_scalar_params/out.sql +++ b/ibis/backends/bigquery/tests/system/snapshots/test_client/test_subquery_scalar_params/out.sql @@ -1,19 +1,19 @@ SELECT - COUNT(t2.foo) AS count + COUNT(`t2`.`foo`) AS `count` FROM ( SELECT - t1.string_col, - SUM(t1.float_col) AS foo + `t1`.`string_col`, + SUM(`t1`.`float_col`) AS `foo` FROM ( SELECT - t0.float_col, - t0.timestamp_col, - t0.int_col, - t0.string_col - FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 + `t0`.`float_col`, + `t0`.`timestamp_col`, + `t0`.`int_col`, + `t0`.`string_col` + FROM `ibis-gbq`.`ibis_gbq_testing`.`functional_alltypes` AS `t0` WHERE - t0.timestamp_col < datetime('2014-01-01T00:00:00') - ) AS t1 + `t0`.`timestamp_col` < datetime('2014-01-01T00:00:00') + ) AS `t1` GROUP BY 1 -) AS t2 \ No newline at end of file +) AS `t2` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_median/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_median/out.sql index e86bffa88a0b..8c5a9fb3f8dc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_median/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_median/out.sql @@ -1,3 +1,3 @@ SELECT - approx_quantiles(IF(t0.month > 0, t0.double_col, NULL), IF(t0.month > 0, 2, NULL))[offset(1)] AS `ApproxMedian_double_col_ Greater_month_ 0` -FROM functional_alltypes AS t0 \ No newline at end of file + approx_quantiles(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL), IF(`t0`.`month` > 0, 2, NULL))[offset(1)] AS `ApproxMedian_double_col_ Greater_month_ 0` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_nunique/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_nunique/out.sql index 967e47b3904f..68d924daea71 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_nunique/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/filter-approx_nunique/out.sql @@ -1,3 +1,3 @@ SELECT - APPROX_COUNT_DISTINCT(IF(t0.month > 0, t0.double_col, NULL)) AS `ApproxCountDistinct_double_col_ Greater_month_ 0` -FROM functional_alltypes AS t0 \ No newline at end of file + APPROX_COUNT_DISTINCT(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL)) AS `ApproxCountDistinct_double_col_ Greater_month_ 0` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_median/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_median/out.sql index 8a4438fd4e83..0208a6f36a89 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_median/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_median/out.sql @@ -1,3 +1,3 @@ SELECT - approx_quantiles(t0.double_col, 2)[offset(1)] AS ApproxMedian_double_col -FROM functional_alltypes AS t0 \ No newline at end of file + approx_quantiles(`t0`.`double_col`, 2)[offset(1)] AS `ApproxMedian_double_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_nunique/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_nunique/out.sql index 7ee240de9fd9..68127767f47c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_nunique/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_approx/no_filter-approx_nunique/out.sql @@ -1,3 +1,3 @@ SELECT - APPROX_COUNT_DISTINCT(t0.double_col) AS ApproxCountDistinct_double_col -FROM functional_alltypes AS t0 \ No newline at end of file + APPROX_COUNT_DISTINCT(`t0`.`double_col`) AS `ApproxCountDistinct_double_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_binary/out.sql index c9b5a33bdd2a..f385d2b9a02f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_binary/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.value AS BYTES) AS `Cast_value_ binary` -FROM t AS t0 \ No newline at end of file + CAST(`t0`.`value` AS BYTES) AS `Cast_value_ binary` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_and/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_and/out.sql index 9086990603db..b416c9f9136b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_and/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_and/out.sql @@ -1,3 +1,3 @@ SELECT - bit_and(IF(t0.bigint_col > 0, t0.int_col, NULL)) AS `BitAnd_int_col_ Greater_bigint_col_ 0` -FROM functional_alltypes AS t0 \ No newline at end of file + bit_and(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitAnd_int_col_ Greater_bigint_col_ 0` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_or/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_or/out.sql index ec9ed6c6b66b..533efe55deab 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_or/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_or/out.sql @@ -1,3 +1,3 @@ SELECT - bit_or(IF(t0.bigint_col > 0, t0.int_col, NULL)) AS `BitOr_int_col_ Greater_bigint_col_ 0` -FROM functional_alltypes AS t0 \ No newline at end of file + bit_or(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitOr_int_col_ Greater_bigint_col_ 0` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_xor/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_xor/out.sql index 7997e495ef8f..6e8799e46de4 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_xor/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/filter-bit_xor/out.sql @@ -1,3 +1,3 @@ SELECT - bit_xor(IF(t0.bigint_col > 0, t0.int_col, NULL)) AS `BitXor_int_col_ Greater_bigint_col_ 0` -FROM functional_alltypes AS t0 \ No newline at end of file + bit_xor(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitXor_int_col_ Greater_bigint_col_ 0` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_and/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_and/out.sql index fcafb18759da..6ecb37adff5e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_and/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_and/out.sql @@ -1,3 +1,3 @@ SELECT - bit_and(t0.int_col) AS BitAnd_int_col -FROM functional_alltypes AS t0 \ No newline at end of file + bit_and(`t0`.`int_col`) AS `BitAnd_int_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_or/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_or/out.sql index 22f8eb5534f9..c1191a3c6471 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_or/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_or/out.sql @@ -1,3 +1,3 @@ SELECT - bit_or(t0.int_col) AS BitOr_int_col -FROM functional_alltypes AS t0 \ No newline at end of file + bit_or(`t0`.`int_col`) AS `BitOr_int_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_xor/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_xor/out.sql index de39538dbbd7..b1e2ae7247f5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_xor/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bit/no_filter-bit_xor/out.sql @@ -1,3 +1,3 @@ SELECT - bit_xor(t0.int_col) AS BitXor_int_col -FROM functional_alltypes AS t0 \ No newline at end of file + bit_xor(`t0`.`int_col`) AS `BitXor_int_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/mean/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/mean/out.sql index 1345addaed7e..defc913da66e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/mean/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/mean/out.sql @@ -1,3 +1,3 @@ SELECT - AVG(CAST(t0.bool_col AS INT64)) AS Mean_bool_col -FROM functional_alltypes AS t0 \ No newline at end of file + AVG(CAST(`t0`.`bool_col` AS INT64)) AS `Mean_bool_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/sum/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/sum/out.sql index 52a1683495d8..41dcc0ed9848 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/sum/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers/sum/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(CAST(t0.bool_col AS INT64)) AS Sum_bool_col -FROM functional_alltypes AS t0 \ No newline at end of file + SUM(CAST(`t0`.`bool_col` AS INT64)) AS `Sum_bool_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_conj/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_conj/out.sql index 686b89ebd74f..3dcdc77c2b16 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_conj/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_conj/out.sql @@ -1,7 +1,13 @@ SELECT - SUM(IF(( - t0.month > 6 - ) AND ( - t0.month < 10 - ), CAST(t0.bool_col AS INT64), NULL)) AS `Sum_bool_col_ And_Greater_month_ 6_ Less_month_ 10` -FROM functional_alltypes AS t0 \ No newline at end of file + SUM( + IF( + ( + `t0`.`month` > 6 + ) AND ( + `t0`.`month` < 10 + ), + CAST(`t0`.`bool_col` AS INT64), + NULL + ) + ) AS `Sum_bool_col_ And_Greater_month_ 6_ Less_month_ 10` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_simple/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_simple/out.sql index 3e3b21ef17e2..6bc6dd23ed12 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_simple/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bool_reducers_where_simple/out.sql @@ -1,3 +1,3 @@ SELECT - AVG(IF(t0.month > 6, CAST(t0.bool_col AS INT64), NULL)) AS `Mean_bool_col_ Greater_month_ 6` -FROM functional_alltypes AS t0 \ No newline at end of file + AVG(IF(`t0`.`month` > 6, CAST(`t0`.`bool_col` AS INT64), NULL)) AS `Mean_bool_col_ Greater_month_ 6` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bucket/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bucket/out.sql index 7aa64367725a..5490334b51b2 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bucket/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_bucket/out.sql @@ -1,17 +1,17 @@ SELECT CASE WHEN ( - 0 <= t0.value + 0 <= `t0`.`value` ) AND ( - t0.value < 1 + `t0`.`value` < 1 ) THEN 0 WHEN ( - 1 <= t0.value + 1 <= `t0`.`value` ) AND ( - t0.value <= 3 + `t0`.`value` <= 3 ) THEN 1 ELSE CAST(NULL AS INT64) - END AS tmp -FROM t AS t0 \ No newline at end of file + END AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cast_float_to_int/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cast_float_to_int/out.sql index d81e2d9cbdcf..3ebdfd612e9d 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cast_float_to_int/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cast_float_to_int/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(trunc(t0.double_col) AS INT64) AS `Cast_double_col_ int64` -FROM functional_alltypes AS t0 \ No newline at end of file + CAST(trunc(`t0`.`double_col`) AS INT64) AS `Cast_double_col_ int64` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_compile_toplevel/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_compile_toplevel/out.sql index 1b1cef1e6341..6a21813f1cf0 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_compile_toplevel/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_compile_toplevel/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(t0.foo) AS Sum_foo -FROM t0 AS t0 \ No newline at end of file + SUM(`t0`.`foo`) AS `Sum_foo` +FROM `t0` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/pop/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/pop/out.sql index 16c39cbe3843..e9c15dc26bb2 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/pop/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/pop/out.sql @@ -1,3 +1,3 @@ SELECT - COVAR_POP(t0.double_col, t0.double_col) AS `Covariance_double_col_ double_col` -FROM functional_alltypes AS t0 \ No newline at end of file + COVAR_POP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_ double_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/sample/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/sample/out.sql index cd9d190ae5e5..795180ad2157 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/sample/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_cov/sample/out.sql @@ -1,3 +1,3 @@ SELECT - COVAR_SAMP(t0.double_col, t0.double_col) AS `Covariance_double_col_ double_col` -FROM functional_alltypes AS t0 \ No newline at end of file + COVAR_SAMP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_ double_col` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/floordiv/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/floordiv/out.sql index 021c289c519a..545d71968181 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/floordiv/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/floordiv/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(FLOOR(ieee_divide(t0.double_col, 0)) AS INT64) AS `FloorDivide_double_col_ 0` -FROM functional_alltypes AS t0 \ No newline at end of file + CAST(FLOOR(ieee_divide(`t0`.`double_col`, 0)) AS INT64) AS `FloorDivide_double_col_ 0` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/truediv/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/truediv/out.sql index 2f451f747c62..fd497dabba49 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/truediv/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_divide_by_zero/truediv/out.sql @@ -1,3 +1,3 @@ SELECT - ieee_divide(t0.double_col, 0) AS `Divide_double_col_ 0` -FROM functional_alltypes AS t0 \ No newline at end of file + ieee_divide(`t0`.`double_col`, 0) AS `Divide_double_col_ 0` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/date/out.sql index bc3328e12ee8..a482cae8a206 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE(t0.ts) AS tmp -FROM t AS t0 \ No newline at end of file + DATE(`t0`.`ts`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/time/out.sql index c35dfe0331a2..f6b91dd9db01 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_extract_temporal_from_timestamp/time/out.sql @@ -1,3 +1,3 @@ SELECT - time(t0.ts) AS tmp -FROM t AS t0 \ No newline at end of file + time(`t0`.`ts`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_azimuth/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_azimuth/out.sql index c1326749682c..a1c2c27200c8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_azimuth/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_azimuth/out.sql @@ -1,3 +1,3 @@ SELECT - st_azimuth(t0.p0, t0.p1) AS tmp -FROM t AS t0 \ No newline at end of file + st_azimuth(`t0`.`p0`, `t0`.`p1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/contains/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/contains/out.sql index 41db52f14ef0..96bf23390889 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/contains/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/contains/out.sql @@ -1,3 +1,3 @@ SELECT - st_contains(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_contains(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covered_by/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covered_by/out.sql index 45073f686366..d94f44062dd8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covered_by/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covered_by/out.sql @@ -1,3 +1,3 @@ SELECT - st_coveredby(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_coveredby(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covers/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covers/out.sql index 316f696e43df..157f1abca0a2 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covers/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/covers/out.sql @@ -1,3 +1,3 @@ SELECT - st_covers(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_covers(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/d_within/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/d_within/out.sql index 95b081d2f324..f9a4a8d53fe4 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/d_within/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/d_within/out.sql @@ -1,3 +1,3 @@ SELECT - st_dwithin(t0.geog0, t0.geog1, 5.2) AS tmp -FROM t AS t0 \ No newline at end of file + st_dwithin(`t0`.`geog0`, `t0`.`geog1`, 5.2) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/difference/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/difference/out.sql index 3680bf6dc8cf..6a35d9cfa05e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/difference/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/difference/out.sql @@ -1,3 +1,3 @@ SELECT - st_difference(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_difference(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/disjoint/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/disjoint/out.sql index 47f67918fdaf..b60e5f4f311b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/disjoint/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/disjoint/out.sql @@ -1,3 +1,3 @@ SELECT - st_disjoint(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_disjoint(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/distance/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/distance/out.sql index dd498ab034f6..a9bdbfee05af 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/distance/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/distance/out.sql @@ -1,3 +1,3 @@ SELECT - st_distance(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_distance(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/geo_equals/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/geo_equals/out.sql index 525bdd5a98c5..ecdbbfcb201b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/geo_equals/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/geo_equals/out.sql @@ -1,3 +1,3 @@ SELECT - st_equals(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_equals(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersection/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersection/out.sql index 881691e93e02..5dfd3ab8b909 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersection/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersection/out.sql @@ -1,3 +1,3 @@ SELECT - st_intersection(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_intersection(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersects/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersects/out.sql index 9768493f1bbc..fef52009fe64 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersects/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/intersects/out.sql @@ -1,3 +1,3 @@ SELECT - st_intersects(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_intersects(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/max_distance/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/max_distance/out.sql index bebf364b68f8..96a0669f9a8a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/max_distance/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/max_distance/out.sql @@ -1,3 +1,3 @@ SELECT - st_maxdistance(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_maxdistance(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/touches/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/touches/out.sql index 3d50dc04d227..66fce0704abf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/touches/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/touches/out.sql @@ -1,3 +1,3 @@ SELECT - st_touches(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_touches(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/union/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/union/out.sql index 8f5d1d5a8f42..2d55fe586ad8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/union/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/union/out.sql @@ -1,3 +1,3 @@ SELECT - st_union(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_union(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/within/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/within/out.sql index 9ac98c0a3feb..dc4db360b7c4 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/within/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_binary/within/out.sql @@ -1,3 +1,3 @@ SELECT - st_within(t0.geog0, t0.geog1) AS tmp -FROM t AS t0 \ No newline at end of file + st_within(`t0`.`geog0`, `t0`.`geog1`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_max/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_max/out.sql index 3acaebab60b6..b05087e2fc75 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_max/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_max/out.sql @@ -1,3 +1,3 @@ SELECT - st_boundingbox(t0.geog).xmax AS tmp -FROM t AS t0 \ No newline at end of file + st_boundingbox(`t0`.`geog`).xmax AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_min/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_min/out.sql index 3e46dda22606..04e235462a0a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_min/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/x_min/out.sql @@ -1,3 +1,3 @@ SELECT - st_boundingbox(t0.geog).xmin AS tmp -FROM t AS t0 \ No newline at end of file + st_boundingbox(`t0`.`geog`).xmin AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_max/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_max/out.sql index f2163789f6e8..843525e16179 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_max/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_max/out.sql @@ -1,3 +1,3 @@ SELECT - st_boundingbox(t0.geog).ymax AS tmp -FROM t AS t0 \ No newline at end of file + st_boundingbox(`t0`.`geog`).ymax AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_min/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_min/out.sql index dc148200be48..e26977c81c83 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_min/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_minmax/y_min/out.sql @@ -1,3 +1,3 @@ SELECT - st_boundingbox(t0.geog).ymin AS tmp -FROM t AS t0 \ No newline at end of file + st_boundingbox(`t0`.`geog`).ymin AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_point/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_point/out.sql index a57d8a6952f5..a7e665427598 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_point/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_point/out.sql @@ -1,3 +1,3 @@ SELECT - st_geogpoint(t0.lon, t0.lat) AS tmp -FROM t AS t0 \ No newline at end of file + st_geogpoint(`t0`.`lon`, `t0`.`lat`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_simplify/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_simplify/out.sql index 0f9f1b6cc556..e5d6f7d549ef 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_simplify/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_simplify/out.sql @@ -1,3 +1,3 @@ SELECT - st_simplify(t0.geog, 5.2) AS tmp -FROM t AS t0 \ No newline at end of file + st_simplify(`t0`.`geog`, 5.2) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/aread/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/aread/out.sql index 6b56cb853600..d9281160c6bf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/aread/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/aread/out.sql @@ -1,3 +1,3 @@ SELECT - st_area(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_area(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_binary/out.sql index d08e811dab1a..018ab1f57956 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_binary/out.sql @@ -1,3 +1,3 @@ SELECT - st_asbinary(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_asbinary(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_text/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_text/out.sql index 5a15cdcf347a..8dcf46b8758f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_text/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/as_text/out.sql @@ -1,3 +1,3 @@ SELECT - st_astext(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_astext(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/buffer/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/buffer/out.sql index 026f5a13ab9c..70cb2b7cc351 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/buffer/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/buffer/out.sql @@ -1,3 +1,3 @@ SELECT - st_buffer(t0.geog, 5.2) AS tmp -FROM t AS t0 \ No newline at end of file + st_buffer(`t0`.`geog`, 5.2) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/centroid/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/centroid/out.sql index c486a43d1f54..446af3e347eb 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/centroid/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/centroid/out.sql @@ -1,3 +1,3 @@ SELECT - st_centroid(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_centroid(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/end_point/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/end_point/out.sql index 0f4f517bc5d7..9bb5289851e1 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/end_point/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/end_point/out.sql @@ -1,3 +1,3 @@ SELECT - st_endpoint(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_endpoint(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/geometry_type/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/geometry_type/out.sql index 6b87c638e82d..e6849f56ea7c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/geometry_type/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/geometry_type/out.sql @@ -1,3 +1,3 @@ SELECT - st_geometrytype(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_geometrytype(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/length/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/length/out.sql index cc0c00c15a40..daa410bd11d6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/length/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/length/out.sql @@ -1,3 +1,3 @@ SELECT - st_length(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_length(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/npoints/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/npoints/out.sql index ce6c5c6a9988..5776674cef29 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/npoints/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/npoints/out.sql @@ -1,3 +1,3 @@ SELECT - st_numpoints(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_numpoints(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/perimeter/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/perimeter/out.sql index 62b3f322440b..ed5ce2aafa27 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/perimeter/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/perimeter/out.sql @@ -1,3 +1,3 @@ SELECT - st_perimeter(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_perimeter(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/point_n/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/point_n/out.sql index 0b03f583db71..070963810106 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/point_n/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/point_n/out.sql @@ -1,3 +1,3 @@ SELECT - st_pointn(t0.geog, 3) AS tmp -FROM t AS t0 \ No newline at end of file + st_pointn(`t0`.`geog`, 3) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/start_point/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/start_point/out.sql index 9c77d20eaae8..a4681f1837fd 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/start_point/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary/start_point/out.sql @@ -1,3 +1,3 @@ SELECT - st_startpoint(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_startpoint(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary_union/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary_union/out.sql index 112d191a4eae..4729efce3601 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary_union/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_unary_union/out.sql @@ -1,3 +1,3 @@ SELECT - st_union_agg(t0.geog) AS tmp -FROM t AS t0 \ No newline at end of file + st_union_agg(`t0`.`geog`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/x/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/x/out.sql index 76b1ac85d03e..3f2648904d3a 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/x/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/x/out.sql @@ -1,3 +1,3 @@ SELECT - st_x(t0.pt) AS tmp -FROM t AS t0 \ No newline at end of file + st_x(`t0`.`pt`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/y/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/y/out.sql index 472068d4b795..dafbb99d5876 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/y/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_geospatial_xy/y/out.sql @@ -1,3 +1,3 @@ SELECT - st_y(t0.pt) AS tmp -FROM t AS t0 \ No newline at end of file + st_y(`t0`.`pt`) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-binary/out.sql index 06e3d4e02c04..a3f28427c7d1 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-binary/out.sql @@ -1,2 +1,2 @@ SELECT - TO_HEX(MD5(CAST('74657374' AS BYTES FORMAT 'HEX'))) AS tmp \ No newline at end of file + TO_HEX(MD5(CAST('74657374' AS BYTES FORMAT 'HEX'))) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-string/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-string/out.sql index a9b91d0a8fd8..56feffe64936 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-string/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/md5-test-string/out.sql @@ -1,2 +1,2 @@ SELECT - TO_HEX(MD5('test')) AS tmp \ No newline at end of file + TO_HEX(MD5('test')) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-binary/out.sql index b47ac212bf11..4bb7f967efac 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-binary/out.sql @@ -1,2 +1,2 @@ SELECT - SHA(CAST('74657374' AS BYTES FORMAT 'HEX')) AS tmp \ No newline at end of file + SHA(CAST('74657374' AS BYTES FORMAT 'HEX')) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-string/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-string/out.sql index 006923cc428f..14d1aadd4b12 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-string/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha1-test-string/out.sql @@ -1,2 +1,2 @@ SELECT - SHA('test') AS tmp \ No newline at end of file + SHA('test') AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-binary/out.sql index 66952364a23a..94a06c700a99 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-binary/out.sql @@ -1,2 +1,2 @@ SELECT - sha256(CAST('74657374' AS BYTES FORMAT 'HEX')) AS tmp \ No newline at end of file + sha256(CAST('74657374' AS BYTES FORMAT 'HEX')) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-string/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-string/out.sql index eee9f3b6f5c6..1099300c55f1 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-string/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha256-test-string/out.sql @@ -1,2 +1,2 @@ SELECT - sha256('test') AS tmp \ No newline at end of file + sha256('test') AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-binary/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-binary/out.sql index 0a34496a1b95..a4ea136590ee 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-binary/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-binary/out.sql @@ -1,2 +1,2 @@ SELECT - sha512(CAST('74657374' AS BYTES FORMAT 'HEX')) AS tmp \ No newline at end of file + sha512(CAST('74657374' AS BYTES FORMAT 'HEX')) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-string/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-string/out.sql index ba16c3204a26..fe017301d4e8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-string/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_hashbytes/sha512-test-string/out.sql @@ -1,2 +1,2 @@ SELECT - sha512('test') AS tmp \ No newline at end of file + sha512('test') AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_identical_to/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_identical_to/out.sql index dbd6d0bc38db..78efbe4e8ed6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_identical_to/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_identical_to/out.sql @@ -1,18 +1,18 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month -FROM functional_alltypes AS t0 + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month` +FROM `functional_alltypes` AS `t0` WHERE - t0.string_col IS NOT DISTINCT FROM 'a' - AND t0.date_string_col IS NOT DISTINCT FROM 'b' \ No newline at end of file + `t0`.`string_col` IS NOT DISTINCT FROM 'a' + AND `t0`.`date_string_col` IS NOT DISTINCT FROM 'b' \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ms/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ms/out.sql index 108f5bdd6655..f6bbd8c50e76 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ms/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ms/out.sql @@ -1,2 +1,2 @@ SELECT - timestamp_millis(-123456789) AS tmp \ No newline at end of file + timestamp_millis(-123456789) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ns/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ns/out.sql index aaa93e0528b8..71a52f80bc47 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ns/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/ns/out.sql @@ -1,2 +1,2 @@ SELECT - timestamp_micros(CAST(ROUND(1234567891011 / 1000) AS INT64)) AS tmp \ No newline at end of file + timestamp_micros(CAST(ROUND(1234567891011 / 1000) AS INT64)) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/s/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/s/out.sql index a470e62fd7a5..88b361b392d8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/s/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/s/out.sql @@ -1,2 +1,2 @@ SELECT - timestamp_seconds(123456789) AS tmp \ No newline at end of file + timestamp_seconds(123456789) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/us/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/us/out.sql index 9c2e88bc505f..9217cf492432 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/us/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_integer_to_timestamp/us/out.sql @@ -1,2 +1,2 @@ SELECT - timestamp_micros(123456789) AS tmp \ No newline at end of file + timestamp_micros(123456789) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/datetime/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/datetime/out.sql index 4a5e85730e47..bfefa9d352a9 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/datetime/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/datetime/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS tmp \ No newline at end of file + EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_time/out.sql index b0b094d49530..b3acf7e814fc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_time/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(HOUR FROM time(4, 55, 59)) AS tmp \ No newline at end of file + EXTRACT(HOUR FROM time(4, 55, 59)) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_timestamp/out.sql index 4a5e85730e47..bfefa9d352a9 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/string_timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS tmp \ No newline at end of file + EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/time/out.sql index b0b094d49530..b3acf7e814fc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/time/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(HOUR FROM time(4, 55, 59)) AS tmp \ No newline at end of file + EXTRACT(HOUR FROM time(4, 55, 59)) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/timestamp/out.sql index 4a5e85730e47..bfefa9d352a9 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_timestamp_or_time/timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS tmp \ No newline at end of file + EXTRACT(HOUR FROM datetime('2017-01-01T04:55:59')) AS `tmp` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_now/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_now/out.sql index c86165f516de..3eecc7336d34 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_now/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_now/out.sql @@ -1,2 +1,2 @@ SELECT - CURRENT_TIMESTAMP() AS TimestampNow \ No newline at end of file + CURRENT_TIMESTAMP() AS `TimestampNow` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql index 4e9dfeb746c9..dca603f5351e 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql @@ -1,18 +1,19 @@ -WITH t1 AS ( +WITH `t1` AS ( SELECT - CAST(t0.file_date AS DATE) AS file_date, - t0.PARTITIONTIME, - t0.val, - t0.val * 2 AS XYZ - FROM unbound_table AS t0 + CAST(`t0`.`file_date` AS DATE) AS `file_date`, + `t0`.`PARTITIONTIME`, + `t0`.`val`, + `t0`.`val` * 2 AS `XYZ` + FROM `unbound_table` AS `t0` WHERE - t0.PARTITIONTIME < DATE(2017, 1, 1) AND CAST(t0.file_date AS DATE) < DATE(2017, 1, 1) + `t0`.`PARTITIONTIME` < DATE(2017, 1, 1) + AND CAST(`t0`.`file_date` AS DATE) < DATE(2017, 1, 1) ) SELECT - t3.file_date, - t3.PARTITIONTIME, - t3.val, - t3.XYZ -FROM t1 AS t3 -INNER JOIN t1 AS t5 + `t3`.`file_date`, + `t3`.`PARTITIONTIME`, + `t3`.`val`, + `t3`.`XYZ` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t5` ON TRUE \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_foll/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_foll/out.sql index b429e0b40a4b..d3d34d8aab8c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_foll/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_foll/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.month ASC RANGE BETWEEN 1 preceding AND CURRENT ROW) AS two_month_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (PARTITION BY `t0`.`year` ORDER BY `t0`.`month` ASC RANGE BETWEEN 1 preceding AND CURRENT ROW) AS `two_month_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_prec/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_prec/out.sql index 28f748f8d387..07d2c13692bc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_prec/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_range_window_function/prec_prec/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.timestamp_col ASC RANGE BETWEEN 4 preceding AND 2 preceding) AS two_month_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (PARTITION BY `t0`.`year` ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN 4 preceding AND 2 preceding) AS `two_month_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/difference/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/difference/out.sql index 1fba54566ec5..f606f41d85af 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/difference/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/difference/out.sql @@ -1,11 +1,11 @@ SELECT - t2.a + `t2`.`a` FROM ( SELECT * - FROM t0 AS t0 + FROM `t0` AS `t0` EXCEPT DISTINCT SELECT * - FROM t1 AS t1 -) AS t2 \ No newline at end of file + FROM `t1` AS `t1` +) AS `t2` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/intersect/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/intersect/out.sql index 0300759946af..2d55719ae929 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/intersect/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/intersect/out.sql @@ -1,11 +1,11 @@ SELECT - t2.a + `t2`.`a` FROM ( SELECT * - FROM t0 AS t0 + FROM `t0` AS `t0` INTERSECT DISTINCT SELECT * - FROM t1 AS t1 -) AS t2 \ No newline at end of file + FROM `t1` AS `t1` +) AS `t2` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_all/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_all/out.sql index 65ca427a3cc1..934660ad08b6 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_all/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_all/out.sql @@ -1,11 +1,11 @@ SELECT - t2.a + `t2`.`a` FROM ( SELECT * - FROM t0 AS t0 + FROM `t0` AS `t0` UNION ALL SELECT * - FROM t1 AS t1 -) AS t2 \ No newline at end of file + FROM `t1` AS `t1` +) AS `t2` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_distinct/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_distinct/out.sql index f060886f0ca7..a25ce9e14ab5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_distinct/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_set_operation/union_distinct/out.sql @@ -1,11 +1,11 @@ SELECT - t2.a + `t2`.`a` FROM ( SELECT * - FROM t0 AS t0 + FROM `t0` AS `t0` UNION DISTINCT SELECT * - FROM t1 AS t1 -) AS t2 \ No newline at end of file + FROM `t1` AS `t1` +) AS `t2` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_substring/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_substring/out.sql index dde97310a2d4..1f0e135f1a72 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_substring/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_substring/out.sql @@ -1,3 +1,7 @@ SELECT - IF(3 >= 0, substr(t0.value, 3 + 1, 1), substr(t0.value, LENGTH(t0.value) + 3 + 1, 1)) AS tmp -FROM t AS t0 \ No newline at end of file + IF( + 3 >= 0, + substr(`t0`.`value`, 3 + 1, 1), + substr(`t0`.`value`, LENGTH(`t0`.`value`) + 3 + 1, 1) + ) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-date/out.sql index cfda92082d60..d777efe8c269 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.a, DAY) AS tmp -FROM t AS t0 \ No newline at end of file + DATE_TRUNC(`t0`.`a`, DAY) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-timestamp/out.sql index 5914e551cdfa..49c235e509b8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/day-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, DAY) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, DAY) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-time/out.sql index 627fd52607ea..86b91e57f380 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.a, HOUR) AS tmp -FROM t AS t0 \ No newline at end of file + TIME_TRUNC(`t0`.`a`, HOUR) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-timestamp/out.sql index 2eb17fc72aa7..73ae9c38d7f3 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/hour-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, HOUR) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, HOUR) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-time/out.sql index bc83a4627907..ff72ad1869f1 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.a, MICROSECOND) AS tmp -FROM t AS t0 \ No newline at end of file + TIME_TRUNC(`t0`.`a`, MICROSECOND) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-timestamp/out.sql index 85129ae2ca98..6e969ad4a4e3 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/micros-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, MICROSECOND) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, MICROSECOND) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-time/out.sql index d568ac473f24..320bf3dba9fb 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.a, MILLISECOND) AS tmp -FROM t AS t0 \ No newline at end of file + TIME_TRUNC(`t0`.`a`, MILLISECOND) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-timestamp/out.sql index eeb61582028e..33dae6b0f68d 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/millis-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, MILLISECOND) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, MILLISECOND) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-time/out.sql index 53a26cff7227..e160ccd4a6e0 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.a, MINUTE) AS tmp -FROM t AS t0 \ No newline at end of file + TIME_TRUNC(`t0`.`a`, MINUTE) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-timestamp/out.sql index 75ed2f48e4f3..8e8f9b216b1c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/minute-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, MINUTE) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, MINUTE) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-date/out.sql index 08aea93a5d16..18595276e94c 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.a, MONTH) AS tmp -FROM t AS t0 \ No newline at end of file + DATE_TRUNC(`t0`.`a`, MONTH) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-timestamp/out.sql index fb5755607afc..a91309d464e8 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/month-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, MONTH) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, MONTH) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-date/out.sql index 69fb0c1c0073..b0d7d6961791 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.a, QUARTER) AS tmp -FROM t AS t0 \ No newline at end of file + DATE_TRUNC(`t0`.`a`, QUARTER) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-timestamp/out.sql index 6ae384abfe45..0a2d8f60b36f 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/quarter-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, QUARTER) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, QUARTER) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-time/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-time/out.sql index ed1a4f61a766..dbc7706ed2ea 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-time/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-time/out.sql @@ -1,3 +1,3 @@ SELECT - TIME_TRUNC(t0.a, SECOND) AS tmp -FROM t AS t0 \ No newline at end of file + TIME_TRUNC(`t0`.`a`, SECOND) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-timestamp/out.sql index ca6b3eea53d1..b079b10cf547 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/second-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, SECOND) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, SECOND) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-date/out.sql index 5fa3caed910e..2e9393921012 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.a, WEEK(MONDAY)) AS tmp -FROM t AS t0 \ No newline at end of file + DATE_TRUNC(`t0`.`a`, WEEK(MONDAY)) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-timestamp/out.sql index 116adb3c510c..97185a8e1b46 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/week-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, WEEK(MONDAY)) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, WEEK(MONDAY)) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-date/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-date/out.sql index 6c6515bd7737..908cf8cb2652 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-date/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-date/out.sql @@ -1,3 +1,3 @@ SELECT - DATE_TRUNC(t0.a, YEAR) AS tmp -FROM t AS t0 \ No newline at end of file + DATE_TRUNC(`t0`.`a`, YEAR) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-timestamp/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-timestamp/out.sql index 9b639983ccda..bd256408229b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-timestamp/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_temporal_truncate/year-timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - TIMESTAMP_TRUNC(t0.a, YEAR) AS tmp -FROM t AS t0 \ No newline at end of file + TIMESTAMP_TRUNC(`t0`.`a`, YEAR) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_no_timezone/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_no_timezone/out.sql index 13bb7ff2b42b..78b8c273dbcf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_no_timezone/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_no_timezone/out.sql @@ -1,3 +1,3 @@ SELECT - parse_timestamp('%F', t0.date_string_col, 'UTC') AS `StringToTimestamp_date_string_col_ '%F'` -FROM functional_alltypes AS t0 \ No newline at end of file + parse_timestamp('%F', `t0`.`date_string_col`, 'UTC') AS `StringToTimestamp_date_string_col_ '%F'` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_timezone/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_timezone/out.sql index 9e192cd1a351..01ed85e47093 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_timezone/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_to_timestamp_timezone/out.sql @@ -1,3 +1,3 @@ SELECT - parse_timestamp('%F %Z', CONCAT(t0.date_string_col, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_ '%F %Z'` -FROM functional_alltypes AS t0 \ No newline at end of file + parse_timestamp('%F %Z', CONCAT(`t0`.`date_string_col`, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_ '%F %Z'` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/days/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/days/out.sql index 45c49c5adc20..35721bf95368 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/days/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/days/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' DAY preceding AND INTERVAL 0 DAY following) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN INTERVAL '1' DAY preceding AND INTERVAL 0 DAY following) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/five/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/five/out.sql index 7483e117fe59..8bd0f083e008 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/five/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/five/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN 5 preceding AND CURRENT ROW) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN 5 preceding AND CURRENT ROW) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/hours/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/hours/out.sql index 497fe55410fe..31ee99c82c64 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/hours/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/hours/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' HOUR preceding AND INTERVAL 0 HOUR following) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN INTERVAL '1' HOUR preceding AND INTERVAL 0 HOUR following) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/micros/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/micros/out.sql index 37c7b9452f03..61ec48e6dea0 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/micros/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/micros/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' MICROSECOND preceding AND INTERVAL 0 MICROSECOND following) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN INTERVAL '1' MICROSECOND preceding AND INTERVAL 0 MICROSECOND following) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/minutes/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/minutes/out.sql index ab8f45c80717..20f4807e69cc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/minutes/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/minutes/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' MINUTE preceding AND INTERVAL 0 MINUTE following) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN INTERVAL '1' MINUTE preceding AND INTERVAL 0 MINUTE following) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/seconds/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/seconds/out.sql index 88e0656e843b..7083aacf02c2 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/seconds/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/seconds/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' SECOND preceding AND INTERVAL 0 SECOND following) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN INTERVAL '1' SECOND preceding AND INTERVAL 0 SECOND following) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/two_days/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/two_days/out.sql index b2e631f727c3..aca5354fb4c5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/two_days/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/two_days/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL (EXTRACT(DAY FROM INTERVAL '1' DAY) * 2) DAY preceding AND INTERVAL 0 DAY following) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN INTERVAL (EXTRACT(DAY FROM INTERVAL '1' DAY) * 2) DAY preceding AND INTERVAL 0 DAY following) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/week/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/week/out.sql index db904f4be055..d29dbecaadd0 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/week/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_trailing_range_window/week/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (ORDER BY t0.timestamp_col ASC RANGE BETWEEN INTERVAL '1' WEEK preceding AND INTERVAL 0 WEEK following) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (ORDER BY `t0`.`timestamp_col` ASC RANGE BETWEEN INTERVAL '1' WEEK preceding AND INTERVAL 0 WEEK following) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/False/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/False/out.sql index de91c71e2d5d..fb68ee11ccd0 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/False/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/False/out.sql @@ -1,23 +1,23 @@ SELECT - t1.id, - t1.bool_col, - t1.tinyint_col, - t1.smallint_col, - t1.int_col, - t1.bigint_col, - t1.float_col, - t1.double_col, - t1.date_string_col, - t1.string_col, - t1.timestamp_col, - t1.year, - t1.month + `t1`.`id`, + `t1`.`bool_col`, + `t1`.`tinyint_col`, + `t1`.`smallint_col`, + `t1`.`int_col`, + `t1`.`bigint_col`, + `t1`.`float_col`, + `t1`.`double_col`, + `t1`.`date_string_col`, + `t1`.`string_col`, + `t1`.`timestamp_col`, + `t1`.`year`, + `t1`.`month` FROM ( SELECT * - FROM functional_alltypes AS t0 + FROM `functional_alltypes` AS `t0` UNION ALL SELECT * - FROM functional_alltypes AS t0 -) AS t1 \ No newline at end of file + FROM `functional_alltypes` AS `t0` +) AS `t1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/True/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/True/out.sql index 84d01ac2951a..26f4fe953438 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/True/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union/True/out.sql @@ -1,23 +1,23 @@ SELECT - t1.id, - t1.bool_col, - t1.tinyint_col, - t1.smallint_col, - t1.int_col, - t1.bigint_col, - t1.float_col, - t1.double_col, - t1.date_string_col, - t1.string_col, - t1.timestamp_col, - t1.year, - t1.month + `t1`.`id`, + `t1`.`bool_col`, + `t1`.`tinyint_col`, + `t1`.`smallint_col`, + `t1`.`int_col`, + `t1`.`bigint_col`, + `t1`.`float_col`, + `t1`.`double_col`, + `t1`.`date_string_col`, + `t1`.`string_col`, + `t1`.`timestamp_col`, + `t1`.`year`, + `t1`.`month` FROM ( SELECT * - FROM functional_alltypes AS t0 + FROM `functional_alltypes` AS `t0` UNION DISTINCT SELECT * - FROM functional_alltypes AS t0 -) AS t1 \ No newline at end of file + FROM `functional_alltypes` AS `t0` +) AS `t1` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql index 3a6924cb5b2e..3252f19058dc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-False/out.sql @@ -1,29 +1,29 @@ -WITH t1 AS ( +WITH `t1` AS ( SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 + `t0`.`string_col`, + SUM(`t0`.`double_col`) AS `metric` + FROM `functional_alltypes` AS `t0` GROUP BY 1 ) SELECT - t7.string_col, - t7.metric + `t7`.`string_col`, + `t7`.`metric` FROM ( SELECT - t5.string_col, - t5.metric + `t5`.`string_col`, + `t5`.`metric` FROM ( SELECT * - FROM t1 AS t2 + FROM `t1` AS `t2` UNION ALL SELECT * - FROM t1 AS t4 - ) AS t5 + FROM `t1` AS `t4` + ) AS `t5` UNION ALL SELECT * - FROM t1 AS t3 -) AS t7 \ No newline at end of file + FROM `t1` AS `t3` +) AS `t7` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql index cc408f613945..fd5a8a571b37 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/False-True/out.sql @@ -1,29 +1,29 @@ -WITH t1 AS ( +WITH `t1` AS ( SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 + `t0`.`string_col`, + SUM(`t0`.`double_col`) AS `metric` + FROM `functional_alltypes` AS `t0` GROUP BY 1 ) SELECT - t7.string_col, - t7.metric + `t7`.`string_col`, + `t7`.`metric` FROM ( SELECT - t5.string_col, - t5.metric + `t5`.`string_col`, + `t5`.`metric` FROM ( SELECT * - FROM t1 AS t2 + FROM `t1` AS `t2` UNION DISTINCT SELECT * - FROM t1 AS t4 - ) AS t5 + FROM `t1` AS `t4` + ) AS `t5` UNION ALL SELECT * - FROM t1 AS t3 -) AS t7 \ No newline at end of file + FROM `t1` AS `t3` +) AS `t7` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql index 81d85c90cbb2..40e1cd0c436b 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-False/out.sql @@ -1,29 +1,29 @@ -WITH t1 AS ( +WITH `t1` AS ( SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 + `t0`.`string_col`, + SUM(`t0`.`double_col`) AS `metric` + FROM `functional_alltypes` AS `t0` GROUP BY 1 ) SELECT - t7.string_col, - t7.metric + `t7`.`string_col`, + `t7`.`metric` FROM ( SELECT - t5.string_col, - t5.metric + `t5`.`string_col`, + `t5`.`metric` FROM ( SELECT * - FROM t1 AS t2 + FROM `t1` AS `t2` UNION ALL SELECT * - FROM t1 AS t4 - ) AS t5 + FROM `t1` AS `t4` + ) AS `t5` UNION DISTINCT SELECT * - FROM t1 AS t3 -) AS t7 \ No newline at end of file + FROM `t1` AS `t3` +) AS `t7` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql index 77a087cb3362..e5b9e52f6722 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_union_cte/True-True/out.sql @@ -1,29 +1,29 @@ -WITH t1 AS ( +WITH `t1` AS ( SELECT - t0.string_col, - SUM(t0.double_col) AS metric - FROM functional_alltypes AS t0 + `t0`.`string_col`, + SUM(`t0`.`double_col`) AS `metric` + FROM `functional_alltypes` AS `t0` GROUP BY 1 ) SELECT - t7.string_col, - t7.metric + `t7`.`string_col`, + `t7`.`metric` FROM ( SELECT - t5.string_col, - t5.metric + `t5`.`string_col`, + `t5`.`metric` FROM ( SELECT * - FROM t1 AS t2 + FROM `t1` AS `t2` UNION DISTINCT SELECT * - FROM t1 AS t4 - ) AS t5 + FROM `t1` AS `t4` + ) AS `t5` UNION DISTINCT SELECT * - FROM t1 AS t3 -) AS t7 \ No newline at end of file + FROM `t1` AS `t3` +) AS `t7` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_one_unnest.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_one_unnest.sql index 1efaf2f26d3a..58998846f040 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_one_unnest.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_one_unnest.sql @@ -1,16 +1,16 @@ SELECT - t0.rowindex, - IF(pos = pos_2, repeated_struct_col, NULL) AS repeated_struct_col -FROM array_test AS t0 -CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t0.repeated_struct_col)) - 1)) AS pos -CROSS JOIN UNNEST(t0.repeated_struct_col) AS repeated_struct_col WITH OFFSET AS pos_2 + `t0`.`rowindex`, + IF(pos = pos_2, `repeated_struct_col`, NULL) AS `repeated_struct_col` +FROM `array_test` AS `t0` +CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(`t0`.`repeated_struct_col`)) - 1)) AS pos +CROSS JOIN UNNEST(`t0`.`repeated_struct_col`) AS `repeated_struct_col` WITH OFFSET AS pos_2 WHERE pos = pos_2 OR ( pos > ( - ARRAY_LENGTH(t0.repeated_struct_col) - 1 + ARRAY_LENGTH(`t0`.`repeated_struct_col`) - 1 ) AND pos_2 = ( - ARRAY_LENGTH(t0.repeated_struct_col) - 1 + ARRAY_LENGTH(`t0`.`repeated_struct_col`) - 1 ) ) \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_two_unnests.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_two_unnests.sql index febc60d6c4df..3a5cab0285bf 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_two_unnests.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_unnest/out_two_unnests.sql @@ -1,32 +1,32 @@ SELECT - IF(pos = pos_2, level_two, NULL) AS level_two + IF(pos = pos_2, `level_two`, NULL) AS `level_two` FROM ( SELECT - t0.rowindex, - IF(pos = pos_2, level_one, NULL).nested_struct_col AS level_one - FROM array_test AS t0 - CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t0.repeated_struct_col)) - 1)) AS pos - CROSS JOIN UNNEST(t0.repeated_struct_col) AS level_one WITH OFFSET AS pos_2 + `t0`.`rowindex`, + IF(pos = pos_2, `level_one`, NULL).`nested_struct_col` AS `level_one` + FROM `array_test` AS `t0` + CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(`t0`.`repeated_struct_col`)) - 1)) AS pos + CROSS JOIN UNNEST(`t0`.`repeated_struct_col`) AS `level_one` WITH OFFSET AS pos_2 WHERE pos = pos_2 OR ( pos > ( - ARRAY_LENGTH(t0.repeated_struct_col) - 1 + ARRAY_LENGTH(`t0`.`repeated_struct_col`) - 1 ) AND pos_2 = ( - ARRAY_LENGTH(t0.repeated_struct_col) - 1 + ARRAY_LENGTH(`t0`.`repeated_struct_col`) - 1 ) ) -) AS t1 -CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(t1.level_one)) - 1)) AS pos -CROSS JOIN UNNEST(t1.level_one) AS level_two WITH OFFSET AS pos_2 +) AS `t1` +CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(`t1`.`level_one`)) - 1)) AS pos +CROSS JOIN UNNEST(`t1`.`level_one`) AS `level_two` WITH OFFSET AS pos_2 WHERE pos = pos_2 OR ( pos > ( - ARRAY_LENGTH(t1.level_one) - 1 + ARRAY_LENGTH(`t1`.`level_one`) - 1 ) AND pos_2 = ( - ARRAY_LENGTH(t1.level_one) - 1 + ARRAY_LENGTH(`t1`.`level_one`) - 1 ) ) \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/current_foll/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/current_foll/out.sql index f7af87cc3889..7621ca814cca 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/current_foll/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/current_foll/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.timestamp_col ASC ROWS BETWEEN CURRENT ROW AND 2 following) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (PARTITION BY `t0`.`year` ORDER BY `t0`.`timestamp_col` ASC ROWS BETWEEN CURRENT ROW AND 2 following) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_current/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_current/out.sql index 812d5c8e17fc..4487cb8125d5 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_current/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_current/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.timestamp_col ASC ROWS BETWEEN 1 preceding AND CURRENT ROW) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (PARTITION BY `t0`.`year` ORDER BY `t0`.`timestamp_col` ASC ROWS BETWEEN 1 preceding AND CURRENT ROW) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_prec/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_prec/out.sql index dc3996ac2b7d..b925096a5fdc 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_prec/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_function/prec_prec/out.sql @@ -1,16 +1,16 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month, - AVG(t0.float_col) OVER (PARTITION BY t0.year ORDER BY t0.timestamp_col ASC ROWS BETWEEN 4 preceding AND 2 preceding) AS win_avg -FROM functional_alltypes AS t0 \ No newline at end of file + `t0`.`id`, + `t0`.`bool_col`, + `t0`.`tinyint_col`, + `t0`.`smallint_col`, + `t0`.`int_col`, + `t0`.`bigint_col`, + `t0`.`float_col`, + `t0`.`double_col`, + `t0`.`date_string_col`, + `t0`.`string_col`, + `t0`.`timestamp_col`, + `t0`.`year`, + `t0`.`month`, + AVG(`t0`.`float_col`) OVER (PARTITION BY `t0`.`year` ORDER BY `t0`.`timestamp_col` ASC ROWS BETWEEN 4 preceding AND 2 preceding) AS `win_avg` +FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/following/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/following/out.sql index 4efa722dccbc..d84c347e7921 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/following/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/following/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(t0.a) OVER (ROWS BETWEEN 1 following AND UNBOUNDED FOLLOWING) AS tmp -FROM t AS t0 \ No newline at end of file + SUM(`t0`.`a`) OVER (ROWS BETWEEN 1 following AND UNBOUNDED FOLLOWING) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/preceding/out.sql b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/preceding/out.sql index fe5c57096cc2..ae094f1c4b92 100644 --- a/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/preceding/out.sql +++ b/ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_window_unbounded/preceding/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(t0.a) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 preceding) AS tmp -FROM t AS t0 \ No newline at end of file + SUM(`t0`.`a`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 preceding) AS `tmp` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/count/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/count/out.sql index af5c023dd5ff..1b5b80c2db62 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/count/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/count/out.sql @@ -1,3 +1,3 @@ SELECT - countIf(t0.double_col, t0.bigint_col < 70) AS "Count(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + countIf("t0"."double_col", "t0"."bigint_col" < 70) AS "Count(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/max/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/max/out.sql index b2708dc6d6f7..729336a0d44b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/max/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/max/out.sql @@ -1,3 +1,3 @@ SELECT - maxIf(t0.double_col, t0.bigint_col < 70) AS "Max(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + maxIf("t0"."double_col", "t0"."bigint_col" < 70) AS "Max(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/mean/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/mean/out.sql index 6280758e636b..a3582e4cbdcf 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/mean/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/mean/out.sql @@ -1,3 +1,3 @@ SELECT - avgIf(t0.double_col, t0.bigint_col < 70) AS "Mean(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + avgIf("t0"."double_col", "t0"."bigint_col" < 70) AS "Mean(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/min/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/min/out.sql index 052e8bf363ea..377d48bf68d1 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/min/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/min/out.sql @@ -1,3 +1,3 @@ SELECT - minIf(t0.double_col, t0.bigint_col < 70) AS "Min(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + minIf("t0"."double_col", "t0"."bigint_col" < 70) AS "Min(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/std/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/std/out.sql index fc3700a892ee..a31236bb2501 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/std/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/std/out.sql @@ -1,3 +1,3 @@ SELECT - stddevSampIf(t0.double_col, t0.bigint_col < 70) AS "StandardDev(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + stddevSampIf("t0"."double_col", "t0"."bigint_col" < 70) AS "StandardDev(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/sum/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/sum/out.sql index 60e9ba221527..f5b04fc2ab38 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/sum/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/sum/out.sql @@ -1,3 +1,3 @@ SELECT - sumIf(t0.double_col, t0.bigint_col < 70) AS "Sum(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + sumIf("t0"."double_col", "t0"."bigint_col" < 70) AS "Sum(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/var/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/var/out.sql index d21dbed60a49..bd479ba09a53 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/var/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_reduction_where/var/out.sql @@ -1,3 +1,3 @@ SELECT - varSampIf(t0.double_col, t0.bigint_col < 70) AS "Variance(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + varSampIf("t0"."double_col", "t0"."bigint_col" < 70) AS "Variance(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_std_var_pop/std/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_std_var_pop/std/out.sql index 16791db4e361..f4d55c3823ff 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_std_var_pop/std/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_std_var_pop/std/out.sql @@ -1,3 +1,3 @@ SELECT - stddevPopIf(t0.double_col, t0.bigint_col < 70) AS "StandardDev(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + stddevPopIf("t0"."double_col", "t0"."bigint_col" < 70) AS "StandardDev(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_std_var_pop/var/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_std_var_pop/var/out.sql index 3bc94bce8a9f..bbde294172d1 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_std_var_pop/var/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_aggregations/test_std_var_pop/var/out.sql @@ -1,3 +1,3 @@ SELECT - varPopIf(t0.double_col, t0.bigint_col < 70) AS "Variance(double_col, Less(bigint_col, 70))" -FROM functional_alltypes AS t0 \ No newline at end of file + varPopIf("t0"."double_col", "t0"."bigint_col" < 70) AS "Variance(double_col, Less(bigint_col, 70))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql index f97b0b7e0747..6e208faf9e4b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float32/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float32/out.sql index 98753d331890..a2ad2afc9e9d 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float32/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float32/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.double_col AS Nullable(Float32)) AS "Cast(double_col, float32)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."double_col" AS Nullable(Float32)) AS "Cast(double_col, float32)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float64/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float64/out.sql index eccf0cf30586..9c0d96c91a3f 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float64/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/float64/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.double_col AS Float64) AS "Cast(double_col, !float64)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."double_col" AS Float64) AS "Cast(double_col, !float64)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/int16/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/int16/out.sql index b7ed3873ec03..3cf7ca0c8451 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/int16/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/int16/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.double_col AS Nullable(Int16)) AS "Cast(double_col, int16)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."double_col" AS Nullable(Int16)) AS "Cast(double_col, int16)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/int8/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/int8/out.sql index e748f08dfdf7..798bce54ff29 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/int8/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_double_col/int8/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.double_col AS Nullable(Int8)) AS "Cast(double_col, int8)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."double_col" AS Nullable(Int8)) AS "Cast(double_col, int8)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/date/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/date/out.sql index 4af6d58848b2..76d4640c9d6f 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/date/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/date/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.string_col AS Nullable(DATE)) AS "Cast(string_col, date)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."string_col" AS Nullable(DATE)) AS "Cast(string_col, date)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/int16/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/int16/out.sql index ef55adad9689..2afe5df2b14f 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/int16/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/int16/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.string_col AS Nullable(Int16)) AS "Cast(string_col, int16)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."string_col" AS Nullable(Int16)) AS "Cast(string_col, int16)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/int8/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/int8/out.sql index 012a8420810c..75bd2d4778ca 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/int8/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/int8/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.string_col AS Nullable(Int8)) AS "Cast(string_col, int8)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."string_col" AS Nullable(Int8)) AS "Cast(string_col, int8)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/mapstring_int64/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/mapstring_int64/out.sql index fd415c3d093a..1e26580d6098 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/mapstring_int64/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/mapstring_int64/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.string_col AS Map(String, Nullable(Int64))) AS "Cast(string_col, !map)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."string_col" AS Map(String, Nullable(Int64))) AS "Cast(string_col, !map)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/string/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/string/out.sql index 188a52291632..f819d430aab0 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/string/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/string/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.string_col AS String) AS "Cast(string_col, !string)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."string_col" AS String) AS "Cast(string_col, !string)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/structa_string_b_int64/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/structa_string_b_int64/out.sql index b7eb2caf81cd..7ccbd7140a78 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/structa_string_b_int64/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/structa_string_b_int64/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.string_col AS Tuple(a Nullable(String), b Nullable(Int64))) AS "Cast(string_col, !struct)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."string_col" AS Tuple(a Nullable(String), b Nullable(Int64))) AS "Cast(string_col, !struct)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/timestamp/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/timestamp/out.sql index 7141924de135..65f976d317ba 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/timestamp/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_cast_string_col/timestamp/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.string_col AS Nullable(DATETIME)) AS "Cast(string_col, timestamp)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."string_col" AS Nullable(DATETIME)) AS "Cast(string_col, timestamp)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_column_regexp_extract/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_column_regexp_extract/out.sql index 17cb19de2901..2da83aca651e 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_column_regexp_extract/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_column_regexp_extract/out.sql @@ -1,7 +1,9 @@ SELECT CASE - WHEN notEmpty(extractGroups(CAST(t0.string_col AS String), CONCAT('(', '[\d]+', ')'))[3 + 1]) - THEN extractGroups(CAST(t0.string_col AS String), CONCAT('(', '[\d]+', ')'))[3 + 1] + WHEN notEmpty( + extractGroups(CAST("t0"."string_col" AS String), CONCAT('(', '[\d]+', ')'))[3 + 1] + ) + THEN extractGroups(CAST("t0"."string_col" AS String), CONCAT('(', '[\d]+', ')'))[3 + 1] ELSE NULL END AS "RegexExtract(string_col, '[\\d]+', 3)" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_column_regexp_replace/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_column_regexp_replace/out.sql index 2d24cf4611e5..5eb21c433e57 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_column_regexp_replace/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_column_regexp_replace/out.sql @@ -1,3 +1,3 @@ SELECT - replaceRegexpAll(t0.string_col, '[\d]+', 'aaa') AS "RegexReplace(string_col, '[\\d]+', 'aaa')" -FROM functional_alltypes AS t0 \ No newline at end of file + replaceRegexpAll("t0"."string_col", '[\d]+', 'aaa') AS "RegexReplace(string_col, '[\\d]+', 'aaa')" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out1.sql index 30f8b69f93bc..ceb370f312cc 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out1.sql @@ -1,3 +1,3 @@ SELECT - GREATEST(t0.int_col, 10) AS "Greatest()" -FROM functional_alltypes AS t0 \ No newline at end of file + GREATEST("t0"."int_col", 10) AS "Greatest()" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out2.sql index a55d6c21b8bb..85601ed38dad 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out2.sql @@ -1,3 +1,3 @@ SELECT - GREATEST(t0.int_col, t0.bigint_col) AS "Greatest()" -FROM functional_alltypes AS t0 \ No newline at end of file + GREATEST("t0"."int_col", "t0"."bigint_col") AS "Greatest()" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out3.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out3.sql index 8082272f6adf..3dc8affe7d89 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out3.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out3.sql @@ -1,3 +1,3 @@ SELECT - LEAST(t0.int_col, 10) AS "Least()" -FROM functional_alltypes AS t0 \ No newline at end of file + LEAST("t0"."int_col", 10) AS "Least()" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out4.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out4.sql index 81663e74f595..4f57aa348d16 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out4.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_greatest_least/out4.sql @@ -1,3 +1,3 @@ SELECT - LEAST(t0.int_col, t0.bigint_col) AS "Least()" -FROM functional_alltypes AS t0 \ No newline at end of file + LEAST("t0"."int_col", "t0"."bigint_col") AS "Least()" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/comma_none/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/comma_none/out.sql index 267418e4dbbb..1c85b5fbb75a 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/comma_none/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/comma_none/out.sql @@ -1,7 +1,7 @@ SELECT CASE - WHEN empty(groupArray(t0.string_col)) + WHEN empty(groupArray("t0"."string_col")) THEN NULL - ELSE arrayStringConcat(groupArray(t0.string_col), ',') + ELSE arrayStringConcat(groupArray("t0"."string_col"), ',') END AS "GroupConcat(string_col, ',')" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/comma_zero/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/comma_zero/out.sql index 007b81ddbd2b..7846aad19960 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/comma_zero/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/comma_zero/out.sql @@ -1,7 +1,7 @@ SELECT CASE - WHEN empty(groupArrayIf(t0.string_col, t0.bool_col = 0)) + WHEN empty(groupArrayIf("t0"."string_col", "t0"."bool_col" = 0)) THEN NULL - ELSE arrayStringConcat(groupArrayIf(t0.string_col, t0.bool_col = 0), ',') + ELSE arrayStringConcat(groupArrayIf("t0"."string_col", "t0"."bool_col" = 0), ',') END AS "GroupConcat(string_col, ',', Equals(bool_col, 0))" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/minus_none/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/minus_none/out.sql index d0c6e0c84539..b68d30a94cec 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/minus_none/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_group_concat/minus_none/out.sql @@ -1,7 +1,7 @@ SELECT CASE - WHEN empty(groupArray(t0.string_col)) + WHEN empty(groupArray("t0"."string_col")) THEN NULL - ELSE arrayStringConcat(groupArray(t0.string_col), '-') + ELSE arrayStringConcat(groupArray("t0"."string_col"), '-') END AS "GroupConcat(string_col, '-')" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_hash/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_hash/out.sql index 5f84917b8eb9..bfd18d927565 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_hash/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_hash/out.sql @@ -1,3 +1,3 @@ SELECT - sipHash64(t0.string_col) AS "Hash(string_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + sipHash64("t0"."string_col") AS "Hash(string_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql index b2e0d4507c4f..32ed9ce5dec7 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bigint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.bigint_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."bigint_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql index d967873daf1f..581ad8820c60 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/bool_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.bool_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql index c8ad0f838a31..95fc17b5c1b5 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/date_string_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.date_string_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."date_string_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql index f97b0b7e0747..6e208faf9e4b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/double_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql index 33277148af85..011938c86330 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/float_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.float_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."float_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql index b4012dbb377d..e162ed46b465 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/id/out.sql @@ -1,3 +1,3 @@ SELECT - t0.id -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."id" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql index 6b3541821ed4..ad1282f2b06b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/int_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql index d0eb5143c2b4..8cf6f8be9ab1 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/month/out.sql @@ -1,3 +1,3 @@ SELECT - t0.month -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."month" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql index dfcd8e0c0149..9d35742155b4 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/smallint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.smallint_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."smallint_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql index cfe88fb96a8c..11a14ce0716c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/string_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.string_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."string_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql index 860302ea8039..cffb9b1135c6 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/timestamp_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.timestamp_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."timestamp_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql index c9f057e3aa11..8eeeecefaf66 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/tinyint_col/out.sql @@ -1,3 +1,3 @@ SELECT - t0.tinyint_col -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."tinyint_col" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql index 5295b8fc6a8d..0afd5e4842d5 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_noop_cast/year/out.sql @@ -1,3 +1,3 @@ SELECT - t0.year -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."year" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find/out1.sql index fb3ba7be84f7..0647f5bfb1c6 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find/out1.sql @@ -1,3 +1,3 @@ SELECT - locate(t0.string_col, 'a') - 1 AS "StringFind(string_col, 'a')" -FROM functional_alltypes AS t0 \ No newline at end of file + locate("t0"."string_col", 'a') - 1 AS "StringFind(string_col, 'a')" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find/out2.sql index f80e7854276b..36e1ffb4878c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find/out2.sql @@ -1,3 +1,3 @@ SELECT - locate(t0.string_col, t0.string_col) - 1 AS "StringFind(string_col, string_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + locate("t0"."string_col", "t0"."string_col") - 1 AS "StringFind(string_col, string_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find_in_set/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find_in_set/out.sql index 36d0b0d247ca..a418947dff5e 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find_in_set/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_find_in_set/out.sql @@ -1,3 +1,3 @@ SELECT - indexOf(['a', 'b', 'c'], t0.string_col) - 1 AS "FindInSet(string_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + indexOf(['a', 'b', 'c'], "t0"."string_col") - 1 AS "FindInSet(string_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out1.sql index 8323a166f3d4..f01d4b40fac4 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out1.sql @@ -1,3 +1,3 @@ SELECT - t0.string_col LIKE 'foo%' AS "StringSQLLike(string_col, 'foo%')" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."string_col" LIKE 'foo%' AS "StringSQLLike(string_col, 'foo%')" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql index 8ec21cad88cf..89813e81d3b0 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql @@ -1,3 +1,3 @@ SELECT - t0.string_col LIKE 'foo%' OR t0.string_col LIKE '%bar' AS "Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."string_col" LIKE 'foo%' OR "t0"."string_col" LIKE '%bar' AS "Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_substring/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_substring/out1.sql index c63c6f5051b3..191676d95a33 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_substring/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_substring/out1.sql @@ -1,7 +1,7 @@ SELECT CASE WHEN 2 >= 0 - THEN SUBSTRING(t0.string_col, 2 + 1) - ELSE SUBSTRING(t0.string_col, LENGTH(t0.string_col) + 2 + 1) + THEN SUBSTRING("t0"."string_col", 2 + 1) + ELSE SUBSTRING("t0"."string_col", LENGTH("t0"."string_col") + 2 + 1) END AS "Substring(string_col, 2)" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_substring/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_substring/out2.sql index c121d8cb9bda..d45d241e8840 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_substring/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_substring/out2.sql @@ -1,7 +1,7 @@ SELECT CASE WHEN 0 >= 0 - THEN SUBSTRING(t0.string_col, 0 + 1, 3) - ELSE SUBSTRING(t0.string_col, LENGTH(t0.string_col) + 0 + 1, 3) + THEN SUBSTRING("t0"."string_col", 0 + 1, 3) + ELSE SUBSTRING("t0"."string_col", LENGTH("t0"."string_col") + 0 + 1, 3) END AS "Substring(string_col, 0, 3)" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_cast/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_cast/out1.sql index 1491cfdc2a67..8c45bbd7d6d7 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_cast/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_cast/out1.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.timestamp_col AS DATETIME) AS "Cast(timestamp_col, !timestamp)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."timestamp_col" AS DATETIME) AS "Cast(timestamp_col, !timestamp)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_cast/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_cast/out2.sql index d6f185cd8c53..daa10c3b5e60 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_cast/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_cast/out2.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.int_col AS DATETIME) AS "Cast(int_col, !timestamp)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST("t0"."int_col" AS DATETIME) AS "Cast(int_col, !timestamp)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_from_integer/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_from_integer/out.sql index 155a4cd77e5f..1022c7539dbd 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_from_integer/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_timestamp_from_integer/out.sql @@ -1,3 +1,3 @@ SELECT - toDateTime(t0.int_col) AS "TimestampFromUNIX(int_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + toDateTime("t0"."int_col") AS "TimestampFromUNIX(int_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/abs/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/abs/out.sql index 9053504cff85..55df560b2fac 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/abs/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/abs/out.sql @@ -1,3 +1,3 @@ SELECT - ABS(t0.double_col) AS "Abs(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + ABS("t0"."double_col") AS "Abs(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql index fafc564ab456..09cbb2e254bb 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(CEIL(t0.double_col) AS Nullable(Int64)) AS "Ceil(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + CAST(CEIL("t0"."double_col") AS Nullable(Int64)) AS "Ceil(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/exp/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/exp/out.sql index 4d8fe27b3e8e..c11261c44dbd 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/exp/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/exp/out.sql @@ -1,3 +1,3 @@ SELECT - EXP(t0.double_col) AS "Exp(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + EXP("t0"."double_col") AS "Exp(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log/out.sql index 82a9852fce2e..32ff0c4948e6 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log/out.sql @@ -1,3 +1,3 @@ SELECT - LN(t0.double_col) AS "Log(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + LN("t0"."double_col") AS "Log(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log10/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log10/out.sql index 8fa391142aca..c862dded81ed 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log10/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log10/out.sql @@ -1,3 +1,3 @@ SELECT - LOG10(t0.double_col) AS "Log10(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + LOG10("t0"."double_col") AS "Log10(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log2/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log2/out.sql index 8c0c7345d907..cebe0f41f850 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log2/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/log2/out.sql @@ -1,3 +1,3 @@ SELECT - LOG2(t0.double_col) AS "Log2(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + LOG2("t0"."double_col") AS "Log2(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round/out.sql index ca5418a19a6c..8e25278c7b9a 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round/out.sql @@ -1,3 +1,3 @@ SELECT - ROUND(t0.double_col) AS "Round(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + ROUND("t0"."double_col") AS "Round(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round_0/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round_0/out.sql index d3d0e5b8d3f3..0540e27c45a9 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round_0/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round_0/out.sql @@ -1,3 +1,3 @@ SELECT - ROUND(t0.double_col, 0) AS "Round(double_col, 0)" -FROM functional_alltypes AS t0 \ No newline at end of file + ROUND("t0"."double_col", 0) AS "Round(double_col, 0)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round_2/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round_2/out.sql index 546248ea6602..86e64f8f88bf 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round_2/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/round_2/out.sql @@ -1,3 +1,3 @@ SELECT - ROUND(t0.double_col, 2) AS "Round(double_col, 2)" -FROM functional_alltypes AS t0 \ No newline at end of file + ROUND("t0"."double_col", 2) AS "Round(double_col, 2)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/sign/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/sign/out.sql index 1081419665a0..10d56c154fa0 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/sign/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/sign/out.sql @@ -1,3 +1,3 @@ SELECT - intDivOrZero(t0.double_col, ABS(t0.double_col)) AS "Sign(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + intDivOrZero("t0"."double_col", ABS("t0"."double_col")) AS "Sign(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/sqrt/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/sqrt/out.sql index 22d3ea0b7053..2e4139a99d1c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/sqrt/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/sqrt/out.sql @@ -1,3 +1,3 @@ SELECT - SQRT(t0.double_col) AS "Sqrt(double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + SQRT("t0"."double_col") AS "Sqrt(double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_literals/test_string_numeric_boolean_literals/false/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_literals/test_string_numeric_boolean_literals/false/out.sql index ebdf5892bd88..3384069fe9ef 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_literals/test_string_numeric_boolean_literals/false/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_literals/test_string_numeric_boolean_literals/false/out.sql @@ -1,2 +1,2 @@ SELECT - FALSE AS False \ No newline at end of file + FALSE AS "False" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_literals/test_string_numeric_boolean_literals/true/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_literals/test_string_numeric_boolean_literals/true/out.sql index 8cafff945927..0e4959c5be70 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_literals/test_string_numeric_boolean_literals/true/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_literals/test_string_numeric_boolean_literals/true/out.sql @@ -1,2 +1,2 @@ SELECT - TRUE AS True \ No newline at end of file + TRUE AS "True" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_between/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_between/out.sql index f385a5073f4a..d36d7dbfe47a 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_between/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_between/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col BETWEEN 0 AND 10 AS "Between(int_col, 0, 10)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" BETWEEN 0 AND 10 AS "Between(int_col, 0, 10)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/add/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/add/out.sql index e68b7ab92f5b..f861b63ca850 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/add/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/add/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col + t0.tinyint_col AS "Add(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" + "t0"."tinyint_col" AS "Add(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/eq/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/eq/out.sql index fcb1bd0800dc..594a301ee86c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/eq/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/eq/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col = t0.tinyint_col AS "Equals(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" = "t0"."tinyint_col" AS "Equals(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/ge/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/ge/out.sql index 19a415c58ebf..f54ffa08d8d5 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/ge/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/ge/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col >= t0.tinyint_col AS "GreaterEqual(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" >= "t0"."tinyint_col" AS "GreaterEqual(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/gt/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/gt/out.sql index 2bb6220a6ff3..9a3e27a030f6 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/gt/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/gt/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col > t0.tinyint_col AS "Greater(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" > "t0"."tinyint_col" AS "Greater(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/le/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/le/out.sql index 078ecd833ad4..d8c830904dfc 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/le/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/le/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col <= t0.tinyint_col AS "LessEqual(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" <= "t0"."tinyint_col" AS "LessEqual(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/lt/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/lt/out.sql index e7f933028170..cefda5d4ae87 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/lt/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/lt/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col < t0.tinyint_col AS "Less(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" < "t0"."tinyint_col" AS "Less(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/mul/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/mul/out.sql index e1389c6da13b..ee9e7b4aa15b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/mul/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/mul/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col * t0.tinyint_col AS "Multiply(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" * "t0"."tinyint_col" AS "Multiply(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/ne/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/ne/out.sql index b640e3569c95..69f73bd5e823 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/ne/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/ne/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col <> t0.tinyint_col AS "NotEquals(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" <> "t0"."tinyint_col" AS "NotEquals(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/pow/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/pow/out.sql index 1eb784222e9c..2b99f1bca330 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/pow/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/pow/out.sql @@ -1,3 +1,3 @@ SELECT - POWER(t0.int_col, t0.tinyint_col) AS "Power(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + POWER("t0"."int_col", "t0"."tinyint_col") AS "Power(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/sub/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/sub/out.sql index de39ee04558f..ad2506f3d9f7 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/sub/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/sub/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col - t0.tinyint_col AS "Subtract(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" - "t0"."tinyint_col" AS "Subtract(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/truediv/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/truediv/out.sql index cb51f9f8c6a1..ef67e8fbca04 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/truediv/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_operators/truediv/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col / t0.tinyint_col AS "Divide(int_col, tinyint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" / "t0"."tinyint_col" AS "Divide(int_col, tinyint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda0/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda0/out.sql index 8133e2fc1ec5..dbe622a75a1a 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda0/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda0/out.sql @@ -1,5 +1,5 @@ SELECT ( - t0.int_col + t0.tinyint_col - ) + t0.double_col AS "Add(Add(int_col, tinyint_col), double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."int_col" + "t0"."tinyint_col" + ) + "t0"."double_col" AS "Add(Add(int_col, tinyint_col), double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql index 3c61319c349c..c0b915d5412f 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql @@ -1,3 +1,3 @@ SELECT - LN(t0.int_col) + t0.double_col AS "Add(Log(int_col), double_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + LN("t0"."int_col") + "t0"."double_col" AS "Add(Log(int_col), double_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql index 08d6cd257a99..32f86e31d105 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql @@ -1,5 +1,5 @@ SELECT - t0.tinyint_col + -( - t0.int_col + t0.double_col + "t0"."tinyint_col" + -( + "t0"."int_col" + "t0"."double_col" ) AS "Add(tinyint_col, Negate(Add(int_col, double_col)))" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/bool_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/bool_col/out.sql index f99229d255c1..4ed2a27cb186 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/bool_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/bool_col/out.sql @@ -1,5 +1,5 @@ SELECT NOT ( - t0.bool_col + "t0"."bool_col" ) AS "Not(bool_col)" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/float_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/float_col/out.sql index a0bba803ac5e..bd9115f001e4 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/float_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/float_col/out.sql @@ -1,5 +1,5 @@ SELECT -( - t0.float_col + "t0"."float_col" ) AS "Negate(float_col)" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/int_col/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/int_col/out.sql index 0f6ccfcd6d63..0a970f06c50c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/int_col/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_negate/int_col/out.sql @@ -1,5 +1,5 @@ SELECT -( - t0.int_col + "t0"."int_col" ) AS "Negate(int_col)" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_search_case/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_search_case/out.sql index 550ad6ad9278..11569fc15843 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_search_case/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_search_case/out.sql @@ -1,9 +1,9 @@ SELECT CASE - WHEN t0.float_col > 0 - THEN t0.int_col * 2 - WHEN t0.float_col < 0 - THEN t0.int_col + WHEN "t0"."float_col" > 0 + THEN "t0"."int_col" * 2 + WHEN "t0"."float_col" < 0 + THEN "t0"."int_col" ELSE 0 END AS "SearchedCase(0)" -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_simple_case/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_simple_case/out.sql index 2864ba8200b4..66d7aca5a83f 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_simple_case/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_simple_case/out.sql @@ -1,3 +1,3 @@ SELECT - CASE t0.string_col WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS "SimpleCase(string_col, 'default')" -FROM functional_alltypes AS t0 \ No newline at end of file + CASE "t0"."string_col" WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS "SimpleCase(string_col, 'default')" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql index a62f4b1f78bc..9598809b7217 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql @@ -1,5 +1,5 @@ SELECT - t0.id IN (SELECT - arrayJoin(t1.ids) AS ids - FROM way_view AS t1) AS "InSubquery(id)" -FROM node_view AS t0 \ No newline at end of file + "t0"."id" IN (SELECT + arrayJoin("t1"."ids") AS "ids" + FROM "way_view" AS "t1") AS "InSubquery(id)" +FROM "node_view" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql index 034aab28ceef..54d3826d1847 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_array_expr_projection/out.sql @@ -1,10 +1,10 @@ SELECT - CAST(t1.string_col AS Nullable(Float64)) AS "Cast(string_col, float64)" + CAST("t1"."string_col" AS Nullable(Float64)) AS "Cast(string_col, float64)" FROM ( SELECT - t0.string_col, - COUNT(*) AS count - FROM functional_alltypes AS t0 + "t0"."string_col", + COUNT(*) AS "count" + FROM "functional_alltypes" AS "t0" GROUP BY - t0.string_col -) AS t1 \ No newline at end of file + "t0"."string_col" +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql index ef5ec770f49a..4e106ac768a3 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql @@ -1,18 +1,18 @@ SELECT - t4.a, - t4.b, - t4.c, - t4.d, - t4.c / ( - t4.a - t4.b - ) AS e + "t4"."a", + "t4"."b", + "t4"."c", + "t4"."d", + "t4"."c" / ( + "t4"."a" - "t4"."b" + ) AS "e" FROM ( SELECT - t2.a, - t2.b, - t3.c, - t3.d - FROM s AS t2 - INNER JOIN t AS t3 - ON t2.a = t3.c -) AS t4 \ No newline at end of file + "t2"."a", + "t2"."b", + "t3"."c", + "t3"."d" + FROM "s" AS "t2" + INNER JOIN "t" AS "t3" + ON "t2"."a" = "t3"."c" +) AS "t4" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql index 6edcaf0c84a9..818766ac3f33 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_count_name/out.sql @@ -1,9 +1,9 @@ SELECT - t0.a, + "t0"."a", COALESCE(countIf(NOT ( - t0.b - )), 0) AS A, - COALESCE(countIf(t0.b), 0) AS B -FROM t AS t0 + "t0"."b" + )), 0) AS "A", + COALESCE(countIf("t0"."b"), 0) AS "B" +FROM "t" AS "t0" GROUP BY - t0.a \ No newline at end of file + "t0"."a" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_ifelse_use_if/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_ifelse_use_if/out.sql index 73b7283aba5f..5909a4ea1a1b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_ifelse_use_if/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_ifelse_use_if/out.sql @@ -1,3 +1,3 @@ SELECT - CASE WHEN t0.float_col > 0 THEN t0.int_col ELSE t0.bigint_col END AS "IfElse(Greater(float_col, 0), int_col, bigint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + CASE WHEN "t0"."float_col" > 0 THEN "t0"."int_col" ELSE "t0"."bigint_col" END AS "IfElse(Greater(float_col, 0), int_col, bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql index 1bd6720ed390..bda298346094 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql @@ -1,17 +1,17 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" +FROM "functional_alltypes" AS "t0" WHERE - t0.string_col IN ('foo', 'bar') \ No newline at end of file + "t0"."string_col" IN ('foo', 'bar') \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql index 85fd1cae375d..0612bf8ebe05 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql @@ -1,19 +1,19 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" +FROM "functional_alltypes" AS "t0" WHERE NOT ( - t0.string_col IN ('foo', 'bar') + "t0"."string_col" IN ('foo', 'bar') ) \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isnull_case_expr_rewrite_failure/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isnull_case_expr_rewrite_failure/out.sql index 3029dd2e2fb3..0a5733558a19 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isnull_case_expr_rewrite_failure/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isnull_case_expr_rewrite_failure/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(CASE WHEN isNull(t0.string_col) THEN 1 ELSE 0 END) AS "Sum(IfElse(IsNull(string_col), 1, 0))" -FROM functional_alltypes AS t0 \ No newline at end of file + SUM(CASE WHEN isNull("t0"."string_col") THEN 1 ELSE 0 END) AS "Sum(IfElse(IsNull(string_col), 1, 0))" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql index 728987548b00..66518891122e 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql @@ -1,17 +1,17 @@ SELECT - t1.id, - t1.bool_col, - t1.tinyint_col, - t1.smallint_col, - t1.int_col, - t1.bigint_col, - t1.float_col, - t1.double_col, - t1.date_string_col, - t1.string_col, - t1.timestamp_col, - t1.year, - t1.month -FROM functional_alltypes AS t1 -INNER JOIN functional_alltypes AS t3 - ON t1.id = t3.id \ No newline at end of file + "t1"."id", + "t1"."bool_col", + "t1"."tinyint_col", + "t1"."smallint_col", + "t1"."int_col", + "t1"."bigint_col", + "t1"."float_col", + "t1"."double_col", + "t1"."date_string_col", + "t1"."string_col", + "t1"."timestamp_col", + "t1"."year", + "t1"."month" +FROM "functional_alltypes" AS "t1" +INNER JOIN "functional_alltypes" AS "t3" + ON "t1"."id" = "t3"."id" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql index 2ae649a0ea76..eae60372b99a 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql @@ -1,17 +1,17 @@ SELECT - t1.key, + "t1"."key", SUM(( ( - t1.value + 1 + "t1"."value" + 1 ) + 2 - ) + 3) AS abc + ) + 3) AS "abc" FROM ( SELECT - t0.key, - t0.value - FROM t0 AS t0 + "t0"."key", + "t0"."value" + FROM "t0" AS "t0" WHERE - t0.value = 42 -) AS t1 + "t0"."value" = 42 +) AS "t1" GROUP BY - t1.key \ No newline at end of file + "t1"."key" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql index d22a599a88a7..7a163b9542da 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql @@ -1,17 +1,17 @@ SELECT - t1.key, + "t1"."key", SUM(( ( - t1.value + 1 + "t1"."value" + 1 ) + 2 - ) + 3) AS foo + ) + 3) AS "foo" FROM ( SELECT - t0.key, - t0.value - FROM t0 AS t0 + "t0"."key", + "t0"."value" + FROM "t0" AS "t0" WHERE - t0.value = 42 -) AS t1 + "t0"."value" = 42 +) AS "t1" GROUP BY - t1.key \ No newline at end of file + "t1"."key" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_physical_table_reference_translate/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_physical_table_reference_translate/out.sql index cdcc673f9d1c..67f2cfc5ea25 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_physical_table_reference_translate/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_physical_table_reference_translate/out.sql @@ -1,3 +1,3 @@ SELECT * -FROM functional_alltypes \ No newline at end of file +FROM "functional_alltypes" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql index 99d5c76e03f3..6d497849fa7e 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql @@ -1,3 +1,3 @@ SELECT * -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql index a1bdbc4f3fc3..7ea322ab3e40 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql @@ -1,26 +1,26 @@ SELECT - t2.playerID, - t2.yearID, - t2.stint, - t2.teamID, - t2.lgID, - t2.G, - t2.AB, - t2.R, - t2.H, - t2.X2B, - t2.X3B, - t2.HR, - t2.RBI, - t2.SB, - t2.CS, - t2.BB, - t2.SO, - t2.IBB, - t2.HBP, - t2.SH, - t2.SF, - t2.GIDP -FROM batting AS t2 -ANY JOIN awards_players AS t3 - ON t2.playerID = t3.awardID \ No newline at end of file + "t2"."playerID", + "t2"."yearID", + "t2"."stint", + "t2"."teamID", + "t2"."lgID", + "t2"."G", + "t2"."AB", + "t2"."R", + "t2"."H", + "t2"."X2B", + "t2"."X3B", + "t2"."HR", + "t2"."RBI", + "t2"."SB", + "t2"."CS", + "t2"."BB", + "t2"."SO", + "t2"."IBB", + "t2"."HBP", + "t2"."SH", + "t2"."SF", + "t2"."GIDP" +FROM "batting" AS "t2" +ANY JOIN "awards_players" AS "t3" + ON "t2"."playerID" = "t3"."awardID" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql index 651c9ca46694..99c2c8ed05ba 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql @@ -1,26 +1,26 @@ SELECT - t2.playerID, - t2.yearID, - t2.stint, - t2.teamID, - t2.lgID, - t2.G, - t2.AB, - t2.R, - t2.H, - t2.X2B, - t2.X3B, - t2.HR, - t2.RBI, - t2.SB, - t2.CS, - t2.BB, - t2.SO, - t2.IBB, - t2.HBP, - t2.SH, - t2.SF, - t2.GIDP -FROM batting AS t2 -LEFT ANY JOIN awards_players AS t3 - ON t2.playerID = t3.awardID \ No newline at end of file + "t2"."playerID", + "t2"."yearID", + "t2"."stint", + "t2"."teamID", + "t2"."lgID", + "t2"."G", + "t2"."AB", + "t2"."R", + "t2"."H", + "t2"."X2B", + "t2"."X3B", + "t2"."HR", + "t2"."RBI", + "t2"."SB", + "t2"."CS", + "t2"."BB", + "t2"."SO", + "t2"."IBB", + "t2"."HBP", + "t2"."SH", + "t2"."SF", + "t2"."GIDP" +FROM "batting" AS "t2" +LEFT ANY JOIN "awards_players" AS "t3" + ON "t2"."playerID" = "t3"."awardID" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql index ebcca144d254..b0e5b8e38295 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql @@ -1,26 +1,26 @@ SELECT - t2.playerID, - t2.yearID, - t2.stint, - t2.teamID, - t2.lgID, - t2.G, - t2.AB, - t2.R, - t2.H, - t2.X2B, - t2.X3B, - t2.HR, - t2.RBI, - t2.SB, - t2.CS, - t2.BB, - t2.SO, - t2.IBB, - t2.HBP, - t2.SH, - t2.SF, - t2.GIDP -FROM batting AS t2 -INNER JOIN awards_players AS t3 - ON t2.playerID = t3.awardID \ No newline at end of file + "t2"."playerID", + "t2"."yearID", + "t2"."stint", + "t2"."teamID", + "t2"."lgID", + "t2"."G", + "t2"."AB", + "t2"."R", + "t2"."H", + "t2"."X2B", + "t2"."X3B", + "t2"."HR", + "t2"."RBI", + "t2"."SB", + "t2"."CS", + "t2"."BB", + "t2"."SO", + "t2"."IBB", + "t2"."HBP", + "t2"."SH", + "t2"."SF", + "t2"."GIDP" +FROM "batting" AS "t2" +INNER JOIN "awards_players" AS "t3" + ON "t2"."playerID" = "t3"."awardID" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql index 5ae2ee1998b1..3933c87710b6 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql @@ -1,26 +1,26 @@ SELECT - t2.playerID, - t2.yearID, - t2.stint, - t2.teamID, - t2.lgID, - t2.G, - t2.AB, - t2.R, - t2.H, - t2.X2B, - t2.X3B, - t2.HR, - t2.RBI, - t2.SB, - t2.CS, - t2.BB, - t2.SO, - t2.IBB, - t2.HBP, - t2.SH, - t2.SF, - t2.GIDP -FROM batting AS t2 -LEFT OUTER JOIN awards_players AS t3 - ON t2.playerID = t3.awardID \ No newline at end of file + "t2"."playerID", + "t2"."yearID", + "t2"."stint", + "t2"."teamID", + "t2"."lgID", + "t2"."G", + "t2"."AB", + "t2"."R", + "t2"."H", + "t2"."X2B", + "t2"."X3B", + "t2"."HR", + "t2"."RBI", + "t2"."SB", + "t2"."CS", + "t2"."BB", + "t2"."SO", + "t2"."IBB", + "t2"."HBP", + "t2"."SH", + "t2"."SF", + "t2"."GIDP" +FROM "batting" AS "t2" +LEFT OUTER JOIN "awards_players" AS "t3" + ON "t2"."playerID" = "t3"."awardID" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql index 43bddb923f0a..71a2337a7448 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql @@ -1,26 +1,26 @@ SELECT - t2.playerID, - t2.yearID, - t2.stint, - t2.teamID, - t2.lgID, - t2.G, - t2.AB, - t2.R, - t2.H, - t2.X2B, - t2.X3B, - t2.HR, - t2.RBI, - t2.SB, - t2.CS, - t2.BB, - t2.SO, - t2.IBB, - t2.HBP, - t2.SH, - t2.SF, - t2.GIDP -FROM batting AS t2 -ANY JOIN awards_players AS t3 - ON t2.playerID = t3.playerID \ No newline at end of file + "t2"."playerID", + "t2"."yearID", + "t2"."stint", + "t2"."teamID", + "t2"."lgID", + "t2"."G", + "t2"."AB", + "t2"."R", + "t2"."H", + "t2"."X2B", + "t2"."X3B", + "t2"."HR", + "t2"."RBI", + "t2"."SB", + "t2"."CS", + "t2"."BB", + "t2"."SO", + "t2"."IBB", + "t2"."HBP", + "t2"."SH", + "t2"."SF", + "t2"."GIDP" +FROM "batting" AS "t2" +ANY JOIN "awards_players" AS "t3" + ON "t2"."playerID" = "t3"."playerID" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql index 5586b8b01ee0..7181dbe8cc83 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql @@ -1,26 +1,26 @@ SELECT - t2.playerID, - t2.yearID, - t2.stint, - t2.teamID, - t2.lgID, - t2.G, - t2.AB, - t2.R, - t2.H, - t2.X2B, - t2.X3B, - t2.HR, - t2.RBI, - t2.SB, - t2.CS, - t2.BB, - t2.SO, - t2.IBB, - t2.HBP, - t2.SH, - t2.SF, - t2.GIDP -FROM batting AS t2 -LEFT ANY JOIN awards_players AS t3 - ON t2.playerID = t3.playerID \ No newline at end of file + "t2"."playerID", + "t2"."yearID", + "t2"."stint", + "t2"."teamID", + "t2"."lgID", + "t2"."G", + "t2"."AB", + "t2"."R", + "t2"."H", + "t2"."X2B", + "t2"."X3B", + "t2"."HR", + "t2"."RBI", + "t2"."SB", + "t2"."CS", + "t2"."BB", + "t2"."SO", + "t2"."IBB", + "t2"."HBP", + "t2"."SH", + "t2"."SF", + "t2"."GIDP" +FROM "batting" AS "t2" +LEFT ANY JOIN "awards_players" AS "t3" + ON "t2"."playerID" = "t3"."playerID" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql index f611516b394e..fc1eeca36630 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql @@ -1,26 +1,26 @@ SELECT - t2.playerID, - t2.yearID, - t2.stint, - t2.teamID, - t2.lgID, - t2.G, - t2.AB, - t2.R, - t2.H, - t2.X2B, - t2.X3B, - t2.HR, - t2.RBI, - t2.SB, - t2.CS, - t2.BB, - t2.SO, - t2.IBB, - t2.HBP, - t2.SH, - t2.SF, - t2.GIDP -FROM batting AS t2 -INNER JOIN awards_players AS t3 - ON t2.playerID = t3.playerID \ No newline at end of file + "t2"."playerID", + "t2"."yearID", + "t2"."stint", + "t2"."teamID", + "t2"."lgID", + "t2"."G", + "t2"."AB", + "t2"."R", + "t2"."H", + "t2"."X2B", + "t2"."X3B", + "t2"."HR", + "t2"."RBI", + "t2"."SB", + "t2"."CS", + "t2"."BB", + "t2"."SO", + "t2"."IBB", + "t2"."HBP", + "t2"."SH", + "t2"."SF", + "t2"."GIDP" +FROM "batting" AS "t2" +INNER JOIN "awards_players" AS "t3" + ON "t2"."playerID" = "t3"."playerID" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql index c820c7e05b88..e3d44d128a61 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql @@ -1,26 +1,26 @@ SELECT - t2.playerID, - t2.yearID, - t2.stint, - t2.teamID, - t2.lgID, - t2.G, - t2.AB, - t2.R, - t2.H, - t2.X2B, - t2.X3B, - t2.HR, - t2.RBI, - t2.SB, - t2.CS, - t2.BB, - t2.SO, - t2.IBB, - t2.HBP, - t2.SH, - t2.SF, - t2.GIDP -FROM batting AS t2 -LEFT OUTER JOIN awards_players AS t3 - ON t2.playerID = t3.playerID \ No newline at end of file + "t2"."playerID", + "t2"."yearID", + "t2"."stint", + "t2"."teamID", + "t2"."lgID", + "t2"."G", + "t2"."AB", + "t2"."R", + "t2"."H", + "t2"."X2B", + "t2"."X3B", + "t2"."HR", + "t2"."RBI", + "t2"."SB", + "t2"."CS", + "t2"."BB", + "t2"."SO", + "t2"."IBB", + "t2"."HBP", + "t2"."SH", + "t2"."SF", + "t2"."GIDP" +FROM "batting" AS "t2" +LEFT OUTER JOIN "awards_players" AS "t3" + ON "t2"."playerID" = "t3"."playerID" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql index 86d975c44589..d7dbc74ba50b 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql @@ -1,21 +1,21 @@ SELECT - SUM(t1.float_col) AS "Sum(float_col)" + SUM("t1"."float_col") AS "Sum(float_col)" FROM ( SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month - FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > 0 -) AS t1 \ No newline at end of file + "t0"."int_col" > 0 +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql index d2bf6243fdea..bafeb1ea3fae 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql @@ -1,28 +1,28 @@ SELECT - t2.string_col + "t2"."string_col" FROM ( SELECT - t1.string_col, - SUM(t1.float_col) AS total + "t1"."string_col", + SUM("t1"."float_col") AS "total" FROM ( SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month - FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > 0 - ) AS t1 + "t0"."int_col" > 0 + ) AS "t1" GROUP BY - t1.string_col -) AS t2 \ No newline at end of file + "t1"."string_col" +) AS "t2" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_timestamp_extract_field/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_timestamp_extract_field/out.sql index 9336d08bda8d..07b42d623f25 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_timestamp_extract_field/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_timestamp_extract_field/out.sql @@ -1,8 +1,8 @@ SELECT - toYear(t0.timestamp_col) AS year, - toMonth(t0.timestamp_col) AS month, - toDayOfMonth(t0.timestamp_col) AS day, - toHour(t0.timestamp_col) AS hour, - toMinute(t0.timestamp_col) AS minute, - toSecond(t0.timestamp_col) AS second -FROM functional_alltypes AS t0 \ No newline at end of file + toYear("t0"."timestamp_col") AS "year", + toMonth("t0"."timestamp_col") AS "month", + toDayOfMonth("t0"."timestamp_col") AS "day", + toHour("t0"."timestamp_col") AS "hour", + toMinute("t0"."timestamp_col") AS "minute", + toSecond("t0"."timestamp_col") AS "second" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql index cb651e1f7369..6a0067715eb6 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql @@ -1,19 +1,19 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" +FROM "functional_alltypes" AS "t0" WHERE - t0.float_col > 0 AND t0.int_col < ( - t0.float_col * 2 + "t0"."float_col" > 0 AND "t0"."int_col" < ( + "t0"."float_col" * 2 ) \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql index a27f8a736dc5..ce6fd8adb053 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql @@ -1,17 +1,17 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" +FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > 0 AND t0.float_col BETWEEN 0 AND 1 \ No newline at end of file + "t0"."int_col" > 0 AND "t0"."float_col" BETWEEN 0 AND 1 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql index cee980322a7b..2650818b44ad 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_timestamp/out.sql @@ -1,6 +1,6 @@ SELECT - t0.uuid, - minIf(t0.ts, t0.search_level = 1) AS min_date -FROM t AS t0 + "t0"."uuid", + minIf("t0"."ts", "t0"."search_level" = 1) AS "min_date" +FROM "t" AS "t0" GROUP BY - t0.uuid \ No newline at end of file + "t0"."uuid" \ No newline at end of file diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index e66b0b99763e..d0bcb68a0386 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -491,7 +491,7 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend: """ url = urlparse(url) - database = url.path[1:] or ":memory:" + database = url.path or ":memory:" query_params = parse_qs(url.query) for name, value in query_params.items(): diff --git a/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint16/out.sql b/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint16/out.sql index abb420080b20..2acbaa8d5f12 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint16/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint16/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.a AS USMALLINT) AS "Cast(a, uint16)" -FROM t AS t0 \ No newline at end of file + CAST("t0"."a" AS USMALLINT) AS "Cast(a, uint16)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint32/out.sql b/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint32/out.sql index b2ec0d726884..a48e232ecd8e 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint32/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint32/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.a AS UINTEGER) AS "Cast(a, uint32)" -FROM t AS t0 \ No newline at end of file + CAST("t0"."a" AS UINTEGER) AS "Cast(a, uint32)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint64/out.sql b/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint64/out.sql index 6cefd3bb478b..18926c3384c4 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint64/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint64/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.a AS UBIGINT) AS "Cast(a, uint64)" -FROM t AS t0 \ No newline at end of file + CAST("t0"."a" AS UBIGINT) AS "Cast(a, uint64)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint8/out.sql b/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint8/out.sql index dae9dbdc41cb..d6c4e6f31517 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint8/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_datatypes/test_cast_uints/uint8/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(t0.a AS UTINYINT) AS "Cast(a, uint8)" -FROM t AS t0 \ No newline at end of file + CAST("t0"."a" AS UTINYINT) AS "Cast(a, uint8)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql index e68c65813913..a7503ad3c708 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql @@ -1,3 +1,3 @@ SELECT - ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS DOUBLE)) AS tmp -FROM t AS t0 \ No newline at end of file + ST_DWITHIN("t0"."geom", "t0"."geom", CAST(3.0 AS DOUBLE)) AS "tmp" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql index 7da710b2dceb..c3309623704a 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/as_text/out.sql @@ -1,3 +1,3 @@ SELECT - ST_ASTEXT(t0.geom) AS tmp -FROM t AS t0 \ No newline at end of file + ST_ASTEXT("t0"."geom") AS "tmp" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql index bf8ba88ffde2..e51ac61f7af2 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_unary_snapshot/n_points/out.sql @@ -1,3 +1,3 @@ SELECT - ST_NPOINTS(t0.geom) AS tmp -FROM t AS t0 \ No newline at end of file + ST_NPOINTS("t0"."geom") AS "tmp" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index bc12542b3b70..906137627f49 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -111,7 +111,7 @@ def _from_url(self, url: str, **kwargs): """ url = urlparse(url) - database = url.path[1:] or ":memory:" + database = url.path or ":memory:" return self.connect(database=database, **kwargs) def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/bigquery/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/bigquery/out.sql index 499888a1d390..0f8ec722632b 100644 --- a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/bigquery/out.sql @@ -1,7 +1,7 @@ -WITH foo AS ( +WITH `foo` AS ( SELECT * - FROM `ibis-gbq`.ibis_gbq_testing.test_bigquery_temp_mem_t_for_cte AS t0 + FROM `ibis-gbq`.`ibis_gbq_testing`.`test_bigquery_temp_mem_t_for_cte` AS `t0` ) SELECT COUNT(*) AS `x` diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/clickhouse/out.sql index 65c261d51aed..86a20dda1d70 100644 --- a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/clickhouse/out.sql @@ -1,7 +1,7 @@ -WITH foo AS ( +WITH "foo" AS ( SELECT * - FROM test_clickhouse_temp_mem_t_for_cte AS t0 + FROM "test_clickhouse_temp_mem_t_for_cte" AS "t0" ) SELECT COUNT(*) AS "x" diff --git a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/duckdb/out.sql b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/duckdb/out.sql index 274da8b2928b..51c412222fac 100644 --- a/ibis/backends/tests/snapshots/test_dot_sql/test_cte/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_dot_sql/test_cte/duckdb/out.sql @@ -1,7 +1,7 @@ -WITH foo AS ( +WITH "foo" AS ( SELECT * - FROM test_duckdb_temp_mem_t_for_cte AS t0 + FROM "test_duckdb_temp_mem_t_for_cte" AS "t0" ) SELECT COUNT(*) AS "x" diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql index fca431bc4c45..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql @@ -1,42 +1,42 @@ -WITH t6 AS ( +WITH "t6" AS ( SELECT - t5.street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM ( SELECT - t2.street, - t2.key + "t2"."street", + "t2"."key" FROM ( SELECT - t0.street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t2 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" INNER JOIN ( SELECT - t1.key + "t1"."key" FROM ( SELECT - t0.street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - ) AS t4 - ON t2.key = t4.key - ) AS t5 -), t1 AS ( + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( SELECT - t0.street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" ) SELECT - t8.street, - t8.key -FROM t6 AS t8 + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" INNER JOIN ( SELECT - t7.key - FROM t6 AS t7 -) AS t10 - ON t8.key = t10.key \ No newline at end of file + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql index 2e5f7d14030f..0ceff9007707 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql @@ -1,42 +1,42 @@ -WITH t6 AS ( +WITH "t6" AS ( SELECT - t5.street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS "key" FROM ( SELECT - t2.street, - t2.key + "t2"."street", + "t2"."key" FROM ( SELECT - t0.street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key - FROM data AS t0 - ) AS t2 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS "key" + FROM "data" AS "t0" + ) AS "t2" INNER JOIN ( SELECT - t1.key + "t1"."key" FROM ( SELECT - t0.street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key - FROM data AS t0 - ) AS t1 - ) AS t4 - ON t2.key = t4.key - ) AS t5 -), t1 AS ( + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( SELECT - t0.street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key - FROM data AS t0 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS "key" + FROM "data" AS "t0" ) SELECT - t8.street, - t8.key -FROM t6 AS t8 + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" INNER JOIN ( SELECT - t7.key - FROM t6 AS t7 -) AS t10 - ON t8.key = t10.key \ No newline at end of file + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/risingwave/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/risingwave/out.sql new file mode 100644 index 000000000000..64a6e78e8bf3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/risingwave/out.sql @@ -0,0 +1,42 @@ +WITH "t6" AS ( + SELECT + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/sqlite/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/sqlite/out.sql index 96d6c839c2c0..9989afbf4aec 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/sqlite/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/sqlite/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH "t6" AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS "key" - FROM data AS t5 -), t1 AS ( + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS "key" - FROM t2 -), t4 AS ( - SELECT - t3."key" AS "key" - FROM t3 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" ) SELECT - t3.street, - t3."key" -FROM t3 -JOIN t4 - ON t3."key" = t4."key" \ No newline at end of file + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/bigquery/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/bigquery/out.sql index 09e7f95d3580..b26705406420 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/bigquery/out.sql @@ -1,5 +1,5 @@ SELECT - t0.id, - t0.bool_col -FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 + `t0`.`id`, + `t0`.`bool_col` +FROM `ibis-gbq`.`ibis_gbq_testing`.`functional_alltypes` AS `t0` LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql index f26c12ac78c8..b309cd65374d 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql @@ -1,5 +1,5 @@ SELECT - t0.id, - t0.bool_col -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql index f26c12ac78c8..b309cd65374d 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql @@ -1,5 +1,5 @@ SELECT - t0.id, - t0.bool_col -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/bigquery/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/bigquery/out.sql index 09e7f95d3580..b26705406420 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/bigquery/out.sql @@ -1,5 +1,5 @@ SELECT - t0.id, - t0.bool_col -FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 + `t0`.`id`, + `t0`.`bool_col` +FROM `ibis-gbq`.`ibis_gbq_testing`.`functional_alltypes` AS `t0` LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql index f26c12ac78c8..b309cd65374d 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql @@ -1,5 +1,5 @@ SELECT - t0.id, - t0.bool_col -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql index f26c12ac78c8..b309cd65374d 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql @@ -1,5 +1,5 @@ SELECT - t0.id, - t0.bool_col -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/bigquery/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/bigquery/out.sql index 7601954ae202..56100fb38053 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/bigquery/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(t0.bigint_col) AS Sum_bigint_col -FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 \ No newline at end of file + SUM(`t0`.`bigint_col`) AS `Sum_bigint_col` +FROM `ibis-gbq`.`ibis_gbq_testing`.`functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql index eb7db2731364..6bd0ba8c995d 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(t0.bigint_col) AS "Sum(bigint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql index eb7db2731364..6bd0ba8c995d 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(t0.bigint_col) AS "Sum(bigint_col)" -FROM functional_alltypes AS t0 \ No newline at end of file + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/bigquery/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/bigquery/out.sql index 1f1906f84e03..62279f6d2e1b 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/bigquery/out.sql @@ -2,9 +2,9 @@ SELECT * FROM ( SELECT - t0.id, - t0.bool_col - FROM `ibis-gbq`.ibis_gbq_testing.functional_alltypes AS t0 + `t0`.`id`, + `t0`.`bool_col` + FROM `ibis-gbq`.`ibis_gbq_testing`.`functional_alltypes` AS `t0` LIMIT 10 -) AS t2 +) AS `t2` LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql index 88b2af3a2cc3..97338646649f 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql @@ -2,9 +2,9 @@ SELECT * FROM ( SELECT - t0.id, - t0.bool_col - FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" LIMIT 10 -) AS t2 +) AS "t2" LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql index 88b2af3a2cc3..97338646649f 100644 --- a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql @@ -2,9 +2,9 @@ SELECT * FROM ( SELECT - t0.id, - t0.bool_col - FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" LIMIT 10 -) AS t2 +) AS "t2" LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql b/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql index c454d75c63aa..07089341f0a2 100644 --- a/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql +++ b/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql @@ -1,17 +1,17 @@ SELECT - t3.key1 AS key1, - AVG(t3.value1 - t3.value2) AS avg_diff + "t4"."key1", + AVG("t4"."value1" - "t4"."value2") AS "avg_diff" FROM ( SELECT - t0.value1 AS value1, - t0.key1 AS key1, - t0.key2 AS key2, - t1.value2 AS value2, - t1.key1 AS key1_right, - t1.key4 AS key4 - FROM table1 AS t0 - LEFT OUTER JOIN table2 AS t1 - ON t0.key1 = t1.key1 -) AS t3 + "t2"."value1", + "t2"."key1", + "t2"."key2", + "t3"."value2", + "t3"."key1" AS "key1_right", + "t3"."key4" + FROM "table1" AS "t2" + LEFT OUTER JOIN "table2" AS "t3" + ON "t2"."key1" = "t3"."key1" +) AS "t4" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql index d0b7a174d49a..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql @@ -1,20 +1,20 @@ -WITH t1 AS ( +WITH "t1" AS ( SELECT - t0.key - FROM leaf AS t0 + "t0"."key" + FROM "leaf" AS "t0" WHERE TRUE ) SELECT - t3.key -FROM t1 AS t3 -INNER JOIN t1 AS t4 - ON t3.key = t4.key + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" INNER JOIN ( SELECT - t3.key - FROM t1 AS t3 - INNER JOIN t1 AS t4 - ON t3.key = t4.key -) AS t6 - ON t3.key = t6.key \ No newline at end of file + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql index d0b7a174d49a..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql @@ -1,20 +1,20 @@ -WITH t1 AS ( +WITH "t1" AS ( SELECT - t0.key - FROM leaf AS t0 + "t0"."key" + FROM "leaf" AS "t0" WHERE TRUE ) SELECT - t3.key -FROM t1 AS t3 -INNER JOIN t1 AS t4 - ON t3.key = t4.key + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" INNER JOIN ( SELECT - t3.key - FROM t1 AS t3 - INNER JOIN t1 AS t4 - ON t3.key = t4.key -) AS t6 - ON t3.key = t6.key \ No newline at end of file + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/risingwave/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/risingwave/out.sql new file mode 100644 index 000000000000..3cccc7356173 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/risingwave/out.sql @@ -0,0 +1,20 @@ +WITH "t1" AS ( + SELECT + "t0"."key" + FROM "leaf" AS "t0" + WHERE + TRUE +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/sqlite/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/sqlite/out.sql index 14bef4d230ef..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/sqlite/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/sqlite/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH "t1" AS ( SELECT - t4."key" AS "key" - FROM leaf AS t4 + "t0"."key" + FROM "leaf" AS "t0" WHERE - 1 = 1 -), t1 AS ( - SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( - SELECT - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" + TRUE ) SELECT - t2."key" -FROM t2 -JOIN t2 AS t3 - ON t2."key" = t3."key" \ No newline at end of file + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/bigquery/out.sql index fc16f2428d16..ac006b1d5f25 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/bigquery/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.continent + CASE `t0`.`continent` WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,8 +15,8 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + END AS `cont`, + SUM(`t0`.`population`) AS `total_pop` +FROM `countries` AS `t0` GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/clickhouse/out.sql index 0b9554f4c048..3abd937a1e28 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/clickhouse/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.continent + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,11 +15,11 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" GROUP BY - CASE t0.continent + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/duckdb/out.sql index fc16f2428d16..d3969647c9ea 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/duckdb/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.continent + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,8 +15,8 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/bigquery/out.sql index 9e6bcbdd13af..ab4017d56b0f 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/bigquery/out.sql @@ -1,9 +1,9 @@ SELECT - t0.x IN ( + `t0`.`x` IN ( SELECT - t0.x - FROM t AS t0 + `t0`.`x` + FROM `t` AS `t0` WHERE - t0.x > 2 - ) AS InSubquery_x -FROM t AS t0 \ No newline at end of file + `t0`.`x` > 2 + ) AS `InSubquery_x` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql index cd122964c87e..c1611d8cecc3 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql @@ -1,9 +1,9 @@ SELECT - t0.x IN ( + "t0"."x" IN ( SELECT - t0.x - FROM t AS t0 + "t0"."x" + FROM "t" AS "t0" WHERE - t0.x > 2 + "t0"."x" > 2 ) AS "InSubquery(x)" -FROM t AS t0 \ No newline at end of file +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql index dd1d25118977..fa4484d15c6a 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql @@ -1,9 +1,9 @@ SELECT - t0.x IN ( + "t0"."x" IN ( SELECT - t0.x - FROM t AS t0 + "t0"."x" + FROM "t" AS "t0" WHERE - t0.x > CAST(2 AS TINYINT) + "t0"."x" > CAST(2 AS TINYINT) ) AS "InSubquery(x)" -FROM t AS t0 \ No newline at end of file +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql index a5ee8fb42f14..31cedc71e60b 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/bigquery/out.sql @@ -1,81 +1,81 @@ -WITH t5 AS ( +WITH `t5` AS ( SELECT - t4.field_of_study, - ANY_VALUE(t4.diff) AS diff + `t4`.`field_of_study`, + ANY_VALUE(`t4`.`diff`) AS `diff` FROM ( SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff + `t3`.`field_of_study`, + `t3`.`years`, + `t3`.`degrees`, + `t3`.`earliest_degrees`, + `t3`.`latest_degrees`, + `t3`.`latest_degrees` - `t3`.`earliest_degrees` AS `diff` FROM ( SELECT - t2.field_of_study, - t2.years, - t2.degrees, - first_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - last_value(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + `t2`.`field_of_study`, + `t2`.`years`, + `t2`.`degrees`, + first_value(`t2`.`degrees`) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `earliest_degrees`, + last_value(`t2`.`degrees`) OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `latest_degrees` FROM ( SELECT - t1.field_of_study, - t1.__pivoted__.years AS years, - t1.__pivoted__.degrees AS degrees + `t1`.`field_of_study`, + `t1`.`__pivoted__`.`years` AS `years`, + `t1`.`__pivoted__`.`degrees` AS `degrees` FROM ( SELECT - t0.field_of_study, - IF(pos = pos_2, __pivoted__, NULL) AS __pivoted__ - FROM humanities AS t0 + `t0`.`field_of_study`, + IF(pos = pos_2, `__pivoted__`, NULL) AS `__pivoted__` + FROM `humanities` AS `t0` CROSS JOIN UNNEST(GENERATE_ARRAY( 0, GREATEST( ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + [STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`)] ) ) - 1 )) AS pos - CROSS JOIN UNNEST([STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)]) AS __pivoted__ WITH OFFSET AS pos_2 + CROSS JOIN UNNEST([STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`)]) AS `__pivoted__` WITH OFFSET AS pos_2 WHERE pos = pos_2 OR ( pos > ( ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + [STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`)] ) - 1 ) AND pos_2 = ( ARRAY_LENGTH( - [STRUCT('1970-71' AS years, t0.`1970-71` AS degrees), STRUCT('1975-76' AS years, t0.`1975-76` AS degrees), STRUCT('1980-81' AS years, t0.`1980-81` AS degrees), STRUCT('1985-86' AS years, t0.`1985-86` AS degrees), STRUCT('1990-91' AS years, t0.`1990-91` AS degrees), STRUCT('1995-96' AS years, t0.`1995-96` AS degrees), STRUCT('2000-01' AS years, t0.`2000-01` AS degrees), STRUCT('2005-06' AS years, t0.`2005-06` AS degrees), STRUCT('2010-11' AS years, t0.`2010-11` AS degrees), STRUCT('2011-12' AS years, t0.`2011-12` AS degrees), STRUCT('2012-13' AS years, t0.`2012-13` AS degrees), STRUCT('2013-14' AS years, t0.`2013-14` AS degrees), STRUCT('2014-15' AS years, t0.`2014-15` AS degrees), STRUCT('2015-16' AS years, t0.`2015-16` AS degrees), STRUCT('2016-17' AS years, t0.`2016-17` AS degrees), STRUCT('2017-18' AS years, t0.`2017-18` AS degrees), STRUCT('2018-19' AS years, t0.`2018-19` AS degrees), STRUCT('2019-20' AS years, t0.`2019-20` AS degrees)] + [STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`)] ) - 1 ) ) - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 + ) AS `t1` + ) AS `t2` + ) AS `t3` + ) AS `t4` GROUP BY 1 ) SELECT - t11.field_of_study, - t11.diff + `t11`.`field_of_study`, + `t11`.`diff` FROM ( SELECT - t6.field_of_study, - t6.diff - FROM t5 AS t6 + `t6`.`field_of_study`, + `t6`.`diff` + FROM `t5` AS `t6` ORDER BY - t6.diff DESC + `t6`.`diff` DESC LIMIT 10 UNION ALL SELECT - t6.field_of_study, - t6.diff - FROM t5 AS t6 + `t6`.`field_of_study`, + `t6`.`diff` + FROM `t5` AS `t6` WHERE - t6.diff < 0 + `t6`.`diff` < 0 ORDER BY - t6.diff ASC NULLS LAST + `t6`.`diff` ASC NULLS LAST LIMIT 10 -) AS t11 \ No newline at end of file +) AS `t11` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql index 55466dbdea2c..1ca9740613df 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql @@ -1,60 +1,60 @@ -WITH t5 AS ( +WITH "t5" AS ( SELECT - t4.field_of_study, - any(t4.diff) AS diff + "t4"."field_of_study", + any("t4"."diff") AS "diff" FROM ( SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" FROM ( SELECT - t2.field_of_study, - t2.years, - t2.degrees, - any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + any("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + anyLast("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" FROM ( SELECT - t1.field_of_study, - CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, - CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees + "t1"."field_of_study", + CAST("t1"."__pivoted__".1 AS Nullable(String)) AS "years", + CAST("t1"."__pivoted__".2 AS Nullable(Int64)) AS "degrees" FROM ( SELECT - t0.field_of_study, + "t0"."field_of_study", arrayJoin( - [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 + [CAST(tuple('1970-71', "t0"."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', "t0"."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', "t0"."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', "t0"."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', "t0"."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', "t0"."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', "t0"."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', "t0"."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', "t0"."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', "t0"."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', "t0"."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', "t0"."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', "t0"."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', "t0"."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', "t0"."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', "t0"."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', "t0"."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', "t0"."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] + ) AS "__pivoted__" + FROM "humanities" AS "t0" + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" GROUP BY - t4.field_of_study + "t4"."field_of_study" ) SELECT - t11.field_of_study, - t11.diff + "t11"."field_of_study", + "t11"."diff" FROM ( SELECT - t6.field_of_study, - t6.diff - FROM t5 AS t6 + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" ORDER BY - t6.diff DESC + "t6"."diff" DESC LIMIT 10 UNION ALL SELECT - t6.field_of_study, - t6.diff - FROM t5 AS t6 + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" WHERE - t6.diff < 0 + "t6"."diff" < 0 ORDER BY - t6.diff ASC + "t6"."diff" ASC LIMIT 10 -) AS t11 \ No newline at end of file +) AS "t11" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql index 52d685293884..997a167e6479 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql @@ -1,60 +1,60 @@ -WITH t5 AS ( +WITH "t5" AS ( SELECT - t4.field_of_study, - FIRST(t4.diff) AS diff + "t4"."field_of_study", + FIRST("t4"."diff") AS "diff" FROM ( SELECT - t3.field_of_study, - t3.years, - t3.degrees, - t3.earliest_degrees, - t3.latest_degrees, - t3.latest_degrees - t3.earliest_degrees AS diff + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" FROM ( SELECT - t2.field_of_study, - t2.years, - t2.degrees, - FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" FROM ( SELECT - t1.field_of_study, - t1.__pivoted__.years AS years, - t1.__pivoted__.degrees AS degrees + "t1"."field_of_study", + "t1"."__pivoted__"."years" AS "years", + "t1"."__pivoted__"."degrees" AS "degrees" FROM ( SELECT - t0.field_of_study, + "t0"."field_of_study", UNNEST( - [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 + [{'years': '1970-71', 'degrees': "t0"."1970-71"}, {'years': '1975-76', 'degrees': "t0"."1975-76"}, {'years': '1980-81', 'degrees': "t0"."1980-81"}, {'years': '1985-86', 'degrees': "t0"."1985-86"}, {'years': '1990-91', 'degrees': "t0"."1990-91"}, {'years': '1995-96', 'degrees': "t0"."1995-96"}, {'years': '2000-01', 'degrees': "t0"."2000-01"}, {'years': '2005-06', 'degrees': "t0"."2005-06"}, {'years': '2010-11', 'degrees': "t0"."2010-11"}, {'years': '2011-12', 'degrees': "t0"."2011-12"}, {'years': '2012-13', 'degrees': "t0"."2012-13"}, {'years': '2013-14', 'degrees': "t0"."2013-14"}, {'years': '2014-15', 'degrees': "t0"."2014-15"}, {'years': '2015-16', 'degrees': "t0"."2015-16"}, {'years': '2016-17', 'degrees': "t0"."2016-17"}, {'years': '2017-18', 'degrees': "t0"."2017-18"}, {'years': '2018-19', 'degrees': "t0"."2018-19"}, {'years': '2019-20', 'degrees': "t0"."2019-20"}] + ) AS "__pivoted__" + FROM "humanities" AS "t0" + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" GROUP BY 1 ) SELECT - t11.field_of_study, - t11.diff + "t11"."field_of_study", + "t11"."diff" FROM ( SELECT - t6.field_of_study, - t6.diff - FROM t5 AS t6 + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" ORDER BY - t6.diff DESC + "t6"."diff" DESC LIMIT 10 UNION ALL SELECT - t6.field_of_study, - t6.diff - FROM t5 AS t6 + "t6"."field_of_study", + "t6"."diff" + FROM "t5" AS "t6" WHERE - t6.diff < CAST(0 AS TINYINT) + "t6"."diff" < CAST(0 AS TINYINT) ORDER BY - t6.diff ASC + "t6"."diff" ASC LIMIT 10 -) AS t11 \ No newline at end of file +) AS "t11" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/bigquery-date/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/bigquery-date/out.sql index 9fac0141f2b3..81b7f00cff51 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/bigquery-date/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/bigquery-date/out.sql @@ -1,2 +1,2 @@ SELECT - CAST('2023-04-07' AS DATE) AS `datetime_date_2023_ 4_ 7` \ No newline at end of file + DATE(2023, 4, 7) AS `datetime_date_2023_ 4_ 7` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/bigquery-timestamp/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/bigquery-timestamp/out.sql index 16d7e15ea1c2..2dc7b53b2103 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/bigquery-timestamp/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/bigquery-timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - CAST('2023-04-07 04:05:06.230136' AS DATETIME) AS `datetime_datetime_2023_ 4_ 7_ 4_ 5_ 6_ 230136` \ No newline at end of file + datetime('2023-04-07T04:05:06.230136') AS `datetime_datetime_2023_ 4_ 7_ 4_ 5_ 6_ 230136` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/clickhouse-timestamp/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/clickhouse-timestamp/out.sql index 4a134cb9473a..e5586cf6d91f 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/clickhouse-timestamp/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/clickhouse-timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - toDateTime64('2023-04-07T04:05:06.230136', 6) AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file + parseDateTime64BestEffort('2023-04-07T04:05:06.230136', 6) AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/datafusion-date/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/datafusion-date/out.sql new file mode 100644 index 000000000000..917dd77ad76f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/datafusion-date/out.sql @@ -0,0 +1,2 @@ +SELECT + DATE_TRUNC('DAY', '2023-04-07') AS "datetime.date(2023, 4, 7)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/datafusion-timestamp/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/datafusion-timestamp/out.sql new file mode 100644 index 000000000000..42e6d3e8cc94 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/datafusion-timestamp/out.sql @@ -0,0 +1,2 @@ +SELECT + ARROW_CAST('2023-04-07 04:05:06.230136', 'Timestamp(Microsecond, None)') AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/druid-date/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/druid-date/out.sql new file mode 100644 index 000000000000..7dd4e139e3fd --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/druid-date/out.sql @@ -0,0 +1,2 @@ +SELECT + DATE_FROM_PARTS(2023, 4, 7) AS "datetime.date(2023, 4, 7)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/druid-timestamp/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/druid-timestamp/out.sql new file mode 100644 index 000000000000..3add9bb0ad1c --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/druid-timestamp/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('2023-04-07T04:05:06.230136' AS TIMESTAMP) AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/duckdb-timestamp/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/duckdb-timestamp/out.sql index 3add9bb0ad1c..3e36a03d4c62 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/duckdb-timestamp/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/duckdb-timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - CAST('2023-04-07T04:05:06.230136' AS TIMESTAMP) AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file + MAKE_TIMESTAMP(2023, 4, 7, 4, 5, 6.230136) AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/exasol-date/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/exasol-date/out.sql new file mode 100644 index 000000000000..fd66b4fee3cb --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/exasol-date/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('2023-04-07' AS DATE) AS "datetime_date(2023, 4, 7)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/exasol-timestamp/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/exasol-timestamp/out.sql new file mode 100644 index 000000000000..6e447fb277d7 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/exasol-timestamp/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('2023-04-07 04:05:06.230' AS TIMESTAMP) AS "datetime_datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/mssql-date/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/mssql-date/out.sql index 4636132576f6..184d125e696e 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/mssql-date/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/mssql-date/out.sql @@ -1,2 +1,2 @@ SELECT - DATEFROMPARTS(2023, 4, 7) AS "datetime.date(2023, 4, 7)" \ No newline at end of file + DATEFROMPARTS(2023, 4, 7) AS [datetime.date(2023, 4, 7)] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/mssql-timestamp/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/mssql-timestamp/out.sql index e2fe11d84ee9..384bef1d80fb 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/mssql-timestamp/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/mssql-timestamp/out.sql @@ -1,2 +1,2 @@ SELECT - DATETIME2FROMPARTS(2023, 4, 7, 4, 5, 6, 230136, 6) AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file + DATETIME2FROMPARTS(2023, 4, 7, 4, 5, 6, 230136, 6) AS [datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/oracle-date/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/oracle-date/out.sql index 73b72c7b2ab9..c2b4de850a75 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/oracle-date/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/oracle-date/out.sql @@ -1,3 +1,2 @@ SELECT - TO_DATE('2023-04-07', 'FXYYYY-MM-DD') AS "datetime.date(2023, 4, 7)" -FROM DUAL \ No newline at end of file + TO_DATE('2023-04-07', 'FXYYYY-MM-DD') AS "datetime.date(2023, 4, 7)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/oracle-timestamp/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/oracle-timestamp/out.sql index 4459c360b49e..891de4604dc9 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/oracle-timestamp/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_temporal_literal_sql/oracle-timestamp/out.sql @@ -1,3 +1,2 @@ SELECT - TO_TIMESTAMP('2023-04-07,04:05:06.230136', 'FXYYYY-MM-DD,HH24:MI:SS.FF6') AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" -FROM DUAL \ No newline at end of file + TO_TIMESTAMP('2023-04-07T04:05:06.230136', 'YYYY-MM-DD"T"HH24:MI:SS.FF6') AS "datetime.datetime(2023, 4, 7, 4, 5, 6, 230136)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-bigquery/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-bigquery/out.sql index 917b7bb475f4..22a61be84c29 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-bigquery/out.sql @@ -1,2 +1,2 @@ SELECT - CAST('04:05:06' AS TIME) AS `datetime_time_4_ 5_ 6` \ No newline at end of file + TIME(4, 5, 6) AS `datetime_time_4_ 5_ 6` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-clickhouse/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-clickhouse/out.sql new file mode 100644 index 000000000000..2b9d3ef5da1e --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-clickhouse/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06' AS Nullable(TIME)) AS "datetime.time(4, 5, 6)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-datafusion/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-datafusion/out.sql new file mode 100644 index 000000000000..eaa35ee21d81 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-datafusion/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06' AS TIME) AS "datetime.time(4, 5, 6)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-druid/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-druid/out.sql new file mode 100644 index 000000000000..eaa35ee21d81 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-druid/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06' AS TIME) AS "datetime.time(4, 5, 6)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-exasol/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-exasol/out.sql new file mode 100644 index 000000000000..38076fc94a2f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-exasol/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06' AS TIME) AS "datetime_time(4, 5, 6)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-impala/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-impala/out.sql new file mode 100644 index 000000000000..379517f88186 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-impala/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06' AS TIMESTAMP) AS `datetime.time(4, 5, 6)` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-mssql/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-mssql/out.sql index 1081342a8f7b..0010d9f01874 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-mssql/out.sql @@ -1,2 +1,2 @@ SELECT - TIMEFROMPARTS(4, 5, 6, 0, 0) AS "datetime.time(4, 5, 6)" \ No newline at end of file + TIMEFROMPARTS(4, 5, 6, 0, 0) AS [datetime.time(4, 5, 6)] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-oracle/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-oracle/out.sql new file mode 100644 index 000000000000..f3d6c27bf4c6 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-oracle/out.sql @@ -0,0 +1,2 @@ +SELECT + TO_TIMESTAMP('04:05:06', 'YYYY-MM-DD"T"HH24:MI:SS.FF6') AS "datetime.time(4, 5, 6)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-postgres/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-postgres/out.sql index fab689db2dde..47d684023da8 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-postgres/out.sql @@ -1,2 +1,2 @@ SELECT - MAKE_TIME(4, 5, 6.0) AS "datetime.time(4, 5, 6)" \ No newline at end of file + MAKE_TIME(CAST(4 AS INT), CAST(5 AS INT), CAST(6.0 AS DOUBLE PRECISION)) AS "datetime.time(4, 5, 6)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-sqlite/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-sqlite/out.sql index 399b76c6254d..0a95157910a6 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-sqlite/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/0-sqlite/out.sql @@ -1,2 +1,2 @@ SELECT - '04:05:06.000000' AS "datetime.time(4, 5, 6)" \ No newline at end of file + '04:05:06' AS "datetime.time(4, 5, 6)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-bigquery/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-bigquery/out.sql index bad9e484fcb9..e94cc480ee9c 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-bigquery/out.sql @@ -1,2 +1,2 @@ SELECT - CAST('04:05:06.234567' AS TIME) AS `datetime_time_4_ 5_ 6_ 234567` \ No newline at end of file + TIME(4, 5, 6) AS `datetime_time_4_ 5_ 6_ 234567` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-clickhouse/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-clickhouse/out.sql new file mode 100644 index 000000000000..41473b7fd96b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-clickhouse/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06.234567' AS Nullable(TIME)) AS "datetime.time(4, 5, 6, 234567)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-datafusion/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-datafusion/out.sql new file mode 100644 index 000000000000..af024e962bf1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-datafusion/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06.234567' AS TIME) AS "datetime.time(4, 5, 6, 234567)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-druid/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-druid/out.sql new file mode 100644 index 000000000000..af024e962bf1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-druid/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06.234567' AS TIME) AS "datetime.time(4, 5, 6, 234567)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-exasol/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-exasol/out.sql new file mode 100644 index 000000000000..5e300b8ade04 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-exasol/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06.234567' AS TIME) AS "datetime_time(4, 5, 6, 234567)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-impala/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-impala/out.sql new file mode 100644 index 000000000000..e216ee9a485f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-impala/out.sql @@ -0,0 +1,2 @@ +SELECT + CAST('04:05:06.234567' AS TIMESTAMP) AS `datetime.time(4, 5, 6, 234567)` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-mssql/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-mssql/out.sql index bd1d495137b7..819cd2c669bd 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-mssql/out.sql @@ -1,2 +1,2 @@ SELECT - TIMEFROMPARTS(4, 5, 6, 234567, 0) AS "datetime.time(4, 5, 6, 234567)" \ No newline at end of file + TIMEFROMPARTS(4, 5, 6, 0, 0) AS [datetime.time(4, 5, 6, 234567)] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-oracle/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-oracle/out.sql new file mode 100644 index 000000000000..5af4117fd8e1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-oracle/out.sql @@ -0,0 +1,2 @@ +SELECT + TO_TIMESTAMP('04:05:06.234567', 'YYYY-MM-DD"T"HH24:MI:SS.FF6') AS "datetime.time(4, 5, 6, 234567)" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-postgres/out.sql b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-postgres/out.sql index ac6ca0601526..1302de8ffaff 100644 --- a/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_temporal/test_time_literal_sql/234567-postgres/out.sql @@ -1,2 +1,2 @@ SELECT - MAKE_TIME(4, 5, 6.234567) AS "datetime.time(4, 5, 6, 234567)" \ No newline at end of file + MAKE_TIME(CAST(4 AS INT), CAST(5 AS INT), CAST(6.234567 AS DOUBLE PRECISION)) AS "datetime.time(4, 5, 6, 234567)" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql index 4a8b9ef4cb44..c7745e622b92 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql @@ -1,19 +1,19 @@ SELECT - t0.a, - t0.b -FROM my_table AS t0 + "t0"."a", + "t0"."b" +FROM "my_table" AS "t0" WHERE - t0.a < CAST(100 AS TINYINT) - AND t0.a = ( + "t0"."a" < CAST(100 AS TINYINT) + AND "t0"."a" = ( SELECT - MAX(t1.a) AS "Max(a)" + MAX("t1"."a") AS "Max(a)" FROM ( SELECT - t0.a, - t0.b - FROM my_table AS t0 + "t0"."a", + "t0"."b" + FROM "my_table" AS "t0" WHERE - t0.a < CAST(100 AS TINYINT) - ) AS t1 + "t0"."a" < CAST(100 AS TINYINT) + ) AS "t1" ) - AND t0.b = 'a' \ No newline at end of file + AND "t0"."b" = 'a' \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql index 37c5668f835b..6ccba780d897 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql @@ -1,18 +1,18 @@ SELECT - t0.a, - t0.b * CAST(2 AS TINYINT) AS b2 -FROM my_table AS t0 + "t0"."a", + "t0"."b" * CAST(2 AS TINYINT) AS "b2" +FROM "my_table" AS "t0" WHERE - t0.a < CAST(100 AS TINYINT) - AND t0.a = ( + "t0"."a" < CAST(100 AS TINYINT) + AND "t0"."a" = ( SELECT - MAX(t1.a) AS "Max(a)" + MAX("t1"."a") AS "Max(a)" FROM ( SELECT - t0.a, - t0.b * CAST(2 AS TINYINT) AS b2 - FROM my_table AS t0 + "t0"."a", + "t0"."b" * CAST(2 AS TINYINT) AS "b2" + FROM "my_table" AS "t0" WHERE - t0.a < CAST(100 AS TINYINT) - ) AS t1 + "t0"."a" < CAST(100 AS TINYINT) + ) AS "t1" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql index 37c5668f835b..6ccba780d897 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql @@ -1,18 +1,18 @@ SELECT - t0.a, - t0.b * CAST(2 AS TINYINT) AS b2 -FROM my_table AS t0 + "t0"."a", + "t0"."b" * CAST(2 AS TINYINT) AS "b2" +FROM "my_table" AS "t0" WHERE - t0.a < CAST(100 AS TINYINT) - AND t0.a = ( + "t0"."a" < CAST(100 AS TINYINT) + AND "t0"."a" = ( SELECT - MAX(t1.a) AS "Max(a)" + MAX("t1"."a") AS "Max(a)" FROM ( SELECT - t0.a, - t0.b * CAST(2 AS TINYINT) AS b2 - FROM my_table AS t0 + "t0"."a", + "t0"."b" * CAST(2 AS TINYINT) AS "b2" + FROM "my_table" AS "t0" WHERE - t0.a < CAST(100 AS TINYINT) - ) AS t1 + "t0"."a" < CAST(100 AS TINYINT) + ) AS "t1" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql index d38aa10366c4..0913c0727447 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql @@ -2,6 +2,6 @@ SELECT DISTINCT * FROM ( SELECT - t0.string_col - FROM functional_alltypes AS t0 -) AS t1 \ No newline at end of file + "t0"."string_col" + FROM "functional_alltypes" AS "t0" +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql index 1e1635a607bf..2e86d41c48ea 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col + CAST(4 AS TINYINT) AS "Add(int_col, 4)" -FROM int_col_table AS t0 \ No newline at end of file + "t0"."int_col" + CAST(4 AS TINYINT) AS "Add(int_col, 4)" +FROM "int_col_table" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql index 0237f96353c1..26c4292ded48 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql @@ -1,3 +1,3 @@ SELECT - t0.int_col + CAST(4 AS TINYINT) AS foo -FROM int_col_table AS t0 \ No newline at end of file + "t0"."int_col" + CAST(4 AS TINYINT) AS "foo" +FROM "int_col_table" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql index 1203afe54f42..d7306679c60a 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql @@ -1,24 +1,24 @@ SELECT - t1.string_col, - COUNT(DISTINCT t1.int_col) AS nunique + "t1"."string_col", + COUNT(DISTINCT "t1"."int_col") AS "nunique" FROM ( SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month - FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" + FROM "functional_alltypes" AS "t0" WHERE - t0.bigint_col > CAST(0 AS TINYINT) -) AS t1 + "t0"."bigint_col" > CAST(0 AS TINYINT) +) AS "t1" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql index 65eba6a3a4c8..a3eb8d979926 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql @@ -1,17 +1,17 @@ SELECT - t3.key + "t3"."key" FROM ( SELECT - t0.string_col AS key, - CAST(t0.float_col AS DOUBLE) AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + CAST("t0"."float_col" AS DOUBLE) AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > CAST(0 AS TINYINT) + "t0"."int_col" > CAST(0 AS TINYINT) EXCEPT SELECT - t0.string_col AS key, - t0.double_col AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + "t0"."double_col" AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col <= CAST(0 AS TINYINT) -) AS t3 \ No newline at end of file + "t0"."int_col" <= CAST(0 AS TINYINT) +) AS "t3" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql index 0043337a96ee..99aff4bed43c 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql @@ -1,21 +1,21 @@ SELECT - t2.b, - t2.sum + "t2"."b", + "t2"."sum" FROM ( SELECT - t1.b, - SUM(t1.a) AS sum, - MAX(t1.a) AS "Max(a)" + "t1"."b", + SUM("t1"."a") AS "sum", + MAX("t1"."a") AS "Max(a)" FROM ( SELECT - t0.a, - t0.b - FROM t AS t0 + "t0"."a", + "t0"."b" + FROM "t" AS "t0" WHERE - t0.b = 'm' - ) AS t1 + "t0"."b" = 'm' + ) AS "t1" GROUP BY 1 -) AS t2 +) AS "t2" WHERE - t2."Max(a)" = CAST(2 AS TINYINT) \ No newline at end of file + "t2"."Max(a)" = CAST(2 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql index 6691834ce1b5..80fa982abf7a 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql @@ -1,14 +1,14 @@ SELECT - t1.string_col, - t1."CountStar()" + "t1"."string_col", + "t1"."CountStar()" FROM ( SELECT - t0.string_col, + "t0"."string_col", COUNT(*) AS "CountStar()", - MAX(t0.double_col) AS "Max(double_col)" - FROM functional_alltypes AS t0 + MAX("t0"."double_col") AS "Max(double_col)" + FROM "functional_alltypes" AS "t0" GROUP BY 1 -) AS t1 +) AS "t1" WHERE - t1."Max(double_col)" = CAST(1 AS TINYINT) \ No newline at end of file + "t1"."Max(double_col)" = CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql index 7c3cbc2fdf12..f9f8dec98698 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql @@ -1,17 +1,17 @@ SELECT - t3.key + "t3"."key" FROM ( SELECT - t0.string_col AS key, - CAST(t0.float_col AS DOUBLE) AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + CAST("t0"."float_col" AS DOUBLE) AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > CAST(0 AS TINYINT) + "t0"."int_col" > CAST(0 AS TINYINT) INTERSECT SELECT - t0.string_col AS key, - t0.double_col AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + "t0"."double_col" AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col <= CAST(0 AS TINYINT) -) AS t3 \ No newline at end of file + "t0"."int_col" <= CAST(0 AS TINYINT) +) AS "t3" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql index cbb6ac1079a3..bc9d9115d855 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql @@ -1,7 +1,7 @@ SELECT - t0.string_col, - COUNT(DISTINCT t0.int_col) AS int_card, - COUNT(DISTINCT t0.smallint_col) AS smallint_card -FROM functional_alltypes AS t0 + "t0"."string_col", + COUNT(DISTINCT "t0"."int_col") AS "int_card", + COUNT(DISTINCT "t0"."smallint_col") AS "smallint_card" +FROM "functional_alltypes" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql index 45fa51a79cba..cd5a9be3059b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql @@ -1,19 +1,19 @@ SELECT - t0.double_col, - t0.string_col, - t0.int_col, - t0.float_col -FROM functional_alltypes AS t0 + "t0"."double_col", + "t0"."string_col", + "t0"."int_col", + "t0"."float_col" +FROM "functional_alltypes" AS "t0" WHERE - t0.double_col > CAST(3.14 AS DOUBLE) - AND CONTAINS(t0.string_col, 'foo') + "t0"."double_col" > CAST(3.14 AS DOUBLE) + AND CONTAINS("t0"."string_col", 'foo') AND ( ( ( - t0.int_col - CAST(1 AS TINYINT) + "t0"."int_col" - CAST(1 AS TINYINT) ) = CAST(0 AS TINYINT) ) OR ( - t0.float_col <= CAST(1.34 AS DOUBLE) + "t0"."float_col" <= CAST(1.34 AS DOUBLE) ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql index c1410ecdac47..d3733046981b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql @@ -1,18 +1,18 @@ SELECT - t0.a, - t0.b -FROM my_table AS t0 + "t0"."a", + "t0"."b" +FROM "my_table" AS "t0" WHERE - t0.a < CAST(100 AS TINYINT) - AND t0.a = ( + "t0"."a" < CAST(100 AS TINYINT) + AND "t0"."a" = ( SELECT - MAX(t1.a) AS "Max(a)" + MAX("t1"."a") AS "Max(a)" FROM ( SELECT - t0.a, - t0.b - FROM my_table AS t0 + "t0"."a", + "t0"."b" + FROM "my_table" AS "t0" WHERE - t0.a < CAST(100 AS TINYINT) - ) AS t1 + "t0"."a" < CAST(100 AS TINYINT) + ) AS "t1" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql index 7ceaa3d33e79..01918d46ff36 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql @@ -1,19 +1,19 @@ SELECT - COUNT(t2.foo) AS "Count(foo)" + COUNT("t2"."foo") AS "Count(foo)" FROM ( SELECT - t1.string_col, - SUM(t1.float_col) AS foo + "t1"."string_col", + SUM("t1"."float_col") AS "foo" FROM ( SELECT - t0.float_col, - t0.timestamp_col, - t0.int_col, - t0.string_col - FROM alltypes AS t0 + "t0"."float_col", + "t0"."timestamp_col", + "t0"."int_col", + "t0"."string_col" + FROM "alltypes" AS "t0" WHERE - t0.timestamp_col < MAKE_TIMESTAMP(2014, 1, 1, 0, 0, 0.0) - ) AS t1 + "t0"."timestamp_col" < MAKE_TIMESTAMP(2014, 1, 1, 0, 0, 0.0) + ) AS "t1" GROUP BY 1 -) AS t2 \ No newline at end of file +) AS "t2" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql index 4d00f47c36b0..fff41a520ec4 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql @@ -1,18 +1,18 @@ SELECT - t3.key, - t3.value + "t3"."key", + "t3"."value" FROM ( SELECT - t0.string_col AS key, - CAST(t0.float_col AS DOUBLE) AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + CAST("t0"."float_col" AS DOUBLE) AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > CAST(0 AS TINYINT) + "t0"."int_col" > CAST(0 AS TINYINT) EXCEPT SELECT - t0.string_col AS key, - t0.double_col AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + "t0"."double_col" AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col <= CAST(0 AS TINYINT) -) AS t3 \ No newline at end of file + "t0"."int_col" <= CAST(0 AS TINYINT) +) AS "t3" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql index 483b4fef6f49..098405dd6f82 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql @@ -2,7 +2,7 @@ SELECT DISTINCT * FROM ( SELECT - t0.string_col, - t0.int_col - FROM functional_alltypes AS t0 -) AS t1 \ No newline at end of file + "t0"."string_col", + "t0"."int_col" + FROM "functional_alltypes" AS "t0" +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql index db148ea33a9f..f5ce6636c22b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql @@ -1,19 +1,19 @@ SELECT - t5.a + "t5"."a" FROM ( SELECT - t4.a + "t4"."a" FROM ( SELECT - t0.a, - t0.b, - MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) AS the_date - FROM t AS t0 + "t0"."a", + "t0"."b", + MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) AS "the_date" + FROM "t" AS "t0" WHERE - t0.c = MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) - ) AS t4 - INNER JOIN s AS t2 - ON t4.b = t2.b -) AS t5 + "t0"."c" = MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) + ) AS "t4" + INNER JOIN "s" AS "t2" + ON "t4"."b" = "t2"."b" +) AS "t5" WHERE - t5.a < CAST(1.0 AS DOUBLE) \ No newline at end of file + "t5"."a" < CAST(1.0 AS DOUBLE) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql index 72a05ef24e92..d7b9f61d48dc 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql @@ -1,18 +1,18 @@ SELECT - t3.key, - t3.value + "t3"."key", + "t3"."value" FROM ( SELECT - t0.string_col AS key, - CAST(t0.float_col AS DOUBLE) AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + CAST("t0"."float_col" AS DOUBLE) AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > CAST(0 AS TINYINT) + "t0"."int_col" > CAST(0 AS TINYINT) INTERSECT SELECT - t0.string_col AS key, - t0.double_col AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + "t0"."double_col" AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col <= CAST(0 AS TINYINT) -) AS t3 \ No newline at end of file + "t0"."int_col" <= CAST(0 AS TINYINT) +) AS "t3" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql index 0bf62fa423da..cf49db40dbea 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql @@ -1,18 +1,18 @@ SELECT - t3.key, - t3.value + "t3"."key", + "t3"."value" FROM ( SELECT - t0.string_col AS key, - CAST(t0.float_col AS DOUBLE) AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + CAST("t0"."float_col" AS DOUBLE) AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > CAST(0 AS TINYINT) + "t0"."int_col" > CAST(0 AS TINYINT) UNION SELECT - t0.string_col AS key, - t0.double_col AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + "t0"."double_col" AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col <= CAST(0 AS TINYINT) -) AS t3 \ No newline at end of file + "t0"."int_col" <= CAST(0 AS TINYINT) +) AS "t3" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql index 2afa8074bb0c..3a8f09ded485 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql @@ -1,20 +1,20 @@ -WITH t1 AS ( +WITH "t1" AS ( SELECT - t0.a, - t0.b - FROM t AS t0 + "t0"."a", + "t0"."b" + FROM "t" AS "t0" ORDER BY - t0.b ASC + "t0"."b" ASC ) SELECT - t3.a, - t3.b + "t3"."a", + "t3"."b" FROM ( SELECT * - FROM t1 AS t2 + FROM "t1" AS "t2" UNION ALL SELECT * - FROM t1 AS t2 -) AS t3 \ No newline at end of file + FROM "t1" AS "t2" +) AS "t3" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql index b2fb8620109b..b57144bd31c9 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql @@ -1,17 +1,17 @@ SELECT - t3.key + "t3"."key" FROM ( SELECT - t0.string_col AS key, - CAST(t0.float_col AS DOUBLE) AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + CAST("t0"."float_col" AS DOUBLE) AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col > CAST(0 AS TINYINT) + "t0"."int_col" > CAST(0 AS TINYINT) UNION ALL SELECT - t0.string_col AS key, - t0.double_col AS value - FROM functional_alltypes AS t0 + "t0"."string_col" AS "key", + "t0"."double_col" AS "value" + FROM "functional_alltypes" AS "t0" WHERE - t0.int_col <= CAST(0 AS TINYINT) -) AS t3 \ No newline at end of file + "t0"."int_col" <= CAST(0 AS TINYINT) +) AS "t3" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql index 889345c4621d..efe7f2b9b631 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql @@ -2,12 +2,12 @@ SELECT COUNT(*) AS "CountStar()" FROM ( SELECT - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment, - t2.r_name AS region - FROM tpch_region AS t2 - INNER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey -) AS t4 \ No newline at end of file + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment", + "t2"."r_name" AS "region" + FROM "tpch_region" AS "t2" + INNER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" +) AS "t4" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql index 47945167c00a..bb81eda40239 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql @@ -1,13 +1,13 @@ SELECT - t1.foo_id, - t1.total + "t1"."foo_id", + "t1"."total" FROM ( SELECT - t0.foo_id, - SUM(t0.f) AS total - FROM star1 AS t0 + "t0"."foo_id", + SUM("t0"."f") AS "total" + FROM "star1" AS "t0" GROUP BY 1 -) AS t1 +) AS "t1" WHERE - t1.total > CAST(10 AS TINYINT) \ No newline at end of file + "t1"."total" > CAST(10 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql index 35e4fe0adc24..65116af02a75 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql @@ -1,14 +1,14 @@ SELECT - t1.foo_id, - t1.total + "t1"."foo_id", + "t1"."total" FROM ( SELECT - t0.foo_id, - SUM(t0.f) AS total, + "t0"."foo_id", + SUM("t0"."f") AS "total", COUNT(*) AS "CountStar()" - FROM star1 AS t0 + FROM "star1" AS "t0" GROUP BY 1 -) AS t1 +) AS "t1" WHERE - t1."CountStar()" > CAST(100 AS TINYINT) \ No newline at end of file + "t1"."CountStar()" > CAST(100 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql index 0a483c1315c4..eb990a67c04d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql @@ -1,16 +1,16 @@ SELECT - t4.foo_id, - SUM(t4.value1) AS total + "t4"."foo_id", + SUM("t4"."value1") AS "total" FROM ( SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id, - t3.value1 - FROM star1 AS t2 - INNER JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id -) AS t4 + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id", + "t3"."value1" + FROM "star1" AS "t2" + INNER JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" +) AS "t4" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql index 82d666c54c3f..808c9de5c6fc 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql @@ -1,23 +1,23 @@ SELECT - t1.g, - SUM(t1.foo) AS "foo total" + "t1"."g", + SUM("t1"."foo") AS "foo total" FROM ( SELECT - t0.a, - t0.b, - t0.c, - t0.d, - t0.e, - t0.f, - t0.g, - t0.h, - t0.i, - t0.j, - t0.k, - t0.a + t0.b AS foo - FROM alltypes AS t0 + "t0"."a", + "t0"."b", + "t0"."c", + "t0"."d", + "t0"."e", + "t0"."f", + "t0"."g", + "t0"."h", + "t0"."i", + "t0"."j", + "t0"."k", + "t0"."a" + "t0"."b" AS "foo" + FROM "alltypes" AS "t0" WHERE - t0.f > CAST(0 AS TINYINT) AND t0.g = 'bar' -) AS t1 + "t0"."f" > CAST(0 AS TINYINT) AND "t0"."g" = 'bar' +) AS "t1" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql index 32772c5a969d..84b289f68fc3 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql @@ -1,25 +1,25 @@ SELECT - t1.g, - SUM(t1.foo) AS "foo total" + "t1"."g", + SUM("t1"."foo") AS "foo total" FROM ( SELECT - t0.a, - t0.b, - t0.c, - t0.d, - t0.e, - t0.f, - t0.g, - t0.h, - t0.i, - t0.j, - t0.k, - t0.a + t0.b AS foo - FROM alltypes AS t0 + "t0"."a", + "t0"."b", + "t0"."c", + "t0"."d", + "t0"."e", + "t0"."f", + "t0"."g", + "t0"."h", + "t0"."i", + "t0"."j", + "t0"."k", + "t0"."a" + "t0"."b" AS "foo" + FROM "alltypes" AS "t0" WHERE - t0.f > CAST(0 AS TINYINT) AND ( - t0.a + t0.b + "t0"."f" > CAST(0 AS TINYINT) AND ( + "t0"."a" + "t0"."b" ) < CAST(10 AS TINYINT) -) AS t1 +) AS "t1" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql index 7407d5fcf78b..802747adcc8c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql @@ -1,16 +1,16 @@ SELECT - t0.a, - t0.b, - t0.c, - t0.d, - t0.e, - t0.f, - t0.g, - t0.h, - t0.i, - t0.j, - t0.k, - t0.a + t0.b AS foo -FROM alltypes AS t0 + "t0"."a", + "t0"."b", + "t0"."c", + "t0"."d", + "t0"."e", + "t0"."f", + "t0"."g", + "t0"."h", + "t0"."i", + "t0"."j", + "t0"."k", + "t0"."a" + "t0"."b" AS "foo" +FROM "alltypes" AS "t0" WHERE - t0.f > CAST(0 AS TINYINT) AND t0.g = 'bar' \ No newline at end of file + "t0"."f" > CAST(0 AS TINYINT) AND "t0"."g" = 'bar' \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql index 0917ea29aae1..f41268dc8f89 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql @@ -1,16 +1,16 @@ SELECT - t0.a, - t0.b, - t0.c, - t0.d, - t0.e, - t0.f, - t0.g, - t0.h, - t0.i, - t0.j, - t0.k, - t0.a + t0.b AS foo -FROM alltypes AS t0 + "t0"."a", + "t0"."b", + "t0"."c", + "t0"."d", + "t0"."e", + "t0"."f", + "t0"."g", + "t0"."h", + "t0"."i", + "t0"."j", + "t0"."k", + "t0"."a" + "t0"."b" AS "foo" +FROM "alltypes" AS "t0" WHERE - t0.f > CAST(0 AS TINYINT) \ No newline at end of file + "t0"."f" > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql index f2ef0d0d3f67..0b4227109539 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql @@ -1,8 +1,8 @@ SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id -FROM star1 AS t2 -ANTI JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id \ No newline at end of file + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id" +FROM "star1" AS "t2" +ANTI JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql index b110ecf4f3b8..3dea0500f604 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql @@ -1,9 +1,9 @@ SELECT - t0.dest, - t0.origin, - t0.arrdelay -FROM airlines AS t0 + "t0"."dest", + "t0"."origin", + "t0"."arrdelay" +FROM "airlines" AS "t0" WHERE ( - CAST(t0.dest AS BIGINT) = CAST(0 AS TINYINT) + CAST("t0"."dest" AS BIGINT) = CAST(0 AS TINYINT) ) = TRUE \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql index 173b6323b243..49071225a5db 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql @@ -1,23 +1,23 @@ SELECT - t2.arrdelay, - t2.dest, - t2.dest_avg, - t2.dev + "t2"."arrdelay", + "t2"."dest", + "t2"."dest_avg", + "t2"."dev" FROM ( SELECT - t1.arrdelay, - t1.dest, - AVG(t1.arrdelay) OVER (PARTITION BY t1.dest ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS dest_avg, - t1.arrdelay - AVG(t1.arrdelay) OVER (PARTITION BY t1.dest ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS dev + "t1"."arrdelay", + "t1"."dest", + AVG("t1"."arrdelay") OVER (PARTITION BY "t1"."dest" ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "dest_avg", + "t1"."arrdelay" - AVG("t1"."arrdelay") OVER (PARTITION BY "t1"."dest" ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "dev" FROM ( SELECT - t0.arrdelay, - t0.dest - FROM airlines AS t0 - ) AS t1 -) AS t2 + "t0"."arrdelay", + "t0"."dest" + FROM "airlines" AS "t0" + ) AS "t1" +) AS "t2" WHERE - NOT t2.dev IS NULL + NOT "t2"."dev" IS NULL ORDER BY - t2.dev DESC + "t2"."dev" DESC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql index f498893a25cb..190eb7c08f1a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql @@ -1,43 +1,43 @@ SELECT - t3.c_name, - t5.r_name, - t4.n_name -FROM tpch_customer AS t3 -INNER JOIN tpch_nation AS t4 - ON t3.c_nationkey = t4.n_nationkey -INNER JOIN tpch_region AS t5 - ON t4.n_regionkey = t5.r_regionkey + "t3"."c_name", + "t5"."r_name", + "t4"."n_name" +FROM "tpch_customer" AS "t3" +INNER JOIN "tpch_nation" AS "t4" + ON "t3"."c_nationkey" = "t4"."n_nationkey" +INNER JOIN "tpch_region" AS "t5" + ON "t4"."n_regionkey" = "t5"."r_regionkey" SEMI JOIN ( SELECT - t7.n_name, - t7."Sum(Cast(c_acctbal, float64))" + "t7"."n_name", + "t7"."Sum(Cast(c_acctbal, float64))" FROM ( SELECT - t6.n_name, - SUM(CAST(t6.c_acctbal AS DOUBLE)) AS "Sum(Cast(c_acctbal, float64))" + "t6"."n_name", + SUM(CAST("t6"."c_acctbal" AS DOUBLE)) AS "Sum(Cast(c_acctbal, float64))" FROM ( SELECT - t3.c_custkey, - t3.c_name, - t3.c_address, - t3.c_nationkey, - t3.c_phone, - t3.c_acctbal, - t3.c_mktsegment, - t3.c_comment, - t4.n_name, - t5.r_name - FROM tpch_customer AS t3 - INNER JOIN tpch_nation AS t4 - ON t3.c_nationkey = t4.n_nationkey - INNER JOIN tpch_region AS t5 - ON t4.n_regionkey = t5.r_regionkey - ) AS t6 + "t3"."c_custkey", + "t3"."c_name", + "t3"."c_address", + "t3"."c_nationkey", + "t3"."c_phone", + "t3"."c_acctbal", + "t3"."c_mktsegment", + "t3"."c_comment", + "t4"."n_name", + "t5"."r_name" + FROM "tpch_customer" AS "t3" + INNER JOIN "tpch_nation" AS "t4" + ON "t3"."c_nationkey" = "t4"."n_nationkey" + INNER JOIN "tpch_region" AS "t5" + ON "t4"."n_regionkey" = "t5"."r_regionkey" + ) AS "t6" GROUP BY 1 - ) AS t7 + ) AS "t7" ORDER BY - t7."Sum(Cast(c_acctbal, float64))" DESC + "t7"."Sum(Cast(c_acctbal, float64))" DESC LIMIT 10 -) AS t10 - ON t4.n_name = t10.n_name \ No newline at end of file +) AS "t10" + ON "t4"."n_name" = "t10"."n_name" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql index 4b25c0cd6d1e..3f14a3c53b88 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql @@ -1,21 +1,21 @@ SELECT - CASE t0.g WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS col1, + CASE "t0"."g" WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS "col1", CASE - WHEN t0.g = 'foo' + WHEN "t0"."g" = 'foo' THEN 'bar' - WHEN t0.g = 'baz' - THEN t0.g + WHEN "t0"."g" = 'baz' + THEN "t0"."g" ELSE CAST(NULL AS TEXT) - END AS col2, - t0.a, - t0.b, - t0.c, - t0.d, - t0.e, - t0.f, - t0.g, - t0.h, - t0.i, - t0.j, - t0.k -FROM alltypes AS t0 \ No newline at end of file + END AS "col2", + "t0"."a", + "t0"."b", + "t0"."c", + "t0"."d", + "t0"."e", + "t0"."f", + "t0"."g", + "t0"."h", + "t0"."i", + "t0"."j", + "t0"."k" +FROM "alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql index df7349fd3de0..03e2f6c0473f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql @@ -2,38 +2,38 @@ SELECT * FROM ( SELECT - t1.city, - t1."Count(city)" + "t1"."city", + "t1"."Count(city)" FROM ( SELECT - t0.city, - COUNT(t0.city) AS "Count(city)" - FROM tbl AS t0 + "t0"."city", + COUNT("t0"."city") AS "Count(city)" + FROM "tbl" AS "t0" GROUP BY 1 - ) AS t1 + ) AS "t1" ORDER BY - t1."Count(city)" DESC + "t1"."Count(city)" DESC LIMIT 10 -) AS t3 +) AS "t3" LIMIT 5 OFFSET ( SELECT COUNT(*) + CAST(-5 AS TINYINT) FROM ( SELECT - t1.city, - t1."Count(city)" + "t1"."city", + "t1"."Count(city)" FROM ( SELECT - t0.city, - COUNT(t0.city) AS "Count(city)" - FROM tbl AS t0 + "t0"."city", + COUNT("t0"."city") AS "Count(city)" + FROM "tbl" AS "t0" GROUP BY 1 - ) AS t1 + ) AS "t1" ORDER BY - t1."Count(city)" DESC + "t1"."Count(city)" DESC LIMIT 10 - ) AS t3 + ) AS "t3" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql index 57f027a897ec..f0f9981d0e25 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql @@ -1,14 +1,14 @@ SELECT - t4.diag, - t4.status + "t4"."diag", + "t4"."status" FROM ( SELECT - CAST(t0.diag + CAST(1 AS TINYINT) AS INT) AS diag, - t0.status - FROM aids2_one AS t0 + CAST("t0"."diag" + CAST(1 AS TINYINT) AS INT) AS "diag", + "t0"."status" + FROM "aids2_one" AS "t0" UNION ALL SELECT - CAST(t1.diag + CAST(1 AS TINYINT) AS INT) AS diag, - t1.status - FROM aids2_two AS t1 -) AS t4 \ No newline at end of file + CAST("t1"."diag" + CAST(1 AS TINYINT) AS INT) AS "diag", + "t1"."status" + FROM "aids2_two" AS "t1" +) AS "t4" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql index 6578a858c971..6d2946c5d8fb 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql @@ -1,26 +1,26 @@ SELECT - t2.key1, - SUM(t2.total) AS total + "t2"."key1", + SUM("t2"."total") AS "total" FROM ( SELECT - t1.key1, - t1.key2, - SUM(t1.total) AS total + "t1"."key1", + "t1"."key2", + SUM("t1"."total") AS "total" FROM ( SELECT - t0.key1, - t0.key2, - t0.key3, - SUM(t0.value) AS total - FROM foo_table AS t0 + "t0"."key1", + "t0"."key2", + "t0"."key3", + SUM("t0"."value") AS "total" + FROM "foo_table" AS "t0" GROUP BY 1, 2, 3 - ) AS t1 + ) AS "t1" GROUP BY 1, 2 -) AS t2 +) AS "t2" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql index 8114e54d3695..f03acf1df5fa 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql @@ -1,3 +1,3 @@ SELECT - SUFFIX(t0.foo_id, 'foo') AS tmp -FROM star1 AS t0 \ No newline at end of file + SUFFIX("t0"."foo_id", 'foo') AS "tmp" +FROM "star1" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql index 89f8c66d24dc..dcb94c24b5a1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql @@ -1,13 +1,13 @@ SELECT - t0.key1, - t0.key2, - t0.value1 -FROM t1 AS t0 + "t0"."key1", + "t0"."key2", + "t0"."value1" +FROM "t1" AS "t0" WHERE EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM t2 AS t1 + FROM "t2" AS "t1" WHERE - t0.key1 = t1.key1 + "t0"."key1" = "t1"."key1" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql index bffa5a6720b0..fff249c2693e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql @@ -1,14 +1,14 @@ SELECT - t0.session_id, - t0.user_id, - t0.event_type, - t0.ts -FROM events AS t0 + "t0"."session_id", + "t0"."user_id", + "t0"."event_type", + "t0"."ts" +FROM "events" AS "t0" WHERE EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM purchases AS t1 + FROM "purchases" AS "t1" WHERE - t1.ts > '2015-08-15' AND t0.user_id = t1.user_id + "t1"."ts" > '2015-08-15' AND "t0"."user_id" = "t1"."user_id" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql index 918ff235ee2a..85a4bc48e639 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql @@ -1,7 +1,7 @@ SELECT - t0.color -FROM t AS t0 + "t0"."color" +FROM "t" AS "t0" WHERE - LOWER(t0.color) LIKE '%de%' - AND CONTAINS(LOWER(t0.color), 'de') - AND REGEXP_MATCHES(LOWER(t0.color), '.*ge.*') \ No newline at end of file + LOWER("t0"."color") LIKE '%de%' + AND CONTAINS(LOWER("t0"."color"), 'de') + AND REGEXP_MATCHES(LOWER("t0"."color"), '.*ge.*') \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql index 68396dd92fca..89976ca29622 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql @@ -1,32 +1,32 @@ -WITH t1 AS ( +WITH "t1" AS ( SELECT - t0.region, - t0.kind, - SUM(t0.amount) AS total - FROM purchases AS t0 + "t0"."region", + "t0"."kind", + SUM("t0"."amount") AS "total" + FROM "purchases" AS "t0" GROUP BY 1, 2 ) SELECT - t5.region, - t5.total - t6.total AS diff + "t5"."region", + "t5"."total" - "t6"."total" AS "diff" FROM ( SELECT - t2.region, - t2.kind, - t2.total - FROM t1 AS t2 + "t2"."region", + "t2"."kind", + "t2"."total" + FROM "t1" AS "t2" WHERE - t2.kind = 'foo' -) AS t5 + "t2"."kind" = 'foo' +) AS "t5" INNER JOIN ( SELECT - t2.region, - t2.kind, - t2.total - FROM t1 AS t2 + "t2"."region", + "t2"."kind", + "t2"."total" + FROM "t1" AS "t2" WHERE - t2.kind = 'bar' -) AS t6 - ON t5.region = t6.region \ No newline at end of file + "t2"."kind" = 'bar' +) AS "t6" + ON "t5"."region" = "t6"."region" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql index 4f1205ba038e..9b99151402d1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql @@ -1,23 +1,23 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" WHERE - t0.f > LN( + "t0"."f" > LN( ( SELECT - AVG(t1.f) AS "Mean(f)" + AVG("t1"."f") AS "Mean(f)" FROM ( SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id - FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" + FROM "star1" AS "t0" WHERE - t0.foo_id = 'foo' - ) AS t1 + "t0"."foo_id" = 'foo' + ) AS "t1" ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql index 640d7f8d09a4..a42ab6964031 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql @@ -1,25 +1,25 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" WHERE - t0.f > ( + "t0"."f" > ( LN( ( SELECT - AVG(t1.f) AS "Mean(f)" + AVG("t1"."f") AS "Mean(f)" FROM ( SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id - FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" + FROM "star1" AS "t0" WHERE - t0.foo_id = 'foo' - ) AS t1 + "t0"."foo_id" = 'foo' + ) AS "t1" ) ) + CAST(1 AS TINYINT) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql index bc1d0cc45118..79d8cee79e9d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql @@ -1,7 +1,7 @@ SELECT - t0.foo, - t0.bar, - t0.value, - t0.foo + t0.bar AS baz, - t0.foo * CAST(2 AS TINYINT) AS qux -FROM tbl AS t0 \ No newline at end of file + "t0"."foo", + "t0"."bar", + "t0"."value", + "t0"."foo" + "t0"."bar" AS "baz", + "t0"."foo" * CAST(2 AS TINYINT) AS "qux" +FROM "tbl" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql index 4f8cda85cc5c..d91e991fa6d7 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql @@ -1,9 +1,9 @@ SELECT - t0.foo, - t0.bar, - t0.value, - t0.foo + t0.bar AS baz, - t0.foo * CAST(2 AS TINYINT) AS qux -FROM tbl AS t0 + "t0"."foo", + "t0"."bar", + "t0"."value", + "t0"."foo" + "t0"."bar" AS "baz", + "t0"."foo" * CAST(2 AS TINYINT) AS "qux" +FROM "tbl" AS "t0" WHERE - t0.value > CAST(0 AS TINYINT) \ No newline at end of file + "t0"."value" > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql index f768122da94c..33c608e128ff 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql @@ -1,4 +1,4 @@ SELECT - t0.date AS else, - t0.explain AS join -FROM table AS t0 \ No newline at end of file + "t0"."date" AS "else", + "t0"."explain" AS "join" +FROM "table" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql index dc33ad4a62ff..4d92015cc1bd 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql @@ -1,7 +1,7 @@ SELECT - t0.x + CAST(1 AS TINYINT) AS x -FROM t AS t0 + "t0"."x" + CAST(1 AS TINYINT) AS "x" +FROM "t" AS "t0" WHERE ( - t0.x + CAST(1 AS TINYINT) + "t0"."x" + CAST(1 AS TINYINT) ) > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql index 2fd5fe2ddc07..11159dee0f92 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql @@ -1,5 +1,5 @@ SELECT - CAST(1 AS TINYINT) AS a -FROM t AS t0 + CAST(1 AS TINYINT) AS "a" +FROM "t" AS "t0" WHERE CAST(1 AS TINYINT) > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql index 396306956a81..bbfe6934de2d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql @@ -1,21 +1,21 @@ SELECT - t4.key1, - t4.key2, - t4.value1, - t5.value2, - t9.value3, - t9.value4 -FROM first AS t4 -INNER JOIN second AS t5 - ON t4.key1 = t5.key1 + "t4"."key1", + "t4"."key2", + "t4"."value1", + "t5"."value2", + "t9"."value3", + "t9"."value4" +FROM "first" AS "t4" +INNER JOIN "second" AS "t5" + ON "t4"."key1" = "t5"."key1" INNER JOIN ( SELECT - t6.key2, - t6.key3, - t6.value3, - t7.value4 - FROM third AS t6 - INNER JOIN fourth AS t7 - ON t6.key3 = t7.key3 -) AS t9 - ON t4.key2 = t9.key2 \ No newline at end of file + "t6"."key2", + "t6"."key3", + "t6"."value3", + "t7"."value4" + FROM "third" AS "t6" + INNER JOIN "fourth" AS "t7" + ON "t6"."key3" = "t7"."key3" +) AS "t9" + ON "t4"."key2" = "t9"."key2" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql index 879cd074a3ac..cf8bb36c3c11 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql @@ -1,28 +1,28 @@ SELECT - t4.value_a, - t5.value_b + "t4"."value_a", + "t5"."value_b" FROM ( SELECT - t0.year, - t0.month, - t0.day, - t0.value_a - FROM a AS t0 + "t0"."year", + "t0"."month", + "t0"."day", + "t0"."value_a" + FROM "a" AS "t0" WHERE - t0.year = CAST(2016 AS SMALLINT) - AND t0.month = CAST(2 AS TINYINT) - AND t0.day = CAST(29 AS TINYINT) -) AS t4 + "t0"."year" = CAST(2016 AS SMALLINT) + AND "t0"."month" = CAST(2 AS TINYINT) + AND "t0"."day" = CAST(29 AS TINYINT) +) AS "t4" LEFT OUTER JOIN ( SELECT - t1.year, - t1.month, - t1.day, - t1.value_b - FROM b AS t1 + "t1"."year", + "t1"."month", + "t1"."day", + "t1"."value_b" + FROM "b" AS "t1" WHERE - t1.year = CAST(2016 AS SMALLINT) - AND t1.month = CAST(2 AS TINYINT) - AND t1.day = CAST(29 AS TINYINT) -) AS t5 - ON t4.year = t5.year AND t4.month = t5.month AND t4.day = t5.day \ No newline at end of file + "t1"."year" = CAST(2016 AS SMALLINT) + AND "t1"."month" = CAST(2 AS TINYINT) + AND "t1"."day" = CAST(29 AS TINYINT) +) AS "t5" + ON "t4"."year" = "t5"."year" AND "t4"."month" = "t5"."month" AND "t4"."day" = "t5"."day" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql index 28ef4cadbb22..03ed7e8d464c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql @@ -1,21 +1,21 @@ SELECT - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment, - t4.r_regionkey, - t4.r_name, - t4.r_comment, - t5.c_custkey, - t5.c_name, - t5.c_address, - t5.c_nationkey, - t5.c_phone, - t5.c_acctbal, - t5.c_mktsegment, - t5.c_comment -FROM tpch_nation AS t3 -INNER JOIN tpch_region AS t4 - ON t3.n_regionkey = t4.r_regionkey -INNER JOIN tpch_customer AS t5 - ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment", + "t4"."r_regionkey", + "t4"."r_name", + "t4"."r_comment", + "t5"."c_custkey", + "t5"."c_name", + "t5"."c_address", + "t5"."c_nationkey", + "t5"."c_phone", + "t5"."c_acctbal", + "t5"."c_mktsegment", + "t5"."c_comment" +FROM "tpch_nation" AS "t3" +INNER JOIN "tpch_region" AS "t4" + ON "t3"."n_regionkey" = "t4"."r_regionkey" +INNER JOIN "tpch_customer" AS "t5" + ON "t3"."n_nationkey" = "t5"."c_nationkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql index ccaf14d42229..e5c3c2f2aa77 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql @@ -1,17 +1,17 @@ SELECT - t5.c_custkey, - t5.c_name, - t5.c_address, - t5.c_nationkey, - t5.c_phone, - t5.c_acctbal, - t5.c_mktsegment, - t5.c_comment, - t3.n_nationkey, - t3.n_name AS nation, - t4.r_name AS region -FROM tpch_nation AS t3 -INNER JOIN tpch_region AS t4 - ON t3.n_regionkey = t4.r_regionkey -INNER JOIN tpch_customer AS t5 - ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file + "t5"."c_custkey", + "t5"."c_name", + "t5"."c_address", + "t5"."c_nationkey", + "t5"."c_phone", + "t5"."c_acctbal", + "t5"."c_mktsegment", + "t5"."c_comment", + "t3"."n_nationkey", + "t3"."n_name" AS "nation", + "t4"."r_name" AS "region" +FROM "tpch_nation" AS "t3" +INNER JOIN "tpch_region" AS "t4" + ON "t3"."n_regionkey" = "t4"."r_regionkey" +INNER JOIN "tpch_customer" AS "t5" + ON "t3"."n_nationkey" = "t5"."c_nationkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql index cf540b867fb9..cdc5697c2d3d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql @@ -1,31 +1,31 @@ SELECT - t5.on, - t5.by, - t5.on_right, - t5.by_right, - t5.val + "t5"."on", + "t5"."by", + "t5"."on_right", + "t5"."by_right", + "t5"."val" FROM ( SELECT - t2.on, - t2.by, - t3.on AS on_right, - t3.by AS by_right, - t3.val - FROM left AS t2 - LEFT OUTER JOIN right AS t3 - ON t2.by = t3.by -) AS t5 + "t2"."on", + "t2"."by", + "t3"."on" AS "on_right", + "t3"."by" AS "by_right", + "t3"."val" + FROM "left" AS "t2" + LEFT OUTER JOIN "right" AS "t3" + ON "t2"."by" = "t3"."by" +) AS "t5" WHERE - t5.on_right = ( + "t5"."on_right" = ( SELECT - MAX(t4.on) AS "Max(on)" + MAX("t4"."on") AS "Max(on)" FROM ( SELECT - t1.on, - t1.by, - t1.val - FROM right AS t1 + "t1"."on", + "t1"."by", + "t1"."val" + FROM "right" AS "t1" WHERE - t1.by = t0.by AND t1.on <= t0.on - ) AS t4 + "t1"."by" = "t0"."by" AND "t1"."on" <= "t0"."on" + ) AS "t4" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql index 1ea81d25a312..c27a7b3da739 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql @@ -1,13 +1,13 @@ SELECT - t4.c, - t4.f, - t4.foo_id, - t4.bar_id + "t4"."c", + "t4"."f", + "t4"."foo_id", + "t4"."bar_id" FROM ( SELECT * - FROM star1 AS t0 + FROM "star1" AS "t0" LIMIT 100 -) AS t4 -INNER JOIN star2 AS t3 - ON t4.foo_id = t3.foo_id \ No newline at end of file +) AS "t4" +INNER JOIN "star2" AS "t3" + ON "t4"."foo_id" = "t3"."foo_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql index 12799aaebd5a..ca9b89c74630 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql @@ -1,23 +1,23 @@ -WITH t1 AS ( +WITH "t1" AS ( SELECT * - FROM functional_alltypes AS t0 + FROM "functional_alltypes" AS "t0" LIMIT 100 ) SELECT - t3.id, - t3.bool_col, - t3.tinyint_col, - t3.smallint_col, - t3.int_col, - t3.bigint_col, - t3.float_col, - t3.double_col, - t3.date_string_col, - t3.string_col, - t3.timestamp_col, - t3.year, - t3.month -FROM t1 AS t3 -INNER JOIN t1 AS t5 + "t3"."id", + "t3"."bool_col", + "t3"."tinyint_col", + "t3"."smallint_col", + "t3"."int_col", + "t3"."bigint_col", + "t3"."float_col", + "t3"."double_col", + "t3"."date_string_col", + "t3"."string_col", + "t3"."timestamp_col", + "t3"."year", + "t3"."month" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t5" ON TRUE \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql index bb7c1b27ee01..2cda81f9a69c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql @@ -2,33 +2,33 @@ SELECT COUNT(*) AS "CountStar()" FROM ( SELECT - t1.id, - t1.bool_col, - t1.tinyint_col, - t1.smallint_col, - t1.int_col, - t1.bigint_col, - t1.float_col, - t1.double_col, - t1.date_string_col, - t1.string_col, - t1.timestamp_col, - t1.year, - t1.month, - t3.id AS id_right, - t3.bool_col AS bool_col_right, - t3.tinyint_col AS tinyint_col_right, - t3.smallint_col AS smallint_col_right, - t3.int_col AS int_col_right, - t3.bigint_col AS bigint_col_right, - t3.float_col AS float_col_right, - t3.double_col AS double_col_right, - t3.date_string_col AS date_string_col_right, - t3.string_col AS string_col_right, - t3.timestamp_col AS timestamp_col_right, - t3.year AS year_right, - t3.month AS month_right - FROM functional_alltypes AS t1 - INNER JOIN functional_alltypes AS t3 - ON t1.tinyint_col < EXTRACT(minute FROM t3.timestamp_col) -) AS t4 \ No newline at end of file + "t1"."id", + "t1"."bool_col", + "t1"."tinyint_col", + "t1"."smallint_col", + "t1"."int_col", + "t1"."bigint_col", + "t1"."float_col", + "t1"."double_col", + "t1"."date_string_col", + "t1"."string_col", + "t1"."timestamp_col", + "t1"."year", + "t1"."month", + "t3"."id" AS "id_right", + "t3"."bool_col" AS "bool_col_right", + "t3"."tinyint_col" AS "tinyint_col_right", + "t3"."smallint_col" AS "smallint_col_right", + "t3"."int_col" AS "int_col_right", + "t3"."bigint_col" AS "bigint_col_right", + "t3"."float_col" AS "float_col_right", + "t3"."double_col" AS "double_col_right", + "t3"."date_string_col" AS "date_string_col_right", + "t3"."string_col" AS "string_col_right", + "t3"."timestamp_col" AS "timestamp_col_right", + "t3"."year" AS "year_right", + "t3"."month" AS "month_right" + FROM "functional_alltypes" AS "t1" + INNER JOIN "functional_alltypes" AS "t3" + ON "t1"."tinyint_col" < EXTRACT(minute FROM "t3"."timestamp_col") +) AS "t4" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql index fd5640b69685..2dba0c8f36ec 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql @@ -1,22 +1,22 @@ SELECT - t4.id AS left_id, - t4.desc AS left_desc, - t5.id AS right_id, - t5.desc AS right_desc + "t4"."id" AS "left_id", + "t4"."desc" AS "left_desc", + "t5"."id" AS "right_id", + "t5"."desc" AS "right_desc" FROM ( SELECT - t0.id, - t0.desc - FROM foo AS t0 + "t0"."id", + "t0"."desc" + FROM "foo" AS "t0" WHERE - t0.id < CAST(2 AS TINYINT) -) AS t4 + "t0"."id" < CAST(2 AS TINYINT) +) AS "t4" LEFT OUTER JOIN ( SELECT - t1.id, - t1.desc - FROM bar AS t1 + "t1"."id", + "t1"."desc" + FROM "bar" AS "t1" WHERE - t1.id < CAST(3 AS TINYINT) -) AS t5 - ON t4.id = t5.id AND t4.desc = t5.desc \ No newline at end of file + "t1"."id" < CAST(3 AS TINYINT) +) AS "t5" + ON "t4"."id" = "t5"."id" AND "t4"."desc" = "t5"."desc" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql index 4d414f398697..cc050d3028c4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql @@ -1,12 +1,12 @@ SELECT - t3.c, - t3.f, - t3.foo_id, - t3.bar_id, - t4.value1, - t5.value2 -FROM star1 AS t3 -LEFT OUTER JOIN star2 AS t4 - ON t3.foo_id = t4.foo_id -INNER JOIN star3 AS t5 - ON t3.bar_id = t5.bar_id \ No newline at end of file + "t3"."c", + "t3"."f", + "t3"."foo_id", + "t3"."bar_id", + "t4"."value1", + "t5"."value2" +FROM "star1" AS "t3" +LEFT OUTER JOIN "star2" AS "t4" + ON "t3"."foo_id" = "t4"."foo_id" +INNER JOIN "star3" AS "t5" + ON "t3"."bar_id" = "t5"."bar_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql index 7f7d28627c2f..266babfbbfeb 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql @@ -3,7 +3,7 @@ SELECT FROM ( SELECT * - FROM functional_alltypes AS t0 + FROM "functional_alltypes" AS "t0" LIMIT 20 -) AS t1 +) AS "t1" LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql index 315083748ef8..45c3e7cfc10a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql @@ -1,14 +1,14 @@ SELECT - t2.string_col, - t2.nrows + "t2"."string_col", + "t2"."nrows" FROM ( SELECT - t0.string_col, - COUNT(*) AS nrows - FROM functional_alltypes AS t0 + "t0"."string_col", + COUNT(*) AS "nrows" + FROM "functional_alltypes" AS "t0" GROUP BY 1 LIMIT 5 -) AS t2 +) AS "t2" ORDER BY - t2.string_col ASC \ No newline at end of file + "t2"."string_col" ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql index c93dbd9ab8dc..4c7c35d3c442 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql @@ -1,7 +1,7 @@ SELECT - t0.a, - t0.b, - t0.c -FROM foo AS t0 + "t0"."a", + "t0"."b", + "t0"."c" +FROM "foo" AS "t0" WHERE - t0.a > CAST(0 AS TINYINT) \ No newline at end of file + "t0"."a" > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql index fdc7cf00bcd2..40ef73b9ea30 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql @@ -1,12 +1,12 @@ SELECT - t0.job, - t0.dept_id, - t0.year, - t0.y -FROM foo AS t0 + "t0"."job", + "t0"."dept_id", + "t0"."year", + "t0"."y" +FROM "foo" AS "t0" WHERE - t0.y > ( + "t0"."y" > ( SELECT - MAX(t1.x) AS "Max(x)" - FROM bar AS t1 + MAX("t1"."x") AS "Max(x)" + FROM "bar" AS "t1" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql index dbf4aadac203..34e180525498 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql @@ -1,6 +1,6 @@ SELECT - t0.foo_id, - SUM(t0.f) AS total -FROM star1 AS t0 + "t0"."foo_id", + SUM("t0"."f") AS "total" +FROM "star1" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql index be9b430bd3d3..c5b36f1f25d4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql @@ -1,8 +1,8 @@ SELECT - t0.foo_id, - t0.bar_id, - SUM(t0.f) AS total -FROM star1 AS t0 + "t0"."foo_id", + "t0"."bar_id", + SUM("t0"."f") AS "total" +FROM "star1" AS "t0" GROUP BY 1, 2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql index a924af63f39d..c6776a20fb0a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql @@ -1,3 +1,3 @@ SELECT COUNT(*) AS "CountStar()" -FROM star1 AS t0 \ No newline at end of file +FROM "star1" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql index 31e87b57f3ea..7ce2d03a7aec 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql @@ -1,9 +1,9 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" WHERE - t0.f > CAST(0 AS TINYINT) + "t0"."f" > CAST(0 AS TINYINT) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql index 2b6d0fe52716..651d57bbc4c8 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql @@ -1,4 +1,4 @@ SELECT * -FROM star1 AS t0 +FROM "star1" AS "t0" LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql index ab4dd6df7158..6a53e9d7fec3 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql @@ -1,13 +1,13 @@ SELECT - t1.c, - t1.f, - t1.foo_id, - t1.bar_id + "t1"."c", + "t1"."f", + "t1"."foo_id", + "t1"."bar_id" FROM ( SELECT * - FROM star1 AS t0 + FROM "star1" AS "t0" LIMIT 10 -) AS t1 +) AS "t1" WHERE - t1.f > CAST(0 AS TINYINT) \ No newline at end of file + "t1"."f" > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql index 3c71bda9b962..0f11f2394924 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql @@ -1,5 +1,5 @@ SELECT * -FROM star1 AS t0 +FROM "star1" AS "t0" LIMIT 10 OFFSET 5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql index bb666f269b2a..b5450abae2a7 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql @@ -1,8 +1,8 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" ORDER BY - t0.f ASC \ No newline at end of file + "t0"."f" ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql index f223e15ca36b..9b15a47d945c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql @@ -1,3 +1,3 @@ SELECT * -FROM alltypes \ No newline at end of file +FROM "alltypes" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql index 3d676c32001a..52fe23dbf9e4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql @@ -1,20 +1,20 @@ SELECT - t2.r_name, - t6.n_name -FROM tpch_region AS t2 -INNER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey + "t2"."r_name", + "t6"."n_name" +FROM "tpch_region" AS "t2" +INNER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" INNER JOIN ( SELECT - t2.r_regionkey, - t2.r_name, - t2.r_comment, - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment - FROM tpch_region AS t2 - INNER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey -) AS t6 - ON t2.r_regionkey = t6.r_regionkey \ No newline at end of file + "t2"."r_regionkey", + "t2"."r_name", + "t2"."r_comment", + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "tpch_region" AS "t2" + INNER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" +) AS "t6" + ON "t2"."r_regionkey" = "t6"."r_regionkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql index 802ea0aad1c6..3bcfd0d04c11 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql @@ -1,8 +1,8 @@ SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id -FROM star1 AS t2 -SEMI JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id \ No newline at end of file + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id" +FROM "star1" AS "t2" +SEMI JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql index 56f5488cdde3..540e9df3ccb0 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql @@ -1,8 +1,8 @@ SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id -FROM star1 AS t2 -INNER JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id \ No newline at end of file + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id" +FROM "star1" AS "t2" +INNER JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql index 59916704c75d..733df816e0c2 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql @@ -1,8 +1,8 @@ SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id -FROM star1 AS t2 -INNER JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id AND t2.bar_id = t3.foo_id \ No newline at end of file + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id" +FROM "star1" AS "t2" +INNER JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" AND "t2"."bar_id" = "t3"."foo_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql index 0b9cd8c00921..6fe0cfaafa6b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql @@ -1,8 +1,8 @@ SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id -FROM star1 AS t2 -LEFT OUTER JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id \ No newline at end of file + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id" +FROM "star1" AS "t2" +LEFT OUTER JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql index 91950bc952c5..39e54c234e2d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql @@ -1,8 +1,8 @@ SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id -FROM star1 AS t2 -FULL OUTER JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id \ No newline at end of file + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id" +FROM "star1" AS "t2" +FULL OUTER JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql index 5877da115f7b..b32c0c3a5a02 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql @@ -1,12 +1,12 @@ SELECT - t1.b, - COUNT(*) AS b_count + "t1"."b", + COUNT(*) AS "b_count" FROM ( SELECT - t0.b - FROM t AS t0 + "t0"."b" + FROM "t" AS "t0" ORDER BY - t0.a ASC -) AS t1 + "t0"."a" ASC +) AS "t1" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql index b40324745e05..0471b87d4908 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql @@ -1,12 +1,12 @@ SELECT - t1.b, - COUNT(*) AS b_count + "t1"."b", + COUNT(*) AS "b_count" FROM ( SELECT - t0.b - FROM t AS t0 + "t0"."b" + FROM "t" AS "t0" ORDER BY - t0.b ASC -) AS t1 + "t0"."b" ASC +) AS "t1" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql index 05528dd5f869..717ada6a7cac 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql @@ -1,3 +1,3 @@ SELECT - STARTS_WITH(t0.foo_id, 'foo') AS tmp -FROM star1 AS t0 \ No newline at end of file + STARTS_WITH("t0"."foo_id", 'foo') AS "tmp" +FROM "star1" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql index d400eab10c82..6c353f9e889a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql @@ -1,76 +1,76 @@ SELECT - t8.c_custkey, - t8.c_name, - t8.c_address, - t8.c_nationkey, - t8.c_phone, - t8.c_acctbal, - t8.c_mktsegment, - t8.c_comment, - t8.region, - t8.amount, - t8.odate + "t8"."c_custkey", + "t8"."c_name", + "t8"."c_address", + "t8"."c_nationkey", + "t8"."c_phone", + "t8"."c_acctbal", + "t8"."c_mktsegment", + "t8"."c_comment", + "t8"."region", + "t8"."amount", + "t8"."odate" FROM ( SELECT - t6.c_custkey, - t6.c_name, - t6.c_address, - t6.c_nationkey, - t6.c_phone, - t6.c_acctbal, - t6.c_mktsegment, - t6.c_comment, - t4.r_name AS region, - t7.o_totalprice AS amount, - CAST(t7.o_orderdate AS TIMESTAMP) AS odate - FROM tpch_region AS t4 - INNER JOIN tpch_nation AS t5 - ON t4.r_regionkey = t5.n_regionkey - INNER JOIN tpch_customer AS t6 - ON t6.c_nationkey = t5.n_nationkey - INNER JOIN tpch_orders AS t7 - ON t7.o_custkey = t6.c_custkey -) AS t8 + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t4"."r_name" AS "region", + "t7"."o_totalprice" AS "amount", + CAST("t7"."o_orderdate" AS TIMESTAMP) AS "odate" + FROM "tpch_region" AS "t4" + INNER JOIN "tpch_nation" AS "t5" + ON "t4"."r_regionkey" = "t5"."n_regionkey" + INNER JOIN "tpch_customer" AS "t6" + ON "t6"."c_nationkey" = "t5"."n_nationkey" + INNER JOIN "tpch_orders" AS "t7" + ON "t7"."o_custkey" = "t6"."c_custkey" +) AS "t8" WHERE - t8.amount > ( + "t8"."amount" > ( SELECT - AVG(t10.amount) AS "Mean(amount)" + AVG("t10"."amount") AS "Mean(amount)" FROM ( SELECT - t9.c_custkey, - t9.c_name, - t9.c_address, - t9.c_nationkey, - t9.c_phone, - t9.c_acctbal, - t9.c_mktsegment, - t9.c_comment, - t9.region, - t9.amount, - t9.odate + "t9"."c_custkey", + "t9"."c_name", + "t9"."c_address", + "t9"."c_nationkey", + "t9"."c_phone", + "t9"."c_acctbal", + "t9"."c_mktsegment", + "t9"."c_comment", + "t9"."region", + "t9"."amount", + "t9"."odate" FROM ( SELECT - t6.c_custkey, - t6.c_name, - t6.c_address, - t6.c_nationkey, - t6.c_phone, - t6.c_acctbal, - t6.c_mktsegment, - t6.c_comment, - t4.r_name AS region, - t7.o_totalprice AS amount, - CAST(t7.o_orderdate AS TIMESTAMP) AS odate - FROM tpch_region AS t4 - INNER JOIN tpch_nation AS t5 - ON t4.r_regionkey = t5.n_regionkey - INNER JOIN tpch_customer AS t6 - ON t6.c_nationkey = t5.n_nationkey - INNER JOIN tpch_orders AS t7 - ON t7.o_custkey = t6.c_custkey - ) AS t9 + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t4"."r_name" AS "region", + "t7"."o_totalprice" AS "amount", + CAST("t7"."o_orderdate" AS TIMESTAMP) AS "odate" + FROM "tpch_region" AS "t4" + INNER JOIN "tpch_nation" AS "t5" + ON "t4"."r_regionkey" = "t5"."n_regionkey" + INNER JOIN "tpch_customer" AS "t6" + ON "t6"."c_nationkey" = "t5"."n_nationkey" + INNER JOIN "tpch_orders" AS "t7" + ON "t7"."o_custkey" = "t6"."c_custkey" + ) AS "t9" WHERE - t9.region = t8.region - ) AS t10 + "t9"."region" = "t8"."region" + ) AS "t10" ) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql index 404caac50463..215ec506d232 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql @@ -1,12 +1,12 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" WHERE - t0.f > ( + "t0"."f" > ( SELECT - AVG(t0.f) AS "Mean(f)" - FROM star1 AS t0 + AVG("t0"."f") AS "Mean(f)" + FROM "star1" AS "t0" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql index fdbdef535ce4..6b73bf2aee0c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql @@ -1,21 +1,21 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" WHERE - t0.f > ( + "t0"."f" > ( SELECT - AVG(t1.f) AS "Mean(f)" + AVG("t1"."f") AS "Mean(f)" FROM ( SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id - FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" + FROM "star1" AS "t0" WHERE - t0.foo_id = 'foo' - ) AS t1 + "t0"."foo_id" = 'foo' + ) AS "t1" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql index 62595b73b907..6c8b142b59cb 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql @@ -1,49 +1,49 @@ -WITH t5 AS ( +WITH "t5" AS ( SELECT - t2.a, - t2.g, - t2.metric + "t2"."a", + "t2"."g", + "t2"."metric" FROM ( SELECT - t0.a, - t0.g, - SUM(t0.f) AS metric - FROM alltypes AS t0 + "t0"."a", + "t0"."g", + SUM("t0"."f") AS "metric" + FROM "alltypes" AS "t0" GROUP BY 1, 2 - ) AS t2 + ) AS "t2" INNER JOIN ( SELECT - t0.a, - t0.g, - SUM(t0.f) AS metric - FROM alltypes AS t0 + "t0"."a", + "t0"."g", + SUM("t0"."f") AS "metric" + FROM "alltypes" AS "t0" GROUP BY 1, 2 - ) AS t4 - ON t2.g = t4.g -), t1 AS ( + ) AS "t4" + ON "t2"."g" = "t4"."g" +), "t1" AS ( SELECT - t0.a, - t0.g, - SUM(t0.f) AS metric - FROM alltypes AS t0 + "t0"."a", + "t0"."g", + SUM("t0"."f") AS "metric" + FROM "alltypes" AS "t0" GROUP BY 1, 2 ) SELECT - t8.a, - t8.g, - t8.metric + "t8"."a", + "t8"."g", + "t8"."metric" FROM ( SELECT * - FROM t5 AS t6 + FROM "t5" AS "t6" UNION ALL SELECT * - FROM t5 AS t7 -) AS t8 \ No newline at end of file + FROM "t5" AS "t7" +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql index 029dae462da1..4039868d3eac 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql @@ -1,31 +1,31 @@ -WITH t1 AS ( +WITH "t1" AS ( SELECT - t0.g, - t0.a, - t0.b, - SUM(t0.f) AS total - FROM alltypes AS t0 + "t0"."g", + "t0"."a", + "t0"."b", + SUM("t0"."f") AS "total" + FROM "alltypes" AS "t0" GROUP BY 1, 2, 3 ) SELECT - t6.g, - MAX(t6.total - t6.total_right) AS metric + "t6"."g", + MAX("t6"."total" - "t6"."total_right") AS "metric" FROM ( SELECT - t3.g, - t3.a, - t3.b, - t3.total, - t5.g AS g_right, - t5.a AS a_right, - t5.b AS b_right, - t5.total AS total_right - FROM t1 AS t3 - INNER JOIN t1 AS t5 - ON t3.a = t5.b -) AS t6 + "t3"."g", + "t3"."a", + "t3"."b", + "t3"."total", + "t5"."g" AS "g_right", + "t5"."a" AS "a_right", + "t5"."b" AS "b_right", + "t5"."total" AS "total_right" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t5" + ON "t3"."a" = "t5"."b" +) AS "t6" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql index 54240af94422..ce336c1dbd99 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql @@ -1,38 +1,38 @@ -WITH t1 AS ( +WITH "t1" AS ( SELECT - t0.dest, - t0.origin, - t0.arrdelay - FROM airlines AS t0 + "t0"."dest", + "t0"."origin", + "t0"."arrdelay" + FROM "airlines" AS "t0" WHERE - t0.dest IN ('ORD', 'JFK', 'SFO') + "t0"."dest" IN ('ORD', 'JFK', 'SFO') ) SELECT - t8.origin, + "t8"."origin", COUNT(*) AS "CountStar()" FROM ( SELECT - t3.dest, - t3.origin, - t3.arrdelay - FROM t1 AS t3 + "t3"."dest", + "t3"."origin", + "t3"."arrdelay" + FROM "t1" AS "t3" SEMI JOIN ( SELECT - t4.dest, - t4."Mean(arrdelay)" + "t4"."dest", + "t4"."Mean(arrdelay)" FROM ( SELECT - t2.dest, - AVG(t2.arrdelay) AS "Mean(arrdelay)" - FROM t1 AS t2 + "t2"."dest", + AVG("t2"."arrdelay") AS "Mean(arrdelay)" + FROM "t1" AS "t2" GROUP BY 1 - ) AS t4 + ) AS "t4" ORDER BY - t4."Mean(arrdelay)" DESC + "t4"."Mean(arrdelay)" DESC LIMIT 10 - ) AS t7 - ON t3.dest = t7.dest -) AS t8 + ) AS "t7" + ON "t3"."dest" = "t7"."dest" +) AS "t8" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql index 6d86baa0190c..054cc889c755 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql @@ -1,24 +1,24 @@ SELECT - t1.foo, - t1.bar, - t1.city, - t1.v1, - t1.v2 -FROM tbl AS t1 + "t1"."foo", + "t1"."bar", + "t1"."city", + "t1"."v1", + "t1"."v2" +FROM "tbl" AS "t1" SEMI JOIN ( SELECT - t2.city, - t2."Mean(v2)" + "t2"."city", + "t2"."Mean(v2)" FROM ( SELECT - t0.city, - AVG(t0.v2) AS "Mean(v2)" - FROM tbl AS t0 + "t0"."city", + AVG("t0"."v2") AS "Mean(v2)" + FROM "tbl" AS "t0" GROUP BY 1 - ) AS t2 + ) AS "t2" ORDER BY - t2."Mean(v2)" DESC + "t2"."Mean(v2)" DESC LIMIT 10 -) AS t5 - ON t1.city = t5.city \ No newline at end of file +) AS "t5" + ON "t1"."city" = "t5"."city" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql index 53e239ad98c8..d6c6b84b848b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql @@ -1,24 +1,24 @@ SELECT - t1.foo, - t1.bar, - t1.city, - t1.v1, - t1.v2 -FROM tbl AS t1 + "t1"."foo", + "t1"."bar", + "t1"."city", + "t1"."v1", + "t1"."v2" +FROM "tbl" AS "t1" SEMI JOIN ( SELECT - t2.city, - t2."Count(city)" + "t2"."city", + "t2"."Count(city)" FROM ( SELECT - t0.city, - COUNT(t0.city) AS "Count(city)" - FROM tbl AS t0 + "t0"."city", + COUNT("t0"."city") AS "Count(city)" + FROM "tbl" AS "t0" GROUP BY 1 - ) AS t2 + ) AS "t2" ORDER BY - t2."Count(city)" DESC + "t2"."Count(city)" DESC LIMIT 10 -) AS t5 - ON t1.city = t5.city \ No newline at end of file +) AS "t5" + ON "t1"."city" = "t5"."city" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql index b81d1c1f474a..e9c68902a83c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql @@ -1,50 +1,50 @@ SELECT - t3.c_custkey, - t3.c_name, - t3.c_address, - t3.c_nationkey, - t3.c_phone, - t3.c_acctbal, - t3.c_mktsegment, - t3.c_comment, - t4.n_name, - t5.r_name -FROM tpch_customer AS t3 -INNER JOIN tpch_nation AS t4 - ON t3.c_nationkey = t4.n_nationkey -INNER JOIN tpch_region AS t5 - ON t4.n_regionkey = t5.r_regionkey + "t3"."c_custkey", + "t3"."c_name", + "t3"."c_address", + "t3"."c_nationkey", + "t3"."c_phone", + "t3"."c_acctbal", + "t3"."c_mktsegment", + "t3"."c_comment", + "t4"."n_name", + "t5"."r_name" +FROM "tpch_customer" AS "t3" +INNER JOIN "tpch_nation" AS "t4" + ON "t3"."c_nationkey" = "t4"."n_nationkey" +INNER JOIN "tpch_region" AS "t5" + ON "t4"."n_regionkey" = "t5"."r_regionkey" SEMI JOIN ( SELECT - t7.n_name, - t7."Sum(c_acctbal)" + "t7"."n_name", + "t7"."Sum(c_acctbal)" FROM ( SELECT - t6.n_name, - SUM(t6.c_acctbal) AS "Sum(c_acctbal)" + "t6"."n_name", + SUM("t6"."c_acctbal") AS "Sum(c_acctbal)" FROM ( SELECT - t3.c_custkey, - t3.c_name, - t3.c_address, - t3.c_nationkey, - t3.c_phone, - t3.c_acctbal, - t3.c_mktsegment, - t3.c_comment, - t4.n_name, - t5.r_name - FROM tpch_customer AS t3 - INNER JOIN tpch_nation AS t4 - ON t3.c_nationkey = t4.n_nationkey - INNER JOIN tpch_region AS t5 - ON t4.n_regionkey = t5.r_regionkey - ) AS t6 + "t3"."c_custkey", + "t3"."c_name", + "t3"."c_address", + "t3"."c_nationkey", + "t3"."c_phone", + "t3"."c_acctbal", + "t3"."c_mktsegment", + "t3"."c_comment", + "t4"."n_name", + "t5"."r_name" + FROM "tpch_customer" AS "t3" + INNER JOIN "tpch_nation" AS "t4" + ON "t3"."c_nationkey" = "t4"."n_nationkey" + INNER JOIN "tpch_region" AS "t5" + ON "t4"."n_regionkey" = "t5"."r_regionkey" + ) AS "t6" GROUP BY 1 - ) AS t7 + ) AS "t7" ORDER BY - t7."Sum(c_acctbal)" DESC + "t7"."Sum(c_acctbal)" DESC LIMIT 10 -) AS t10 - ON t4.n_name = t10.n_name \ No newline at end of file +) AS "t10" + ON "t4"."n_name" = "t10"."n_name" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql index fe46767993fe..10470883609e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql @@ -1,14 +1,14 @@ SELECT - t1.dest, - t1."Mean(arrdelay)" + "t1"."dest", + "t1"."Mean(arrdelay)" FROM ( SELECT - t0.dest, - AVG(t0.arrdelay) AS "Mean(arrdelay)" - FROM airlines AS t0 + "t0"."dest", + AVG("t0"."arrdelay") AS "Mean(arrdelay)" + FROM "airlines" AS "t0" GROUP BY 1 -) AS t1 +) AS "t1" ORDER BY - t1."Mean(arrdelay)" DESC + "t1"."Mean(arrdelay)" DESC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql index 6a6108dcf40c..bf5316341bdf 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql @@ -1,32 +1,32 @@ -WITH t9 AS ( +WITH "t9" AS ( SELECT - t8.region, - EXTRACT(year FROM t8.odate) AS year, - CAST(SUM(t8.amount) AS DOUBLE) AS total + "t8"."region", + EXTRACT(year FROM "t8"."odate") AS "year", + CAST(SUM("t8"."amount") AS DOUBLE) AS "total" FROM ( SELECT - t4.r_name AS region, - t5.n_name AS nation, - t7.o_totalprice AS amount, - CAST(t7.o_orderdate AS TIMESTAMP) AS odate - FROM tpch_region AS t4 - INNER JOIN tpch_nation AS t5 - ON t4.r_regionkey = t5.n_regionkey - INNER JOIN tpch_customer AS t6 - ON t6.c_nationkey = t5.n_nationkey - INNER JOIN tpch_orders AS t7 - ON t7.o_custkey = t6.c_custkey - ) AS t8 + "t4"."r_name" AS "region", + "t5"."n_name" AS "nation", + "t7"."o_totalprice" AS "amount", + CAST("t7"."o_orderdate" AS TIMESTAMP) AS "odate" + FROM "tpch_region" AS "t4" + INNER JOIN "tpch_nation" AS "t5" + ON "t4"."r_regionkey" = "t5"."n_regionkey" + INNER JOIN "tpch_customer" AS "t6" + ON "t6"."c_nationkey" = "t5"."n_nationkey" + INNER JOIN "tpch_orders" AS "t7" + ON "t7"."o_custkey" = "t6"."c_custkey" + ) AS "t8" GROUP BY 1, 2 ) SELECT - t11.region, - t11.year, - t11.total - t13.total AS yoy_change -FROM t9 AS t11 -INNER JOIN t9 AS t13 - ON t11.year = ( - t13.year - CAST(1 AS TINYINT) + "t11"."region", + "t11"."year", + "t11"."total" - "t13"."total" AS "yoy_change" +FROM "t9" AS "t11" +INNER JOIN "t9" AS "t13" + ON "t11"."year" = ( + "t13"."year" - CAST(1 AS TINYINT) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql index 102f8d7fc152..e66e28b99984 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql @@ -2,25 +2,25 @@ SELECT COUNT(*) AS "CountStar()" FROM ( SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month - FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" + FROM "functional_alltypes" AS "t0" WHERE - t0.timestamp_col < ( + "t0"."timestamp_col" < ( MAKE_TIMESTAMP(2010, 1, 1, 0, 0, 0.0) + INTERVAL '3' MONTH ) - AND t0.timestamp_col < ( + AND "t0"."timestamp_col" < ( CAST(CURRENT_TIMESTAMP AS TIMESTAMP) + INTERVAL '10' DAY ) -) AS t1 \ No newline at end of file +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql index e38225fffe4e..0ebb0d67033b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql @@ -1,19 +1,19 @@ SELECT - t4.c, - t4.f, - t4.foo_id, - t4.bar_id, - t4.diff + "t4"."c", + "t4"."f", + "t4"."foo_id", + "t4"."bar_id", + "t4"."diff" FROM ( SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id, - t2.f - t3.value1 AS diff - FROM star1 AS t2 - INNER JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id -) AS t4 + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id", + "t2"."f" - "t3"."value1" AS "diff" + FROM "star1" AS "t2" + INNER JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" +) AS "t4" WHERE - t4.diff > CAST(1 AS TINYINT) \ No newline at end of file + "t4"."diff" > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql index 34036b117531..1cf616302cf5 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql @@ -1,15 +1,16 @@ SELECT - t0.a, - t0.b, - t0.c, - t0.d, - t0.e, - t0.f, - t0.g, - t0.h, - t0.i, - t0.j, - t0.k -FROM alltypes AS t0 + "t0"."a", + "t0"."b", + "t0"."c", + "t0"."d", + "t0"."e", + "t0"."f", + "t0"."g", + "t0"."h", + "t0"."i", + "t0"."j", + "t0"."k" +FROM "alltypes" AS "t0" WHERE - t0.a > CAST(0 AS TINYINT) AND t0.f BETWEEN CAST(0 AS TINYINT) AND CAST(1 AS TINYINT) \ No newline at end of file + "t0"."a" > CAST(0 AS TINYINT) + AND "t0"."f" BETWEEN CAST(0 AS TINYINT) AND CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql index 338494c9c8e1..e9dcc1060340 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql @@ -1,21 +1,21 @@ SELECT - t4.c, - t4.f, - t4.foo_id, - t4.bar_id, - t4.value1, - t4.value3 + "t4"."c", + "t4"."f", + "t4"."foo_id", + "t4"."bar_id", + "t4"."value1", + "t4"."value3" FROM ( SELECT - t2.c, - t2.f, - t2.foo_id, - t2.bar_id, - t3.value1, - t3.value3 - FROM star1 AS t2 - INNER JOIN star2 AS t3 - ON t2.foo_id = t3.foo_id -) AS t4 + "t2"."c", + "t2"."f", + "t2"."foo_id", + "t2"."bar_id", + "t3"."value1", + "t3"."value3" + FROM "star1" AS "t2" + INNER JOIN "star2" AS "t3" + ON "t2"."foo_id" = "t3"."foo_id" +) AS "t4" WHERE - t4.f > CAST(0 AS TINYINT) AND t4.value3 < CAST(1000 AS SMALLINT) \ No newline at end of file + "t4"."f" > CAST(0 AS TINYINT) AND "t4"."value3" < CAST(1000 AS SMALLINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql index 35e4fe0adc24..65116af02a75 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql @@ -1,14 +1,14 @@ SELECT - t1.foo_id, - t1.total + "t1"."foo_id", + "t1"."total" FROM ( SELECT - t0.foo_id, - SUM(t0.f) AS total, + "t0"."foo_id", + SUM("t0"."f") AS "total", COUNT(*) AS "CountStar()" - FROM star1 AS t0 + FROM "star1" AS "t0" GROUP BY 1 -) AS t1 +) AS "t1" WHERE - t1."CountStar()" > CAST(100 AS TINYINT) \ No newline at end of file + "t1"."CountStar()" > CAST(100 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql index 47945167c00a..bb81eda40239 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql @@ -1,13 +1,13 @@ SELECT - t1.foo_id, - t1.total + "t1"."foo_id", + "t1"."total" FROM ( SELECT - t0.foo_id, - SUM(t0.f) AS total - FROM star1 AS t0 + "t0"."foo_id", + SUM("t0"."f") AS "total" + FROM "star1" AS "t0" GROUP BY 1 -) AS t1 +) AS "t1" WHERE - t1.total > CAST(10 AS TINYINT) \ No newline at end of file + "t1"."total" > CAST(10 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql index dbf4aadac203..34e180525498 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql @@ -1,6 +1,6 @@ SELECT - t0.foo_id, - SUM(t0.f) AS total -FROM star1 AS t0 + "t0"."foo_id", + SUM("t0"."f") AS "total" +FROM "star1" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql index be9b430bd3d3..c5b36f1f25d4 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql @@ -1,8 +1,8 @@ SELECT - t0.foo_id, - t0.bar_id, - SUM(t0.f) AS total -FROM star1 AS t0 + "t0"."foo_id", + "t0"."bar_id", + SUM("t0"."f") AS "total" +FROM "star1" AS "t0" GROUP BY 1, 2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql index c4f686443cee..af7f94c32409 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col BETWEEN CAST(5 AS TINYINT) AND CAST(10 AS TINYINT) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" BETWEEN CAST(5 AS TINYINT) AND CAST(10 AS TINYINT) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql index d2e722d4fa18..a288a6c325f4 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql @@ -1,7 +1,8 @@ SELECT ( - t0.double_col > CAST(0 AS TINYINT) - ) AND ( - t0.double_col < CAST(5 AS TINYINT) - ) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" > CAST(0 AS TINYINT) + ) + AND ( + "t0"."double_col" < CAST(5 AS TINYINT) + ) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql index 4b0542464299..8e7e85a06eef 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql @@ -1,7 +1,8 @@ SELECT ( - t0.double_col < CAST(0 AS TINYINT) - ) OR ( - t0.double_col > CAST(5 AS TINYINT) - ) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" < CAST(0 AS TINYINT) + ) + OR ( + "t0"."double_col" > CAST(5 AS TINYINT) + ) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql index df5a9329fcb4..978ba8ae9e67 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql @@ -1,7 +1,11 @@ SELECT COALESCE( - CASE WHEN t0.double_col > CAST(30 AS TINYINT) THEN t0.double_col ELSE NULL END, + CASE + WHEN "t0"."double_col" > CAST(30 AS TINYINT) + THEN "t0"."double_col" + ELSE NULL + END, NULL, - t0.float_col - ) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."float_col" + ) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql index 2ad44306e1cd..a21e83164c3e 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col = CAST(5 AS TINYINT) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" = CAST(5 AS TINYINT) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql index 8b722a819754..3dd5fffe7afe 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col >= CAST(5 AS TINYINT) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" >= CAST(5 AS TINYINT) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql index ca8c8d134d60..d1b071460150 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col > CAST(5 AS TINYINT) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" > CAST(5 AS TINYINT) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql index 53bf7d0d2dbb..b09e7d427b7a 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col <= CAST(5 AS TINYINT) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" <= CAST(5 AS TINYINT) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql index 627be1840789..44e538076f9d 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col < CAST(5 AS TINYINT) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" < CAST(5 AS TINYINT) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql index 685a418a8eb4..e9b9c60dcac8 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col <> CAST(5 AS TINYINT) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" <> CAST(5 AS TINYINT) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql index 1aa27939686a..47a9c20dfbed 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql @@ -1,20 +1,20 @@ SELECT - t3.g, - t3.metric + "t3"."g", + "t3"."metric" FROM ( SELECT - t0.g, - SUM(t0.f) AS metric - FROM alltypes AS t0 + "t0"."g", + SUM("t0"."f") AS "metric" + FROM "alltypes" AS "t0" GROUP BY 1 -) AS t3 +) AS "t3" INNER JOIN ( SELECT - t1.g, - SUM(t1.f) AS metric - FROM alltypes AS t1 + "t1"."g", + SUM("t1"."f") AS "metric" + FROM "alltypes" AS "t1" GROUP BY 1 -) AS t6 - ON t3.g = t6.g \ No newline at end of file +) AS "t6" + ON "t3"."g" = "t6"."g" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql index 37382bcf5149..402fb340b779 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql @@ -1,3 +1,3 @@ SELECT - COUNT(DISTINCT t0.int_col) AS nunique -FROM functional_alltypes AS t0 \ No newline at end of file + COUNT(DISTINCT "t0"."int_col") AS "nunique" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql index 14b6c6d83477..c46d3552f709 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql @@ -1,6 +1,6 @@ SELECT - t0.string_col, - COUNT(DISTINCT t0.int_col) AS nunique -FROM functional_alltypes AS t0 + "t0"."string_col", + COUNT(DISTINCT "t0"."int_col") AS "nunique" +FROM "functional_alltypes" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql index 483b4fef6f49..098405dd6f82 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql @@ -2,7 +2,7 @@ SELECT DISTINCT * FROM ( SELECT - t0.string_col, - t0.int_col - FROM functional_alltypes AS t0 -) AS t1 \ No newline at end of file + "t0"."string_col", + "t0"."int_col" + FROM "functional_alltypes" AS "t0" +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql index d38aa10366c4..0913c0727447 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql @@ -2,6 +2,6 @@ SELECT DISTINCT * FROM ( SELECT - t0.string_col - FROM functional_alltypes AS t0 -) AS t1 \ No newline at end of file + "t0"."string_col" + FROM "functional_alltypes" AS "t0" +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql index dd4c570ec517..22cb0aee7152 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql @@ -1,3 +1,3 @@ SELECT DISTINCT * -FROM functional_alltypes AS t0 \ No newline at end of file +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql index b0be257b254f..f62872c94e58 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql @@ -1,13 +1,13 @@ SELECT - t0.key1, - t0.key2, - t0.value1 -FROM foo_t AS t0 + "t0"."key1", + "t0"."key2", + "t0"."value1" +FROM "foo_t" AS "t0" WHERE EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM bar_t AS t1 + FROM "bar_t" AS "t1" WHERE - t0.key1 = t1.key1 + "t0"."key1" = "t1"."key1" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql index f397c2b7251e..0493676fd923 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql @@ -1,17 +1,17 @@ SELECT - t0.key1, - t0.key2, - t0.value1 -FROM foo_t AS t0 + "t0"."key1", + "t0"."key2", + "t0"."value1" +FROM "foo_t" AS "t0" WHERE EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM bar_t AS t1 + FROM "bar_t" AS "t1" WHERE ( - t0.key1 = t1.key1 + "t0"."key1" = "t1"."key1" ) AND ( - t1.key2 = 'foo' + "t1"."key2" = 'foo' ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql index c0ba260a78bb..da36f3727637 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql @@ -1,13 +1,13 @@ SELECT - t1.int_col, - t1.bigint_col + "t1"."int_col", + "t1"."bigint_col" FROM ( SELECT - t0.int_col, - SUM(t0.bigint_col) AS bigint_col - FROM t AS t0 + "t0"."int_col", + SUM("t0"."bigint_col") AS "bigint_col" + FROM "t" AS "t0" GROUP BY 1 -) AS t1 +) AS "t1" WHERE - t1.bigint_col = CAST(60 AS TINYINT) \ No newline at end of file + "t1"."bigint_col" = CAST(60 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql index ebf908ac0397..c192b87e763e 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql @@ -1,37 +1,37 @@ SELECT - t5.t1_id1, - t5.t1_val1, - t9.id3, - t9.val2, - t9.dt, - t9.t3_val2, - t9.id2a, - t9.id2b, - t9.val2_right + "t5"."t1_id1", + "t5"."t1_val1", + "t9"."id3", + "t9"."val2", + "t9"."dt", + "t9"."t3_val2", + "t9"."id2a", + "t9"."id2b", + "t9"."val2_right" FROM ( SELECT - t0.id1 AS t1_id1, - t0.val1 AS t1_val1 - FROM test1 AS t0 -) AS t5 + "t0"."id1" AS "t1_id1", + "t0"."val1" AS "t1_val1" + FROM "test1" AS "t0" +) AS "t5" LEFT OUTER JOIN ( SELECT - t7.id3, - t7.val2, - t7.dt, - t7.t3_val2, - t3.id2a, - t3.id2b, - t3.val2 AS val2_right + "t7"."id3", + "t7"."val2", + "t7"."dt", + "t7"."t3_val2", + "t3"."id2a", + "t3"."id2b", + "t3"."val2" AS "val2_right" FROM ( SELECT - CAST(t1.id3 AS BIGINT) AS id3, - t1.val2, - t1.dt, - CAST(t1.id3 AS BIGINT) AS t3_val2 - FROM test3 AS t1 - ) AS t7 - INNER JOIN test2 AS t3 - ON t3.id2b = t7.id3 -) AS t9 - ON t5.t1_id1 = t9.id2a \ No newline at end of file + CAST("t1"."id3" AS BIGINT) AS "id3", + "t1"."val2", + "t1"."dt", + CAST("t1"."id3" AS BIGINT) AS "t3_val2" + FROM "test3" AS "t1" + ) AS "t7" + INNER JOIN "test2" AS "t3" + ON "t3"."id2b" = "t7"."id3" +) AS "t9" + ON "t5"."t1_id1" = "t9"."id2a" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql index d06c0383bb09..415c2932e0be 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col IS NULL AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" IS NULL AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql index f33c3466083a..567d20198fcb 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql @@ -1,3 +1,3 @@ SELECT - NOT t0.double_col IS NULL AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + NOT "t0"."double_col" IS NULL AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql index 28ef4cadbb22..03ed7e8d464c 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql @@ -1,21 +1,21 @@ SELECT - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment, - t4.r_regionkey, - t4.r_name, - t4.r_comment, - t5.c_custkey, - t5.c_name, - t5.c_address, - t5.c_nationkey, - t5.c_phone, - t5.c_acctbal, - t5.c_mktsegment, - t5.c_comment -FROM tpch_nation AS t3 -INNER JOIN tpch_region AS t4 - ON t3.n_regionkey = t4.r_regionkey -INNER JOIN tpch_customer AS t5 - ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment", + "t4"."r_regionkey", + "t4"."r_name", + "t4"."r_comment", + "t5"."c_custkey", + "t5"."c_name", + "t5"."c_address", + "t5"."c_nationkey", + "t5"."c_phone", + "t5"."c_acctbal", + "t5"."c_mktsegment", + "t5"."c_comment" +FROM "tpch_nation" AS "t3" +INNER JOIN "tpch_region" AS "t4" + ON "t3"."n_regionkey" = "t4"."r_regionkey" +INNER JOIN "tpch_customer" AS "t5" + ON "t3"."n_nationkey" = "t5"."c_nationkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql index 9289a835e37a..38f8b0cac8d1 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql @@ -1,11 +1,11 @@ SELECT - t2.r_regionkey, - t2.r_name, - t2.r_comment, - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment -FROM tpch_region AS t2 -INNER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file + "t2"."r_regionkey", + "t2"."r_name", + "t2"."r_comment", + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" +FROM "tpch_region" AS "t2" +INNER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql index 38534295064f..62b8b3a02a94 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql @@ -1,8 +1,8 @@ SELECT - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment -FROM tpch_region AS t2 -INNER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" +FROM "tpch_region" AS "t2" +INNER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql index 7048d19d0ba4..83cb5797fa83 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql @@ -1,11 +1,11 @@ SELECT - t2.r_regionkey, - t2.r_name, - t2.r_comment, - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment -FROM tpch_region AS t2 -LEFT OUTER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file + "t2"."r_regionkey", + "t2"."r_name", + "t2"."r_comment", + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" +FROM "tpch_region" AS "t2" +LEFT OUTER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql index 26c408b5be1a..ae780af31792 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql @@ -1,8 +1,8 @@ SELECT - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment -FROM tpch_region AS t2 -LEFT OUTER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" +FROM "tpch_region" AS "t2" +LEFT OUTER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql index f14ac5c0d92e..77ab8eedab04 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql @@ -1,11 +1,11 @@ SELECT - t2.r_regionkey, - t2.r_name, - t2.r_comment, - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment -FROM tpch_region AS t2 -FULL OUTER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file + "t2"."r_regionkey", + "t2"."r_name", + "t2"."r_comment", + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" +FROM "tpch_region" AS "t2" +FULL OUTER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql index 1b339a3f247b..e1538231f19f 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql @@ -1,8 +1,8 @@ SELECT - t3.n_nationkey, - t3.n_name, - t3.n_regionkey, - t3.n_comment -FROM tpch_region AS t2 -FULL OUTER JOIN tpch_nation AS t3 - ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" +FROM "tpch_region" AS "t2" +FULL OUTER JOIN "tpch_nation" AS "t3" + ON "t2"."r_regionkey" = "t3"."n_regionkey" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql index 2b6d0fe52716..651d57bbc4c8 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql @@ -1,4 +1,4 @@ SELECT * -FROM star1 AS t0 +FROM "star1" AS "t0" LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql index 3c71bda9b962..0f11f2394924 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql @@ -1,5 +1,5 @@ SELECT * -FROM star1 AS t0 +FROM "star1" AS "t0" LIMIT 10 OFFSET 5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql index 31e87b57f3ea..7ce2d03a7aec 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql @@ -1,9 +1,9 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" WHERE - t0.f > CAST(0 AS TINYINT) + "t0"."f" > CAST(0 AS TINYINT) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql index ab4dd6df7158..6a53e9d7fec3 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql @@ -1,13 +1,13 @@ SELECT - t1.c, - t1.f, - t1.foo_id, - t1.bar_id + "t1"."c", + "t1"."f", + "t1"."foo_id", + "t1"."bar_id" FROM ( SELECT * - FROM star1 AS t0 + FROM "star1" AS "t0" LIMIT 10 -) AS t1 +) AS "t1" WHERE - t1.f > CAST(0 AS TINYINT) \ No newline at end of file + "t1"."f" > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql index c297a58cd8f8..b28c9eb2ed58 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql @@ -1,24 +1,24 @@ SELECT - t5.foo_id, - t5.total, - t5.value1 + "t5"."foo_id", + "t5"."total", + "t5"."value1" FROM ( SELECT - t4.foo_id, - t4.total, - t2.value1 + "t4"."foo_id", + "t4"."total", + "t2"."value1" FROM ( SELECT - t0.foo_id, - SUM(t0.f) AS total - FROM star1 AS t0 + "t0"."foo_id", + SUM("t0"."f") AS "total" + FROM "star1" AS "t0" GROUP BY 1 - ) AS t4 - INNER JOIN star2 AS t2 - ON t4.foo_id = t2.foo_id -) AS t5 + ) AS "t4" + INNER JOIN "star2" AS "t2" + ON "t4"."foo_id" = "t2"."foo_id" +) AS "t5" WHERE - t5.total > CAST(100 AS TINYINT) + "t5"."total" > CAST(100 AS TINYINT) ORDER BY - t5.total DESC \ No newline at end of file + "t5"."total" DESC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql index ad0a5d35f89d..94f254f3a207 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql @@ -1,20 +1,20 @@ SELECT - t4.x1, - t4.y1, - t5.x2, - t9.x3, - t9.y2, - t9.x4 -FROM t1 AS t4 -INNER JOIN t2 AS t5 - ON t4.x1 = t5.x2 + "t4"."x1", + "t4"."y1", + "t5"."x2", + "t9"."x3", + "t9"."y2", + "t9"."x4" +FROM "t1" AS "t4" +INNER JOIN "t2" AS "t5" + ON "t4"."x1" = "t5"."x2" INNER JOIN ( SELECT - t6.x3, - t6.y2, - t7.x4 - FROM t3 AS t6 - INNER JOIN t4 AS t7 - ON t6.x3 = t7.x4 -) AS t9 - ON t4.y1 = t9.y2 \ No newline at end of file + "t6"."x3", + "t6"."y2", + "t7"."x4" + FROM "t3" AS "t6" + INNER JOIN "t4" AS "t7" + ON "t6"."x3" = "t7"."x4" +) AS "t9" + ON "t4"."y1" = "t9"."y2" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql index 82946b9a13bc..293c1b4efd77 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql @@ -1,5 +1,5 @@ SELECT - t0.person_id -FROM person AS t0 + "t0"."person_id" +FROM "person" AS "t0" WHERE CAST(400 AS SMALLINT) <= CAST(40 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql index 66e751eda132..90abf9858067 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql @@ -1,3 +1,3 @@ SELECT - t0.double_col * CAST(2 AS TINYINT) AS foo -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" * CAST(2 AS TINYINT) AS "foo" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql index 3ea20ad88c56..e248e7ce636e 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql @@ -1,5 +1,5 @@ SELECT NOT ( - t0.double_col > CAST(0 AS TINYINT) - ) AS tmp -FROM functional_alltypes AS t0 \ No newline at end of file + "t0"."double_col" > CAST(0 AS TINYINT) + ) AS "tmp" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql index 8cb242edd156..699792bed260 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql @@ -1,37 +1,41 @@ SELECT - t6.ancestor_node_sort_order, - t6.n + "t6"."ancestor_node_sort_order", + "t6"."n" FROM ( SELECT - t5.ancestor_node_sort_order, - CAST(1 AS TINYINT) AS n + "t5"."ancestor_node_sort_order", + CAST(1 AS TINYINT) AS "n" FROM ( SELECT - t2.product_id, - t4.ancestor_level_name, - t4.ancestor_level_number, - t4.ancestor_node_sort_order, - t4.descendant_node_natural_key, - t4.product_level_name - FROM facts AS t2 + "t2"."product_id", + "t4"."ancestor_level_name", + "t4"."ancestor_level_number", + "t4"."ancestor_node_sort_order", + "t4"."descendant_node_natural_key", + "t4"."product_level_name" + FROM "facts" AS "t2" INNER JOIN ( SELECT - t1.ancestor_level_name, - t1.ancestor_level_number, - t1.ancestor_node_sort_order, - t1.descendant_node_natural_key, + "t1"."ancestor_level_name", + "t1"."ancestor_level_number", + "t1"."ancestor_node_sort_order", + "t1"."descendant_node_natural_key", CONCAT( - LPAD('-', ( - t1.ancestor_level_number - CAST(1 AS TINYINT) - ) * CAST(7 AS TINYINT), '-'), - t1.ancestor_level_name - ) AS product_level_name - FROM products AS t1 - ) AS t4 - ON t2.product_id = t4.descendant_node_natural_key - ) AS t5 + LPAD( + '-', + ( + "t1"."ancestor_level_number" - CAST(1 AS TINYINT) + ) * CAST(7 AS TINYINT), + '-' + ), + "t1"."ancestor_level_name" + ) AS "product_level_name" + FROM "products" AS "t1" + ) AS "t4" + ON "t2"."product_id" = "t4"."descendant_node_natural_key" + ) AS "t5" GROUP BY 1 -) AS t6 +) AS "t6" ORDER BY - t6.ancestor_node_sort_order ASC \ No newline at end of file + "t6"."ancestor_node_sort_order" ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql index a94638f06f0b..a53781106951 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql @@ -1,61 +1,61 @@ SELECT - t13.customer_id, - t13.first_name, - t13.last_name, - t13.first_order, - t13.most_recent_order, - t13.number_of_orders, - t11.total_amount AS customer_lifetime_value + "t13"."customer_id", + "t13"."first_name", + "t13"."last_name", + "t13"."first_order", + "t13"."most_recent_order", + "t13"."number_of_orders", + "t11"."total_amount" AS "customer_lifetime_value" FROM ( SELECT - t10.customer_id, - t10.first_name, - t10.last_name, - t10.first_order, - t10.most_recent_order, - t10.number_of_orders + "t10"."customer_id", + "t10"."first_name", + "t10"."last_name", + "t10"."first_order", + "t10"."most_recent_order", + "t10"."number_of_orders" FROM ( SELECT - t3.customer_id, - t3.first_name, - t3.last_name, - t7.customer_id AS customer_id_right, - t7.first_order, - t7.most_recent_order, - t7.number_of_orders - FROM customers AS t3 + "t3"."customer_id", + "t3"."first_name", + "t3"."last_name", + "t7"."customer_id" AS "customer_id_right", + "t7"."first_order", + "t7"."most_recent_order", + "t7"."number_of_orders" + FROM "customers" AS "t3" LEFT OUTER JOIN ( SELECT - t2.customer_id, - MIN(t2.order_date) AS first_order, - MAX(t2.order_date) AS most_recent_order, - COUNT(t2.order_id) AS number_of_orders - FROM orders AS t2 + "t2"."customer_id", + MIN("t2"."order_date") AS "first_order", + MAX("t2"."order_date") AS "most_recent_order", + COUNT("t2"."order_id") AS "number_of_orders" + FROM "orders" AS "t2" GROUP BY 1 - ) AS t7 - ON t3.customer_id = t7.customer_id - ) AS t10 -) AS t13 + ) AS "t7" + ON "t3"."customer_id" = "t7"."customer_id" + ) AS "t10" +) AS "t13" LEFT OUTER JOIN ( SELECT - t8.customer_id, - SUM(t8.amount) AS total_amount + "t8"."customer_id", + SUM("t8"."amount") AS "total_amount" FROM ( SELECT - t4.payment_id, - t4.order_id, - t4.payment_method, - t4.amount, - t5.order_id AS order_id_right, - t5.customer_id, - t5.order_date, - t5.status - FROM payments AS t4 - LEFT OUTER JOIN orders AS t5 - ON t4.order_id = t5.order_id - ) AS t8 + "t4"."payment_id", + "t4"."order_id", + "t4"."payment_method", + "t4"."amount", + "t5"."order_id" AS "order_id_right", + "t5"."customer_id", + "t5"."order_date", + "t5"."status" + FROM "payments" AS "t4" + LEFT OUTER JOIN "orders" AS "t5" + ON "t4"."order_id" = "t5"."order_id" + ) AS "t8" GROUP BY 1 -) AS t11 - ON t13.customer_id = t11.customer_id \ No newline at end of file +) AS "t11" + ON "t13"."customer_id" = "t11"."customer_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql index fc3f08afa72b..bf95ad4de09b 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql @@ -1,16 +1,16 @@ SELECT - t3.id, - t3.personal, - t3.family, - t4.taken, - t4.person, - t4.quant, - t4.reading, - t5.id AS id_right, - t5.site, - t5.dated -FROM person AS t3 -INNER JOIN survey AS t4 - ON t3.id = t4.person -INNER JOIN visited AS t5 - ON t5.id = t4.taken \ No newline at end of file + "t3"."id", + "t3"."personal", + "t3"."family", + "t4"."taken", + "t4"."person", + "t4"."quant", + "t4"."reading", + "t5"."id" AS "id_right", + "t5"."site", + "t5"."dated" +FROM "person" AS "t3" +INNER JOIN "survey" AS "t4" + ON "t3"."id" = "t4"."person" +INNER JOIN "visited" AS "t5" + ON "t5"."id" = "t4"."taken" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql index 548a1efef2ec..49e1de6dee7f 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql @@ -1,15 +1,15 @@ SELECT - t0.key1, - t0.key2, - t0.value1 -FROM foo_t AS t0 + "t0"."key1", + "t0"."key2", + "t0"."value1" +FROM "foo_t" AS "t0" WHERE NOT ( EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM bar_t AS t1 + FROM "bar_t" AS "t1" WHERE - t0.key1 = t1.key1 + "t0"."key1" = "t1"."key1" ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql index bb666f269b2a..b5450abae2a7 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql @@ -1,8 +1,8 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" ORDER BY - t0.f ASC \ No newline at end of file + "t0"."f" ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql index 356b091282c5..57217205415c 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql @@ -1,8 +1,8 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" ORDER BY RANDOM() ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql index 99a46813f652..1cd433cac8ee 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql @@ -1,8 +1,8 @@ SELECT - t0.a, - t0.b -FROM t AS t0 + "t0"."a", + "t0"."b" +FROM "t" AS "t0" WHERE - t0.a = CAST(1 AS TINYINT) + "t0"."a" = CAST(1 AS TINYINT) ORDER BY - CONCAT(t0.b, 'a') ASC \ No newline at end of file + CONCAT("t0"."b", 'a') ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql index 31c40c343501..1bbe6d29ebd7 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql @@ -1,9 +1,9 @@ SELECT CASE - WHEN t0.f > CAST(0 AS TINYINT) - THEN t0.d * CAST(2 AS TINYINT) - WHEN t0.c < CAST(0 AS TINYINT) - THEN t0.a * CAST(2 AS TINYINT) + WHEN "t0"."f" > CAST(0 AS TINYINT) + THEN "t0"."d" * CAST(2 AS TINYINT) + WHEN "t0"."c" < CAST(0 AS TINYINT) + THEN "t0"."a" * CAST(2 AS TINYINT) ELSE CAST(NULL AS BIGINT) - END AS tmp -FROM alltypes AS t0 \ No newline at end of file + END AS "tmp" +FROM "alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql index c598c1264a74..45e3247059df 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql @@ -1,25 +1,25 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" +FROM "functional_alltypes" AS "t0" WHERE NOT ( EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM functional_alltypes AS t1 + FROM "functional_alltypes" AS "t1" WHERE - t0.string_col = t1.string_col + "t0"."string_col" = "t1"."string_col" ) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql index 16089afced58..2be1572284b0 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql @@ -1,23 +1,23 @@ SELECT - t0.id, - t0.bool_col, - t0.tinyint_col, - t0.smallint_col, - t0.int_col, - t0.bigint_col, - t0.float_col, - t0.double_col, - t0.date_string_col, - t0.string_col, - t0.timestamp_col, - t0.year, - t0.month -FROM functional_alltypes AS t0 + "t0"."id", + "t0"."bool_col", + "t0"."tinyint_col", + "t0"."smallint_col", + "t0"."int_col", + "t0"."bigint_col", + "t0"."float_col", + "t0"."double_col", + "t0"."date_string_col", + "t0"."string_col", + "t0"."timestamp_col", + "t0"."year", + "t0"."month" +FROM "functional_alltypes" AS "t0" WHERE EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM functional_alltypes AS t1 + FROM "functional_alltypes" AS "t1" WHERE - t0.string_col = t1.string_col + "t0"."string_col" = "t1"."string_col" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql index e9c93029c637..ac92348404fd 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql @@ -1,8 +1,8 @@ SELECT - t1.c, - t1.f, - t1.foo_id, - t1.bar_id -FROM star1 AS t1 -INNER JOIN star1 AS t3 - ON t1.foo_id = t3.bar_id \ No newline at end of file + "t1"."c", + "t1"."f", + "t1"."foo_id", + "t1"."bar_id" +FROM "star1" AS "t1" +INNER JOIN "star1" AS "t3" + ON "t1"."foo_id" = "t3"."bar_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql index 3575c8d6e653..a3261990855f 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql @@ -1,3 +1,3 @@ SELECT - CASE t0.g WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS tmp -FROM alltypes AS t0 \ No newline at end of file + CASE "t0"."g" WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS "tmp" +FROM "alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql index e8f9420b4263..9a054603df9c 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql @@ -1,13 +1,13 @@ SELECT - t1.string_col, - t1.foo + "t1"."string_col", + "t1"."foo" FROM ( SELECT - t0.string_col, - MAX(t0.double_col) AS foo - FROM functional_alltypes AS t0 + "t0"."string_col", + MAX("t0"."double_col") AS "foo" + FROM "functional_alltypes" AS "t0" GROUP BY 1 -) AS t1 +) AS "t1" ORDER BY - t1.foo DESC \ No newline at end of file + "t1"."foo" DESC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql index e0f8941f3527..e59c73676247 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql @@ -1,14 +1,14 @@ SELECT - t4.foo_id, - t4.total, - t2.value1 + "t4"."foo_id", + "t4"."total", + "t2"."value1" FROM ( SELECT - t0.foo_id, - SUM(t0.f) AS total - FROM star1 AS t0 + "t0"."foo_id", + SUM("t0"."f") AS "total" + FROM "star1" AS "t0" GROUP BY 1 -) AS t4 -INNER JOIN star2 AS t2 - ON t4.foo_id = t2.foo_id \ No newline at end of file +) AS "t4" +INNER JOIN "star2" AS "t2" + ON "t4"."foo_id" = "t2"."foo_id" \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql index ee5a1da42bd3..dd64692ebe63 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql @@ -1,21 +1,21 @@ SELECT - t0.job, - t0.dept_id, - t0.year, - t0.y -FROM foo AS t0 + "t0"."job", + "t0"."dept_id", + "t0"."year", + "t0"."y" +FROM "foo" AS "t0" WHERE - t0.y > ( + "t0"."y" > ( SELECT - AVG(t2.y) AS "Mean(y)" + AVG("t2"."y") AS "Mean(y)" FROM ( SELECT - t1.job, - t1.dept_id, - t1.year, - t1.y - FROM foo AS t1 + "t1"."job", + "t1"."dept_id", + "t1"."year", + "t1"."y" + FROM "foo" AS "t1" WHERE - t0.dept_id = t1.dept_id - ) AS t2 + "t0"."dept_id" = "t1"."dept_id" + ) AS "t2" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql index f2e028c28d23..893b51610688 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql @@ -1,31 +1,31 @@ SELECT - t7.p_partkey, - t7.ps_supplycost + "t7"."p_partkey", + "t7"."ps_supplycost" FROM ( SELECT - t3.p_partkey, - t4.ps_supplycost - FROM part AS t3 - INNER JOIN partsupp AS t4 - ON t3.p_partkey = t4.ps_partkey -) AS t7 + "t3"."p_partkey", + "t4"."ps_supplycost" + FROM "part" AS "t3" + INNER JOIN "partsupp" AS "t4" + ON "t3"."p_partkey" = "t4"."ps_partkey" +) AS "t7" WHERE - t7.ps_supplycost = ( + "t7"."ps_supplycost" = ( SELECT - MIN(t9.ps_supplycost) AS "Min(ps_supplycost)" + MIN("t9"."ps_supplycost") AS "Min(ps_supplycost)" FROM ( SELECT - t8.ps_partkey, - t8.ps_supplycost + "t8"."ps_partkey", + "t8"."ps_supplycost" FROM ( SELECT - t5.ps_partkey, - t5.ps_supplycost - FROM partsupp AS t5 - INNER JOIN supplier AS t6 - ON t6.s_suppkey = t5.ps_suppkey - ) AS t8 + "t5"."ps_partkey", + "t5"."ps_supplycost" + FROM "partsupp" AS "t5" + INNER JOIN "supplier" AS "t6" + ON "t6"."s_suppkey" = "t5"."ps_suppkey" + ) AS "t8" WHERE - t8.ps_partkey = t7.p_partkey - ) AS t9 + "t8"."ps_partkey" = "t7"."p_partkey" + ) AS "t9" ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql index e1914ac959bd..fa090401f5ad 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql @@ -1,10 +1,10 @@ SELECT - t0.c, - t0.f, - t0.foo_id, - t0.bar_id -FROM star1 AS t0 + "t0"."c", + "t0"."f", + "t0"."foo_id", + "t0"."bar_id" +FROM "star1" AS "t0" WHERE - t0.f > CAST(0 AS TINYINT) AND t0.c < ( - t0.f * CAST(2 AS TINYINT) + "t0"."f" > CAST(0 AS TINYINT) AND "t0"."c" < ( + "t0"."f" * CAST(2 AS TINYINT) ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql index d64e69ba3894..719644e39bda 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql @@ -1,12 +1,12 @@ SELECT - t0.job, - t0.dept_id, - t0.year, - t0.y -FROM foo AS t0 + "t0"."job", + "t0"."dept_id", + "t0"."year", + "t0"."y" +FROM "foo" AS "t0" WHERE - t0.job IN ( + "t0"."job" IN ( SELECT - t1.job - FROM bar AS t1 + "t1"."job" + FROM "bar" AS "t1" ) \ No newline at end of file diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index fad799de5e3b..8d0b06e85cb0 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -2639,11 +2639,6 @@ def test_temporal_literal_sql(value, dialect, snapshot): - {"pyspark", "impala", "clickhouse", "oracle", *_NO_SQLGLOT_DIALECT} ), *no_sqlglot_dialect, - *[ - param("impala", marks=no_time_type), - param("clickhouse", marks=no_time_type), - param("oracle", marks=no_time_type), - ], ], ) @pytest.mark.parametrize("micros", [0, 234567]) diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql index 953b4dfeefc4..bb68526ff92e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql @@ -1,62 +1,62 @@ SELECT - t2.l_returnflag, - t2.l_linestatus, - t2.sum_qty, - t2.sum_base_price, - t2.sum_disc_price, - t2.sum_charge, - t2.avg_qty, - t2.avg_price, - t2.avg_disc, - t2.count_order + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."sum_qty", + "t2"."sum_base_price", + "t2"."sum_disc_price", + "t2"."sum_charge", + "t2"."avg_qty", + "t2"."avg_price", + "t2"."avg_disc", + "t2"."count_order" FROM ( SELECT - t1.l_returnflag, - t1.l_linestatus, - SUM(t1.l_quantity) AS sum_qty, - SUM(t1.l_extendedprice) AS sum_base_price, - SUM(t1.l_extendedprice * ( - CAST(1 AS TINYINT) - t1.l_discount - )) AS sum_disc_price, + "t1"."l_returnflag", + "t1"."l_linestatus", + SUM("t1"."l_quantity") AS "sum_qty", + SUM("t1"."l_extendedprice") AS "sum_base_price", + SUM("t1"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t1"."l_discount" + )) AS "sum_disc_price", SUM( ( - t1.l_extendedprice * ( - CAST(1 AS TINYINT) - t1.l_discount + "t1"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t1"."l_discount" ) ) * ( - t1.l_tax + CAST(1 AS TINYINT) + "t1"."l_tax" + CAST(1 AS TINYINT) ) - ) AS sum_charge, - AVG(t1.l_quantity) AS avg_qty, - AVG(t1.l_extendedprice) AS avg_price, - AVG(t1.l_discount) AS avg_disc, - COUNT(*) AS count_order + ) AS "sum_charge", + AVG("t1"."l_quantity") AS "avg_qty", + AVG("t1"."l_extendedprice") AS "avg_price", + AVG("t1"."l_discount") AS "avg_disc", + COUNT(*) AS "count_order" FROM ( SELECT - t0.l_orderkey, - t0.l_partkey, - t0.l_suppkey, - t0.l_linenumber, - t0.l_quantity, - t0.l_extendedprice, - t0.l_discount, - t0.l_tax, - t0.l_returnflag, - t0.l_linestatus, - t0.l_shipdate, - t0.l_commitdate, - t0.l_receiptdate, - t0.l_shipinstruct, - t0.l_shipmode, - t0.l_comment - FROM lineitem AS t0 + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + "t0"."l_quantity", + "t0"."l_extendedprice", + "t0"."l_discount", + "t0"."l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" WHERE - t0.l_shipdate <= MAKE_DATE(1998, 9, 2) - ) AS t1 + "t0"."l_shipdate" <= MAKE_DATE(1998, 9, 2) + ) AS "t1" GROUP BY 1, 2 -) AS t2 +) AS "t2" ORDER BY - t2.l_returnflag ASC, - t2.l_linestatus ASC \ No newline at end of file + "t2"."l_returnflag" ASC, + "t2"."l_linestatus" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql index eb30c07672f4..b27b44cd0c05 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql @@ -1,116 +1,116 @@ SELECT - t14.s_acctbal, - t14.s_name, - t14.n_name, - t14.p_partkey, - t14.p_mfgr, - t14.s_address, - t14.s_phone, - t14.s_comment + "t14"."s_acctbal", + "t14"."s_name", + "t14"."n_name", + "t14"."p_partkey", + "t14"."p_mfgr", + "t14"."s_address", + "t14"."s_phone", + "t14"."s_comment" FROM ( SELECT - t5.p_partkey, - t5.p_name, - t5.p_mfgr, - t5.p_brand, - t5.p_type, - t5.p_size, - t5.p_container, - t5.p_retailprice, - t5.p_comment, - t6.ps_partkey, - t6.ps_suppkey, - t6.ps_availqty, - t6.ps_supplycost, - t6.ps_comment, - t8.s_suppkey, - t8.s_name, - t8.s_address, - t8.s_nationkey, - t8.s_phone, - t8.s_acctbal, - t8.s_comment, - t10.n_nationkey, - t10.n_name, - t10.n_regionkey, - t10.n_comment, - t12.r_regionkey, - t12.r_name, - t12.r_comment - FROM part AS t5 - INNER JOIN partsupp AS t6 - ON t5.p_partkey = t6.ps_partkey - INNER JOIN supplier AS t8 - ON t8.s_suppkey = t6.ps_suppkey - INNER JOIN nation AS t10 - ON t8.s_nationkey = t10.n_nationkey - INNER JOIN region AS t12 - ON t10.n_regionkey = t12.r_regionkey -) AS t14 + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment", + "t6"."ps_partkey", + "t6"."ps_suppkey", + "t6"."ps_availqty", + "t6"."ps_supplycost", + "t6"."ps_comment", + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_nationkey", + "t8"."s_phone", + "t8"."s_acctbal", + "t8"."s_comment", + "t10"."n_nationkey", + "t10"."n_name", + "t10"."n_regionkey", + "t10"."n_comment", + "t12"."r_regionkey", + "t12"."r_name", + "t12"."r_comment" + FROM "part" AS "t5" + INNER JOIN "partsupp" AS "t6" + ON "t5"."p_partkey" = "t6"."ps_partkey" + INNER JOIN "supplier" AS "t8" + ON "t8"."s_suppkey" = "t6"."ps_suppkey" + INNER JOIN "nation" AS "t10" + ON "t8"."s_nationkey" = "t10"."n_nationkey" + INNER JOIN "region" AS "t12" + ON "t10"."n_regionkey" = "t12"."r_regionkey" +) AS "t14" WHERE - t14.p_size = CAST(15 AS TINYINT) - AND t14.p_type LIKE '%BRASS' - AND t14.r_name = 'EUROPE' - AND t14.ps_supplycost = ( + "t14"."p_size" = CAST(15 AS TINYINT) + AND "t14"."p_type" LIKE '%BRASS' + AND "t14"."r_name" = 'EUROPE' + AND "t14"."ps_supplycost" = ( SELECT - MIN(t16.ps_supplycost) AS "Min(ps_supplycost)" + MIN("t16"."ps_supplycost") AS "Min(ps_supplycost)" FROM ( SELECT - t15.ps_partkey, - t15.ps_suppkey, - t15.ps_availqty, - t15.ps_supplycost, - t15.ps_comment, - t15.s_suppkey, - t15.s_name, - t15.s_address, - t15.s_nationkey, - t15.s_phone, - t15.s_acctbal, - t15.s_comment, - t15.n_nationkey, - t15.n_name, - t15.n_regionkey, - t15.n_comment, - t15.r_regionkey, - t15.r_name, - t15.r_comment + "t15"."ps_partkey", + "t15"."ps_suppkey", + "t15"."ps_availqty", + "t15"."ps_supplycost", + "t15"."ps_comment", + "t15"."s_suppkey", + "t15"."s_name", + "t15"."s_address", + "t15"."s_nationkey", + "t15"."s_phone", + "t15"."s_acctbal", + "t15"."s_comment", + "t15"."n_nationkey", + "t15"."n_name", + "t15"."n_regionkey", + "t15"."n_comment", + "t15"."r_regionkey", + "t15"."r_name", + "t15"."r_comment" FROM ( SELECT - t7.ps_partkey, - t7.ps_suppkey, - t7.ps_availqty, - t7.ps_supplycost, - t7.ps_comment, - t9.s_suppkey, - t9.s_name, - t9.s_address, - t9.s_nationkey, - t9.s_phone, - t9.s_acctbal, - t9.s_comment, - t11.n_nationkey, - t11.n_name, - t11.n_regionkey, - t11.n_comment, - t13.r_regionkey, - t13.r_name, - t13.r_comment - FROM partsupp AS t7 - INNER JOIN supplier AS t9 - ON t9.s_suppkey = t7.ps_suppkey - INNER JOIN nation AS t11 - ON t9.s_nationkey = t11.n_nationkey - INNER JOIN region AS t13 - ON t11.n_regionkey = t13.r_regionkey - ) AS t15 + "t7"."ps_partkey", + "t7"."ps_suppkey", + "t7"."ps_availqty", + "t7"."ps_supplycost", + "t7"."ps_comment", + "t9"."s_suppkey", + "t9"."s_name", + "t9"."s_address", + "t9"."s_nationkey", + "t9"."s_phone", + "t9"."s_acctbal", + "t9"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment", + "t13"."r_regionkey", + "t13"."r_name", + "t13"."r_comment" + FROM "partsupp" AS "t7" + INNER JOIN "supplier" AS "t9" + ON "t9"."s_suppkey" = "t7"."ps_suppkey" + INNER JOIN "nation" AS "t11" + ON "t9"."s_nationkey" = "t11"."n_nationkey" + INNER JOIN "region" AS "t13" + ON "t11"."n_regionkey" = "t13"."r_regionkey" + ) AS "t15" WHERE - t15.r_name = 'EUROPE' AND t14.p_partkey = t15.ps_partkey - ) AS t16 + "t15"."r_name" = 'EUROPE' AND "t14"."p_partkey" = "t15"."ps_partkey" + ) AS "t16" ) ORDER BY - t14.s_acctbal DESC, - t14.n_name ASC, - t14.s_name ASC, - t14.p_partkey ASC + "t14"."s_acctbal" DESC, + "t14"."n_name" ASC, + "t14"."s_name" ASC, + "t14"."p_partkey" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql index f1cd6fd9a332..c25bca3164a5 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql @@ -1,103 +1,103 @@ SELECT - t8.l_orderkey, - t8.revenue, - t8.o_orderdate, - t8.o_shippriority + "t8"."l_orderkey", + "t8"."revenue", + "t8"."o_orderdate", + "t8"."o_shippriority" FROM ( SELECT - t7.l_orderkey, - t7.o_orderdate, - t7.o_shippriority, - SUM(t7.l_extendedprice * ( - CAST(1 AS TINYINT) - t7.l_discount - )) AS revenue + "t7"."l_orderkey", + "t7"."o_orderdate", + "t7"."o_shippriority", + SUM("t7"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t7"."l_discount" + )) AS "revenue" FROM ( SELECT - t6.c_custkey, - t6.c_name, - t6.c_address, - t6.c_nationkey, - t6.c_phone, - t6.c_acctbal, - t6.c_mktsegment, - t6.c_comment, - t6.o_orderkey, - t6.o_custkey, - t6.o_orderstatus, - t6.o_totalprice, - t6.o_orderdate, - t6.o_orderpriority, - t6.o_clerk, - t6.o_shippriority, - t6.o_comment, - t6.l_orderkey, - t6.l_partkey, - t6.l_suppkey, - t6.l_linenumber, - t6.l_quantity, - t6.l_extendedprice, - t6.l_discount, - t6.l_tax, - t6.l_returnflag, - t6.l_linestatus, - t6.l_shipdate, - t6.l_commitdate, - t6.l_receiptdate, - t6.l_shipinstruct, - t6.l_shipmode, - t6.l_comment + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t6"."o_orderkey", + "t6"."o_custkey", + "t6"."o_orderstatus", + "t6"."o_totalprice", + "t6"."o_orderdate", + "t6"."o_orderpriority", + "t6"."o_clerk", + "t6"."o_shippriority", + "t6"."o_comment", + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment" FROM ( SELECT - t3.c_custkey, - t3.c_name, - t3.c_address, - t3.c_nationkey, - t3.c_phone, - t3.c_acctbal, - t3.c_mktsegment, - t3.c_comment, - t4.o_orderkey, - t4.o_custkey, - t4.o_orderstatus, - t4.o_totalprice, - t4.o_orderdate, - t4.o_orderpriority, - t4.o_clerk, - t4.o_shippriority, - t4.o_comment, - t5.l_orderkey, - t5.l_partkey, - t5.l_suppkey, - t5.l_linenumber, - t5.l_quantity, - t5.l_extendedprice, - t5.l_discount, - t5.l_tax, - t5.l_returnflag, - t5.l_linestatus, - t5.l_shipdate, - t5.l_commitdate, - t5.l_receiptdate, - t5.l_shipinstruct, - t5.l_shipmode, - t5.l_comment - FROM customer AS t3 - INNER JOIN orders AS t4 - ON t3.c_custkey = t4.o_custkey - INNER JOIN lineitem AS t5 - ON t5.l_orderkey = t4.o_orderkey - ) AS t6 + "t3"."c_custkey", + "t3"."c_name", + "t3"."c_address", + "t3"."c_nationkey", + "t3"."c_phone", + "t3"."c_acctbal", + "t3"."c_mktsegment", + "t3"."c_comment", + "t4"."o_orderkey", + "t4"."o_custkey", + "t4"."o_orderstatus", + "t4"."o_totalprice", + "t4"."o_orderdate", + "t4"."o_orderpriority", + "t4"."o_clerk", + "t4"."o_shippriority", + "t4"."o_comment", + "t5"."l_orderkey", + "t5"."l_partkey", + "t5"."l_suppkey", + "t5"."l_linenumber", + "t5"."l_quantity", + "t5"."l_extendedprice", + "t5"."l_discount", + "t5"."l_tax", + "t5"."l_returnflag", + "t5"."l_linestatus", + "t5"."l_shipdate", + "t5"."l_commitdate", + "t5"."l_receiptdate", + "t5"."l_shipinstruct", + "t5"."l_shipmode", + "t5"."l_comment" + FROM "customer" AS "t3" + INNER JOIN "orders" AS "t4" + ON "t3"."c_custkey" = "t4"."o_custkey" + INNER JOIN "lineitem" AS "t5" + ON "t5"."l_orderkey" = "t4"."o_orderkey" + ) AS "t6" WHERE - t6.c_mktsegment = 'BUILDING' - AND t6.o_orderdate < MAKE_DATE(1995, 3, 15) - AND t6.l_shipdate > MAKE_DATE(1995, 3, 15) - ) AS t7 + "t6"."c_mktsegment" = 'BUILDING' + AND "t6"."o_orderdate" < MAKE_DATE(1995, 3, 15) + AND "t6"."l_shipdate" > MAKE_DATE(1995, 3, 15) + ) AS "t7" GROUP BY 1, 2, 3 -) AS t8 +) AS "t8" ORDER BY - t8.revenue DESC, - t8.o_orderdate ASC + "t8"."revenue" DESC, + "t8"."o_orderdate" ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql index 77ba19f9cc07..8131f812a2a8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql @@ -1,39 +1,40 @@ SELECT - t4.o_orderpriority, - t4.order_count + "t4"."o_orderpriority", + "t4"."order_count" FROM ( SELECT - t3.o_orderpriority, - COUNT(*) AS order_count + "t3"."o_orderpriority", + COUNT(*) AS "order_count" FROM ( SELECT - t0.o_orderkey, - t0.o_custkey, - t0.o_orderstatus, - t0.o_totalprice, - t0.o_orderdate, - t0.o_orderpriority, - t0.o_clerk, - t0.o_shippriority, - t0.o_comment - FROM orders AS t0 + "t0"."o_orderkey", + "t0"."o_custkey", + "t0"."o_orderstatus", + "t0"."o_totalprice", + "t0"."o_orderdate", + "t0"."o_orderpriority", + "t0"."o_clerk", + "t0"."o_shippriority", + "t0"."o_comment" + FROM "orders" AS "t0" WHERE EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM lineitem AS t1 + FROM "lineitem" AS "t1" WHERE ( - t1.l_orderkey = t0.o_orderkey - ) AND ( - t1.l_commitdate < t1.l_receiptdate + "t1"."l_orderkey" = "t0"."o_orderkey" + ) + AND ( + "t1"."l_commitdate" < "t1"."l_receiptdate" ) ) - AND t0.o_orderdate >= MAKE_DATE(1993, 7, 1) - AND t0.o_orderdate < MAKE_DATE(1993, 10, 1) - ) AS t3 + AND "t0"."o_orderdate" >= MAKE_DATE(1993, 7, 1) + AND "t0"."o_orderdate" < MAKE_DATE(1993, 10, 1) + ) AS "t3" GROUP BY 1 -) AS t4 +) AS "t4" ORDER BY - t4.o_orderpriority ASC \ No newline at end of file + "t4"."o_orderpriority" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql index 2ee7ce67f6ed..4cbff7141c68 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql @@ -1,129 +1,129 @@ SELECT - t14.n_name, - t14.revenue + "t14"."n_name", + "t14"."revenue" FROM ( SELECT - t13.n_name, - SUM(t13.l_extendedprice * ( - CAST(1 AS TINYINT) - t13.l_discount - )) AS revenue + "t13"."n_name", + SUM("t13"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t13"."l_discount" + )) AS "revenue" FROM ( SELECT - t12.c_custkey, - t12.c_name, - t12.c_address, - t12.c_nationkey, - t12.c_phone, - t12.c_acctbal, - t12.c_mktsegment, - t12.c_comment, - t12.o_orderkey, - t12.o_custkey, - t12.o_orderstatus, - t12.o_totalprice, - t12.o_orderdate, - t12.o_orderpriority, - t12.o_clerk, - t12.o_shippriority, - t12.o_comment, - t12.l_orderkey, - t12.l_partkey, - t12.l_suppkey, - t12.l_linenumber, - t12.l_quantity, - t12.l_extendedprice, - t12.l_discount, - t12.l_tax, - t12.l_returnflag, - t12.l_linestatus, - t12.l_shipdate, - t12.l_commitdate, - t12.l_receiptdate, - t12.l_shipinstruct, - t12.l_shipmode, - t12.l_comment, - t12.s_suppkey, - t12.s_name, - t12.s_address, - t12.s_nationkey, - t12.s_phone, - t12.s_acctbal, - t12.s_comment, - t12.n_nationkey, - t12.n_name, - t12.n_regionkey, - t12.n_comment, - t12.r_regionkey, - t12.r_name, - t12.r_comment + "t12"."c_custkey", + "t12"."c_name", + "t12"."c_address", + "t12"."c_nationkey", + "t12"."c_phone", + "t12"."c_acctbal", + "t12"."c_mktsegment", + "t12"."c_comment", + "t12"."o_orderkey", + "t12"."o_custkey", + "t12"."o_orderstatus", + "t12"."o_totalprice", + "t12"."o_orderdate", + "t12"."o_orderpriority", + "t12"."o_clerk", + "t12"."o_shippriority", + "t12"."o_comment", + "t12"."l_orderkey", + "t12"."l_partkey", + "t12"."l_suppkey", + "t12"."l_linenumber", + "t12"."l_quantity", + "t12"."l_extendedprice", + "t12"."l_discount", + "t12"."l_tax", + "t12"."l_returnflag", + "t12"."l_linestatus", + "t12"."l_shipdate", + "t12"."l_commitdate", + "t12"."l_receiptdate", + "t12"."l_shipinstruct", + "t12"."l_shipmode", + "t12"."l_comment", + "t12"."s_suppkey", + "t12"."s_name", + "t12"."s_address", + "t12"."s_nationkey", + "t12"."s_phone", + "t12"."s_acctbal", + "t12"."s_comment", + "t12"."n_nationkey", + "t12"."n_name", + "t12"."n_regionkey", + "t12"."n_comment", + "t12"."r_regionkey", + "t12"."r_name", + "t12"."r_comment" FROM ( SELECT - t6.c_custkey, - t6.c_name, - t6.c_address, - t6.c_nationkey, - t6.c_phone, - t6.c_acctbal, - t6.c_mktsegment, - t6.c_comment, - t7.o_orderkey, - t7.o_custkey, - t7.o_orderstatus, - t7.o_totalprice, - t7.o_orderdate, - t7.o_orderpriority, - t7.o_clerk, - t7.o_shippriority, - t7.o_comment, - t8.l_orderkey, - t8.l_partkey, - t8.l_suppkey, - t8.l_linenumber, - t8.l_quantity, - t8.l_extendedprice, - t8.l_discount, - t8.l_tax, - t8.l_returnflag, - t8.l_linestatus, - t8.l_shipdate, - t8.l_commitdate, - t8.l_receiptdate, - t8.l_shipinstruct, - t8.l_shipmode, - t8.l_comment, - t9.s_suppkey, - t9.s_name, - t9.s_address, - t9.s_nationkey, - t9.s_phone, - t9.s_acctbal, - t9.s_comment, - t10.n_nationkey, - t10.n_name, - t10.n_regionkey, - t10.n_comment, - t11.r_regionkey, - t11.r_name, - t11.r_comment - FROM customer AS t6 - INNER JOIN orders AS t7 - ON t6.c_custkey = t7.o_custkey - INNER JOIN lineitem AS t8 - ON t8.l_orderkey = t7.o_orderkey - INNER JOIN supplier AS t9 - ON t8.l_suppkey = t9.s_suppkey - INNER JOIN nation AS t10 - ON t6.c_nationkey = t9.s_nationkey AND t9.s_nationkey = t10.n_nationkey - INNER JOIN region AS t11 - ON t10.n_regionkey = t11.r_regionkey - ) AS t12 + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t8"."l_orderkey", + "t8"."l_partkey", + "t8"."l_suppkey", + "t8"."l_linenumber", + "t8"."l_quantity", + "t8"."l_extendedprice", + "t8"."l_discount", + "t8"."l_tax", + "t8"."l_returnflag", + "t8"."l_linestatus", + "t8"."l_shipdate", + "t8"."l_commitdate", + "t8"."l_receiptdate", + "t8"."l_shipinstruct", + "t8"."l_shipmode", + "t8"."l_comment", + "t9"."s_suppkey", + "t9"."s_name", + "t9"."s_address", + "t9"."s_nationkey", + "t9"."s_phone", + "t9"."s_acctbal", + "t9"."s_comment", + "t10"."n_nationkey", + "t10"."n_name", + "t10"."n_regionkey", + "t10"."n_comment", + "t11"."r_regionkey", + "t11"."r_name", + "t11"."r_comment" + FROM "customer" AS "t6" + INNER JOIN "orders" AS "t7" + ON "t6"."c_custkey" = "t7"."o_custkey" + INNER JOIN "lineitem" AS "t8" + ON "t8"."l_orderkey" = "t7"."o_orderkey" + INNER JOIN "supplier" AS "t9" + ON "t8"."l_suppkey" = "t9"."s_suppkey" + INNER JOIN "nation" AS "t10" + ON "t6"."c_nationkey" = "t9"."s_nationkey" AND "t9"."s_nationkey" = "t10"."n_nationkey" + INNER JOIN "region" AS "t11" + ON "t10"."n_regionkey" = "t11"."r_regionkey" + ) AS "t12" WHERE - t12.r_name = 'ASIA' - AND t12.o_orderdate >= MAKE_DATE(1994, 1, 1) - AND t12.o_orderdate < MAKE_DATE(1995, 1, 1) - ) AS t13 + "t12"."r_name" = 'ASIA' + AND "t12"."o_orderdate" >= MAKE_DATE(1994, 1, 1) + AND "t12"."o_orderdate" < MAKE_DATE(1995, 1, 1) + ) AS "t13" GROUP BY 1 -) AS t14 +) AS "t14" ORDER BY - t14.revenue DESC \ No newline at end of file + "t14"."revenue" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql index eea01a0277a6..6b845f769470 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql @@ -1,27 +1,27 @@ SELECT - SUM(t1.l_extendedprice * t1.l_discount) AS revenue + SUM("t1"."l_extendedprice" * "t1"."l_discount") AS "revenue" FROM ( SELECT - t0.l_orderkey, - t0.l_partkey, - t0.l_suppkey, - t0.l_linenumber, - t0.l_quantity, - t0.l_extendedprice, - t0.l_discount, - t0.l_tax, - t0.l_returnflag, - t0.l_linestatus, - t0.l_shipdate, - t0.l_commitdate, - t0.l_receiptdate, - t0.l_shipinstruct, - t0.l_shipmode, - t0.l_comment - FROM lineitem AS t0 + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + "t0"."l_quantity", + "t0"."l_extendedprice", + "t0"."l_discount", + "t0"."l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" WHERE - t0.l_shipdate >= MAKE_DATE(1994, 1, 1) - AND t0.l_shipdate < MAKE_DATE(1995, 1, 1) - AND t0.l_discount BETWEEN CAST(0.05 AS DOUBLE) AND CAST(0.07 AS DOUBLE) - AND t0.l_quantity < CAST(24 AS TINYINT) -) AS t1 \ No newline at end of file + "t0"."l_shipdate" >= MAKE_DATE(1994, 1, 1) + AND "t0"."l_shipdate" < MAKE_DATE(1995, 1, 1) + AND "t0"."l_discount" BETWEEN CAST(0.05 AS DOUBLE) AND CAST(0.07 AS DOUBLE) + AND "t0"."l_quantity" < CAST(24 AS TINYINT) +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql index 1f0d06b91d10..fca7ba9abc97 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql @@ -1,71 +1,71 @@ SELECT - t14.supp_nation, - t14.cust_nation, - t14.l_year, - t14.revenue + "t14"."supp_nation", + "t14"."cust_nation", + "t14"."l_year", + "t14"."revenue" FROM ( SELECT - t13.supp_nation, - t13.cust_nation, - t13.l_year, - SUM(t13.volume) AS revenue + "t13"."supp_nation", + "t13"."cust_nation", + "t13"."l_year", + SUM("t13"."volume") AS "revenue" FROM ( SELECT - t12.supp_nation, - t12.cust_nation, - t12.l_shipdate, - t12.l_extendedprice, - t12.l_discount, - t12.l_year, - t12.volume + "t12"."supp_nation", + "t12"."cust_nation", + "t12"."l_shipdate", + "t12"."l_extendedprice", + "t12"."l_discount", + "t12"."l_year", + "t12"."volume" FROM ( SELECT - t9.n_name AS supp_nation, - t11.n_name AS cust_nation, - t6.l_shipdate, - t6.l_extendedprice, - t6.l_discount, - EXTRACT(year FROM t6.l_shipdate) AS l_year, - t6.l_extendedprice * ( - CAST(1 AS TINYINT) - t6.l_discount - ) AS volume - FROM supplier AS t5 - INNER JOIN lineitem AS t6 - ON t5.s_suppkey = t6.l_suppkey - INNER JOIN orders AS t7 - ON t7.o_orderkey = t6.l_orderkey - INNER JOIN customer AS t8 - ON t8.c_custkey = t7.o_custkey - INNER JOIN nation AS t9 - ON t5.s_nationkey = t9.n_nationkey - INNER JOIN nation AS t11 - ON t8.c_nationkey = t11.n_nationkey - ) AS t12 + "t9"."n_name" AS "supp_nation", + "t11"."n_name" AS "cust_nation", + "t6"."l_shipdate", + "t6"."l_extendedprice", + "t6"."l_discount", + EXTRACT(year FROM "t6"."l_shipdate") AS "l_year", + "t6"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t6"."l_discount" + ) AS "volume" + FROM "supplier" AS "t5" + INNER JOIN "lineitem" AS "t6" + ON "t5"."s_suppkey" = "t6"."l_suppkey" + INNER JOIN "orders" AS "t7" + ON "t7"."o_orderkey" = "t6"."l_orderkey" + INNER JOIN "customer" AS "t8" + ON "t8"."c_custkey" = "t7"."o_custkey" + INNER JOIN "nation" AS "t9" + ON "t5"."s_nationkey" = "t9"."n_nationkey" + INNER JOIN "nation" AS "t11" + ON "t8"."c_nationkey" = "t11"."n_nationkey" + ) AS "t12" WHERE ( ( ( - t12.cust_nation = 'FRANCE' + "t12"."cust_nation" = 'FRANCE' ) AND ( - t12.supp_nation = 'GERMANY' + "t12"."supp_nation" = 'GERMANY' ) ) OR ( ( - t12.cust_nation = 'GERMANY' + "t12"."cust_nation" = 'GERMANY' ) AND ( - t12.supp_nation = 'FRANCE' + "t12"."supp_nation" = 'FRANCE' ) ) ) - AND t12.l_shipdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) - ) AS t13 + AND "t12"."l_shipdate" BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + ) AS "t13" GROUP BY 1, 2, 3 -) AS t14 +) AS "t14" ORDER BY - t14.supp_nation ASC, - t14.cust_nation ASC, - t14.l_year ASC \ No newline at end of file + "t14"."supp_nation" ASC, + "t14"."cust_nation" ASC, + "t14"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql index a06154f0383f..e2a61a509543 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql @@ -1,52 +1,52 @@ SELECT - t18.o_year, - t18.mkt_share + "t18"."o_year", + "t18"."mkt_share" FROM ( SELECT - t17.o_year, - SUM(t17.nation_volume) / SUM(t17.volume) AS mkt_share + "t17"."o_year", + SUM("t17"."nation_volume") / SUM("t17"."volume") AS "mkt_share" FROM ( SELECT - t16.o_year, - t16.volume, - t16.nation, - t16.r_name, - t16.o_orderdate, - t16.p_type, - CASE WHEN t16.nation = 'BRAZIL' THEN t16.volume ELSE CAST(0 AS TINYINT) END AS nation_volume + "t16"."o_year", + "t16"."volume", + "t16"."nation", + "t16"."r_name", + "t16"."o_orderdate", + "t16"."p_type", + CASE WHEN "t16"."nation" = 'BRAZIL' THEN "t16"."volume" ELSE CAST(0 AS TINYINT) END AS "nation_volume" FROM ( SELECT - EXTRACT(year FROM t10.o_orderdate) AS o_year, - t8.l_extendedprice * ( - CAST(1 AS TINYINT) - t8.l_discount - ) AS volume, - t15.n_name AS nation, - t14.r_name, - t10.o_orderdate, - t7.p_type - FROM part AS t7 - INNER JOIN lineitem AS t8 - ON t7.p_partkey = t8.l_partkey - INNER JOIN supplier AS t9 - ON t9.s_suppkey = t8.l_suppkey - INNER JOIN orders AS t10 - ON t8.l_orderkey = t10.o_orderkey - INNER JOIN customer AS t11 - ON t10.o_custkey = t11.c_custkey - INNER JOIN nation AS t12 - ON t11.c_nationkey = t12.n_nationkey - INNER JOIN region AS t14 - ON t12.n_regionkey = t14.r_regionkey - INNER JOIN nation AS t15 - ON t9.s_nationkey = t15.n_nationkey - ) AS t16 + EXTRACT(year FROM "t10"."o_orderdate") AS "o_year", + "t8"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t8"."l_discount" + ) AS "volume", + "t15"."n_name" AS "nation", + "t14"."r_name", + "t10"."o_orderdate", + "t7"."p_type" + FROM "part" AS "t7" + INNER JOIN "lineitem" AS "t8" + ON "t7"."p_partkey" = "t8"."l_partkey" + INNER JOIN "supplier" AS "t9" + ON "t9"."s_suppkey" = "t8"."l_suppkey" + INNER JOIN "orders" AS "t10" + ON "t8"."l_orderkey" = "t10"."o_orderkey" + INNER JOIN "customer" AS "t11" + ON "t10"."o_custkey" = "t11"."c_custkey" + INNER JOIN "nation" AS "t12" + ON "t11"."c_nationkey" = "t12"."n_nationkey" + INNER JOIN "region" AS "t14" + ON "t12"."n_regionkey" = "t14"."r_regionkey" + INNER JOIN "nation" AS "t15" + ON "t9"."s_nationkey" = "t15"."n_nationkey" + ) AS "t16" WHERE - t16.r_name = 'AMERICA' - AND t16.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) - AND t16.p_type = 'ECONOMY ANODIZED STEEL' - ) AS t17 + "t16"."r_name" = 'AMERICA' + AND "t16"."o_orderdate" BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + AND "t16"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t17" GROUP BY 1 -) AS t18 +) AS "t18" ORDER BY - t18.o_year ASC \ No newline at end of file + "t18"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql index b146d5c6cea8..7ea2a643d044 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql @@ -1,49 +1,49 @@ SELECT - t14.nation, - t14.o_year, - t14.sum_profit + "t14"."nation", + "t14"."o_year", + "t14"."sum_profit" FROM ( SELECT - t13.nation, - t13.o_year, - SUM(t13.amount) AS sum_profit + "t13"."nation", + "t13"."o_year", + SUM("t13"."amount") AS "sum_profit" FROM ( SELECT - t12.amount, - t12.o_year, - t12.nation, - t12.p_name + "t12"."amount", + "t12"."o_year", + "t12"."nation", + "t12"."p_name" FROM ( SELECT ( - t6.l_extendedprice * ( - CAST(1 AS TINYINT) - t6.l_discount + "t6"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t6"."l_discount" ) ) - ( - t8.ps_supplycost * t6.l_quantity - ) AS amount, - EXTRACT(year FROM t10.o_orderdate) AS o_year, - t11.n_name AS nation, - t9.p_name - FROM lineitem AS t6 - INNER JOIN supplier AS t7 - ON t7.s_suppkey = t6.l_suppkey - INNER JOIN partsupp AS t8 - ON t8.ps_suppkey = t6.l_suppkey AND t8.ps_partkey = t6.l_partkey - INNER JOIN part AS t9 - ON t9.p_partkey = t6.l_partkey - INNER JOIN orders AS t10 - ON t10.o_orderkey = t6.l_orderkey - INNER JOIN nation AS t11 - ON t7.s_nationkey = t11.n_nationkey - ) AS t12 + "t8"."ps_supplycost" * "t6"."l_quantity" + ) AS "amount", + EXTRACT(year FROM "t10"."o_orderdate") AS "o_year", + "t11"."n_name" AS "nation", + "t9"."p_name" + FROM "lineitem" AS "t6" + INNER JOIN "supplier" AS "t7" + ON "t7"."s_suppkey" = "t6"."l_suppkey" + INNER JOIN "partsupp" AS "t8" + ON "t8"."ps_suppkey" = "t6"."l_suppkey" AND "t8"."ps_partkey" = "t6"."l_partkey" + INNER JOIN "part" AS "t9" + ON "t9"."p_partkey" = "t6"."l_partkey" + INNER JOIN "orders" AS "t10" + ON "t10"."o_orderkey" = "t6"."l_orderkey" + INNER JOIN "nation" AS "t11" + ON "t7"."s_nationkey" = "t11"."n_nationkey" + ) AS "t12" WHERE - t12.p_name LIKE '%green%' - ) AS t13 + "t12"."p_name" LIKE '%green%' + ) AS "t13" GROUP BY 1, 2 -) AS t14 +) AS "t14" ORDER BY - t14.nation ASC, - t14.o_year DESC \ No newline at end of file + "t14"."nation" ASC, + "t14"."o_year" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql index 33986b7a70e9..94d73f54c8dc 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql @@ -1,115 +1,115 @@ SELECT - t10.c_custkey, - t10.c_name, - t10.revenue, - t10.c_acctbal, - t10.n_name, - t10.c_address, - t10.c_phone, - t10.c_comment + "t10"."c_custkey", + "t10"."c_name", + "t10"."revenue", + "t10"."c_acctbal", + "t10"."n_name", + "t10"."c_address", + "t10"."c_phone", + "t10"."c_comment" FROM ( SELECT - t9.c_custkey, - t9.c_name, - t9.c_acctbal, - t9.n_name, - t9.c_address, - t9.c_phone, - t9.c_comment, - SUM(t9.l_extendedprice * ( - CAST(1 AS TINYINT) - t9.l_discount - )) AS revenue + "t9"."c_custkey", + "t9"."c_name", + "t9"."c_acctbal", + "t9"."n_name", + "t9"."c_address", + "t9"."c_phone", + "t9"."c_comment", + SUM("t9"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t9"."l_discount" + )) AS "revenue" FROM ( SELECT - t8.c_custkey, - t8.c_name, - t8.c_address, - t8.c_nationkey, - t8.c_phone, - t8.c_acctbal, - t8.c_mktsegment, - t8.c_comment, - t8.o_orderkey, - t8.o_custkey, - t8.o_orderstatus, - t8.o_totalprice, - t8.o_orderdate, - t8.o_orderpriority, - t8.o_clerk, - t8.o_shippriority, - t8.o_comment, - t8.l_orderkey, - t8.l_partkey, - t8.l_suppkey, - t8.l_linenumber, - t8.l_quantity, - t8.l_extendedprice, - t8.l_discount, - t8.l_tax, - t8.l_returnflag, - t8.l_linestatus, - t8.l_shipdate, - t8.l_commitdate, - t8.l_receiptdate, - t8.l_shipinstruct, - t8.l_shipmode, - t8.l_comment, - t8.n_nationkey, - t8.n_name, - t8.n_regionkey, - t8.n_comment + "t8"."c_custkey", + "t8"."c_name", + "t8"."c_address", + "t8"."c_nationkey", + "t8"."c_phone", + "t8"."c_acctbal", + "t8"."c_mktsegment", + "t8"."c_comment", + "t8"."o_orderkey", + "t8"."o_custkey", + "t8"."o_orderstatus", + "t8"."o_totalprice", + "t8"."o_orderdate", + "t8"."o_orderpriority", + "t8"."o_clerk", + "t8"."o_shippriority", + "t8"."o_comment", + "t8"."l_orderkey", + "t8"."l_partkey", + "t8"."l_suppkey", + "t8"."l_linenumber", + "t8"."l_quantity", + "t8"."l_extendedprice", + "t8"."l_discount", + "t8"."l_tax", + "t8"."l_returnflag", + "t8"."l_linestatus", + "t8"."l_shipdate", + "t8"."l_commitdate", + "t8"."l_receiptdate", + "t8"."l_shipinstruct", + "t8"."l_shipmode", + "t8"."l_comment", + "t8"."n_nationkey", + "t8"."n_name", + "t8"."n_regionkey", + "t8"."n_comment" FROM ( SELECT - t4.c_custkey, - t4.c_name, - t4.c_address, - t4.c_nationkey, - t4.c_phone, - t4.c_acctbal, - t4.c_mktsegment, - t4.c_comment, - t5.o_orderkey, - t5.o_custkey, - t5.o_orderstatus, - t5.o_totalprice, - t5.o_orderdate, - t5.o_orderpriority, - t5.o_clerk, - t5.o_shippriority, - t5.o_comment, - t6.l_orderkey, - t6.l_partkey, - t6.l_suppkey, - t6.l_linenumber, - t6.l_quantity, - t6.l_extendedprice, - t6.l_discount, - t6.l_tax, - t6.l_returnflag, - t6.l_linestatus, - t6.l_shipdate, - t6.l_commitdate, - t6.l_receiptdate, - t6.l_shipinstruct, - t6.l_shipmode, - t6.l_comment, - t7.n_nationkey, - t7.n_name, - t7.n_regionkey, - t7.n_comment - FROM customer AS t4 - INNER JOIN orders AS t5 - ON t4.c_custkey = t5.o_custkey - INNER JOIN lineitem AS t6 - ON t6.l_orderkey = t5.o_orderkey - INNER JOIN nation AS t7 - ON t4.c_nationkey = t7.n_nationkey - ) AS t8 + "t4"."c_custkey", + "t4"."c_name", + "t4"."c_address", + "t4"."c_nationkey", + "t4"."c_phone", + "t4"."c_acctbal", + "t4"."c_mktsegment", + "t4"."c_comment", + "t5"."o_orderkey", + "t5"."o_custkey", + "t5"."o_orderstatus", + "t5"."o_totalprice", + "t5"."o_orderdate", + "t5"."o_orderpriority", + "t5"."o_clerk", + "t5"."o_shippriority", + "t5"."o_comment", + "t6"."l_orderkey", + "t6"."l_partkey", + "t6"."l_suppkey", + "t6"."l_linenumber", + "t6"."l_quantity", + "t6"."l_extendedprice", + "t6"."l_discount", + "t6"."l_tax", + "t6"."l_returnflag", + "t6"."l_linestatus", + "t6"."l_shipdate", + "t6"."l_commitdate", + "t6"."l_receiptdate", + "t6"."l_shipinstruct", + "t6"."l_shipmode", + "t6"."l_comment", + "t7"."n_nationkey", + "t7"."n_name", + "t7"."n_regionkey", + "t7"."n_comment" + FROM "customer" AS "t4" + INNER JOIN "orders" AS "t5" + ON "t4"."c_custkey" = "t5"."o_custkey" + INNER JOIN "lineitem" AS "t6" + ON "t6"."l_orderkey" = "t5"."o_orderkey" + INNER JOIN "nation" AS "t7" + ON "t4"."c_nationkey" = "t7"."n_nationkey" + ) AS "t8" WHERE - t8.o_orderdate >= MAKE_DATE(1993, 10, 1) - AND t8.o_orderdate < MAKE_DATE(1994, 1, 1) - AND t8.l_returnflag = 'R' - ) AS t9 + "t8"."o_orderdate" >= MAKE_DATE(1993, 10, 1) + AND "t8"."o_orderdate" < MAKE_DATE(1994, 1, 1) + AND "t8"."l_returnflag" = 'R' + ) AS "t9" GROUP BY 1, 2, @@ -118,7 +118,7 @@ FROM ( 5, 6, 7 -) AS t10 +) AS "t10" ORDER BY - t10.revenue DESC + "t10"."revenue" DESC LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql index 594e5a7db6bd..3f94c814bcf5 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql @@ -1,109 +1,109 @@ SELECT - t8.ps_partkey, - t8.value + "t8"."ps_partkey", + "t8"."value" FROM ( SELECT - t7.ps_partkey, - SUM(t7.ps_supplycost * t7.ps_availqty) AS value + "t7"."ps_partkey", + SUM("t7"."ps_supplycost" * "t7"."ps_availqty") AS "value" FROM ( SELECT - t6.ps_partkey, - t6.ps_suppkey, - t6.ps_availqty, - t6.ps_supplycost, - t6.ps_comment, - t6.s_suppkey, - t6.s_name, - t6.s_address, - t6.s_nationkey, - t6.s_phone, - t6.s_acctbal, - t6.s_comment, - t6.n_nationkey, - t6.n_name, - t6.n_regionkey, - t6.n_comment + "t6"."ps_partkey", + "t6"."ps_suppkey", + "t6"."ps_availqty", + "t6"."ps_supplycost", + "t6"."ps_comment", + "t6"."s_suppkey", + "t6"."s_name", + "t6"."s_address", + "t6"."s_nationkey", + "t6"."s_phone", + "t6"."s_acctbal", + "t6"."s_comment", + "t6"."n_nationkey", + "t6"."n_name", + "t6"."n_regionkey", + "t6"."n_comment" FROM ( SELECT - t3.ps_partkey, - t3.ps_suppkey, - t3.ps_availqty, - t3.ps_supplycost, - t3.ps_comment, - t4.s_suppkey, - t4.s_name, - t4.s_address, - t4.s_nationkey, - t4.s_phone, - t4.s_acctbal, - t4.s_comment, - t5.n_nationkey, - t5.n_name, - t5.n_regionkey, - t5.n_comment - FROM partsupp AS t3 - INNER JOIN supplier AS t4 - ON t3.ps_suppkey = t4.s_suppkey - INNER JOIN nation AS t5 - ON t5.n_nationkey = t4.s_nationkey - ) AS t6 + "t3"."ps_partkey", + "t3"."ps_suppkey", + "t3"."ps_availqty", + "t3"."ps_supplycost", + "t3"."ps_comment", + "t4"."s_suppkey", + "t4"."s_name", + "t4"."s_address", + "t4"."s_nationkey", + "t4"."s_phone", + "t4"."s_acctbal", + "t4"."s_comment", + "t5"."n_nationkey", + "t5"."n_name", + "t5"."n_regionkey", + "t5"."n_comment" + FROM "partsupp" AS "t3" + INNER JOIN "supplier" AS "t4" + ON "t3"."ps_suppkey" = "t4"."s_suppkey" + INNER JOIN "nation" AS "t5" + ON "t5"."n_nationkey" = "t4"."s_nationkey" + ) AS "t6" WHERE - t6.n_name = 'GERMANY' - ) AS t7 + "t6"."n_name" = 'GERMANY' + ) AS "t7" GROUP BY 1 -) AS t8 +) AS "t8" WHERE - t8.value > ( + "t8"."value" > ( ( SELECT - SUM(t7.ps_supplycost * t7.ps_availqty) AS "Sum(Multiply(ps_supplycost, ps_availqty))" + SUM("t7"."ps_supplycost" * "t7"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - t6.ps_partkey, - t6.ps_suppkey, - t6.ps_availqty, - t6.ps_supplycost, - t6.ps_comment, - t6.s_suppkey, - t6.s_name, - t6.s_address, - t6.s_nationkey, - t6.s_phone, - t6.s_acctbal, - t6.s_comment, - t6.n_nationkey, - t6.n_name, - t6.n_regionkey, - t6.n_comment + "t6"."ps_partkey", + "t6"."ps_suppkey", + "t6"."ps_availqty", + "t6"."ps_supplycost", + "t6"."ps_comment", + "t6"."s_suppkey", + "t6"."s_name", + "t6"."s_address", + "t6"."s_nationkey", + "t6"."s_phone", + "t6"."s_acctbal", + "t6"."s_comment", + "t6"."n_nationkey", + "t6"."n_name", + "t6"."n_regionkey", + "t6"."n_comment" FROM ( SELECT - t3.ps_partkey, - t3.ps_suppkey, - t3.ps_availqty, - t3.ps_supplycost, - t3.ps_comment, - t4.s_suppkey, - t4.s_name, - t4.s_address, - t4.s_nationkey, - t4.s_phone, - t4.s_acctbal, - t4.s_comment, - t5.n_nationkey, - t5.n_name, - t5.n_regionkey, - t5.n_comment - FROM partsupp AS t3 - INNER JOIN supplier AS t4 - ON t3.ps_suppkey = t4.s_suppkey - INNER JOIN nation AS t5 - ON t5.n_nationkey = t4.s_nationkey - ) AS t6 + "t3"."ps_partkey", + "t3"."ps_suppkey", + "t3"."ps_availqty", + "t3"."ps_supplycost", + "t3"."ps_comment", + "t4"."s_suppkey", + "t4"."s_name", + "t4"."s_address", + "t4"."s_nationkey", + "t4"."s_phone", + "t4"."s_acctbal", + "t4"."s_comment", + "t5"."n_nationkey", + "t5"."n_name", + "t5"."n_regionkey", + "t5"."n_comment" + FROM "partsupp" AS "t3" + INNER JOIN "supplier" AS "t4" + ON "t3"."ps_suppkey" = "t4"."s_suppkey" + INNER JOIN "nation" AS "t5" + ON "t5"."n_nationkey" = "t4"."s_nationkey" + ) AS "t6" WHERE - t6.n_name = 'GERMANY' - ) AS t7 + "t6"."n_name" = 'GERMANY' + ) AS "t7" ) * CAST(0.0001 AS DOUBLE) ) ORDER BY - t8.value DESC \ No newline at end of file + "t8"."value" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql index ab4f275c250c..ffef3be8f840 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql @@ -1,95 +1,95 @@ SELECT - t6.l_shipmode, - t6.high_line_count, - t6.low_line_count + "t6"."l_shipmode", + "t6"."high_line_count", + "t6"."low_line_count" FROM ( SELECT - t5.l_shipmode, + "t5"."l_shipmode", SUM( - CASE t5.o_orderpriority + CASE "t5"."o_orderpriority" WHEN '1-URGENT' THEN CAST(1 AS TINYINT) WHEN '2-HIGH' THEN CAST(1 AS TINYINT) ELSE CAST(0 AS TINYINT) END - ) AS high_line_count, + ) AS "high_line_count", SUM( - CASE t5.o_orderpriority + CASE "t5"."o_orderpriority" WHEN '1-URGENT' THEN CAST(0 AS TINYINT) WHEN '2-HIGH' THEN CAST(0 AS TINYINT) ELSE CAST(1 AS TINYINT) END - ) AS low_line_count + ) AS "low_line_count" FROM ( SELECT - t4.o_orderkey, - t4.o_custkey, - t4.o_orderstatus, - t4.o_totalprice, - t4.o_orderdate, - t4.o_orderpriority, - t4.o_clerk, - t4.o_shippriority, - t4.o_comment, - t4.l_orderkey, - t4.l_partkey, - t4.l_suppkey, - t4.l_linenumber, - t4.l_quantity, - t4.l_extendedprice, - t4.l_discount, - t4.l_tax, - t4.l_returnflag, - t4.l_linestatus, - t4.l_shipdate, - t4.l_commitdate, - t4.l_receiptdate, - t4.l_shipinstruct, - t4.l_shipmode, - t4.l_comment + "t4"."o_orderkey", + "t4"."o_custkey", + "t4"."o_orderstatus", + "t4"."o_totalprice", + "t4"."o_orderdate", + "t4"."o_orderpriority", + "t4"."o_clerk", + "t4"."o_shippriority", + "t4"."o_comment", + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment" FROM ( SELECT - t2.o_orderkey, - t2.o_custkey, - t2.o_orderstatus, - t2.o_totalprice, - t2.o_orderdate, - t2.o_orderpriority, - t2.o_clerk, - t2.o_shippriority, - t2.o_comment, - t3.l_orderkey, - t3.l_partkey, - t3.l_suppkey, - t3.l_linenumber, - t3.l_quantity, - t3.l_extendedprice, - t3.l_discount, - t3.l_tax, - t3.l_returnflag, - t3.l_linestatus, - t3.l_shipdate, - t3.l_commitdate, - t3.l_receiptdate, - t3.l_shipinstruct, - t3.l_shipmode, - t3.l_comment - FROM orders AS t2 - INNER JOIN lineitem AS t3 - ON t2.o_orderkey = t3.l_orderkey - ) AS t4 + "t2"."o_orderkey", + "t2"."o_custkey", + "t2"."o_orderstatus", + "t2"."o_totalprice", + "t2"."o_orderdate", + "t2"."o_orderpriority", + "t2"."o_clerk", + "t2"."o_shippriority", + "t2"."o_comment", + "t3"."l_orderkey", + "t3"."l_partkey", + "t3"."l_suppkey", + "t3"."l_linenumber", + "t3"."l_quantity", + "t3"."l_extendedprice", + "t3"."l_discount", + "t3"."l_tax", + "t3"."l_returnflag", + "t3"."l_linestatus", + "t3"."l_shipdate", + "t3"."l_commitdate", + "t3"."l_receiptdate", + "t3"."l_shipinstruct", + "t3"."l_shipmode", + "t3"."l_comment" + FROM "orders" AS "t2" + INNER JOIN "lineitem" AS "t3" + ON "t2"."o_orderkey" = "t3"."l_orderkey" + ) AS "t4" WHERE - t4.l_shipmode IN ('MAIL', 'SHIP') - AND t4.l_commitdate < t4.l_receiptdate - AND t4.l_shipdate < t4.l_commitdate - AND t4.l_receiptdate >= MAKE_DATE(1994, 1, 1) - AND t4.l_receiptdate < MAKE_DATE(1995, 1, 1) - ) AS t5 + "t4"."l_shipmode" IN ('MAIL', 'SHIP') + AND "t4"."l_commitdate" < "t4"."l_receiptdate" + AND "t4"."l_shipdate" < "t4"."l_commitdate" + AND "t4"."l_receiptdate" >= MAKE_DATE(1994, 1, 1) + AND "t4"."l_receiptdate" < MAKE_DATE(1995, 1, 1) + ) AS "t5" GROUP BY 1 -) AS t6 +) AS "t6" ORDER BY - t6.l_shipmode ASC \ No newline at end of file + "t6"."l_shipmode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql index c020de0fbaec..be357166968e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql @@ -1,45 +1,46 @@ SELECT - t6.c_count, - t6.custdist + "t6"."c_count", + "t6"."custdist" FROM ( SELECT - t5.c_count, - COUNT(*) AS custdist + "t5"."c_count", + COUNT(*) AS "custdist" FROM ( SELECT - t4.c_custkey, - COUNT(t4.o_orderkey) AS c_count + "t4"."c_custkey", + COUNT("t4"."o_orderkey") AS "c_count" FROM ( SELECT - t2.c_custkey, - t2.c_name, - t2.c_address, - t2.c_nationkey, - t2.c_phone, - t2.c_acctbal, - t2.c_mktsegment, - t2.c_comment, - t3.o_orderkey, - t3.o_custkey, - t3.o_orderstatus, - t3.o_totalprice, - t3.o_orderdate, - t3.o_orderpriority, - t3.o_clerk, - t3.o_shippriority, - t3.o_comment - FROM customer AS t2 - LEFT OUTER JOIN orders AS t3 - ON t2.c_custkey = t3.o_custkey AND NOT ( - t3.o_comment LIKE '%special%requests%' + "t2"."c_custkey", + "t2"."c_name", + "t2"."c_address", + "t2"."c_nationkey", + "t2"."c_phone", + "t2"."c_acctbal", + "t2"."c_mktsegment", + "t2"."c_comment", + "t3"."o_orderkey", + "t3"."o_custkey", + "t3"."o_orderstatus", + "t3"."o_totalprice", + "t3"."o_orderdate", + "t3"."o_orderpriority", + "t3"."o_clerk", + "t3"."o_shippriority", + "t3"."o_comment" + FROM "customer" AS "t2" + LEFT OUTER JOIN "orders" AS "t3" + ON "t2"."c_custkey" = "t3"."o_custkey" + AND NOT ( + "t3"."o_comment" LIKE '%special%requests%' ) - ) AS t4 + ) AS "t4" GROUP BY 1 - ) AS t5 + ) AS "t5" GROUP BY 1 -) AS t6 +) AS "t6" ORDER BY - t6.custdist DESC, - t6.c_count DESC \ No newline at end of file + "t6"."custdist" DESC, + "t6"."c_count" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql index 2e411aa24794..513eb4a5a6de 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql @@ -2,74 +2,75 @@ SELECT ( SUM( CASE - WHEN t5.p_type LIKE 'PROMO%' - THEN t5.l_extendedprice * ( - CAST(1 AS TINYINT) - t5.l_discount + WHEN "t5"."p_type" LIKE 'PROMO%' + THEN "t5"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t5"."l_discount" ) ELSE CAST(0 AS TINYINT) END ) * CAST(100 AS TINYINT) - ) / SUM(t5.l_extendedprice * ( - CAST(1 AS TINYINT) - t5.l_discount - )) AS promo_revenue + ) / SUM("t5"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t5"."l_discount" + )) AS "promo_revenue" FROM ( SELECT - t4.l_orderkey, - t4.l_partkey, - t4.l_suppkey, - t4.l_linenumber, - t4.l_quantity, - t4.l_extendedprice, - t4.l_discount, - t4.l_tax, - t4.l_returnflag, - t4.l_linestatus, - t4.l_shipdate, - t4.l_commitdate, - t4.l_receiptdate, - t4.l_shipinstruct, - t4.l_shipmode, - t4.l_comment, - t4.p_partkey, - t4.p_name, - t4.p_mfgr, - t4.p_brand, - t4.p_type, - t4.p_size, - t4.p_container, - t4.p_retailprice, - t4.p_comment + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t4"."p_partkey", + "t4"."p_name", + "t4"."p_mfgr", + "t4"."p_brand", + "t4"."p_type", + "t4"."p_size", + "t4"."p_container", + "t4"."p_retailprice", + "t4"."p_comment" FROM ( SELECT - t2.l_orderkey, - t2.l_partkey, - t2.l_suppkey, - t2.l_linenumber, - t2.l_quantity, - t2.l_extendedprice, - t2.l_discount, - t2.l_tax, - t2.l_returnflag, - t2.l_linestatus, - t2.l_shipdate, - t2.l_commitdate, - t2.l_receiptdate, - t2.l_shipinstruct, - t2.l_shipmode, - t2.l_comment, - t3.p_partkey, - t3.p_name, - t3.p_mfgr, - t3.p_brand, - t3.p_type, - t3.p_size, - t3.p_container, - t3.p_retailprice, - t3.p_comment - FROM lineitem AS t2 - INNER JOIN part AS t3 - ON t2.l_partkey = t3.p_partkey - ) AS t4 + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + "t2"."l_quantity", + "t2"."l_extendedprice", + "t2"."l_discount", + "t2"."l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment", + "t3"."p_partkey", + "t3"."p_name", + "t3"."p_mfgr", + "t3"."p_brand", + "t3"."p_type", + "t3"."p_size", + "t3"."p_container", + "t3"."p_retailprice", + "t3"."p_comment" + FROM "lineitem" AS "t2" + INNER JOIN "part" AS "t3" + ON "t2"."l_partkey" = "t3"."p_partkey" + ) AS "t4" WHERE - t4.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t4.l_shipdate < MAKE_DATE(1995, 10, 1) -) AS t5 \ No newline at end of file + "t4"."l_shipdate" >= MAKE_DATE(1995, 9, 1) + AND "t4"."l_shipdate" < MAKE_DATE(1995, 10, 1) +) AS "t5" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql index 3a5449a2361c..78c88fcb0e00 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql @@ -1,103 +1,105 @@ SELECT - t6.s_suppkey, - t6.s_name, - t6.s_address, - t6.s_phone, - t6.total_revenue + "t6"."s_suppkey", + "t6"."s_name", + "t6"."s_address", + "t6"."s_phone", + "t6"."total_revenue" FROM ( SELECT - t2.s_suppkey, - t2.s_name, - t2.s_address, - t2.s_nationkey, - t2.s_phone, - t2.s_acctbal, - t2.s_comment, - t5.l_suppkey, - t5.total_revenue - FROM supplier AS t2 + "t2"."s_suppkey", + "t2"."s_name", + "t2"."s_address", + "t2"."s_nationkey", + "t2"."s_phone", + "t2"."s_acctbal", + "t2"."s_comment", + "t5"."l_suppkey", + "t5"."total_revenue" + FROM "supplier" AS "t2" INNER JOIN ( SELECT - t3.l_suppkey, - SUM(t3.l_extendedprice * ( - CAST(1 AS TINYINT) - t3.l_discount - )) AS total_revenue + "t3"."l_suppkey", + SUM("t3"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t3"."l_discount" + )) AS "total_revenue" FROM ( SELECT - t1.l_orderkey, - t1.l_partkey, - t1.l_suppkey, - t1.l_linenumber, - t1.l_quantity, - t1.l_extendedprice, - t1.l_discount, - t1.l_tax, - t1.l_returnflag, - t1.l_linestatus, - t1.l_shipdate, - t1.l_commitdate, - t1.l_receiptdate, - t1.l_shipinstruct, - t1.l_shipmode, - t1.l_comment - FROM lineitem AS t1 + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + "t1"."l_quantity", + "t1"."l_extendedprice", + "t1"."l_discount", + "t1"."l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" WHERE - t1.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t1.l_shipdate < MAKE_DATE(1996, 4, 1) - ) AS t3 + "t1"."l_shipdate" >= MAKE_DATE(1996, 1, 1) + AND "t1"."l_shipdate" < MAKE_DATE(1996, 4, 1) + ) AS "t3" GROUP BY 1 - ) AS t5 - ON t2.s_suppkey = t5.l_suppkey -) AS t6 + ) AS "t5" + ON "t2"."s_suppkey" = "t5"."l_suppkey" +) AS "t6" WHERE - t6.total_revenue = ( + "t6"."total_revenue" = ( SELECT - MAX(t6.total_revenue) AS "Max(total_revenue)" + MAX("t6"."total_revenue") AS "Max(total_revenue)" FROM ( SELECT - t2.s_suppkey, - t2.s_name, - t2.s_address, - t2.s_nationkey, - t2.s_phone, - t2.s_acctbal, - t2.s_comment, - t5.l_suppkey, - t5.total_revenue - FROM supplier AS t2 + "t2"."s_suppkey", + "t2"."s_name", + "t2"."s_address", + "t2"."s_nationkey", + "t2"."s_phone", + "t2"."s_acctbal", + "t2"."s_comment", + "t5"."l_suppkey", + "t5"."total_revenue" + FROM "supplier" AS "t2" INNER JOIN ( SELECT - t3.l_suppkey, - SUM(t3.l_extendedprice * ( - CAST(1 AS TINYINT) - t3.l_discount - )) AS total_revenue + "t3"."l_suppkey", + SUM("t3"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t3"."l_discount" + )) AS "total_revenue" FROM ( SELECT - t1.l_orderkey, - t1.l_partkey, - t1.l_suppkey, - t1.l_linenumber, - t1.l_quantity, - t1.l_extendedprice, - t1.l_discount, - t1.l_tax, - t1.l_returnflag, - t1.l_linestatus, - t1.l_shipdate, - t1.l_commitdate, - t1.l_receiptdate, - t1.l_shipinstruct, - t1.l_shipmode, - t1.l_comment - FROM lineitem AS t1 + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + "t1"."l_quantity", + "t1"."l_extendedprice", + "t1"."l_discount", + "t1"."l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" WHERE - t1.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t1.l_shipdate < MAKE_DATE(1996, 4, 1) - ) AS t3 + "t1"."l_shipdate" >= MAKE_DATE(1996, 1, 1) + AND "t1"."l_shipdate" < MAKE_DATE(1996, 4, 1) + ) AS "t3" GROUP BY 1 - ) AS t5 - ON t2.s_suppkey = t5.l_suppkey - ) AS t6 + ) AS "t5" + ON "t2"."s_suppkey" = "t5"."l_suppkey" + ) AS "t6" ) ORDER BY - t6.s_suppkey ASC \ No newline at end of file + "t6"."s_suppkey" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql index b6491dc0efa5..af66a4764073 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql @@ -1,73 +1,73 @@ SELECT - t8.p_brand, - t8.p_type, - t8.p_size, - t8.supplier_cnt + "t8"."p_brand", + "t8"."p_type", + "t8"."p_size", + "t8"."supplier_cnt" FROM ( SELECT - t7.p_brand, - t7.p_type, - t7.p_size, - COUNT(DISTINCT t7.ps_suppkey) AS supplier_cnt + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + COUNT(DISTINCT "t7"."ps_suppkey") AS "supplier_cnt" FROM ( SELECT - t6.ps_partkey, - t6.ps_suppkey, - t6.ps_availqty, - t6.ps_supplycost, - t6.ps_comment, - t6.p_partkey, - t6.p_name, - t6.p_mfgr, - t6.p_brand, - t6.p_type, - t6.p_size, - t6.p_container, - t6.p_retailprice, - t6.p_comment + "t6"."ps_partkey", + "t6"."ps_suppkey", + "t6"."ps_availqty", + "t6"."ps_supplycost", + "t6"."ps_comment", + "t6"."p_partkey", + "t6"."p_name", + "t6"."p_mfgr", + "t6"."p_brand", + "t6"."p_type", + "t6"."p_size", + "t6"."p_container", + "t6"."p_retailprice", + "t6"."p_comment" FROM ( SELECT - t3.ps_partkey, - t3.ps_suppkey, - t3.ps_availqty, - t3.ps_supplycost, - t3.ps_comment, - t4.p_partkey, - t4.p_name, - t4.p_mfgr, - t4.p_brand, - t4.p_type, - t4.p_size, - t4.p_container, - t4.p_retailprice, - t4.p_comment - FROM partsupp AS t3 - INNER JOIN part AS t4 - ON t4.p_partkey = t3.ps_partkey - ) AS t6 + "t3"."ps_partkey", + "t3"."ps_suppkey", + "t3"."ps_availqty", + "t3"."ps_supplycost", + "t3"."ps_comment", + "t4"."p_partkey", + "t4"."p_name", + "t4"."p_mfgr", + "t4"."p_brand", + "t4"."p_type", + "t4"."p_size", + "t4"."p_container", + "t4"."p_retailprice", + "t4"."p_comment" + FROM "partsupp" AS "t3" + INNER JOIN "part" AS "t4" + ON "t4"."p_partkey" = "t3"."ps_partkey" + ) AS "t6" WHERE - t6.p_brand <> 'Brand#45' + "t6"."p_brand" <> 'Brand#45' AND NOT ( - t6.p_type LIKE 'MEDIUM POLISHED%' + "t6"."p_type" LIKE 'MEDIUM POLISHED%' ) - AND t6.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) + AND "t6"."p_size" IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) AND NOT ( - t6.ps_suppkey IN ( + "t6"."ps_suppkey" IN ( SELECT - t2.s_suppkey - FROM supplier AS t2 + "t2"."s_suppkey" + FROM "supplier" AS "t2" WHERE - t2.s_comment LIKE '%Customer%Complaints%' + "t2"."s_comment" LIKE '%Customer%Complaints%' ) ) - ) AS t7 + ) AS "t7" GROUP BY 1, 2, 3 -) AS t8 +) AS "t8" ORDER BY - t8.supplier_cnt DESC, - t8.p_brand ASC, - t8.p_type ASC, - t8.p_size ASC \ No newline at end of file + "t8"."supplier_cnt" DESC, + "t8"."p_brand" ASC, + "t8"."p_type" ASC, + "t8"."p_size" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql index 378aac0f97ee..8ded6c1fcdb8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql @@ -1,92 +1,92 @@ SELECT - SUM(t7.l_extendedprice) / CAST(7.0 AS DOUBLE) AS avg_yearly + SUM("t7"."l_extendedprice") / CAST(7.0 AS DOUBLE) AS "avg_yearly" FROM ( SELECT - t4.l_orderkey, - t4.l_partkey, - t4.l_suppkey, - t4.l_linenumber, - t4.l_quantity, - t4.l_extendedprice, - t4.l_discount, - t4.l_tax, - t4.l_returnflag, - t4.l_linestatus, - t4.l_shipdate, - t4.l_commitdate, - t4.l_receiptdate, - t4.l_shipinstruct, - t4.l_shipmode, - t4.l_comment, - t4.p_partkey, - t4.p_name, - t4.p_mfgr, - t4.p_brand, - t4.p_type, - t4.p_size, - t4.p_container, - t4.p_retailprice, - t4.p_comment + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t4"."p_partkey", + "t4"."p_name", + "t4"."p_mfgr", + "t4"."p_brand", + "t4"."p_type", + "t4"."p_size", + "t4"."p_container", + "t4"."p_retailprice", + "t4"."p_comment" FROM ( SELECT - t2.l_orderkey, - t2.l_partkey, - t2.l_suppkey, - t2.l_linenumber, - t2.l_quantity, - t2.l_extendedprice, - t2.l_discount, - t2.l_tax, - t2.l_returnflag, - t2.l_linestatus, - t2.l_shipdate, - t2.l_commitdate, - t2.l_receiptdate, - t2.l_shipinstruct, - t2.l_shipmode, - t2.l_comment, - t3.p_partkey, - t3.p_name, - t3.p_mfgr, - t3.p_brand, - t3.p_type, - t3.p_size, - t3.p_container, - t3.p_retailprice, - t3.p_comment - FROM lineitem AS t2 - INNER JOIN part AS t3 - ON t3.p_partkey = t2.l_partkey - ) AS t4 + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + "t2"."l_quantity", + "t2"."l_extendedprice", + "t2"."l_discount", + "t2"."l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment", + "t3"."p_partkey", + "t3"."p_name", + "t3"."p_mfgr", + "t3"."p_brand", + "t3"."p_type", + "t3"."p_size", + "t3"."p_container", + "t3"."p_retailprice", + "t3"."p_comment" + FROM "lineitem" AS "t2" + INNER JOIN "part" AS "t3" + ON "t3"."p_partkey" = "t2"."l_partkey" + ) AS "t4" WHERE - t4.p_brand = 'Brand#23' - AND t4.p_container = 'MED BOX' - AND t4.l_quantity < ( + "t4"."p_brand" = 'Brand#23' + AND "t4"."p_container" = 'MED BOX' + AND "t4"."l_quantity" < ( ( SELECT - AVG(t5.l_quantity) AS "Mean(l_quantity)" + AVG("t5"."l_quantity") AS "Mean(l_quantity)" FROM ( SELECT - t0.l_orderkey, - t0.l_partkey, - t0.l_suppkey, - t0.l_linenumber, - t0.l_quantity, - t0.l_extendedprice, - t0.l_discount, - t0.l_tax, - t0.l_returnflag, - t0.l_linestatus, - t0.l_shipdate, - t0.l_commitdate, - t0.l_receiptdate, - t0.l_shipinstruct, - t0.l_shipmode, - t0.l_comment - FROM lineitem AS t0 + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + "t0"."l_quantity", + "t0"."l_extendedprice", + "t0"."l_discount", + "t0"."l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" WHERE - t0.l_partkey = t4.p_partkey - ) AS t5 + "t0"."l_partkey" = "t4"."p_partkey" + ) AS "t5" ) * CAST(0.2 AS DOUBLE) ) -) AS t7 \ No newline at end of file +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql index 27ca9fa730fd..f15310f578b4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql @@ -1,118 +1,118 @@ SELECT - t10.c_name, - t10.c_custkey, - t10.o_orderkey, - t10.o_orderdate, - t10.o_totalprice, - t10.sum_qty + "t10"."c_name", + "t10"."c_custkey", + "t10"."o_orderkey", + "t10"."o_orderdate", + "t10"."o_totalprice", + "t10"."sum_qty" FROM ( SELECT - t9.c_name, - t9.c_custkey, - t9.o_orderkey, - t9.o_orderdate, - t9.o_totalprice, - SUM(t9.l_quantity) AS sum_qty + "t9"."c_name", + "t9"."c_custkey", + "t9"."o_orderkey", + "t9"."o_orderdate", + "t9"."o_totalprice", + SUM("t9"."l_quantity") AS "sum_qty" FROM ( SELECT - t7.c_custkey, - t7.c_name, - t7.c_address, - t7.c_nationkey, - t7.c_phone, - t7.c_acctbal, - t7.c_mktsegment, - t7.c_comment, - t7.o_orderkey, - t7.o_custkey, - t7.o_orderstatus, - t7.o_totalprice, - t7.o_orderdate, - t7.o_orderpriority, - t7.o_clerk, - t7.o_shippriority, - t7.o_comment, - t7.l_orderkey, - t7.l_partkey, - t7.l_suppkey, - t7.l_linenumber, - t7.l_quantity, - t7.l_extendedprice, - t7.l_discount, - t7.l_tax, - t7.l_returnflag, - t7.l_linestatus, - t7.l_shipdate, - t7.l_commitdate, - t7.l_receiptdate, - t7.l_shipinstruct, - t7.l_shipmode, - t7.l_comment + "t7"."c_custkey", + "t7"."c_name", + "t7"."c_address", + "t7"."c_nationkey", + "t7"."c_phone", + "t7"."c_acctbal", + "t7"."c_mktsegment", + "t7"."c_comment", + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment" FROM ( SELECT - t3.c_custkey, - t3.c_name, - t3.c_address, - t3.c_nationkey, - t3.c_phone, - t3.c_acctbal, - t3.c_mktsegment, - t3.c_comment, - t4.o_orderkey, - t4.o_custkey, - t4.o_orderstatus, - t4.o_totalprice, - t4.o_orderdate, - t4.o_orderpriority, - t4.o_clerk, - t4.o_shippriority, - t4.o_comment, - t5.l_orderkey, - t5.l_partkey, - t5.l_suppkey, - t5.l_linenumber, - t5.l_quantity, - t5.l_extendedprice, - t5.l_discount, - t5.l_tax, - t5.l_returnflag, - t5.l_linestatus, - t5.l_shipdate, - t5.l_commitdate, - t5.l_receiptdate, - t5.l_shipinstruct, - t5.l_shipmode, - t5.l_comment - FROM customer AS t3 - INNER JOIN orders AS t4 - ON t3.c_custkey = t4.o_custkey - INNER JOIN lineitem AS t5 - ON t4.o_orderkey = t5.l_orderkey - ) AS t7 + "t3"."c_custkey", + "t3"."c_name", + "t3"."c_address", + "t3"."c_nationkey", + "t3"."c_phone", + "t3"."c_acctbal", + "t3"."c_mktsegment", + "t3"."c_comment", + "t4"."o_orderkey", + "t4"."o_custkey", + "t4"."o_orderstatus", + "t4"."o_totalprice", + "t4"."o_orderdate", + "t4"."o_orderpriority", + "t4"."o_clerk", + "t4"."o_shippriority", + "t4"."o_comment", + "t5"."l_orderkey", + "t5"."l_partkey", + "t5"."l_suppkey", + "t5"."l_linenumber", + "t5"."l_quantity", + "t5"."l_extendedprice", + "t5"."l_discount", + "t5"."l_tax", + "t5"."l_returnflag", + "t5"."l_linestatus", + "t5"."l_shipdate", + "t5"."l_commitdate", + "t5"."l_receiptdate", + "t5"."l_shipinstruct", + "t5"."l_shipmode", + "t5"."l_comment" + FROM "customer" AS "t3" + INNER JOIN "orders" AS "t4" + ON "t3"."c_custkey" = "t4"."o_custkey" + INNER JOIN "lineitem" AS "t5" + ON "t4"."o_orderkey" = "t5"."l_orderkey" + ) AS "t7" WHERE - t7.o_orderkey IN ( + "t7"."o_orderkey" IN ( SELECT - t6.l_orderkey + "t6"."l_orderkey" FROM ( SELECT - t2.l_orderkey, - SUM(t2.l_quantity) AS qty_sum - FROM lineitem AS t2 + "t2"."l_orderkey", + SUM("t2"."l_quantity") AS "qty_sum" + FROM "lineitem" AS "t2" GROUP BY 1 - ) AS t6 + ) AS "t6" WHERE - t6.qty_sum > CAST(300 AS SMALLINT) + "t6"."qty_sum" > CAST(300 AS SMALLINT) ) - ) AS t9 + ) AS "t9" GROUP BY 1, 2, 3, 4, 5 -) AS t10 +) AS "t10" ORDER BY - t10.o_totalprice DESC, - t10.o_orderdate ASC + "t10"."o_totalprice" DESC, + "t10"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql index b33da3fd86a6..4243b5454981 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql @@ -1,65 +1,65 @@ SELECT - SUM(t5.l_extendedprice * ( - CAST(1 AS TINYINT) - t5.l_discount - )) AS revenue + SUM("t5"."l_extendedprice" * ( + CAST(1 AS TINYINT) - "t5"."l_discount" + )) AS "revenue" FROM ( SELECT - t4.l_orderkey, - t4.l_partkey, - t4.l_suppkey, - t4.l_linenumber, - t4.l_quantity, - t4.l_extendedprice, - t4.l_discount, - t4.l_tax, - t4.l_returnflag, - t4.l_linestatus, - t4.l_shipdate, - t4.l_commitdate, - t4.l_receiptdate, - t4.l_shipinstruct, - t4.l_shipmode, - t4.l_comment, - t4.p_partkey, - t4.p_name, - t4.p_mfgr, - t4.p_brand, - t4.p_type, - t4.p_size, - t4.p_container, - t4.p_retailprice, - t4.p_comment + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t4"."p_partkey", + "t4"."p_name", + "t4"."p_mfgr", + "t4"."p_brand", + "t4"."p_type", + "t4"."p_size", + "t4"."p_container", + "t4"."p_retailprice", + "t4"."p_comment" FROM ( SELECT - t2.l_orderkey, - t2.l_partkey, - t2.l_suppkey, - t2.l_linenumber, - t2.l_quantity, - t2.l_extendedprice, - t2.l_discount, - t2.l_tax, - t2.l_returnflag, - t2.l_linestatus, - t2.l_shipdate, - t2.l_commitdate, - t2.l_receiptdate, - t2.l_shipinstruct, - t2.l_shipmode, - t2.l_comment, - t3.p_partkey, - t3.p_name, - t3.p_mfgr, - t3.p_brand, - t3.p_type, - t3.p_size, - t3.p_container, - t3.p_retailprice, - t3.p_comment - FROM lineitem AS t2 - INNER JOIN part AS t3 - ON t3.p_partkey = t2.l_partkey - ) AS t4 + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + "t2"."l_quantity", + "t2"."l_extendedprice", + "t2"."l_discount", + "t2"."l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment", + "t3"."p_partkey", + "t3"."p_name", + "t3"."p_mfgr", + "t3"."p_brand", + "t3"."p_type", + "t3"."p_size", + "t3"."p_container", + "t3"."p_retailprice", + "t3"."p_comment" + FROM "lineitem" AS "t2" + INNER JOIN "part" AS "t3" + ON "t3"."p_partkey" = "t2"."l_partkey" + ) AS "t4" WHERE ( ( @@ -69,24 +69,24 @@ FROM ( ( ( ( - t4.p_brand = 'Brand#12' + "t4"."p_brand" = 'Brand#12' ) - AND t4.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND "t4"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') ) AND ( - t4.l_quantity >= CAST(1 AS TINYINT) + "t4"."l_quantity" >= CAST(1 AS TINYINT) ) ) AND ( - t4.l_quantity <= CAST(11 AS TINYINT) + "t4"."l_quantity" <= CAST(11 AS TINYINT) ) ) - AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) + AND "t4"."p_size" BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) ) - AND t4.l_shipmode IN ('AIR', 'AIR REG') + AND "t4"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - t4.l_shipinstruct = 'DELIVER IN PERSON' + "t4"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) OR ( @@ -96,24 +96,24 @@ FROM ( ( ( ( - t4.p_brand = 'Brand#23' + "t4"."p_brand" = 'Brand#23' ) - AND t4.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + AND "t4"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') ) AND ( - t4.l_quantity >= CAST(10 AS TINYINT) + "t4"."l_quantity" >= CAST(10 AS TINYINT) ) ) AND ( - t4.l_quantity <= CAST(20 AS TINYINT) + "t4"."l_quantity" <= CAST(20 AS TINYINT) ) ) - AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) + AND "t4"."p_size" BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) ) - AND t4.l_shipmode IN ('AIR', 'AIR REG') + AND "t4"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - t4.l_shipinstruct = 'DELIVER IN PERSON' + "t4"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) ) @@ -124,24 +124,24 @@ FROM ( ( ( ( - t4.p_brand = 'Brand#34' + "t4"."p_brand" = 'Brand#34' ) - AND t4.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND "t4"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') ) AND ( - t4.l_quantity >= CAST(20 AS TINYINT) + "t4"."l_quantity" >= CAST(20 AS TINYINT) ) ) AND ( - t4.l_quantity <= CAST(30 AS TINYINT) + "t4"."l_quantity" <= CAST(30 AS TINYINT) ) ) - AND t4.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) + AND "t4"."p_size" BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) ) - AND t4.l_shipmode IN ('AIR', 'AIR REG') + AND "t4"."l_shipmode" IN ('AIR', 'AIR REG') ) AND ( - t4.l_shipinstruct = 'DELIVER IN PERSON' + "t4"."l_shipinstruct" = 'DELIVER IN PERSON' ) ) -) AS t5 \ No newline at end of file +) AS "t5" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql index 87dee39630d3..792ea4e34b2b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql @@ -1,68 +1,68 @@ SELECT - t9.s_name, - t9.s_address + "t9"."s_name", + "t9"."s_address" FROM ( SELECT - t5.s_suppkey, - t5.s_name, - t5.s_address, - t5.s_nationkey, - t5.s_phone, - t5.s_acctbal, - t5.s_comment, - t6.n_nationkey, - t6.n_name, - t6.n_regionkey, - t6.n_comment - FROM supplier AS t5 - INNER JOIN nation AS t6 - ON t5.s_nationkey = t6.n_nationkey -) AS t9 + "t5"."s_suppkey", + "t5"."s_name", + "t5"."s_address", + "t5"."s_nationkey", + "t5"."s_phone", + "t5"."s_acctbal", + "t5"."s_comment", + "t6"."n_nationkey", + "t6"."n_name", + "t6"."n_regionkey", + "t6"."n_comment" + FROM "supplier" AS "t5" + INNER JOIN "nation" AS "t6" + ON "t5"."s_nationkey" = "t6"."n_nationkey" +) AS "t9" WHERE - t9.n_name = 'CANADA' - AND t9.s_suppkey IN ( + "t9"."n_name" = 'CANADA' + AND "t9"."s_suppkey" IN ( SELECT - t1.ps_suppkey - FROM partsupp AS t1 + "t1"."ps_suppkey" + FROM "partsupp" AS "t1" WHERE - t1.ps_partkey IN ( + "t1"."ps_partkey" IN ( SELECT - t3.p_partkey - FROM part AS t3 + "t3"."p_partkey" + FROM "part" AS "t3" WHERE - t3.p_name LIKE 'forest%' + "t3"."p_name" LIKE 'forest%' ) - AND t1.ps_availqty > ( + AND "t1"."ps_availqty" > ( ( SELECT - SUM(t8.l_quantity) AS "Sum(l_quantity)" + SUM("t8"."l_quantity") AS "Sum(l_quantity)" FROM ( SELECT - t4.l_orderkey, - t4.l_partkey, - t4.l_suppkey, - t4.l_linenumber, - t4.l_quantity, - t4.l_extendedprice, - t4.l_discount, - t4.l_tax, - t4.l_returnflag, - t4.l_linestatus, - t4.l_shipdate, - t4.l_commitdate, - t4.l_receiptdate, - t4.l_shipinstruct, - t4.l_shipmode, - t4.l_comment - FROM lineitem AS t4 + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment" + FROM "lineitem" AS "t4" WHERE - t4.l_partkey = t1.ps_partkey - AND t4.l_suppkey = t1.ps_suppkey - AND t4.l_shipdate >= MAKE_DATE(1994, 1, 1) - AND t4.l_shipdate < MAKE_DATE(1995, 1, 1) - ) AS t8 + "t4"."l_partkey" = "t1"."ps_partkey" + AND "t4"."l_suppkey" = "t1"."ps_suppkey" + AND "t4"."l_shipdate" >= MAKE_DATE(1994, 1, 1) + AND "t4"."l_shipdate" < MAKE_DATE(1995, 1, 1) + ) AS "t8" ) * CAST(0.5 AS DOUBLE) ) ) ORDER BY - t9.s_name ASC \ No newline at end of file + "t9"."s_name" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql index 3e5527999c44..45ba92b3a4a0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql @@ -1,74 +1,76 @@ SELECT - t14.s_name, - t14.numwait + "t14"."s_name", + "t14"."numwait" FROM ( SELECT - t13.s_name, - COUNT(*) AS numwait + "t13"."s_name", + COUNT(*) AS "numwait" FROM ( SELECT - t10.l1_orderkey, - t10.o_orderstatus, - t10.l_receiptdate, - t10.l_commitdate, - t10.l1_suppkey, - t10.s_name, - t10.n_name + "t10"."l1_orderkey", + "t10"."o_orderstatus", + "t10"."l_receiptdate", + "t10"."l_commitdate", + "t10"."l1_suppkey", + "t10"."s_name", + "t10"."n_name" FROM ( SELECT - t5.l_orderkey AS l1_orderkey, - t8.o_orderstatus, - t5.l_receiptdate, - t5.l_commitdate, - t5.l_suppkey AS l1_suppkey, - t4.s_name, - t9.n_name - FROM supplier AS t4 - INNER JOIN lineitem AS t5 - ON t4.s_suppkey = t5.l_suppkey - INNER JOIN orders AS t8 - ON t8.o_orderkey = t5.l_orderkey - INNER JOIN nation AS t9 - ON t4.s_nationkey = t9.n_nationkey - ) AS t10 + "t5"."l_orderkey" AS "l1_orderkey", + "t8"."o_orderstatus", + "t5"."l_receiptdate", + "t5"."l_commitdate", + "t5"."l_suppkey" AS "l1_suppkey", + "t4"."s_name", + "t9"."n_name" + FROM "supplier" AS "t4" + INNER JOIN "lineitem" AS "t5" + ON "t4"."s_suppkey" = "t5"."l_suppkey" + INNER JOIN "orders" AS "t8" + ON "t8"."o_orderkey" = "t5"."l_orderkey" + INNER JOIN "nation" AS "t9" + ON "t4"."s_nationkey" = "t9"."n_nationkey" + ) AS "t10" WHERE - t10.o_orderstatus = 'F' - AND t10.l_receiptdate > t10.l_commitdate - AND t10.n_name = 'SAUDI ARABIA' + "t10"."o_orderstatus" = 'F' + AND "t10"."l_receiptdate" > "t10"."l_commitdate" + AND "t10"."n_name" = 'SAUDI ARABIA' AND EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM lineitem AS t6 + FROM "lineitem" AS "t6" WHERE ( - t6.l_orderkey = t10.l1_orderkey - ) AND ( - t6.l_suppkey <> t10.l1_suppkey + "t6"."l_orderkey" = "t10"."l1_orderkey" + ) + AND ( + "t6"."l_suppkey" <> "t10"."l1_suppkey" ) ) AND NOT ( EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM lineitem AS t7 + FROM "lineitem" AS "t7" WHERE ( ( - t7.l_orderkey = t10.l1_orderkey - ) AND ( - t7.l_suppkey <> t10.l1_suppkey + "t7"."l_orderkey" = "t10"."l1_orderkey" + ) + AND ( + "t7"."l_suppkey" <> "t10"."l1_suppkey" ) ) AND ( - t7.l_receiptdate > t7.l_commitdate + "t7"."l_receiptdate" > "t7"."l_commitdate" ) ) ) - ) AS t13 + ) AS "t13" GROUP BY 1 -) AS t14 +) AS "t14" ORDER BY - t14.numwait DESC, - t14.s_name ASC + "t14"."numwait" DESC, + "t14"."s_name" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql index 25009decccf0..9926fb04be9e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql @@ -1,68 +1,68 @@ SELECT - t6.cntrycode, - t6.numcust, - t6.totacctbal + "t6"."cntrycode", + "t6"."numcust", + "t6"."totacctbal" FROM ( SELECT - t5.cntrycode, - COUNT(*) AS numcust, - SUM(t5.c_acctbal) AS totacctbal + "t5"."cntrycode", + COUNT(*) AS "numcust", + SUM("t5"."c_acctbal") AS "totacctbal" FROM ( SELECT CASE WHEN ( CAST(0 AS TINYINT) + 1 ) >= 1 - THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t0.c_phone), CAST(2 AS TINYINT)) - END AS cntrycode, - t0.c_acctbal - FROM customer AS t0 + THEN SUBSTRING("t0"."c_phone", CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING("t0"."c_phone", CAST(0 AS TINYINT) + 1 + LENGTH("t0"."c_phone"), CAST(2 AS TINYINT)) + END AS "cntrycode", + "t0"."c_acctbal" + FROM "customer" AS "t0" WHERE CASE WHEN ( CAST(0 AS TINYINT) + 1 ) >= 1 - THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t0.c_phone), CAST(2 AS TINYINT)) + THEN SUBSTRING("t0"."c_phone", CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING("t0"."c_phone", CAST(0 AS TINYINT) + 1 + LENGTH("t0"."c_phone"), CAST(2 AS TINYINT)) END IN ('13', '31', '23', '29', '30', '18', '17') - AND t0.c_acctbal > ( + AND "t0"."c_acctbal" > ( SELECT - AVG(t3.c_acctbal) AS "Mean(c_acctbal)" + AVG("t3"."c_acctbal") AS "Mean(c_acctbal)" FROM ( SELECT - t0.c_custkey, - t0.c_name, - t0.c_address, - t0.c_nationkey, - t0.c_phone, - t0.c_acctbal, - t0.c_mktsegment, - t0.c_comment - FROM customer AS t0 + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + "t0"."c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" WHERE - t0.c_acctbal > CAST(0.0 AS DOUBLE) + "t0"."c_acctbal" > CAST(0.0 AS DOUBLE) AND CASE WHEN ( CAST(0 AS TINYINT) + 1 ) >= 1 - THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t0.c_phone), CAST(2 AS TINYINT)) + THEN SUBSTRING("t0"."c_phone", CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING("t0"."c_phone", CAST(0 AS TINYINT) + 1 + LENGTH("t0"."c_phone"), CAST(2 AS TINYINT)) END IN ('13', '31', '23', '29', '30', '18', '17') - ) AS t3 + ) AS "t3" ) AND NOT ( EXISTS( SELECT CAST(1 AS TINYINT) AS "1" - FROM orders AS t1 + FROM "orders" AS "t1" WHERE - t1.o_custkey = t0.c_custkey + "t1"."o_custkey" = "t0"."c_custkey" ) ) - ) AS t5 + ) AS "t5" GROUP BY 1 -) AS t6 +) AS "t6" ORDER BY - t6.cntrycode ASC \ No newline at end of file + "t6"."cntrycode" ASC \ No newline at end of file From 5c421cac8226f6de220289e07aee775e7db00141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sun, 4 Feb 2024 17:12:22 +0100 Subject: [PATCH 161/161] fix(ir): support converting limit(1) inputs to scalar subqueries --- docs/posts/ibis-duckdb-geospatial/index.qmd | 2 +- .../ibis-duckdb-geospatial/nyc_data.db.wal | 0 ibis/backends/base/sqlglot/rewrites.py | 17 +++---- ibis/backends/pandas/rewrites.py | 39 ++++++--------- ibis/expr/operations/relations.py | 40 ++++++++++++--- ibis/expr/rewrites.py | 30 +++++++++--- ibis/expr/tests/test_newrels.py | 49 +++++++++++++++++-- ibis/expr/types/arrays.py | 1 - ibis/expr/types/relations.py | 22 +++++++-- 9 files changed, 140 insertions(+), 60 deletions(-) create mode 100644 docs/posts/ibis-duckdb-geospatial/nyc_data.db.wal diff --git a/docs/posts/ibis-duckdb-geospatial/index.qmd b/docs/posts/ibis-duckdb-geospatial/index.qmd index 575b25fdbd57..bc3f793677fa 100644 --- a/docs/posts/ibis-duckdb-geospatial/index.qmd +++ b/docs/posts/ibis-duckdb-geospatial/index.qmd @@ -83,7 +83,7 @@ Notice that the last column has a `geometry` type, and in this case it contains each subway station. Let's grab the entry for the Broad St subway station. ```{python} -broad_station = subway_stations.filter(subway_stations.NAME == "Broad St") +broad_station = subway_stations.filter(subway_stations.NAME == "Broad St").limit(1) broad_station ``` diff --git a/docs/posts/ibis-duckdb-geospatial/nyc_data.db.wal b/docs/posts/ibis-duckdb-geospatial/nyc_data.db.wal new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 05140a76158c..a74d40667725 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -20,6 +20,7 @@ from ibis.common.graph import Graph from ibis.common.patterns import Object, Pattern, _, replace from ibis.common.typing import VarTuple # noqa: TCH001 +from ibis.expr.operations.relations import Simple from ibis.expr.rewrites import d, p, replace_parameter from ibis.expr.schema import Schema @@ -31,19 +32,9 @@ @public -class CTE(ops.Relation): +class CTE(Simple): """Common table expression.""" - parent: ops.Relation - - @attribute - def schema(self): - return self.parent.schema - - @attribute - def values(self): - return self.parent.values - @public class Select(ops.Relation): @@ -62,6 +53,10 @@ def values(self): def schema(self): return Schema({k: v.dtype for k, v in self.selections.items()}) + @attribute + def singlerow(self): + return self.parent.singlerow + @public class Window(ops.Value): diff --git a/ibis/backends/pandas/rewrites.py b/ibis/backends/pandas/rewrites.py index 63f93c830f2c..1e21e5c108ee 100644 --- a/ibis/backends/pandas/rewrites.py +++ b/ibis/backends/pandas/rewrites.py @@ -10,21 +10,18 @@ from ibis.common.collections import FrozenDict from ibis.common.patterns import replace from ibis.common.typing import VarTuple # noqa: TCH001 +from ibis.expr.operations.relations import Simple from ibis.expr.rewrites import replace_parameter from ibis.expr.schema import Schema from ibis.util import gen_name -class PandasRelation(ops.Relation): - pass - - class PandasValue(ops.Value): pass @public -class PandasRename(PandasRelation): +class PandasRename(ops.Relation): parent: ops.Relation mapping: FrozenDict[str, str] @@ -45,22 +42,18 @@ def schema(self): {self.mapping[name]: dtype for name, dtype in self.parent.schema.items()} ) + @attribute + def singlerow(self): + return self.parent.singlerow + @public -class PandasResetIndex(PandasRelation): +class PandasResetIndex(Simple): parent: ops.Relation - @attribute - def values(self): - return self.parent.values - - @attribute - def schema(self): - return self.parent.schema - @public -class PandasJoin(PandasRelation): +class PandasJoin(ops.Relation): left: ops.Relation right: ops.Relation left_on: VarTuple[ops.Value] @@ -84,7 +77,7 @@ class PandasAsofJoin(PandasJoin): @public -class PandasAggregate(PandasRelation): +class PandasAggregate(ops.Relation): parent: ops.Relation groups: FrozenDict[str, ops.Field] metrics: FrozenDict[str, ops.Reduction] @@ -97,21 +90,17 @@ def values(self): def schema(self): return Schema({k: v.dtype for k, v in self.values.items()}) + @attribute + def singlerow(self): + return not self.groups + @public -class PandasLimit(PandasRelation): +class PandasLimit(Simple): parent: ops.Relation n: ops.Relation offset: ops.Relation - @attribute - def values(self): - return self.parent.values - - @attribute - def schema(self): - return self.parent.schema - @public class PandasScalarSubquery(PandasValue): diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index d520a4527634..e1d1570ecb09 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -29,6 +29,10 @@ @public class Relation(Node, Coercible): + """Whether the relation is guaranteed to have a single row.""" + + singlerow = False + @classmethod def __coerce__(cls, value): from ibis.expr.types import TableExpr @@ -127,14 +131,15 @@ def dtype(self): @public class ScalarSubquery(Subquery): - def __init__(self, rel): - from ibis.expr.operations import Reduction + shape = ds.scalar + singlerow = True + def __init__(self, rel): super().__init__(rel=rel) - if not isinstance(self.value, Reduction): + if not rel.singlerow: raise IntegrityError( - f"Subquery {self.value!r} is not a reduction, only " - "reductions can be used as scalar subqueries" + "Scalar subquery must have a single row. Either use a reduction " + "or limit the number of rows in the subquery using `.limit(1)`" ) @@ -182,6 +187,11 @@ def __init__(self, parent, values): def schema(self): return Schema({k: v.dtype for k, v in self.values.items()}) + @attribute + def singlerow(self): + # TODO(kszucs): also check that values doesn't contain Unnest + return self.parent.singlerow + class Simple(Relation): parent: Relation @@ -194,6 +204,10 @@ def values(self): def schema(self): return self.parent.schema + @attribute + def singlerow(self): + return self.parent.singlerow + # TODO(kszucs): remove in favor of View @public @@ -282,10 +296,10 @@ class Filter(Simple): predicates: VarTuple[Value[dt.Boolean]] def __init__(self, parent, predicates): - from ibis.expr.rewrites import ReductionLike + from ibis.expr.rewrites import ScalarLike for pred in predicates: - if pred.find(ReductionLike, filter=Value): + if pred.find(ScalarLike, filter=Value): raise IntegrityError( f"Cannot add {pred!r} to filter, it is a reduction which " "must be converted to a scalar subquery first" @@ -304,6 +318,10 @@ class Limit(Simple): n: typing.Union[int, Scalar[dt.Integer], None] = None offset: typing.Union[int, Scalar[dt.Integer]] = 0 + @attribute + def singlerow(self): + return self.n == 1 + @public class Aggregate(Relation): @@ -328,6 +346,10 @@ def values(self): def schema(self): return Schema({k: v.dtype for k, v in self.values.items()}) + @attribute + def singlerow(self): + return not self.groups + @public class Set(Relation): @@ -438,6 +460,10 @@ class DummyTable(Relation): def schema(self): return Schema({k: v.dtype for k, v in self.values.items()}) + @attribute + def singlerow(self): + return all(value.shape.is_scalar() for value in self.values.values()) + @public class FillNa(Simple): diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 8b1ea87de95d..6f9c3172c6ed 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -10,7 +10,7 @@ import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.common.deferred import Item, _, deferred, var -from ibis.common.exceptions import ExpressionError +from ibis.common.exceptions import ExpressionError, RelationError from ibis.common.patterns import Check, pattern, replace from ibis.util import Namespace @@ -98,8 +98,12 @@ def rewrite_sample(_): @replace(p.Analytic) def project_wrap_analytic(_, rel): - # Wrap analytic functions in a window function - return ops.WindowFunction(_, ops.RowsWindowFrame(rel)) + if _.relations == {rel}: + # Wrap analytic functions in a window function + return ops.WindowFunction(_, ops.RowsWindowFrame(rel)) + else: + # TODO(kszucs): cover this with tests + raise RelationError("Analytic function depends on multiple tables") @replace(p.Reduction) @@ -110,8 +114,9 @@ def project_wrap_reduction(_, rel): # it into a window function of `rel` return ops.WindowFunction(_, ops.RowsWindowFrame(rel)) else: - # 1. The reduction doesn't depend on any table, constructed from - # scalar values, so turn it into a scalar subquery. + # 1. The reduction doesn't depend only on `rel` but either constructed + # from scalar values or depends on other relations, so turn it into + # a scalar subquery. # 2. The reduction is originating from `rel` and other tables, # so this is a correlated scalar subquery. # 3. The reduction is originating entirely from other tables, @@ -119,21 +124,30 @@ def project_wrap_reduction(_, rel): return ops.ScalarSubquery(_.to_expr().as_table()) +@replace(p.Field(p.Relation(singlerow=True))) +def project_wrap_scalar_field(_, rel): + if _.relations == {rel}: + return _ + else: + return ops.ScalarSubquery(_.to_expr().as_table()) + + def rewrite_project_input(value, relation): # we need to detect reductions which are either turned into window functions # or scalar subqueries depending on whether they are originating from the # relation return value.replace( - project_wrap_analytic | project_wrap_reduction, + project_wrap_analytic | project_wrap_reduction | project_wrap_scalar_field, filter=p.Value & ~p.WindowFunction, context={"rel": relation}, ) -ReductionLike = p.Reduction | p.Field(p.Aggregate(groups={})) +ScalarLike = p.Reduction | p.Field(p.Relation(singlerow=True)) +# ReductionLike = p.Reduction | p.Field(p.Aggregate(groups={})) -@replace(ReductionLike) +@replace(ScalarLike) def filter_wrap_reduction(_): # Wrap reductions or fields referencing an aggregation without a group by - # which are scalar fields - in a scalar subquery. In the latter case we diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 24c9661d6e6c..1699d04756fa 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -147,10 +147,25 @@ def test_subquery_integrity_check(): with pytest.raises(IntegrityError, match=msg): ops.ScalarSubquery(t) - agg = t.agg(t.a.sum() + 1) - msg = "is not a reduction" + col = t.select(t.a) + msg = "Scalar subquery must have a single row" with pytest.raises(IntegrityError, match=msg): - ops.ScalarSubquery(agg) + ops.ScalarSubquery(col) + + agg = t.agg(t.a.sum() + 1) + sub = ops.ScalarSubquery(agg) + assert isinstance(sub, ops.ScalarSubquery) + assert sub.shape.is_scalar() + assert sub.dtype.is_int64() + + +# TODO(kszucs): raise a warning about deprecating the use of `to_array` +def test_value_to_array_creates_subquery(): + expr = t.int_col.sum().as_table().to_array() + op = expr.op() + assert op.shape.is_scalar() + assert op.dtype.is_int64() + assert isinstance(op, ops.ScalarSubquery) def test_select_turns_scalar_reduction_into_subquery(): @@ -162,7 +177,7 @@ def test_select_turns_scalar_reduction_into_subquery(): assert t1.op() == expected -def test_select_scalar_foreign_scalar_reduction_into_subquery(): +def test_select_turns_foreign_field_reduction_into_subquery(): t1 = t.filter(t.bool_col) t2 = t.select(summary=t1.int_col.sum()) subquery = ops.ScalarSubquery(t1.int_col.sum().as_table()) @@ -180,6 +195,32 @@ def test_select_turns_value_with_multiple_parents_into_subquery(): assert t1.op() == expected +def test_select_turns_singlerow_relation_field_into_scalar_subquery(): + v = ibis.table(name="v", schema={"a": "int64", "b": "string"}) + + # other is from the same relation + expr = t.select(t.int_col, other=v.limit(1).a) + expected = Project( + parent=t, + values={ + "int_col": t.int_col, + "other": ops.ScalarSubquery(v.limit(1).a.as_table()), + }, + ) + assert expr.op() == expected + + # other is from a different relation + expr = t.select(t.int_col, other=t.limit(1).int_col) + expected = Project( + parent=t, + values={ + "int_col": t.int_col, + "other": ops.ScalarSubquery(t.limit(1).int_col.as_table()), + }, + ) + assert expr.op() == expected + + def test_mutate(): proj = t.select(t, other=t.int_col + 1) expected = Project( diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 923e01f44dcc..b8ab3e26c29a 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -6,7 +6,6 @@ import ibis.expr.operations as ops from ibis.expr.types.generic import Column, Scalar, Value, literal -from ibis.expr.types.typing import V from ibis.common.deferred import deferrable if TYPE_CHECKING: diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 7db999d5aeca..c8a903066506 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -1137,9 +1137,9 @@ def aggregate( metrics = unwrap_aliases(metrics) having = unwrap_aliases(having) - groups = dereference_values(self.op(), groups) - metrics = dereference_values(self.op(), metrics) - having = dereference_values(self.op(), having) + groups = dereference_values(node, groups) + metrics = dereference_values(node, metrics) + having = dereference_values(node, having) # the user doesn't need to specify the metrics used in the having clause # explicitly, we implicitly add them to the metrics list by looking for @@ -1816,6 +1816,22 @@ def intersect(self, table: Table, *rest: Table, distinct: bool = True) -> Table: node = ops.Intersection(node, table, distinct=distinct) return node.to_expr().select(self.columns) + def to_array(self) -> ir.Column: + """View a single column table as an array. + + Returns + ------- + Value + A single column view of a table + """ + schema = self.schema() + if len(schema) != 1: + raise com.ExpressionError( + "Table must have exactly one column when viewed as array" + ) + + return ops.ScalarSubquery(self).to_expr() + def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Table: """Add columns to a table expression.