From 433bb75ba37e34ed386bfa5341e9189fb8b55ec8 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:09:09 -0500 Subject: [PATCH] refactor(exasol): port to sqlglot (#8032) This PR ports exasol to sqlglot instead of sqlalchemy. --- .github/workflows/ibis-backends.yml | 72 +-- ci/schema/exasol.sql | 40 +- ibis/backends/base/sqlglot/datatypes.py | 76 +++ ibis/backends/conftest.py | 1 - ibis/backends/exasol/__init__.py | 446 ++++++++++++------ ibis/backends/exasol/compiler.py | 231 ++++++++- ibis/backends/exasol/converter.py | 38 ++ ibis/backends/exasol/datatypes.py | 26 - ibis/backends/exasol/registry.py | 46 -- ibis/backends/exasol/tests/conftest.py | 27 +- .../test_default_limit/exasol/out.sql | 5 + .../test_disable_query_limit/exasol/out.sql | 5 + .../exasol/out.sql | 3 + .../test_respect_set_limit/exasol/out.sql | 10 + .../test_group_by_has_index/exasol/out.sql | 22 + .../test_sql/test_isin_bug/exasol/out.sql | 9 + ibis/backends/tests/test_aggregation.py | 15 +- ibis/backends/tests/test_asof_join.py | 2 + ibis/backends/tests/test_binary.py | 3 +- ibis/backends/tests/test_dot_sql.py | 20 +- ibis/backends/tests/test_export.py | 22 +- ibis/backends/tests/test_generic.py | 125 +++-- ibis/backends/tests/test_join.py | 28 +- ibis/backends/tests/test_numeric.py | 99 +--- ibis/backends/tests/test_sql.py | 12 +- ibis/backends/tests/test_string.py | 7 +- ibis/backends/tests/test_temporal.py | 160 +++---- ibis/backends/tests/test_window.py | 38 +- poetry.lock | 41 +- pyproject.toml | 6 +- requirements-dev.txt | 6 +- 31 files changed, 986 insertions(+), 655 deletions(-) create mode 100644 ibis/backends/exasol/converter.py delete mode 100644 ibis/backends/exasol/datatypes.py delete mode 100644 ibis/backends/exasol/registry.py create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_isin_bug/exasol/out.sql diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index a78d1ae83a68b..8bdc3c74db5a4 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -162,13 +162,13 @@ jobs: # - oracle # services: # - oracle - # - name: exasol - # title: Exasol - # serial: true - # extras: - # - exasol - # services: - # - exasol + - name: exasol + title: Exasol + serial: true + extras: + - exasol + services: + - exasol # - name: flink # title: Flink # serial: true @@ -299,21 +299,21 @@ jobs: # - flink # - os: windows-latest # backend: - # name: exasol - # title: Exasol - # serial: true - # extras: - # - exasol - # services: - # - exasol - # - os: windows-latest - # backend: # name: risingwave # title: Risingwave # services: # - risingwave # extras: # - risingwave + - os: windows-latest + backend: + name: exasol + title: Exasol + serial: true + extras: + - exasol + services: + - exasol steps: - name: update and install system dependencies if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null @@ -612,46 +612,6 @@ jobs: with: flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - # gen_lockfile_sqlalchemy2: - # name: Generate Poetry Lockfile for SQLAlchemy 2 - # runs-on: ubuntu-latest - # steps: - # - name: checkout - # uses: actions/checkout@v4 - # - # - name: install python - # uses: actions/setup-python@v5 - # with: - # python-version: "3.11" - # - # - run: python -m pip install --upgrade pip 'poetry==1.7.1' - # - # - name: remove deps that are not compatible with sqlalchemy 2 - # run: poetry remove sqlalchemy-exasol - # - # - name: add sqlalchemy 2 - # run: poetry add --lock --optional 'sqlalchemy>=2,<3' - # - # - name: checkout the lock file - # run: git checkout poetry.lock - # - # - name: lock with no updates - # # poetry add is aggressive and will update other dependencies like - # # numpy and pandas so we keep the pyproject.toml edits and then relock - # # without updating anything except the requested versions - # run: poetry lock --no-update - # - # - name: check the sqlalchemy version - # run: poetry show sqlalchemy --no-ansi | grep version | cut -d ':' -f2- | sed 's/ //g' | grep -P '^2\.' - # - # - name: upload deps file - # uses: actions/upload-artifact@v3 - # with: - # name: deps - # path: | - # pyproject.toml - # poetry.lock - # test_backends_sqlalchemy2: # name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} # runs-on: ${{ matrix.os }} diff --git a/ci/schema/exasol.sql b/ci/schema/exasol.sql index 856b059e74077..7d40fd94c7981 100644 --- a/ci/schema/exasol.sql +++ b/ci/schema/exasol.sql @@ -1,7 +1,7 @@ DROP SCHEMA IF EXISTS EXASOL CASCADE; CREATE SCHEMA EXASOL; -CREATE OR REPLACE TABLE EXASOL.diamonds +CREATE OR REPLACE TABLE EXASOL."diamonds" ( "carat" DOUBLE, "cut" VARCHAR(256), @@ -15,13 +15,13 @@ CREATE OR REPLACE TABLE EXASOL.diamonds "z" DOUBLE ); -CREATE OR REPLACE TABLE EXASOL.batting +CREATE OR REPLACE TABLE EXASOL."batting" ( "playerID" VARCHAR(256), "yearID" BIGINT, "stint" BIGINT, "teamID" VARCHAR(256), - "logID" VARCHAR(256), + "lgID" VARCHAR(256), "G" BIGINT, "AB" BIGINT, "R" BIGINT, @@ -41,22 +41,22 @@ CREATE OR REPLACE TABLE EXASOL.batting "GIDP" BIGINT ); -CREATE OR REPLACE TABLE EXASOL.awards_players +CREATE OR REPLACE TABLE EXASOL."awards_players" ( - "playerId" VARCHAR(256), + "playerID" VARCHAR(256), "awardID" VARCHAR(256), - "yearID" VARCHAR(256), - "logID" VARCHAR(256), + "yearID" BIGINT, + "lgID" VARCHAR(256), "tie" VARCHAR(256), "notest" VARCHAR(256) ); -CREATE OR REPLACE TABLE EXASOL.functional_alltypes +CREATE OR REPLACE TABLE EXASOL."functional_alltypes" ( "id" INTEGER, "bool_col" BOOLEAN, "tinyint_col" SHORTINT, - "small_int" SMALLINT, + "smallint_col" SMALLINT, "int_col" INTEGER, "bigint_col" BIGINT, "float_col" FLOAT, @@ -69,7 +69,21 @@ CREATE OR REPLACE TABLE EXASOL.functional_alltypes ); -IMPORT INTO EXASOL.diamonds FROM LOCAL CSV FILE '/data/diamonds.csv' COLUMN SEPARATOR = ',' SKIP = 1; -IMPORT INTO EXASOL.batting FROM LOCAL CSV FILE '/data/batting.csv' COLUMN SEPARATOR = ',' SKIP = 1; -IMPORT INTO EXASOL.awards_players FROM LOCAL CSV FILE '/data/awards_players.csv' COLUMN SEPARATOR = ',' SKIP = 1; -IMPORT INTO EXASOL.functional_alltypes FROM LOCAL CSV FILE '/data/functional_alltypes.csv' COLUMN SEPARATOR = ',' SKIP = 1; +IMPORT INTO EXASOL."diamonds" FROM LOCAL CSV FILE '/data/diamonds.csv' COLUMN SEPARATOR = ',' SKIP = 1; +IMPORT INTO EXASOL."batting" FROM LOCAL CSV FILE '/data/batting.csv' COLUMN SEPARATOR = ',' SKIP = 1; +IMPORT INTO EXASOL."awards_players" FROM LOCAL CSV FILE '/data/awards_players.csv' COLUMN SEPARATOR = ',' SKIP = 1; +IMPORT INTO EXASOL."functional_alltypes" FROM LOCAL CSV FILE '/data/functional_alltypes.csv' COLUMN SEPARATOR = ',' SKIP = 1; + +CREATE OR REPLACE TABLE EXASOL."win" +( + "g" VARCHAR(1), + "x" BIGINT, + "y" BIGINT +); + +INSERT INTO "win" VALUES + ('a', 0, 3), + ('a', 1, 2), + ('a', 2, 0), + ('a', 3, 1), + ('a', 4, 1); diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 4928c4d9523ca..273295f83b2f9 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -738,3 +738,79 @@ def _from_ibis_Int64(cls, dtype: dt.Int64) -> NoReturn: raise com.UnsupportedBackendType( "int64 is not a supported input or output type in BigQuery UDFs; use float64 instead" ) + + +class ExasolType(SqlglotType): + dialect = "exasol" + + default_temporal_scale = 3 + + default_decimal_precision = 18 + default_decimal_scale = 0 + + @classmethod + def _from_ibis_String(cls, dtype: dt.String) -> sge.DataType: + return sge.DataType( + this=sge.DataType.Type.VARCHAR, + expressions=[sge.DataTypeParam(this=sge.convert(2_000_000))], + ) + + @classmethod + def _from_sqlglot_DECIMAL( + cls, + precision: sge.DataTypeParam | None = None, + scale: sge.DataTypeParam | None = None, + ) -> dt.Decimal: + if precision is None: + precision = cls.default_decimal_precision + else: + precision = int(precision.this.this) + + if scale is None: + scale = cls.default_decimal_scale + else: + scale = int(scale.this.this) + + if not scale: + if 0 < precision <= 3: + return dt.Int8(nullable=cls.default_nullable) + elif 3 < precision <= 9: + return dt.Int16(nullable=cls.default_nullable) + elif 9 < precision <= 18: + return dt.Int32(nullable=cls.default_nullable) + elif 18 < precision <= 36: + return dt.Int64(nullable=cls.default_nullable) + else: + raise com.UnsupportedBackendType( + "Decimal precision is too large; Exasol supports precision up to 36." + ) + return dt.Decimal(precision, scale, nullable=cls.default_nullable) + + @classmethod + def _from_ibis_Array(cls, dtype: dt.Array) -> NoReturn: + raise com.UnsupportedBackendType("Arrays not supported in Exasol") + + @classmethod + def _from_ibis_Map(cls, dtype: dt.Map) -> NoReturn: + raise com.UnsupportedBackendType("Maps not supported in Exasol") + + @classmethod + def _from_ibis_Struct(cls, dtype: dt.Struct) -> NoReturn: + raise com.UnsupportedBackendType("Structs not supported in Exasol") + + @classmethod + def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType: + code = typecode.TIMESTAMP if dtype.timezone is None else typecode.TIMESTAMPTZ + return sge.DataType(this=code) + + @classmethod + def _from_sqlglot_ARRAY(cls, value_type: sge.DataType) -> NoReturn: + raise com.UnsupportedBackendType("Arrays not supported in Exasol") + + @classmethod + def _from_sqlglot_MAP(cls, key: sge.DataType, value: sge.DataType) -> NoReturn: + raise com.UnsupportedBackendType("Maps not supported in Exasol") + + @classmethod + def _from_sqlglot_STRUCT(cls, *cols: sge.ColumnDef) -> NoReturn: + raise com.UnsupportedBackendType("Structs not supported in Exasol") diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index ad4b8712de8a6..b10f902818e03 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -548,7 +548,6 @@ def ddl_con(ddl_backend): @pytest.fixture( params=_get_backends_to_test( keep=( - "exasol", "mssql", "oracle", "risingwave", diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index d00f9b7f9c96e..715fa3d6b7e4d 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -1,44 +1,65 @@ from __future__ import annotations +import atexit +import contextlib import re -import warnings -from collections import ChainMap -from contextlib import contextmanager from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse -import sqlalchemy as sa +import pyexasol import sqlglot as sg +import sqlglot.expressions as sge +import ibis +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +import ibis.expr.schema as sch +import ibis.expr.types as ir from ibis import util -from ibis.backends.base.sql.alchemy import AlchemyCanCreateSchema, BaseAlchemyBackend -from ibis.backends.base.sqlglot.datatypes import PostgresType +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import STAR, C from ibis.backends.exasol.compiler import ExasolCompiler if TYPE_CHECKING: - from collections.abc import Iterable, MutableMapping + from collections.abc import Iterable, Mapping + + import pandas as pd + import pyarrow as pa from ibis.backends.base import BaseBackend - from ibis.expr import datatypes as dt + +# strip trailing encodings e.g., UTF8 +_VARCHAR_REGEX = re.compile(r"^(VARCHAR(?:\(\d+\)))?(?:\s+.+)?$") -class Backend(BaseAlchemyBackend, AlchemyCanCreateSchema): +class Backend(SQLGlotBackend): name = "exasol" - compiler = ExasolCompiler + compiler = ExasolCompiler() supports_temporary_tables = False supports_create_or_replace = False supports_in_memory_tables = False supports_python_udfs = False + @property + def version(self) -> str: + # https://stackoverflow.com/a/67500385 + query = ( + sg.select("param_value") + .from_(sg.table("EXA_METADATA", catalog="SYS")) + .where(C.param_name.eq("databaseProductVersion")) + ) + with self._safe_raw_sql(query) as result: + [(version,)] = result.fetchall() + return version + def do_connect( self, user: str, password: str, host: str = "localhost", port: int = 8563, - schema: str | None = None, - encryption: bool = True, - certificate_validation: bool = True, - encoding: str = "en_US.UTF-8", + **kwargs: Any, ) -> None: """Create an Ibis client connected to an Exasol database. @@ -52,130 +73,286 @@ def do_connect( Hostname to connect to (default: "localhost"). port Port number to connect to (default: 8563) - schema - Database schema to open, if `None`, no schema will be opened. - encryption - Enables/disables transport layer encryption (default: True). - certificate_validation - Enables/disables certificate validation (default: True). - encoding - The encoding format (default: "en_US.UTF-8"). + kwargs + Additional keyword arguments passed to `pyexasol.connect`. """ - options = [ - "SSLCertificate=SSL_VERIFY_NONE" if not certificate_validation else "", - f"ENCRYPTION={'yes' if encryption else 'no'}", - f"CONNECTIONCALL={encoding}", - ] - url_template = ( - "exa+websocket://{user}:{password}@{host}:{port}/{schema}?{options}" - ) - url = sa.engine.url.make_url( - url_template.format( - user=user, - password=password, - host=host, - port=port, - schema=schema, - options="&".join(options), + if kwargs.pop("quote_ident", None) is not None: + raise com.UnsupportedArgumentError( + "Setting `quote_ident` to anything other than `True` is not supported. " + "Ibis requires all identifiers to be quoted to work correctly." ) - ) - engine = sa.create_engine(url, poolclass=sa.pool.StaticPool) - super().do_connect(engine) - - def _convert_kwargs(self, kwargs: MutableMapping) -> None: - def convert_sqla_to_ibis(keyword_arguments): - sqla_to_ibis = {"tls": "encryption", "username": "user"} - for sqla_kwarg, ibis_kwarg in sqla_to_ibis.items(): - if sqla_kwarg in keyword_arguments: - keyword_arguments[ibis_kwarg] = keyword_arguments.pop(sqla_kwarg) - - def filter_kwargs(keyword_arguments): - allowed_parameters = [ - "user", - "password", - "host", - "port", - "schema", - "encryption", - "certificate", - "encoding", - ] - to_be_removed = [ - key for key in keyword_arguments if key not in allowed_parameters - ] - for parameter_name in to_be_removed: - del keyword_arguments[parameter_name] - convert_sqla_to_ibis(kwargs) - filter_kwargs(kwargs) + self.con = pyexasol.connect( + dsn=f"{host}:{port}", + user=user, + password=password, + quote_ident=True, + **kwargs, + ) + self._temp_views = set() def _from_url(self, url: str, **kwargs) -> BaseBackend: """Construct an ibis backend from a SQLAlchemy-conforming URL.""" - kwargs = ChainMap(kwargs) - _, new_kwargs = self.inspector.dialect.create_connect_args(url) - kwargs = kwargs.new_child(new_kwargs) - kwargs = dict(kwargs) + url = urlparse(url) + query_params = parse_qs(url.query) + kwargs = { + "user": url.username, + "password": url.password, + "schema": url.path[1:] or None, + "host": url.hostname, + "port": url.port, + } | kwargs + + for name, value in query_params.items(): + if len(value) > 1: + kwargs[name] = value + elif len(value) == 1: + kwargs[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + self._convert_kwargs(kwargs) return self.connect(**kwargs) - @property - def inspector(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=sa.exc.RemovedIn20Warning) - return super().inspector - - @contextmanager + @contextlib.contextmanager def begin(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=sa.exc.RemovedIn20Warning) - with super().begin() as con: - yield con + # pyexasol doesn't have a cursor method + con = self.con + try: + yield con + except Exception: + con.rollback() + raise + else: + con.commit() + + @contextlib.contextmanager + def _safe_raw_sql(self, query: str, *args, **kwargs): + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.compiler.dialect) + + with self.begin() as cur: + yield cur.execute(query, *args, **kwargs) def list_tables(self, like=None, database=None): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=sa.exc.RemovedIn20Warning) - return super().list_tables(like=like, database=database) + tables = sg.select("table_name").from_( + sg.table("EXA_ALL_TABLES", catalog="SYS") + ) + views = sg.select(sg.column("view_name").as_("table_name")).from_( + sg.table("EXA_ALL_VIEWS", catalog="SYS") + ) - def _get_sqla_table( - self, - name: str, - autoload: bool = True, - **kwargs: Any, - ) -> sa.Table: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=sa.exc.RemovedIn20Warning) - return super()._get_sqla_table(name=name, autoload=autoload, **kwargs) + if database is not None: + tables = tables.where(sg.column("table_schema").eq(sge.convert(database))) + views = views.where(sg.column("view_schema").eq(sge.convert(database))) + + query = sg.union(tables, views) + + with self._safe_raw_sql(query) as con: + tables = con.fetchall() + + return self._filter_with_like([table for (table,) in tables], like=like) + + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + name_type_pairs = self._metadata( + sg.select(STAR) + .from_( + sg.table( + table_name, db=schema, catalog=database, quoted=self.compiler.quoted + ) + ) + .sql(self.compiler.dialect) + ) + return sch.Schema.from_tuples(name_type_pairs) + + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + import pandas as pd + + from ibis.backends.exasol.converter import ExasolPandasData + + df = pd.DataFrame.from_records(cursor, columns=schema.names, coerce_float=True) + df = ExasolPandasData.convert_table(df, schema) + return df def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]: - table = sg.table(util.gen_name("exasol_metadata")) + table = sg.table(util.gen_name("exasol_metadata"), quoted=self.compiler.quoted) + dialect = self.compiler.dialect create_view = sg.exp.Create( - kind="VIEW", this=table, expression=sg.parse_one(query, dialect="postgres") + kind="VIEW", + this=table, + expression=sg.parse_one(query, dialect=dialect), ) drop_view = sg.exp.Drop(kind="VIEW", this=table) - describe = sg.exp.Describe(this=table).sql(dialect="postgres") - # strip trailing encodings e.g., UTF8 - varchar_regex = re.compile(r"^(VARCHAR(?:\(\d+\)))?(?:\s+.+)?$") - with self.begin() as con: - con.exec_driver_sql(create_view.sql(dialect="postgres")) + describe = sg.exp.Describe(this=table) + with self._safe_raw_sql(create_view): try: yield from ( ( name, - PostgresType.from_string(varchar_regex.sub(r"\1", typ)), + self.compiler.type_mapper.from_string( + _VARCHAR_REGEX.sub(r"\1", typ) + ), ) - for name, typ, *_ in con.exec_driver_sql(describe) + for name, typ, *_ in self.con.execute( + describe.sql(dialect=dialect) + ).fetchall() ) finally: - con.exec_driver_sql(drop_view.sql(dialect="postgres")) + self.con.execute(drop_view.sql(dialect=dialect)) - @property - def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.text("CURRENT_SCHEMA"))) + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise com.IbisTypeError( + "Exasol cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" + ) + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [ + sg.exp.ColumnConstraint( + kind=sg.exp.NotNullColumnConstraint() + ) + ] + ), + ) + for colname, typ in schema.items() + ] + + ident = sg.to_identifier(name, quoted=quoted) + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema(this=ident, expressions=column_defs), + ) + create_stmt_sql = create_stmt.sql(self.name) + + df = op.data.to_frame() + with self._safe_raw_sql(create_stmt_sql): + self.con.import_from_pandas(df, name) + + atexit.register(self._clean_up_tmp_table, ident) + + def _clean_up_tmp_table(self, ident: sge.Identifier) -> None: + with self._safe_raw_sql( + sge.Drop(kind="TABLE", this=ident, force=True, cascade=True) + ): + pass + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: sch.Schema | None = None, + database: str | None = None, + overwrite: bool = False, + ) -> ir.Table: + """Create a table in Snowflake. + + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The database in which to create the table; optional + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists + """ + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + if database is not None and database != self.current_database: + raise com.UnsupportedOperationError( + "Creating tables in other databases is not supported by Postgres" + ) + else: + database = None + + quoted = self.compiler.quoted + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self._to_sqlglot(table) + else: + query = None + + type_mapper = self.compiler.type_mapper + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name(f"{self.name}_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=quoted) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create(kind="TABLE", this=target) + + this = sg.table(name, catalog=database, quoted=quoted) + with self._safe_raw_sql(create_stmt): + if query is not None: + self.con.execute( + sge.Insert(this=table, expression=query).sql(self.name) + ) + + if overwrite: + self.con.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + ) + self.con.execute( + f"RENAME TABLE {table.sql(self.name)} TO {this.sql(self.name)}" + ) + + if schema is None: + return self.table(name, database=database) + + # preserve the input schema if it was provided + return ops.DatabaseTable( + name, schema=schema, source=self, namespace=ops.Namespace(database=database) + ).to_expr() @property - def current_database(self) -> str: - return None + def current_schema(self) -> str: + with self._safe_raw_sql("SELECT CURRENT_SCHEMA") as cur: + [(schema,)] = cur.fetchall() + return schema def drop_schema( self, name: str, database: str | None = None, force: bool = False @@ -184,11 +361,9 @@ def drop_schema( raise NotImplementedError( "`database` argument is not supported for the Exasol backend" ) - drop_schema = sg.exp.Drop( - kind="SCHEMA", this=sg.to_identifier(name), exists=force - ) + drop_schema = sg.exp.Drop(kind="SCHEMA", this=name, exists=force) with self.begin() as con: - con.exec_driver_sql(drop_schema.sql(dialect="postgres")) + con.execute(drop_schema.sql(dialect=self.compiler.dialect)) def create_schema( self, name: str, database: str | None = None, force: bool = False @@ -197,20 +372,15 @@ def create_schema( raise NotImplementedError( "`database` argument is not supported for the Exasol backend" ) - create_schema = sg.exp.Create( - kind="SCHEMA", this=sg.to_identifier(name), exists=force - ) + create_schema = sg.exp.Create(kind="SCHEMA", this=name, exists=force) + open_schema = self.current_schema with self.begin() as con: - open_schema = self.current_schema - con.exec_driver_sql(create_schema.sql(dialect="postgres")) + con.execute(create_schema.sql(dialect=self.compiler.dialect)) # Exasol implicitly opens the created schema, therefore we need to restore # the previous context. - action = ( - sa.text(f"OPEN SCHEMA {open_schema}") - if open_schema - else sa.text(f"CLOSE SCHEMA {name}") + con.execute( + f"OPEN SCHEMA {open_schema}" if open_schema else f"CLOSE SCHEMA {name}" ) - con.exec_driver_sql(action) def list_schemas( self, like: str | None = None, database: str | None = None @@ -220,15 +390,25 @@ def list_schemas( "`database` argument is not supported for the Exasol backend" ) - schema, table = "SYS", "EXA_SCHEMAS" - sch = sa.table( - table, - sa.column("schema_name", sa.TEXT()), - schema=schema, - ) + query = sg.select("schema_name").from_(sg.table("EXA_SCHEMAS", catalog="SYS")) - query = sa.select(sch.c.schema_name) + with self._safe_raw_sql(query) as con: + schemas = con.fetchall() + return self._filter_with_like([schema for (schema,) in schemas], like=like) - with self.begin() as con: - schemas = list(con.execute(query).scalars()) - return self._filter_with_like(schemas, like=like) + def _cursor_batches( + self, + expr: ir.Expr, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1 << 20, + ) -> Iterable[list]: + self._run_pre_execute_hooks(expr) + + dtypes = expr.as_table().schema().values() + + with self._safe_raw_sql( + self.compile(expr, limit=limit, params=params) + ) as cursor: + while batch := cursor.fetchmany(chunk_size): + yield (tuple(map(dt.normalize, dtypes, row)) for row in batch) diff --git a/ibis/backends/exasol/compiler.py b/ibis/backends/exasol/compiler.py index d4e5fcc6d114e..efbde277b9715 100644 --- a/ibis/backends/exasol/compiler.py +++ b/ibis/backends/exasol/compiler.py @@ -1,24 +1,225 @@ from __future__ import annotations -import sqlalchemy as sa +import contextlib +from functools import singledispatchmethod -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.exasol import registry -from ibis.backends.exasol.datatypes import ExasolSQLType +import sqlglot.expressions as sge +from sqlglot.dialects import Postgres +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.compiler import NULL, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import ExasolType +from ibis.backends.base.sqlglot.rewrites import ( + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_row_number, + rewrite_empty_order_by_window, +) +from ibis.common.patterns import replace +from ibis.expr.rewrites import p, rewrite_sample, y -class ExasolExprTranslator(AlchemyExprTranslator): - _registry = registry.create() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _integer_to_timestamp = sa.func.from_unixtime - _dialect_name = "exa.websocket" - native_json_type = False - type_mapper = ExasolSQLType +def _interval(self, e): + """Work around Exasol's inability to handle string literals in INTERVAL syntax.""" + arg = e.args["this"].this + with contextlib.suppress(AttributeError): + arg = arg.sql(self.dialect) + res = f"INTERVAL '{arg}' {e.args['unit']}" + return res -rewrites = ExasolExprTranslator.rewrites +# Is postgres the best dialect to inherit from? +class Exasol(Postgres): + """The exasol dialect.""" -class ExasolCompiler(AlchemyCompiler): - translator_class = ExasolExprTranslator - support_values_syntax_in_select = False + class Generator(Postgres.Generator): + TRANSFORMS = Postgres.Generator.TRANSFORMS.copy() | { + sge.Interval: _interval, + } + + TYPE_MAPPING = Postgres.Generator.TYPE_MAPPING.copy() | { + sge.DataType.Type.TIMESTAMPTZ: "TIMESTAMP WITH LOCAL TIME ZONE", + } + + +@replace(p.WindowFunction(p.MinRank | p.DenseRank, y @ p.WindowFrame(start=None))) +def exclude_unsupported_window_frame_from_rank(_, y): + return ops.Subtract( + _.copy(frame=y.copy(start=None, end=0, order_by=y.order_by or (ops.NULL,))), 1 + ) + + +class ExasolCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "exasol" + type_mapper = ExasolType + quoted = True + rewrites = ( + rewrite_sample, + exclude_unsupported_window_frame_from_ops, + exclude_unsupported_window_frame_from_rank, + exclude_unsupported_window_frame_from_row_number, + rewrite_empty_order_by_window, + *SQLGlotCompiler.rewrites, + ) + + @staticmethod + def _minimize_spec(start, end, spec): + if ( + start is None + and isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec + + def _aggregate(self, funcname: str, *args, where): + func = self.f[funcname] + if where is not None: + args = tuple(self.if_(where, arg, NULL) for arg in args) + return func(*args) + + @staticmethod + def _gen_valid_name(name: str) -> str: + """Exasol does not allow dots in quoted column names.""" + return name.replace(".", "_") + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_date(): + return self.cast(value.isoformat(), dtype) + elif dtype.is_timestamp(): + val = value.replace(tzinfo=None).isoformat(sep=" ", timespec="milliseconds") + return self.cast(val, dtype) + elif dtype.is_array() or dtype.is_struct() or dtype.is_map(): + raise com.UnsupportedBackendType( + f"{type(dtype).__name__}s are not supported in Exasol" + ) + elif dtype.is_uuid(): + return sge.convert(str(value)) + return super().visit_NonNullLiteral(op, value=value, dtype=dtype) + + @visit_node.register(ops.Date) + def visit_Date(self, op, *, arg): + return self.cast(arg, dt.date) + + @visit_node.register(ops.StartsWith) + def visit_StartsWith(self, op, *, arg, start): + return self.f.left(arg, self.f.length(start)).eq(start) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return self.f.right(arg, self.f.length(end)).eq(end) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + return self.f.locate(substr, arg, (start if start is not None else 0) + 1) + + @visit_node.register(ops.StringSQLILike) + def visit_StringSQLILike(self, op, *, arg, pattern, escape): + return self.f.upper(arg).like(self.f.upper(pattern)) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.locate(needle, haystack) > 0 + + @visit_node.register(ops.ExtractSecond) + def visit_ExtractSecond(self, op, *, arg): + return self.f.floor(self.cast(self.f.extract(self.v.second, arg), op.dtype)) + + @visit_node.register(ops.AnalyticVectorizedUDF) + @visit_node.register(ops.ApproxMedian) + @visit_node.register(ops.Arbitrary) + @visit_node.register(ops.ArgMax) + @visit_node.register(ops.ArgMin) + @visit_node.register(ops.ArrayCollect) + @visit_node.register(ops.ArrayDistinct) + @visit_node.register(ops.ArrayFilter) + @visit_node.register(ops.ArrayFlatten) + @visit_node.register(ops.ArrayIntersect) + @visit_node.register(ops.ArrayMap) + @visit_node.register(ops.ArraySort) + @visit_node.register(ops.ArrayStringJoin) + @visit_node.register(ops.ArrayUnion) + @visit_node.register(ops.ArrayZip) + @visit_node.register(ops.BitwiseNot) + @visit_node.register(ops.Covariance) + @visit_node.register(ops.CumeDist) + @visit_node.register(ops.DateAdd) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.DateSub) + @visit_node.register(ops.DateFromYMD) + @visit_node.register(ops.DayOfWeekIndex) + @visit_node.register(ops.DayOfWeekName) + @visit_node.register(ops.ElementWiseVectorizedUDF) + @visit_node.register(ops.ExtractDayOfYear) + @visit_node.register(ops.ExtractEpochSeconds) + @visit_node.register(ops.ExtractQuarter) + @visit_node.register(ops.ExtractWeekOfYear) + @visit_node.register(ops.First) + @visit_node.register(ops.IntervalFromInteger) + @visit_node.register(ops.IsInf) + @visit_node.register(ops.IsNan) + @visit_node.register(ops.Last) + @visit_node.register(ops.Levenshtein) + @visit_node.register(ops.Median) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.Quantile) + @visit_node.register(ops.ReductionVectorizedUDF) + @visit_node.register(ops.RegexExtract) + @visit_node.register(ops.RegexReplace) + @visit_node.register(ops.RegexSearch) + @visit_node.register(ops.RegexSplit) + @visit_node.register(ops.RowID) + @visit_node.register(ops.StandardDev) + @visit_node.register(ops.Strftime) + @visit_node.register(ops.StringJoin) + @visit_node.register(ops.StringSplit) + @visit_node.register(ops.StringToTimestamp) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.TimestampAdd) + @visit_node.register(ops.TimestampBucket) + @visit_node.register(ops.TimestampDelta) + @visit_node.register(ops.TimestampDiff) + @visit_node.register(ops.TimestampNow) + @visit_node.register(ops.TimestampSub) + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.TypeOf) + @visit_node.register(ops.Unnest) + @visit_node.register(ops.Variance) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + @visit_node.register(ops.CountDistinctStar) + def visit_Unsupported(self, op, **_): + raise com.UnsupportedOperationError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.Log10: "log10", + ops.Modulus: "mod", + ops.All: "min", + ops.Any: "max", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @ExasolCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @ExasolCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(ExasolCompiler, f"visit_{_op.__name__}", _fmt) diff --git a/ibis/backends/exasol/converter.py b/ibis/backends/exasol/converter.py new file mode 100644 index 0000000000000..fb7e83dc712ec --- /dev/null +++ b/ibis/backends/exasol/converter.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import datetime + +from ibis.formats.pandas import PandasData + + +class ExasolPandasData(PandasData): + @classmethod + def convert_String(cls, s, dtype, pandas_type): + if s.dtype != "object": + return s.map(str) + else: + return s + + @classmethod + def convert_Interval(cls, s, dtype, pandas_dtype): + def parse_timedelta(value): + # format is '(+|-)days hour:minute:second.millisecond' + days, rest = value.split(" ", 1) + hms, millis = rest.split(".", 1) + hours, minutes, seconds = hms.split(":") + return datetime.timedelta( + days=int(days), + hours=int(hours), + minutes=int(minutes), + seconds=int(seconds), + milliseconds=int(millis), + ) + + if s.dtype == "int64": + # exasol can return intervals as the number of integer days (e.g., + # from subtraction of two dates) + # + # TODO: investigate whether days are the only interval ever + # returned as integers + return s.map(lambda days: datetime.timedelta(days=days)) + return s.map(parse_timedelta, na_action="ignore") diff --git a/ibis/backends/exasol/datatypes.py b/ibis/backends/exasol/datatypes.py deleted file mode 100644 index afc13c9d78963..0000000000000 --- a/ibis/backends/exasol/datatypes.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -import sqlalchemy.types as sa_types - -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType - -if TYPE_CHECKING: - import ibis.expr.datatypes as dt - - -class ExasolSQLType(AlchemyType): - dialect = "exa.websocket" - - @classmethod - def from_ibis(cls, dtype: dt.DataType) -> sa_types.TypeEngine: - if dtype.is_string(): - # see also: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm - MAX_VARCHAR_SIZE = 2_000_000 - return sa_types.VARCHAR(MAX_VARCHAR_SIZE) - return super().from_ibis(dtype) - - @classmethod - def to_ibis(cls, typ: sa_types.TypeEngine, nullable: bool = True) -> dt.DataType: - return super().to_ibis(typ, nullable=nullable) diff --git a/ibis/backends/exasol/registry.py b/ibis/backends/exasol/registry.py deleted file mode 100644 index 5c23f3996662b..0000000000000 --- a/ibis/backends/exasol/registry.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -import sqlalchemy as sa - -import ibis.expr.operations as ops - -# used for literal translate -from ibis.backends.base.sql.alchemy import ( - fixed_arity, - sqlalchemy_operation_registry, -) - - -class _String: - @staticmethod - def find(t, op): - args = [t.translate(op.substr), t.translate(op.arg)] - if (start := op.start) is not None: - args.append(t.translate(start) + 1) - return sa.func.locate(*args) - 1 - - @staticmethod - def translate(t, op): - func = fixed_arity(sa.func.translate, 3) - return func(t, op) - - -class _Registry: - _unsupported = {ops.StringJoin} - - _supported = { - ops.Translate: _String.translate, - ops.StringFind: _String.find, - } - - @classmethod - def create(cls): - registry = sqlalchemy_operation_registry.copy() - registry = {k: v for k, v in registry.items() if k not in cls._unsupported} - registry.update(cls._supported) - return registry - - -def create(): - """Create an operation registry for an Exasol backend.""" - return _Registry.create() diff --git a/ibis/backends/exasol/tests/conftest.py b/ibis/backends/exasol/tests/conftest.py index 35d3c6b04c7ff..f6389b0336d7b 100644 --- a/ibis/backends/exasol/tests/conftest.py +++ b/ibis/backends/exasol/tests/conftest.py @@ -4,6 +4,8 @@ import subprocess from typing import TYPE_CHECKING +import sqlglot as sg + import ibis from ibis.backends.tests.base import ( ServiceBackendTest, @@ -40,19 +42,24 @@ class TestConf(ServiceBackendTest): service_name = "exasol" supports_tpch = False force_sort = True - deps = "sqlalchemy", "sqlalchemy_exasol", "pyexasol" + deps = ("pyexasol",) @staticmethod def connect(*, tmpdir, worker_id, **kw: Any): - kwargs = { - "user": EXASOL_USER, - "password": EXASOL_PASS, - "host": EXASOL_HOST, - "port": EXASOL_PORT, - "schema": IBIS_TEST_EXASOL_DB, - "certificate_validation": False, - } - return ibis.exasol.connect(**kwargs) + return ibis.exasol.connect( + user=EXASOL_USER, + password=EXASOL_PASS, + host=EXASOL_HOST, + port=EXASOL_PORT, + **kw, + ) + + def postload(self, **kw: Any): + self.connection = self.connect(schema=IBIS_TEST_EXASOL_DB, **kw) + + @staticmethod + def format_table(name: str) -> str: + return sg.to_identifier(name, quoted=True).sql("exasol") @property def test_files(self) -> Iterable[Path]: diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/exasol/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/exasol/out.sql new file mode 100644 index 0000000000000..b309cd65374d5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/exasol/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/exasol/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/exasol/out.sql new file mode 100644 index 0000000000000..b309cd65374d5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/exasol/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/exasol/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/exasol/out.sql new file mode 100644 index 0000000000000..6bd0ba8c995d3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/exasol/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/exasol/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/exasol/out.sql new file mode 100644 index 0000000000000..97338646649f0 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/exasol/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/exasol/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/exasol/out.sql new file mode 100644 index 0000000000000..d3969647c9ea1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/exasol/out.sql @@ -0,0 +1,22 @@ +SELECT + CASE "t0"."continent" + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/exasol/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/exasol/out.sql new file mode 100644 index 0000000000000..c1611d8cecc33 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/exasol/out.sql @@ -0,0 +1,9 @@ +SELECT + "t0"."x" IN ( + SELECT + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 52649199d79bf..0d0586a30f9b9 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -769,7 +769,7 @@ def mean_and_std(v): lambda _: slice(None), marks=pytest.mark.notimpl( ["exasol"], - raises=(com.OperationNotDefinedError, ExaQueryError, sa.exc.DBAPIError), + raises=(com.OperationNotDefinedError, ExaQueryError), strict=False, ), id="no_cond", @@ -849,9 +849,7 @@ def test_reduction_ops( raises=com.OperationNotDefinedError, reason="no one has attempted implementation yet", ) -@pytest.mark.notimpl( - ["exasol"], raises=(sa.exc.DBAPIError, com.UnsupportedOperationError) -) +@pytest.mark.notimpl(["exasol"], raises=com.UnsupportedOperationError) def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): table = alltypes[["int_col", "double_col", "string_col"]] expr = table.nunique(where=ibis_cond(table)) @@ -920,12 +918,11 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): "sqlite", "druid", "oracle", - "exasol", ], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( - ["mysql", "impala"], raises=com.UnsupportedBackendType + ["mysql", "impala", "exasol"], raises=com.UnsupportedBackendType ), pytest.mark.notyet( ["snowflake"], @@ -1153,8 +1150,7 @@ def test_quantile( ), ], ) -@pytest.mark.notimpl(["mssql"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) +@pytest.mark.notimpl(["mssql", "exasol"], raises=com.OperationNotDefinedError) def test_corr_cov( con, batting, @@ -1597,8 +1593,9 @@ def test_grouped_case(backend, con): @pytest.mark.notimpl( - ["datafusion", "mssql", "polars", "exasol"], raises=com.OperationNotDefinedError + ["datafusion", "mssql", "polars"], raises=com.OperationNotDefinedError ) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.broken( ["dask"], reason="Dask does not windowize this operation correctly", diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index e7563587f7f64..3b71ebe883465 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -91,6 +91,7 @@ def time_keyed_right(time_keyed_df2): "druid", "impala", "bigquery", + "exasol", ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): @@ -127,6 +128,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op "druid", "impala", "bigquery", + "exasol", ] ) def test_keyed_asof_join_with_tolerance( diff --git a/ibis/backends/tests/test_binary.py b/ibis/backends/tests/test_binary.py index c3dfe99654247..3559741a493de 100644 --- a/ibis/backends/tests/test_binary.py +++ b/ibis/backends/tests/test_binary.py @@ -3,7 +3,6 @@ import contextlib import pytest -import sqlalchemy.exc import ibis import ibis.common.exceptions as com @@ -29,7 +28,7 @@ @pytest.mark.notimpl( ["exasol"], "Exasol does not have native support for a binary data type.", - raises=sqlalchemy.exc.StatementError, + raises=NotImplementedError, ) def test_binary_literal(con, backend): expr = ibis.literal(b"A") diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 90c14a598da02..6d7fa5d4c08c0 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -24,6 +24,7 @@ _NAMES = { "bigquery": "ibis_gbq_testing.functional_alltypes", + "exasol": '"functional_alltypes"', } @@ -38,17 +39,20 @@ ], ) def test_con_dot_sql(backend, con, schema): - alltypes = con.table("functional_alltypes") + alltypes = backend.functional_alltypes # pull out the quoted name - name = _NAMES.get(con.name, alltypes.op().name) + name = _NAMES.get(con.name, "functional_alltypes") + quoted = getattr(getattr(con, "compiler", None), "quoted", True) + dialect = _IBIS_TO_SQLGLOT_DIALECT.get(con.name, con.name) + cols = [ + sg.column("string_col", quoted=quoted).as_("s", quoted=quoted).sql(dialect), + (sg.column("double_col", quoted=quoted) + 1.0) + .as_("new_col", quoted=quoted) + .sql(dialect), + ] t = ( con.sql( - f""" - SELECT - string_col as s, - double_col + 1.0 AS new_col - FROM {name} - """, + f"SELECT {', '.join(cols)} FROM {name}", schema=schema, ) .group_by("s") # group by a column from SQL diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index b1ec4c4bff283..1760cf8de4612 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -13,6 +13,7 @@ from ibis.backends.tests.errors import ( DuckDBNotImplementedException, DuckDBParserException, + ExaQueryError, MySQLOperationalError, PyDeltaTableError, PyDruidProgrammingError, @@ -97,7 +98,6 @@ def test_empty_column_to_pyarrow(limit, awards_players): @pytest.mark.parametrize("limit", no_limit) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_empty_scalar_to_pyarrow(limit, awards_players): expr = awards_players.filter(awards_players.awardID == "DEADBEEF").yearID.sum() array = expr.to_pyarrow(limit=limit) @@ -105,7 +105,6 @@ def test_empty_scalar_to_pyarrow(limit, awards_players): @pytest.mark.parametrize("limit", no_limit) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_scalar_to_pyarrow_scalar(limit, awards_players): scalar = awards_players.yearID.sum().to_pyarrow(limit=limit) assert isinstance(scalar, pa.Scalar) @@ -209,7 +208,9 @@ def test_table_to_parquet(tmp_path, backend, awards_players): df = pd.read_parquet(outparquet) - backend.assert_frame_equal(awards_players.to_pandas(), df) + backend.assert_frame_equal( + awards_players.to_pandas().fillna(pd.NA), df.fillna(pd.NA) + ) @pytest.mark.notimpl( @@ -224,7 +225,9 @@ def test_table_to_parquet_writer_kwargs(version, tmp_path, backend, awards_playe df = pd.read_parquet(outparquet) - backend.assert_frame_equal(awards_players.to_pandas(), df) + backend.assert_frame_equal( + awards_players.to_pandas().fillna(pd.NA), df.fillna(pd.NA) + ) md = pa.parquet.read_metadata(outparquet) @@ -297,7 +300,7 @@ def test_memtable_to_file(tmp_path, con, ftype, monkeypatch): assert outfile.is_file() -@pytest.mark.notimpl(["exasol"]) +@pytest.mark.notimpl(["flink"]) def test_table_to_csv(tmp_path, backend, awards_players): outcsv = tmp_path / "out.csv" @@ -311,7 +314,7 @@ def test_table_to_csv(tmp_path, backend, awards_players): backend.assert_frame_equal(awards_players.to_pandas(), df) -@pytest.mark.notimpl(["exasol"]) +@pytest.mark.notimpl(["flink"]) @pytest.mark.notimpl( ["duckdb"], reason="cannot inline WriteOptions objects", @@ -337,12 +340,12 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): id="decimal128", marks=[ pytest.mark.notyet(["flink"], raises=NotImplementedError), - pytest.mark.notyet(["exasol"], raises=sa.exc.DBAPIError), pytest.mark.notyet( ["risingwave"], raises=sa.exc.DBAPIError, reason="Feature is not yet implemented: unsupported data type: NUMERIC(38,9)", ), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], ), param( @@ -362,12 +365,13 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): raises=(PySparkParseException, PySparkArithmeticException), reason="precision is out of range", ), - pytest.mark.notyet(["exasol"], raises=sa.exc.DBAPIError), pytest.mark.notyet( ["risingwave"], raises=sa.exc.DBAPIError, reason="Feature is not yet implemented: unsupported data type: NUMERIC(76,38)", ), + pytest.mark.notyet(["flink"], raises=NotImplementedError), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], ), ], @@ -495,7 +499,6 @@ def test_to_pandas_batches_empty_table(backend, con): param( None, marks=[ - pytest.mark.notimpl(["exasol"], raises=sa.exc.CompileError), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -520,7 +523,6 @@ def test_to_pandas_batches_nonempty_table(backend, con, n): param( None, marks=[ - pytest.mark.notimpl(["exasol"], raises=sa.exc.CompileError), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index c92f65090918d..5f4e353d16410 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -72,7 +72,6 @@ def test_null_literal(con, backend): } -@pytest.mark.notimpl(["exasol"]) def test_boolean_literal(con, backend): expr = ibis.literal(False, type=dt.boolean) result = con.execute(expr) @@ -106,32 +105,34 @@ def test_scalar_fillna_nullif(con, expr, expected): @pytest.mark.parametrize( - ("col", "filt"), + ("col", "value", "filt"), [ param( "nan_col", - _.nan_col.isnan(), - marks=pytest.mark.notimpl(["mysql", "sqlite"]), + ibis.literal(np.nan), + methodcaller("isnan"), + marks=[ + pytest.mark.notimpl(["mysql", "sqlite", "druid"]), + pytest.mark.notyet( + ["exasol"], + raises=ExaQueryError, + reason="no way to test for nan-ness", + ), + ], id="nan_col", ), param( - "none_col", - _.none_col.isnull(), - marks=[pytest.mark.notimpl(["mysql"])], - id="none_col", + "none_col", ibis.NA.cast("float64"), methodcaller("isnull"), id="none_col" ), ], ) -@pytest.mark.notimpl(["mssql", "druid", "oracle"]) +@pytest.mark.notimpl(["mssql", "oracle"]) @pytest.mark.notyet(["flink"], "NaN is not supported in Flink SQL", raises=ValueError) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError, strict=False) -def test_isna(backend, alltypes, col, filt): - table = alltypes.select( - nan_col=ibis.literal(np.nan), none_col=ibis.NA.cast("float64") - ) +def test_isna(backend, alltypes, col, value, filt): + table = alltypes.select(**{col: value}) df = table.execute() - result = table[filt].execute().reset_index(drop=True) + result = table[filt(table[col])].execute().reset_index(drop=True) expected = df[df[col].isna()].reset_index(drop=True) backend.assert_frame_equal(result, expected) @@ -569,10 +570,6 @@ def test_order_by_random(alltypes): raises=PyDruidProgrammingError, reason="Druid only supports trivial unions", ) -@pytest.mark.notyet( - ["exasol"], - raises=AssertionError, -) def test_table_info(alltypes): expr = alltypes.info() df = expr.execute() @@ -592,18 +589,8 @@ def test_table_info(alltypes): @pytest.mark.parametrize( ("ibis_op", "pandas_op"), [ - param( - _.string_col.isin([]), - lambda df: df.string_col.isin([]), - marks=pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - id="isin", - ), - param( - _.string_col.notin([]), - lambda df: ~df.string_col.isin([]), - marks=pytest.mark.notimpl(["exasol"], raises=ExaQueryError), - id="notin", - ), + param(_.string_col.isin([]), lambda df: df.string_col.isin([]), id="isin"), + param(_.string_col.notin([]), lambda df: ~df.string_col.isin([]), id="notin"), param( (_.string_col.length() * 1).isin([1]), lambda df: (df.string_col.str.len() * 1).isin([1]), @@ -674,7 +661,6 @@ def test_isin_notin_column_expr(backend, alltypes, df, ibis_op, pandas_op): param(False, True, neg, id="false_negate"), ], ) -@pytest.mark.notimpl(["exasol"]) def test_logical_negation_literal(con, expr, expected, op): assert con.execute(op(ibis.literal(expr)).name("tmp")) == expected @@ -827,7 +813,7 @@ def test_int_scalar(alltypes): assert result.dtype == np.int32 -@pytest.mark.notimpl(["dask", "datafusion", "pandas", "polars", "druid", "exasol"]) +@pytest.mark.notimpl(["dask", "datafusion", "pandas", "polars", "druid"]) @pytest.mark.notyet( ["clickhouse"], reason="https://github.com/ClickHouse/ClickHouse/issues/6697" ) @@ -871,12 +857,12 @@ def test_typeof(con): @pytest.mark.notimpl(["datafusion", "druid"]) @pytest.mark.notimpl(["pyspark"], condition=is_older_than("pyspark", "3.5.0")) @pytest.mark.notyet(["dask", "mssql"], reason="not supported by the backend") -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @pytest.mark.broken( ["risingwave"], raises=sa.exc.InternalError, reason="https://github.com/risingwavelabs/risingwave/issues/1343", ) +@pytest.mark.notyet(["exasol"], raises=ExaQueryError, reason="not supported by exasol") def test_isin_uncorrelated( backend, batting, awards_players, batting_df, awards_players_df ): @@ -896,7 +882,7 @@ def test_isin_uncorrelated( @pytest.mark.broken(["polars"], reason="incorrect answer") -@pytest.mark.notimpl(["druid", "exasol"]) +@pytest.mark.notimpl(["druid"]) @pytest.mark.notyet(["dask"], reason="not supported by the backend") def test_isin_uncorrelated_filter( backend, batting, awards_players, batting_df, awards_players_df @@ -921,7 +907,14 @@ def test_isin_uncorrelated_filter( "dtype", [ "bool", - "bytes", + param( + "bytes", + marks=[ + pytest.mark.notyet( + ["exasol"], raises=ExaQueryError, reason="no binary type" + ) + ], + ), "str", "int", "float", @@ -933,7 +926,14 @@ def test_isin_uncorrelated_filter( "float64", "timestamp", "date", - "time", + param( + "time", + marks=[ + pytest.mark.notyet( + ["exasol"], raises=ExaQueryError, reason="no time type" + ) + ], + ), ], ) def test_literal_na(con, dtype): @@ -942,8 +942,7 @@ def test_literal_na(con, dtype): assert pd.isna(result) -@pytest.mark.notimpl(["exasol"]) -def test_memtable_bool_column(backend, con): +def test_memtable_bool_column(con): data = [True, False, True] t = ibis.memtable({"a": data}) assert Counter(con.execute(t.a)) == Counter(data) @@ -1352,7 +1351,6 @@ def hash_256(col): "risingwave", "snowflake", "sqlite", - "exasol", ] ) @pytest.mark.parametrize( @@ -1370,6 +1368,7 @@ def hash_256(col): pytest.mark.notyet(["duckdb", "impala"], reason="casts to NULL"), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet(["trino"], raises=TrinoUserError), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), pytest.mark.broken( ["druid"], reason="casts to 1672531200000 (millisecond)" ), @@ -1512,10 +1511,6 @@ def test_try_cast_func(con, from_val, to_type, func): slice(None, None), lambda t: t.count().to_pandas(), marks=[ - pytest.mark.notyet( - ["exasol"], - raises=sa.exc.CompileError, - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1529,10 +1524,6 @@ def test_try_cast_func(con, from_val, to_type, func): slice(0, None), lambda t: t.count().to_pandas(), marks=[ - pytest.mark.notyet( - ["exasol"], - raises=sa.exc.CompileError, - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1563,8 +1554,13 @@ def test_try_cast_func(con, from_val, to_type, func): pytest.mark.never( ["impala"], raises=ImpalaHiveServer2Error, - reason="impala doesn't support OFFSET without ORDER BY", - ) + reason="doesn't support OFFSET without ORDER BY", + ), + pytest.mark.notyet( + ["exasol"], + raises=ExaQueryError, + reason="doesn't support OFFSET without ORDER BY", + ), ], ), param( @@ -1582,10 +1578,7 @@ def test_try_cast_func(con, from_val, to_type, func): raises=sa.exc.CompileError, reason="mssql doesn't support OFFSET without LIMIT", ), - pytest.mark.notyet( - ["exasol"], - raises=sa.exc.CompileError, - ), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), pytest.mark.never( ["impala"], raises=ImpalaHiveServer2Error, @@ -1607,8 +1600,13 @@ def test_try_cast_func(con, from_val, to_type, func): pytest.mark.never( ["impala"], raises=ImpalaHiveServer2Error, - reason="impala doesn't support OFFSET without ORDER BY", - ) + reason="doesn't support OFFSET without ORDER BY", + ), + pytest.mark.notyet( + ["exasol"], + raises=ExaQueryError, + reason="doesn't support OFFSET without ORDER BY", + ), ], ), param( @@ -1621,10 +1619,7 @@ def test_try_cast_func(con, from_val, to_type, func): raises=sa.exc.CompileError, reason="mssql doesn't support OFFSET without LIMIT", ), - pytest.mark.notyet( - ["exasol"], - raises=sa.exc.DBAPIError, - ), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), pytest.mark.notyet( ["impala"], raises=ImpalaHiveServer2Error, @@ -1693,10 +1688,7 @@ def test_static_table_slice(backend, slc, expected_count_fn): raises=sa.exc.InternalError, reason="risingwave doesn't support limit/offset", ) -@pytest.mark.notimpl( - ["exasol"], - raises=sa.exc.CompileError, -) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, @@ -1746,7 +1738,7 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): raises=TrinoUserError, reason="backend doesn't support dynamic limit/offset", ) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.CompileError) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, @@ -1800,7 +1792,6 @@ def test_dynamic_table_slice_with_computed_offset(backend): "flink", "polars", "snowflake", - "exasol", ] ) @pytest.mark.notimpl( @@ -1829,7 +1820,6 @@ def test_sample(backend): "flink", "polars", "snowflake", - "exasol", ] ) @pytest.mark.notimpl( @@ -1893,7 +1883,6 @@ def test_substitute(backend): ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend" ) @pytest.mark.notimpl(["flink"], reason="no sqlglot dialect", raises=ValueError) -@pytest.mark.notimpl(["exasol"], raises=ValueError, reason="unknown dialect") @pytest.mark.notimpl( ["risingwave"], raises=ValueError, diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 6eed7380d2f51..2104893321ef7 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd import pytest -import sqlalchemy as sa from packaging.version import parse as vparse from pytest import param @@ -43,7 +42,14 @@ def check_eq(left, right, how, **kwargs): [ "inner", "left", - "right", + param( + "right", + marks=[ + pytest.mark.broken( + ["exasol"], raises=AssertionError, reasons="results don't match" + ) + ], + ), param( "outer", # TODO: mysql will likely never support full outer join @@ -55,12 +61,14 @@ def check_eq(left, right, how, **kwargs): + ["sqlite"] * (vparse(sqlite3.sqlite_version) < vparse("3.39")) ), pytest.mark.xfail_version(datafusion=["datafusion<31"]), + pytest.mark.broken( + ["exasol"], raises=AssertionError, reasons="results don't match" + ), ], ), ], ) @pytest.mark.notimpl(["druid"]) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_mutating_join(backend, batting, awards_players, how): left = batting[batting.yearID == 2015] right = awards_players[awards_players.lgID == "NL"].drop("yearID", "lgID") @@ -109,7 +117,7 @@ def test_mutating_join(backend, batting, awards_players, how): @pytest.mark.parametrize("how", ["semi", "anti"]) -@pytest.mark.notimpl(["dask", "druid", "exasol"]) +@pytest.mark.notimpl(["dask", "druid"]) @pytest.mark.notyet(["flink"], reason="Flink doesn't support semi joins or anti joins") def test_filtering_join(backend, batting, awards_players, how): left = batting[batting.yearID == 2015] @@ -139,7 +147,6 @@ def test_filtering_join(backend, batting, awards_players, how): backend.assert_frame_equal(result, expected, check_like=True) -@pytest.mark.notimpl(["exasol"], raises=com.IbisTypeError) def test_join_then_filter_no_column_overlap(awards_players, batting): left = batting[batting.yearID == 2015] year = left.yearID.name("year") @@ -152,7 +159,6 @@ def test_join_then_filter_no_column_overlap(awards_players, batting): assert not q.execute().empty -@pytest.mark.notimpl(["exasol"], raises=com.IbisTypeError) def test_mutate_then_join_no_column_overlap(batting, awards_players): left = batting.mutate(year=batting.yearID).filter(lambda t: t.year == 2015) left = left["year", "RBI"] @@ -175,7 +181,6 @@ def test_mutate_then_join_no_column_overlap(batting, awards_players): param(lambda left, right: left.join(right, "year", how="semi"), id="how_semi"), ], ) -@pytest.mark.notimpl(["exasol"], raises=com.IbisTypeError) def test_semi_join_topk(batting, awards_players, func): batting = batting.mutate(year=batting.yearID) left = func(batting, batting.year.topk(5)).select("year", "RBI") @@ -198,7 +203,7 @@ def test_join_with_pandas(batting, awards_players): assert df.yearID.nunique() == 7 -@pytest.mark.notimpl(["dask", "exasol"]) +@pytest.mark.notimpl(["dask"]) def test_join_with_pandas_non_null_typed_columns(batting, awards_players): batting_filt = batting[lambda t: t.yearID < 1900][["yearID"]] awards_players_filt = awards_players[lambda t: t.yearID < 1900][ @@ -271,10 +276,6 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): raises=TypeError, reason="dask doesn't support join predicates", ) -@pytest.mark.notimpl( - ["exasol"], - raises=com.IbisTypeError, -) def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_value): n = 5 @@ -299,9 +300,6 @@ def test_join_with_trivial_predicate(awards_players, predicate, how, pandas_valu ) -@pytest.mark.notimpl( - ["exasol"], raises=sa.exc.NoSuchTableError, reason="`win` table isn't loaded" -) @pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notimpl(["flink"], reason="`win` table isn't loaded") @pytest.mark.parametrize( diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index cc4b54067519b..01b41d73e4f55 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -191,10 +191,6 @@ "Expected np.float16 instance", raises=ArrowNotImplementedError, ), - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), ], id="float16", ), @@ -212,12 +208,6 @@ "risingwave": "numeric", "flink": "FLOAT NOT NULL", }, - marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), - ], id="float32", ), param( @@ -234,12 +224,6 @@ "risingwave": "numeric", "flink": "DOUBLE NOT NULL", }, - marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=ExaQueryError, - ), - ], id="float64", ), ], @@ -265,6 +249,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "sqlite": 1.1, "trino": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), + "exasol": decimal.Decimal("1"), "duckdb": decimal.Decimal("1.1"), "risingwave": 1.1, "impala": decimal.Decimal("1"), @@ -281,6 +266,7 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "NUMERIC", "snowflake": "DECIMAL", + "exasol": "DECIMAL(18,0)", "sqlite": "real", "impala": "DECIMAL(9,0)", "trino": "decimal(18,3)", @@ -290,10 +276,9 @@ def test_numeric_literal(con, backend, expr, expected_types): "flink": "DECIMAL(38, 18) NOT NULL", }, marks=[ - pytest.mark.notimpl(["exasol"], raises=ExaQueryError), pytest.mark.notimpl( ["clickhouse"], - "Unsupported precision. Supported values: [1 : 76]. Current value: None", + reason="precision must be specified; clickhouse doesn't have a default", raises=NotImplementedError, ), ], @@ -464,6 +449,7 @@ def test_numeric_literal(con, backend, expr, expected_types): raises=SnowflakeProgrammingError, ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], id="decimal-infinity+", ), @@ -540,6 +526,7 @@ def test_numeric_literal(con, backend, expr, expected_types): reason="can't cast infinity to decimal", ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], id="decimal-infinity-", ), @@ -628,6 +615,7 @@ def test_numeric_literal(con, backend, expr, expected_types): reason="can't cast nan to decimal", ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), + pytest.mark.notyet(["exasol"], raises=ExaQueryError), ], id="decimal-NaN", ), @@ -767,33 +755,13 @@ def test_isnan_isinf( ["datafusion"], raises=com.OperationNotDefinedError ), ), - param( - L(5.5).round(), - 6.0, - id="round", - ), - param( - L(5.556).round(2), - 5.56, - id="round-digits", - ), + param(L(5.5).round(), 6.0, id="round"), + param(L(5.556).round(2), 5.56, id="round-digits"), param(L(5.556).ceil(), 6.0, id="ceil"), param(L(5.556).floor(), 5.0, id="floor"), - param( - L(5.556).exp(), - math.exp(5.556), - id="exp", - ), - param( - L(5.556).sign(), - 1, - id="sign-pos", - ), - param( - L(-5.556).sign(), - -1, - id="sign-neg", - ), + param(L(5.556).exp(), math.exp(5.556), id="exp"), + param(L(5.556).sign(), 1, id="sign-pos"), + param(L(-5.556).sign(), -1, id="sign-neg"), param( L(0).sign(), 0, @@ -810,10 +778,6 @@ def test_isnan_isinf( math.log(5.556, 2), id="log-base", marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -827,15 +791,12 @@ def test_isnan_isinf( math.log(5.556), id="ln", ), + param(L(5.556).ln(), math.log(5.556), id="ln"), param( L(5.556).log2(), math.log(5.556, 2), id="log2", marks=[ - pytest.mark.notimpl( - ["exasol"], - raises=com.OperationNotDefinedError, - ), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -866,6 +827,10 @@ def test_isnan_isinf( marks=pytest.mark.notimpl(["exasol"], raises=ExaQueryError), id="mod", ), + param(L(5.556).log10(), math.log10(5.556), id="log10"), + param(L(5.556).radians(), math.radians(5.556), id="radians"), + param(L(5.556).degrees(), math.degrees(5.556), id="degrees"), + param(L(11) % 3, 11 % 3, id="mod"), ], ) def test_math_functions_literals(con, expr, expected): @@ -998,7 +963,6 @@ def test_simple_math_functions_columns( lambda t: t.double_col.add(1).log(2), lambda t: np.log2(t.double_col + 1), marks=[ - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1016,7 +980,6 @@ def test_simple_math_functions_columns( param( lambda t: t.double_col.add(1).log10(), lambda t: np.log10(t.double_col + 1), - marks=pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), id="log10", ), param( @@ -1031,7 +994,6 @@ def test_simple_math_functions_columns( ), id="log_base_bigint", marks=[ - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), pytest.mark.notimpl( ["datafusion"], raises=com.OperationNotDefinedError ), @@ -1133,11 +1095,12 @@ def test_backend_specific_numerics(backend, con, df, alltypes, expr_fn, expected operator.mul, operator.truediv, operator.floordiv, - operator.pow, + param( + operator.pow, marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)] + ), ], ids=lambda op: op.__name__, ) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_binary_arithmetic_operations(backend, alltypes, df, op): smallint_col = alltypes.smallint_col + 1 # make it nonzero smallint_series = df.smallint_col + 1 @@ -1155,7 +1118,6 @@ def test_binary_arithmetic_operations(backend, alltypes, df, op): backend.assert_series_equal(result, expected, check_exact=False) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_mod(backend, alltypes, df): expr = operator.mod(alltypes.smallint_col, alltypes.smallint_col + 1).name("tmp") @@ -1182,7 +1144,6 @@ def test_mod(backend, alltypes, df): "Cannot apply '%' to arguments of type ' % '. Supported form(s): ' % ", raises=Py4JError, ) -@pytest.mark.notimpl(["exasol"], raises=AttributeError) def test_floating_mod(backend, alltypes, df): expr = operator.mod(alltypes.double_col, alltypes.smallint_col + 1).name("tmp") @@ -1339,7 +1300,7 @@ def test_floating_mod(backend, alltypes, df): @pytest.mark.notyet(["mssql"], raises=(sa.exc.OperationalError, sa.exc.DataError)) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notyet(["postgres"], raises=PsycoPg2DivisionByZero) -@pytest.mark.notimpl(["exasol"], raises=(sa.exc.DBAPIError, com.IbisTypeError)) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_divide_by_zero(backend, alltypes, df, column, denominator): expr = alltypes[column] / denominator result = expr.name("tmp").execute() @@ -1455,13 +1416,7 @@ def test_random(con): [ param(lambda x: x.clip(lower=0), lambda x: x.clip(lower=0), id="lower-int"), param( - lambda x: x.clip(lower=0.0), - lambda x: x.clip(lower=0.0), - marks=pytest.mark.notimpl( - "exasol", - raises=ExaQueryError, - ), - id="lower-float", + lambda x: x.clip(lower=0.0), lambda x: x.clip(lower=0.0), id="lower-float" ), param(lambda x: x.clip(upper=0), lambda x: x.clip(upper=0), id="upper-int"), param( @@ -1482,10 +1437,6 @@ def test_random(con): param( lambda x: x.clip(lower=0, upper=1.0), lambda x: x.clip(lower=0, upper=1.0), - marks=pytest.mark.notimpl( - "exasol", - raises=ExaQueryError, - ), id="lower-upper-float", ), param( @@ -1509,7 +1460,7 @@ def test_clip(backend, alltypes, df, ibis_func, pandas_func): backend.assert_series_equal(result, expected, check_names=False) -@pytest.mark.notimpl(["polars", "exasol"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=PyDruidProgrammingError, @@ -1623,9 +1574,8 @@ def test_bitwise_scalars(con, op, left, right): assert result == expected -@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["datafusion", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @flink_no_bitwise def test_bitwise_not_scalar(con): expr = ~L(2) @@ -1634,9 +1584,8 @@ def test_bitwise_not_scalar(con): assert result == expected -@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["datafusion", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @flink_no_bitwise def test_bitwise_not_col(backend, alltypes, df): expr = (~alltypes.int_col).name("tmp") diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 74db927d928d3..8b17e69a6930b 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -11,22 +11,22 @@ sa = pytest.importorskip("sqlalchemy") sg = pytest.importorskip("sqlglot") -pytestmark = pytest.mark.notimpl(["flink", "exasol", "risingwave"]) +pytestmark = pytest.mark.notimpl(["flink", "risingwave"]) simple_literal = param(ibis.literal(1), id="simple_literal") array_literal = param( ibis.array([1]), marks=[ pytest.mark.never( - ["mysql", "mssql", "oracle", "impala", "sqlite"], - raises=exc.OperationNotDefinedError, + ["mysql", "mssql", "oracle", "impala", "sqlite", "exasol"], + raises=(exc.OperationNotDefinedError, exc.UnsupportedBackendType), reason="arrays not supported in the backend", ), ], id="array_literal", ) no_structs = pytest.mark.never( - ["impala", "mysql", "sqlite", "mssql"], + ["impala", "mysql", "sqlite", "mssql", "exasol"], raises=(NotImplementedError, sa.exc.CompileError, exc.UnsupportedBackendType), reason="structs not supported in the backend", ) @@ -117,7 +117,9 @@ def test_isin_bug(con, snapshot): raises=NotImplementedError, ) @pytest.mark.notyet( - ["datafusion"], reason="no unnest support", raises=exc.OperationNotDefinedError + ["datafusion", "exasol"], + reason="no unnest support", + raises=exc.OperationNotDefinedError, ) @pytest.mark.notyet( ["sqlite", "mysql", "druid", "impala", "mssql"], reason="no unnest support upstream" diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 0ba91bb67050c..55559a2efb1da 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -961,13 +961,14 @@ def test_capitalize(con): ["dask", "pandas", "polars", "oracle", "flink"], raises=com.OperationNotDefinedError ) @pytest.mark.notyet( - ["mssql", "sqlite", "exasol"], - reason="no arrays", - raises=com.OperationNotDefinedError, + ["mssql", "sqlite"], reason="no arrays", raises=com.OperationNotDefinedError ) @pytest.mark.never( ["mysql"], raises=com.OperationNotDefinedError, reason="no array support" ) +@pytest.mark.never( + ["exasol"], raises=com.UnsupportedBackendType, reason="no array support" +) @pytest.mark.notimpl( ["impala"], raises=com.UnsupportedBackendType, reason="no array support" ) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 005bea8ad1052..5643369051f7f 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -47,7 +47,6 @@ raises=AttributeError, reason="Can only use .dt accessor with datetimelike values", ) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) def test_date_extract(backend, alltypes, df, attr, expr_fn): expr = getattr(expr_fn(alltypes.timestamp_col), attr)() expected = getattr(df.timestamp_col.dt, attr).astype("int32") @@ -60,13 +59,9 @@ def test_date_extract(backend, alltypes, df, attr, expr_fn): @pytest.mark.parametrize( "attr", [ - param( - "year", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] - ), - param( - "month", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] - ), - param("day", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)]), + "year", + "month", + "day", param( "day_of_year", marks=[ @@ -80,24 +75,26 @@ def test_date_extract(backend, alltypes, df, attr, expr_fn): "quarter", marks=[ pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), - pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), + "hour", + "minute", param( - "hour", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] - ), - param( - "minute", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] - ), - param( - "second", marks=[pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError)] + "second", + marks=[ + pytest.mark.broken( + ["exasol"], + raises=AssertionError, + reason="seems like exasol might be rounding", + ) + ], ), ], ) -@pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], - raises=AttributeError, + raises=(AttributeError, com.OperationNotDefinedError), reason="AttributeError: 'StringColumn' object has no attribute 'X'", ) def test_timestamp_extract(backend, alltypes, df, attr): @@ -113,42 +110,12 @@ def test_timestamp_extract(backend, alltypes, df, attr): @pytest.mark.parametrize( ("func", "expected"), [ - param( - methodcaller("year"), - 2015, - id="year", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("month"), - 9, - id="month", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("day"), - 1, - id="day", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("hour"), - 14, - id="hour", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("minute"), - 48, - id="minute", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), - param( - methodcaller("second"), - 5, - id="second", - marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], - ), + param(methodcaller("year"), 2015, id="year"), + param(methodcaller("month"), 9, id="month"), + param(methodcaller("day"), 1, id="day"), + param(methodcaller("hour"), 14, id="hour"), + param(methodcaller("minute"), 48, id="minute"), + param(methodcaller("second"), 5, id="second"), param( methodcaller("millisecond"), 359, @@ -262,13 +229,12 @@ def test_timestamp_extract_epoch_seconds(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=AttributeError, reason="'StringColumn' object has no attribute 'week_of_year'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_extract_week_of_year(backend, alltypes, df): expr = alltypes.timestamp_col.week_of_year().name("tmp") result = expr.execute() @@ -344,7 +310,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): param( "W", marks=[ - pytest.mark.broken(["sqlite"], raises=AssertionError), + pytest.mark.broken(["sqlite", "exasol"], raises=AssertionError), pytest.mark.notimpl(["mysql"], raises=com.UnsupportedOperationError), pytest.mark.broken( ["polars"], @@ -480,7 +446,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): reason="attempt to calculate the remainder with a divisor of zero", ), pytest.mark.notimpl( - ["flink"], + ["flink", "exasol"], raises=com.UnsupportedOperationError, reason=" unit is not supported in timestamp truncate", ), @@ -488,13 +454,12 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, reason="AttributeError: 'StringColumn' object has no attribute 'truncate'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_truncate(backend, alltypes, df, unit): expr = alltypes.timestamp_col.truncate(unit).name("tmp") @@ -565,6 +530,11 @@ def test_timestamp_truncate(backend, alltypes, df, unit): "Timestamp truncation is not supported in Flink" ), ), + pytest.mark.broken( + ["exasol"], + raises=AssertionError, + reason="behavior is different than expected", + ), ], ), ], @@ -581,7 +551,6 @@ def test_timestamp_truncate(backend, alltypes, df, unit): raises=AttributeError, reason="AttributeError: 'StringColumn' object has no attribute 'date'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_date_truncate(backend, alltypes, df, unit): expr = alltypes.timestamp_col.date().truncate(unit).name("tmp") @@ -848,8 +817,7 @@ def convert_to_offset(x): id="timestamp-add-interval", marks=[ pytest.mark.notimpl( - ["sqlite"], - raises=com.OperationNotDefinedError, + ["sqlite", "exasol"], raises=com.OperationNotDefinedError ), pytest.mark.notimpl( ["druid"], @@ -872,6 +840,7 @@ def convert_to_offset(x): "snowflake", "sqlite", "bigquery", + "exasol" ], raises=com.OperationNotDefinedError, ), @@ -898,6 +867,7 @@ def convert_to_offset(x): "polars", "snowflake", "bigquery", + "exasol" ], raises=com.OperationNotDefinedError, ), @@ -921,8 +891,7 @@ def convert_to_offset(x): reason="unsupported operand type(s) for -: 'StringColumn' and 'IntervalScalar'", ), pytest.mark.notimpl( - ["sqlite"], - raises=com.OperationNotDefinedError, + ["sqlite", "exasol"], raises=com.OperationNotDefinedError ), ], ), @@ -941,6 +910,7 @@ def convert_to_offset(x): raises=AttributeError, reason="'StringColumn' object has no attribute 'date'", ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -958,6 +928,7 @@ def convert_to_offset(x): raises=AttributeError, reason="'StringColumn' object has no attribute 'date'", ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -998,6 +969,7 @@ def convert_to_offset(x): raises=Exception, reason="pyarrow.lib.ArrowInvalid: Casting from duration[us] to duration[s] would lose data", ), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -1040,7 +1012,6 @@ def convert_to_offset(x): ], ) @pytest.mark.notimpl(["mssql", "oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): expr = expr_fn(alltypes, backend).name("tmp") expected = expected_fn(df, backend) @@ -1284,7 +1255,6 @@ def test_temporal_binop_pandas_timedelta( raises=AttributeError, reason="Can only use .dt accessor with datetimelike values", ) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name): ts = pd.Timestamp("20100302", tz="UTC").to_pydatetime() @@ -1842,14 +1812,13 @@ def test_now_from_projection(alltypes): } -@pytest.mark.notimpl(["pandas", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["pandas", "dask", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["druid"], raises=PyDruidProgrammingError, reason="SQL parse failed" ) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936 missing expression" ) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1880,13 +1849,13 @@ def test_date_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "dask", "pyspark", "mysql"], raises=com.OperationNotDefinedError + ["pandas", "dask", "pyspark", "mysql", "exasol"], + raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904: MAKE TIMESTAMP invalid" ) @pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1905,7 +1874,8 @@ def test_timestamp_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "mysql", "dask", "pyspark"], raises=com.OperationNotDefinedError + ["pandas", "mysql", "dask", "pyspark", "exasol"], + raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( ["sqlite"], @@ -1950,7 +1920,6 @@ def test_timestamp_literal(con, backend): ", , , )" ), ) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -1980,10 +1949,11 @@ def test_timestamp_with_timezone_literal(con, timezone, expected): ["pandas", "datafusion", "dask", "pyspark", "polars", "mysql"], raises=com.OperationNotDefinedError, ) -@pytest.mark.notyet(["clickhouse", "impala"], raises=com.OperationNotDefinedError) +@pytest.mark.notyet( + ["clickhouse", "impala", "exasol"], raises=com.OperationNotDefinedError +) @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -2124,7 +2094,7 @@ def test_interval_literal(con, backend): assert con.execute(expr.typeof()) == INTERVAL_BACKEND_TYPES[backend_name] -@pytest.mark.notimpl(["pandas", "dask"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["pandas", "dask", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["druid"], raises=AttributeError, @@ -2133,7 +2103,6 @@ def test_interval_literal(con, backend): @pytest.mark.broken( ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00936: missing expression" ) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -2150,7 +2119,8 @@ def test_date_column_from_ymd(backend, con, alltypes, df): @pytest.mark.notimpl( - ["pandas", "dask", "pyspark", "mysql"], raises=com.OperationNotDefinedError + ["pandas", "dask", "pyspark", "mysql", "exasol"], + raises=com.OperationNotDefinedError, ) @pytest.mark.broken( ["druid"], @@ -2161,7 +2131,6 @@ def test_date_column_from_ymd(backend, con, alltypes, df): ["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00904 make timestamp invalid" ) @pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) @pytest.mark.notimpl( ["risingwave"], raises=sa.exc.InternalError, @@ -2224,16 +2193,10 @@ def test_timestamp_extract_milliseconds_with_big_value(con): @pytest.mark.notimpl( ["datafusion"], raises=Exception, - reason=( - "This feature is not implemented: Unsupported CAST from Int32 to Timestamp(Nanosecond, None)" - ), + reason="Unsupported CAST from Int32 to Timestamp(Nanosecond, None)", ) -@pytest.mark.notimpl( - ["oracle"], - raises=sa.exc.DatabaseError, - reason="ORA-00932", -) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) +@pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError, reason="ORA-00932") +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_integer_cast_to_timestamp_column(backend, alltypes, df): expr = alltypes.int_col.cast("timestamp") expected = pd.to_datetime(df.int_col, unit="s").rename(expr.get_name()) @@ -2242,7 +2205,7 @@ def test_integer_cast_to_timestamp_column(backend, alltypes, df): @pytest.mark.notimpl(["oracle"], raises=sa.exc.DatabaseError) -@pytest.mark.notimpl(["exasol"], raises=sa.exc.DBAPIError) +@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_integer_cast_to_timestamp_scalar(alltypes, df): expr = alltypes.int_col.min().cast("timestamp") result = expr.execute() @@ -2344,7 +2307,6 @@ def test_timestamp_date_comparison(backend, alltypes, df, left_fn, right_fn): reason="Casting from timestamp[s] to timestamp[ns] would result in out of bounds timestamp: 81953424000", raises=ArrowInvalid, ) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_large_timestamp(con): huge_timestamp = datetime.datetime(year=4567, month=1, day=1) expr = ibis.timestamp("4567-01-01 00:00:00") @@ -2377,6 +2339,7 @@ def test_large_timestamp(con): reason="time_parse truncates to milliseconds", raises=AssertionError, ), + pytest.mark.notimpl(["exasol"], raises=AssertionError), ], ), param( @@ -2428,6 +2391,7 @@ def test_large_timestamp(con): raises=sa.exc.InternalError, reason="Parse error: timestamp without time zone Can't cast string to timestamp (expected format is YYYY-MM-DD HH:MM:SS[.D+{up to 6 digits}] or YYYY-MM-DD HH:MM or YYYY-MM-DD or ISO 8601 format)", ), + pytest.mark.notimpl(["exasol"], raises=AssertionError), ], ), ], @@ -2437,7 +2401,6 @@ def test_large_timestamp(con): raises=sa.exc.DatabaseError, reason="ORA-01843: invalid month was specified", ) -@pytest.mark.notimpl(["exasol"], raises=ExaQueryError) def test_timestamp_precision_output(con, ts, scale, unit): dtype = dt.Timestamp(scale=scale) expr = ibis.literal(ts).cast(dtype) @@ -2576,10 +2539,7 @@ def test_delta(con, start, end, unit, expected): {"seconds": 2}, "2s", marks=[ - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), + pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) ], id="seconds", ), @@ -2587,10 +2547,7 @@ def test_delta(con, start, end, unit, expected): {"minutes": 5}, "300s", marks=[ - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), + pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) ], id="minutes", ), @@ -2598,10 +2555,7 @@ def test_delta(con, start, end, unit, expected): {"hours": 2}, "2h", marks=[ - pytest.mark.notimpl( - ["datafusion"], - raises=com.OperationNotDefinedError, - ), + pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) ], id="hours", ), diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 4378d1d4dd849..ccccc9571ad66 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -14,6 +14,7 @@ import ibis.expr.datatypes as dt from ibis.backends.tests.errors import ( ClickHouseDatabaseError, + ExaQueryError, GoogleBadRequest, ImpalaHiveServer2Error, MySQLOperationalError, @@ -24,17 +25,9 @@ from ibis.legacy.udf.vectorized import analytic, reduction pytestmark = [ - pytest.mark.notimpl( - ["exasol"], - raises=( - sa.exc.ProgrammingError, - sa.exc.NoSuchTableError, - com.OperationNotDefinedError, - ), - ), pytest.mark.notimpl( ["druid"], raises=(com.OperationNotDefinedError, PyDruidProgrammingError) - ), + ) ] @@ -163,7 +156,9 @@ def calc_zscore(s): lambda t: t.id.rank(method="min") / t.id.transform(len), id="cume_dist", marks=[ - pytest.mark.notyet(["clickhouse"], raises=com.OperationNotDefinedError), + pytest.mark.notyet( + ["clickhouse", "exasol"], raises=com.OperationNotDefinedError + ), pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl(["dask"], raises=NotImplementedError), pytest.mark.notimpl( @@ -208,13 +203,19 @@ def calc_zscore(s): lambda t, win: t.float_col.first().over(win), lambda t: t.float_col.transform("first"), id="first", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), + marks=[ + pytest.mark.notimpl(["dask"], raises=NotImplementedError), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + ], ), param( lambda t, win: t.float_col.last().over(win), lambda t: t.float_col.transform("last"), id="last", - marks=pytest.mark.notimpl(["dask"], raises=NotImplementedError), + marks=[ + pytest.mark.notimpl(["dask"], raises=NotImplementedError), + pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + ], ), param( lambda t, win: t.double_col.nth(3).over(win), @@ -430,6 +431,7 @@ def test_grouped_bounded_expanding_window( "snowflake", "datafusion", "trino", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -589,6 +591,7 @@ def test_grouped_bounded_preceding_window(backend, alltypes, df, window_fn): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -785,6 +788,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -816,6 +820,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -941,6 +946,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -973,6 +979,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "snowflake", "trino", "datafusion", + "exasol", ], raises=com.OperationNotDefinedError, ), @@ -1164,7 +1171,7 @@ def test_mutate_window_filter(backend, alltypes): backend.assert_frame_equal(res, sol, check_dtype=False) -@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["polars", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl( ["flink"], raises=Exception, @@ -1226,6 +1233,11 @@ def test_first_last(backend): raises=sa.exc.InternalError, reason="sql parser error: Expected literal int, found: INTERVAL at line:1, column:99", ) +@pytest.mark.broken( + ["exasol"], + raises=ExaQueryError, + reason="database can't handle UTC timestamps in DataFrames", +) def test_range_expression_bounds(backend): t = ibis.memtable( { diff --git a/poetry.lock b/poetry.lock index 2bc9f08730abe..199a9f1083914 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4691,6 +4691,7 @@ files = [ [package.dependencies] packaging = "*" +pandas = {version = "*", optional = true, markers = "extra == \"pandas\""} pyopenssl = "*" rsa = "*" websocket-client = ">=1.0.1" @@ -6352,40 +6353,6 @@ postgresql-psycopg2cffi = ["psycopg2cffi"] pymysql = ["pymysql", "pymysql (<1)"] sqlcipher = ["sqlcipher3_binary"] -[[package]] -name = "sqlalchemy-exasol" -version = "4.6.3" -description = "EXASOL dialect for SQLAlchemy" -optional = true -python-versions = ">=3.8,<4.0" -files = [ - {file = "sqlalchemy_exasol-4.6.3-py3-none-any.whl", hash = "sha256:d524d14bd84935087fb4e9fed273c1b5f6d23f0008ef3460a0278aa332e646ea"}, - {file = "sqlalchemy_exasol-4.6.3.tar.gz", hash = "sha256:03a424886cc90480a2127ca0531779e8b0a415d4b113d85dd23025d6c0b52cd3"}, -] - -[package.dependencies] -packaging = ">=21.3" -pyexasol = ">=0.25.1,<0.26.0" -pyodbc = ">=4.0.34,<6" -sqlalchemy = ">=1.4,<2" - -[package.extras] -turbodbc = ["turbodbc (==4.5.4)"] - -[[package]] -name = "sqlalchemy-risingwave" -version = "1.0.0" -description = "RisingWave dialect for SQLAlchemy" -optional = true -python-versions = "*" -files = [ - {file = "sqlalchemy-risingwave-1.0.0.tar.gz", hash = "sha256:856a3c44b98cba34d399c3cc9785a74896caca152b3685d87553e4210e3e07a4"}, - {file = "sqlalchemy_risingwave-1.0.0-py3-none-any.whl", hash = "sha256:c733365abc38e88f4d23d83713cfc3f21c0b0d3c81210cbc2f569b49a912ba08"}, -] - -[package.dependencies] -SQLAlchemy = ">=1.4,<2" - [[package]] name = "sqlalchemy-views" version = "0.3.2" @@ -7332,7 +7299,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views", "trino"] +all = ["black", "clickhouse-connect", "dask", "datafusion", "db-dtypes", "deltalake", "duckdb", "geopandas", "google-cloud-bigquery", "google-cloud-bigquery-storage", "graphviz", "impyla", "oracledb", "packaging", "pins", "polars", "psycopg2", "pydata-google-auth", "pydruid", "pyexasol", "pymysql", "pyodbc", "pyspark", "regex", "shapely", "snowflake-connector-python", "sqlalchemy", "sqlalchemy-views", "trino"] bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "pydata-google-auth"] clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] @@ -7342,7 +7309,7 @@ deltalake = ["deltalake"] druid = ["pydruid"] duckdb = ["duckdb"] examples = ["pins"] -exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] +exasol = ["pyexasol"] flink = [] geospatial = ["geopandas", "shapely"] impala = ["impyla"] @@ -7362,4 +7329,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "1939863bb76e53c0c8a1575ffe8fd2e035e6768ac21682fe12a9e640ffe3ade1" +content-hash = "3fcc813731a54acc626f4e5d124030eeeff9ce304dd2851b16dfdf89ab529d01" diff --git a/pyproject.toml b/pyproject.toml index 53d58ee328f62..69b029a66af09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,7 @@ polars = { version = ">=0.19.3,<1", optional = true } psycopg2 = { version = ">=2.8.4,<3", optional = true } pydata-google-auth = { version = ">=1.4.0,<2", optional = true } pydruid = { version = ">=0.6.5,<1", optional = true } +pyexasol = { version = ">=0.25.2,<1", optional = true, extras = ["pandas"] } pymysql = { version = ">=1,<2", optional = true } pyodbc = { version = ">=4.0.39,<6", optional = true } pyspark = { version = ">=3,<4", optional = true } @@ -87,7 +88,6 @@ shapely = { version = ">=2,<3", optional = true } # issues with versions <3.0.2 snowflake-connector-python = { version = ">=3.0.2,<4,!=3.3.0b1", optional = true } sqlalchemy = { version = ">=1.4,<3", optional = true } -sqlalchemy-exasol = { version = ">=4.6.0", optional = true } sqlalchemy-views = { version = ">=0.3.1,<1", optional = true } sqlalchemy-risingwave = { version = ">=1.0.0,<2", optional = true } trino = { version = ">=0.321,<1", optional = true } @@ -161,6 +161,7 @@ all = [ "psycopg2", "pydata-google-auth", "pydruid", + "pyexasol", "pymysql", "pyodbc", "pyspark", @@ -168,7 +169,6 @@ all = [ "shapely", "snowflake-connector-python", "sqlalchemy", - "sqlalchemy-exasol", "sqlalchemy-views", "sqlalchemy-risingwave", "trino", @@ -184,7 +184,7 @@ dask = ["dask", "regex"] datafusion = ["datafusion"] druid = ["pydruid"] duckdb = ["duckdb"] -exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] +exasol = ["pyexasol"] flink = [] geospatial = ["geopandas", "shapely"] impala = ["impyla"] diff --git a/requirements-dev.txt b/requirements-dev.txt index b4ad86ee9fb2a..53a3afa070418 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -170,14 +170,14 @@ py4j==0.10.9.7 ; python_version >= "3.9" and python_version < "4.0" pyarrow-hotfix==0.6 ; python_version >= "3.9" and python_version < "4.0" pyarrow==15.0.0 ; python_version >= "3.9" and python_version < "4.0" pyasn1-modules==0.3.0 ; python_version >= "3.9" and python_version < "4.0" -pyasn1==0.5.1 ; python_version >= "3.9" and python_version < "4.0" +pyasn1==0.5.1 ; python_version >= "3.9" and python_version < "4" pycparser==2.21 ; python_version >= "3.9" and python_version < "4.0" pydantic-core==2.16.1 ; python_version >= "3.10" and python_version < "3.13" pydantic==2.6.0 ; python_version >= "3.10" and python_version < "3.13" pydata-google-auth==1.8.2 ; python_version >= "3.9" and python_version < "4.0" pydeps==1.12.17 ; python_version >= "3.9" and python_version < "4.0" pydruid==0.6.6 ; python_version >= "3.9" and python_version < "4.0" -pyexasol==0.25.2 ; python_version >= "3.9" and python_version < "4.0" +pyexasol[pandas]==0.25.2 ; python_version >= "3.9" and python_version < "4.0" pygments==2.17.2 ; python_version >= "3.9" and python_version < "4.0" pyinstrument==4.6.2 ; python_version >= "3.9" and python_version < "4.0" pyjwt==2.8.0 ; python_version >= "3.9" and python_version < "4.0" @@ -234,8 +234,6 @@ snowflake-connector-python==3.6.0 ; python_version >= "3.9" and python_version < sortedcontainers==2.4.0 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.5 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1 ; python_version >= "3.10" and python_version < "3.13" -sqlalchemy-exasol==4.6.3 ; python_version >= "3.9" and python_version < "4.0" -sqlalchemy-risingwave==1.0.0 ; python_version >= "3.9" and python_version < "4.0" sqlalchemy-views==0.3.2 ; python_version >= "3.9" and python_version < "4.0" sqlalchemy==1.4.51 ; python_version >= "3.9" and python_version < "4.0" sqlglot==20.11.0 ; python_version >= "3.9" and python_version < "4.0"