From 4a484b59b4a6ea2a2c7d6b5790c001634280236e Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Tue, 5 Sep 2023 14:39:32 -0400 Subject: [PATCH] feat(duckdb): use new TypeMapper class for parsing dtypes --- ibis/backends/duckdb/__init__.py | 23 +++++++------ ibis/backends/duckdb/datatypes.py | 56 ------------------------------- 2 files changed, 12 insertions(+), 67 deletions(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index ea33944458233..6c6aeb0b53484 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -11,30 +11,32 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Iterable, Iterator, Mapping, MutableMapping -import duckdb -import pyarrow as pa -import sqlglot as sg -import toolz -from packaging.version import parse as vparse - import ibis import ibis.common.exceptions as exc import ibis.expr.datatypes as dt import ibis.expr.operations as ops import ibis.expr.schema as sch import ibis.expr.types as ir +import pyarrow as pa +import sqlglot as sg +import toolz from ibis import util from ibis.backends.base import CanCreateSchema from ibis.backends.base.sql import BaseBackend +from ibis.backends.base.sqlglot.datatypes import DuckDBType from ibis.backends.duckdb.compiler import translate -from ibis.backends.duckdb.datatypes import parse, serialize, DuckDBType +from ibis.backends.duckdb.datatypes import serialize from ibis.expr.operations.relations import PandasDataFrameProxy from ibis.expr.operations.udf import InputType -from ibis.formats.pyarrow import PyArrowData from ibis.formats.pandas import PandasData +from ibis.formats.pyarrow import PyArrowData +from packaging.version import parse as vparse + +import duckdb if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping, MutableMapping + import ibis.expr.operations as ops import pandas as pd import torch @@ -247,7 +249,7 @@ def get_schema(self, table_name: str, database: str | None = None) -> sch.Schema names, types, *_ = results.fetch_arrow_table() names = names.to_pylist() types = types.to_pylist() - return sch.Schema(dict(zip(names, map(parse, types)))) + return sch.Schema(dict(zip(names, map(DuckDBType.from_string, types)))) def list_databases(self, like: str | None = None) -> list[str]: result = self.raw_sql("PRAGMA database_list;") @@ -1146,8 +1148,7 @@ def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: map(as_py, rows["column_type"]), map(as_py, rows["null"]), ): - ibis_type = parse(type) - # ibis_type = DuckDBType.from_string(type, nullable=nullable) + ibis_type = DuckDBType.from_string(type, nullable=nullable) yield name, ibis_type.copy(nullable=null.lower() == "yes") def _register_in_memory_tables(self, expr: ir.Expr) -> None: diff --git a/ibis/backends/duckdb/datatypes.py b/ibis/backends/duckdb/datatypes.py index 80102f5b53b42..f8abd82bd5319 100644 --- a/ibis/backends/duckdb/datatypes.py +++ b/ibis/backends/duckdb/datatypes.py @@ -2,63 +2,7 @@ import functools -import sqlalchemy.dialects.postgresql as psql - import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import DuckDBType as SqlglotDuckdbType - -# _from_duckdb_types = { -# psql.BYTEA: dt.Binary, -# psql.UUID: dt.UUID, -# ducktypes.TinyInteger: dt.Int8, -# ducktypes.SmallInteger: dt.Int16, -# ducktypes.Integer: dt.Int32, -# ducktypes.BigInteger: dt.Int64, -# ducktypes.HugeInteger: dt.Decimal(38, 0), -# ducktypes.UInt8: dt.UInt8, -# ducktypes.UTinyInteger: dt.UInt8, -# ducktypes.UInt16: dt.UInt16, -# ducktypes.USmallInteger: dt.UInt16, -# ducktypes.UInt32: dt.UInt32, -# ducktypes.UInteger: dt.UInt32, -# ducktypes.UInt64: dt.UInt64, -# ducktypes.UBigInteger: dt.UInt64, -# } - -# _to_duckdb_types = { -# dt.UUID: psql.UUID, -# dt.Int8: ducktypes.TinyInteger, -# dt.Int16: ducktypes.SmallInteger, -# dt.Int32: ducktypes.Integer, -# dt.Int64: ducktypes.BigInteger, -# dt.UInt8: ducktypes.UTinyInteger, -# dt.UInt16: ducktypes.USmallInteger, -# dt.UInt32: ducktypes.UInteger, -# dt.UInt64: ducktypes.UBigInteger, -# } - - -class DuckDBType(AlchemyType): - dialect = "duckdb" - - @classmethod - def to_ibis(cls, typ, nullable=True): - if dtype := _from_duckdb_types.get(type(typ)): - return dtype(nullable=nullable) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_ibis(cls, dtype): - if typ := _to_duckdb_types.get(type(dtype)): - return typ - else: - return super().from_ibis(dtype) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotDuckdbType.from_string(type_string, nullable=nullable) @functools.singledispatch