diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index c1570f207539..210670164316 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -27,7 +27,6 @@ from ibis.backends.base.sqlglot import C, F from ibis.backends.duckdb.compiler import DuckDBSQLCompiler from ibis.backends.duckdb.datatypes import DuckDBType -from ibis.expr.operations.relations import PandasDataFrameProxy from ibis.expr.operations.udf import InputType from ibis.formats.pandas import PandasData @@ -1171,10 +1170,6 @@ def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: yield name, ibis_type def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: - # in theory we could use pandas dataframes, but when using dataframes - # with pyarrow datatypes later reads of this data segfault - import pandas as pd - schema = op.schema if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: raise exc.IbisTypeError( @@ -1184,21 +1179,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: # only register if we haven't already done so if (name := op.name) not in self.list_tables(): - if isinstance(data := op.data, PandasDataFrameProxy): - table = data.to_frame() - - # convert to object string dtypes because duckdb is either - # 1. extremely slow to register DataFrames with not-pyarrow - # string dtypes - # 2. broken for string[pyarrow] dtypes (segfault) - if conversions := { - colname: "str" - for colname, col in table.items() - if isinstance(col.dtype, pd.StringDtype) - }: - table = table.astype(conversions) - else: - table = data.to_pyarrow(schema) + table = op.data.to_pyarrow(schema) # register creates a transaction, and we can't nest transactions so # we create a function to encapsulate the whole shebang @@ -1206,10 +1187,7 @@ def _register(name, table): with self.begin() as con: con.connection.register(name, table) - try: - _register(name, table) - except duckdb.NotImplementedException: - _register(name, data.to_pyarrow(schema)) + _register(name, table) def _get_temp_view_definition( self, name: str, definition: sa.sql.compiler.Compiled diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index 816b08903242..50c1682e4264 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -367,3 +367,11 @@ def test_register_filesystem_gcs(con): ) assert band_members.count().to_pyarrow() + + +def test_memtable_null_column_parquet_dtype_roundtrip(con, tmp_path): + before = ibis.memtable({"a": [None, None, None]}, schema={"a": "string"}) + before.to_parquet(tmp_path / "tmp.parquet") + after = ibis.read_parquet(tmp_path / "tmp.parquet") + + assert before.a.type() == after.a.type()