From e1197b294169592cd16d48faa95b6222818a061a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 18 Dec 2024 05:57:11 -0500 Subject: [PATCH] refactor(duckdb): remove the `read_in_memory` method from the duckdb backend BREAKING CHANGE: The `read_in_memory` method is removed from the duckdb backend. Use `ibis.memtable` instead. --- ibis/backends/duckdb/__init__.py | 37 --------------------- ibis/backends/duckdb/tests/test_io.py | 48 ++++++--------------------- 2 files changed, 10 insertions(+), 75 deletions(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index fc93cc53b309..eb280e021aeb 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -808,43 +808,6 @@ def _read_parquet_pyarrow_dataset( # by the time we execute against this so we register it # explicitly. - @util.deprecated( - instead="Pass in-memory data to `memtable` instead.", - as_of="9.1", - removed_in="10.0", - ) - def read_in_memory( - self, - source: pd.DataFrame - | pa.Table - | pa.RecordBatchReader - | pl.DataFrame - | pl.LazyFrame, - table_name: str | None = None, - ) -> ir.Table: - """Register an in-memory table object in the current database. - - Supported objects include pandas DataFrame, a Polars - DataFrame/LazyFrame, or a PyArrow Table or RecordBatchReader. - - Parameters - ---------- - source - The data source. - table_name - An optional name to use for the created table. This defaults to - a sequentially generated name. - - Returns - ------- - ir.Table - The just-registered table - - """ - table_name = table_name or util.gen_name("read_in_memory") - _read_in_memory(source, table_name, self) - return self.table(table_name) - def read_delta( self, source_table: str, diff --git a/ibis/backends/duckdb/tests/test_io.py b/ibis/backends/duckdb/tests/test_io.py index ca7c35ad2eae..439e26bcdeca 100644 --- a/ibis/backends/duckdb/tests/test_io.py +++ b/ibis/backends/duckdb/tests/test_io.py @@ -275,21 +275,6 @@ def test_attach_sqlite(data_dir, tmp_path): assert dt.String(nullable=True) in set(types) -def test_re_read_in_memory_overwrite(con): - df_pandas_1 = pd.DataFrame({"a": ["a"], "b": [1], "d": ["hi"]}) - df_pandas_2 = pd.DataFrame({"a": [1], "c": [1.4]}) - - with pytest.warns(FutureWarning, match="memtable"): - table = con.read_in_memory(df_pandas_1, table_name="df") - assert len(table.columns) == 3 - assert table.schema() == ibis.schema([("a", "str"), ("b", "int"), ("d", "str")]) - - with pytest.warns(FutureWarning, match="memtable"): - table = con.read_in_memory(df_pandas_2, table_name="df") - assert len(table.columns) == 2 - assert table.schema() == ibis.schema([("a", "int"), ("c", "float")]) - - def test_memtable_with_nullable_dtypes(con): data = pd.DataFrame( { @@ -381,37 +366,24 @@ def test_s3_403_fallback(con, httpserver, monkeypatch): def test_register_numpy_str(con): data = pd.DataFrame({"a": [np.str_("xyz"), None]}) - with pytest.warns(FutureWarning, match="memtable"): - result = con.read_in_memory(data) - tm.assert_frame_equal(result.execute(), data) + result = ibis.memtable(data) + tm.assert_frame_equal(con.execute(result), data) -def test_register_recordbatchreader_warns(con): +def test_memtable_recordbatchreader_raises(con): table = pa.Table.from_batches( - [ - pa.RecordBatch.from_pydict({"x": [1, 2]}), - pa.RecordBatch.from_pydict({"x": [3, 4]}), - ] + map(pa.RecordBatch.from_pydict, [{"x": [1, 2]}, {"x": [3, 4]}]) ) reader = table.to_reader() - sol = table.to_pandas() - with pytest.warns(FutureWarning, match="memtable"): - t = con.read_in_memory(reader) - # First execute is fine - res = t.execute() - tm.assert_frame_equal(res, sol) + with pytest.raises(TypeError): + ibis.memtable(reader) - # Later executes warn - with pytest.warns(UserWarning, match="RecordBatchReader"): - t.limit(2).execute() + t = ibis.memtable(reader.read_all()) - # Re-registering over the name with a new reader is fine - reader = table.to_reader() - with pytest.warns(FutureWarning, match="memtable"): - t = con.read_in_memory(reader, table_name=t.get_name()) - res = t.execute() - tm.assert_frame_equal(res, sol) + # First execute is fine + res = con.execute(t) + tm.assert_frame_equal(res, table.to_pandas()) def test_csv_with_slash_n_null(con, tmp_path):