Skip to content

Commit

Permalink
feat(duckdb): add support for read_mysql (#8656)
Browse files Browse the repository at this point in the history
  • Loading branch information
ncclementi authored Mar 20, 2024
1 parent 8a789c0 commit 4ea4a1d
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 1 deletion.
43 changes: 42 additions & 1 deletion ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import ast
import contextlib
import os
import urllib
import warnings
from operator import itemgetter
from pathlib import Path
Expand Down Expand Up @@ -948,7 +949,6 @@ def list_tables(
)
.sql(self.name, pretty=True)
)

out = self.con.execute(sql).fetch_arrow_table()

return self._filter_with_like(out[col].to_pylist(), like)
Expand Down Expand Up @@ -988,6 +988,47 @@ def read_postgres(

return self.table(table_name)

def read_mysql(
self,
*,
uri: str,
catalog: str,
table_name: str | None = None,
) -> ir.Table:
"""Register a table from a MySQL instance into a DuckDB table.
Parameters
----------
uri
A mysql URI of the form `mysql://user:password@host:port/database`
catalog
User-defined alias given to the MySQL database that is being attached
to DuckDB
table_name
The table to read
Returns
-------
ir.Table
The just-registered table.
"""

parsed = urllib.parse.urlparse(uri)

if table_name is None:
raise ValueError("`table_name` is required when registering a mysql table")

self._load_extensions(["mysql"])

database = parsed.path.strip("/")

query_con = f"""ATTACH 'host={parsed.hostname} user={parsed.username} password={parsed.password} port={parsed.port} database={database}' AS {catalog} (TYPE mysql)"""

with self._safe_raw_sql(query_con):
pass

return self.table(table_name, schema=database, database=catalog)

def read_sqlite(self, path: str | Path, table_name: str | None = None) -> ir.Table:
"""Register a table from a SQLite database into a DuckDB table.
Expand Down
35 changes: 35 additions & 0 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,48 @@ def pgurl(): # pragma: no cover
os.environ.get("DUCKDB_POSTGRES") is None, reason="avoiding CI shenanigans"
)
def test_read_postgres(con, pgurl): # pragma: no cover
# we don't run this test in CI, only locally, to avoid bringing a postgres
# container up just for this test. To run locally set env variable to True
# and once a postgres container is up run the test.
table = con.read_postgres(
f"postgres://{pgurl.username}:{pgurl.password}@{pgurl.host}:{pgurl.port}",
table_name="duckdb_test",
)
assert table.count().execute()


@pytest.fixture(scope="session")
def mysqlurl(): # pragma: no cover
mysqlcon = ibis.mysql.connect(user="ibis", password="ibis", database="ibis_testing")

df = pd.DataFrame({"x": [1.0, 2.0, 3.0, 1.0], "y": ["a", "b", "c", "a"]})
s = ibis.schema(dict(x="float64", y="str"))

mysqlcon.create_table("duckdb_test", df, schema=s, overwrite=True)
yield mysqlcon.con
mysqlcon.drop_table("duckdb_test", force=True)


@pytest.mark.skipif(
os.environ.get("DUCKDB_MYSQL") is None, reason="avoiding CI shenanigans"
)
def test_read_mysql(con, mysqlurl): # pragma: no cover
# we don't run this test in CI, only locally, to avoid bringing a mysql
# container up just for this test. To run locally set env variable to True
# and once a mysql container is up run the test.

# TODO(ncclementi) replace for mysqlurl.host when this is fix
# https://github.com/duckdb/duckdb_mysql/issues/44
hostname = "127.0.0.1"

table = con.read_mysql(
uri=f"mysql://{mysqlurl.user.decode()}:{mysqlurl.password.decode()}@{hostname}:{mysqlurl.port}/ibis_testing",
catalog="mysqldb",
table_name="duckdb_test",
)
assert table.count().execute()


@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
Expand Down

0 comments on commit 4ea4a1d

Please sign in to comment.