Skip to content

Commit

Permalink
chore: remove sqlalchemy from the codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Feb 3, 2024
1 parent e68000c commit 5dd75af
Show file tree
Hide file tree
Showing 73 changed files with 738 additions and 4,023 deletions.
1 change: 0 additions & 1 deletion .envrc
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ watch_file poetry-overrides.nix

export CLOUDSDK_ACTIVE_CONFIG_NAME=ibis-gbq
export GOOGLE_CLOUD_PROJECT="$CLOUDSDK_ACTIVE_CONFIG_NAME"
export SQLALCHEMY_WARN_20=1
3 changes: 0 additions & 3 deletions .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ concurrency:
env:
FORCE_COLOR: "1"
ODBCSYSINI: "${{ github.workspace }}/ci/odbc"
SQLALCHEMY_WARN_20: "1"
HYPOTHESIS_PROFILE: "ci"

jobs:
Expand Down Expand Up @@ -426,8 +425,6 @@ jobs:
test_backends_min_version:
name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
env:
SQLALCHEMY_WARN_20: "1"
strategy:
fail-fast: false
matrix:
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/nix-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,5 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
include:
- os: macos-latest
python-version: "3.10"
steps:
- run: echo "No build required"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ result-*

# tutorial data
geography.db
geography.duckdb

# build artifacts
ci/udf/.ninja_deps
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ Download the SQLite database from the `ibis-tutorial-data` GCS (Google Cloud
Storage) bucket, then connect to it using ibis.

```bash
curl -LsS -o geography.db 'https://storage.googleapis.com/ibis-tutorial-data/geography.db'
curl -LsSO 'https://storage.googleapis.com/ibis-tutorial-data/geography.duckdb'
```

Connect to the database and show the available tables
Expand All @@ -130,7 +130,7 @@ Connect to the database and show the available tables
>>> import ibis
>>> from ibis import _
>>> ibis.options.interactive = True
>>> con = ibis.sqlite.connect("geography.db")
>>> con = ibis.duckdb.connect("geography.duckdb")
>>> con.tables
Tables
------
Expand All @@ -147,7 +147,7 @@ Choose the `countries` table and preview its first few rows
┏━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃ iso_alpha2 ┃ iso_alpha3 ┃ iso_numeric ┃ fips ┃ name ┃ capital ┃ area_km2 ┃ population ┃ continent ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━┩
│ string │ string │ int32 │ string │ string │ string │ float64 │ int32 │ string │
│ string │ string │ int64 │ string │ string │ string │ float64 │ int64 │ string │
├────────────┼────────────┼─────────────┼────────┼──────────────────────┼──────────────────┼──────────┼────────────┼───────────┤
ADAND20AN │ Andorra │ Andorra la Vella │ 468.084000EU
AEARE784AE │ United Arab Emirates │ Abu Dhabi │ 82880.04975593AS
Expand All @@ -170,7 +170,7 @@ Show the 5 least populous countries in Asia
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓
┃ name ┃ population ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩
│ string │ int32
│ string │ int64
├────────────────────────────────┼────────────┤
│ Cocos [Keeling] Islands │ 628
│ British Indian Ocean Territory │ 4000
Expand Down
87 changes: 35 additions & 52 deletions ci/make_geography_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,72 +16,56 @@
from __future__ import annotations

import argparse
import datetime
import json
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Any

import requests
import sqlalchemy as sa
import toolz

import ibis

if TYPE_CHECKING:
from collections.abc import Mapping

SCHEMAS = {
"countries": [
("iso_alpha2", sa.TEXT),
("iso_alpha3", sa.TEXT),
("iso_numeric", sa.INT),
("fips", sa.TEXT),
("name", sa.TEXT),
("capital", sa.TEXT),
("area_km2", sa.REAL),
("population", sa.INT),
("continent", sa.TEXT),
],
"gdp": [
("country_code", sa.TEXT),
("year", sa.INT),
("value", sa.REAL),
],
"independence": [
("country_code", sa.TEXT),
("independence_date", sa.DATE),
("independence_from", sa.TEXT),
],
}

POST_PARSE_FUNCTIONS = {
"independence": lambda row: toolz.assoc(
row,
"independence_date",
datetime.datetime.fromisoformat(row["independence_date"]).date(),
)
"countries": {
"iso_alpha2": "string",
"iso_alpha3": "string",
"iso_numeric": "int",
"fips": "string",
"name": "string",
"capital": "string",
"area_km2": "float",
"population": "int",
"continent": "string",
},
"gdp": {
"country_code": "string",
"year": "int",
"value": "float",
},
"independence": {
"country_code": "string",
"independence_date": "date",
"independence_from": "string",
},
}


def make_geography_db(
data: Mapping[str, Any],
con: sa.engine.Engine,
data: Mapping[str, Any], con: ibis.backends.duckdb.Backend
) -> None:
metadata = sa.MetaData(bind=con)

with con.begin() as bind:
with tempfile.TemporaryDirectory() as d:
for table_name, schema in SCHEMAS.items():
table = sa.Table(
table_name,
metadata,
*(sa.Column(col_name, col_type) for col_name, col_type in schema),
ibis_schema = ibis.schema(schema)
cols = ibis_schema.names
path = Path(d, f"{table_name}.jsonl")
path.write_text(
"\n".join(json.dumps(dict(zip(cols, row))) for row in data[table_name])
)
table_columns = table.c.keys()
post_parse = POST_PARSE_FUNCTIONS.get(table_name, toolz.identity)

table.drop(bind=bind, checkfirst=True)
table.create(bind=bind)
bind.execute(
table.insert().values(),
[post_parse(dict(zip(table_columns, row))) for row in data[table_name]],
con.create_table(
table_name, obj=con.read_json(path), schema=ibis_schema, overwrite=True
)


Expand Down Expand Up @@ -109,9 +93,8 @@ def main() -> None:
response = requests.get(args.input_data_url)
response.raise_for_status()
input_data = response.json()
db_path = Path(args.output_directory).joinpath("geography.db")
con = sa.create_engine(f"sqlite:///{db_path}")
make_geography_db(input_data, con)
db_path = Path(args.output_directory).joinpath("geography.duckdb")
make_geography_db(input_data, ibis.duckdb.connect(db_path))
print(db_path) # noqa: T201


Expand Down
30 changes: 15 additions & 15 deletions ci/schema/snowflake.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE OR REPLACE TABLE diamonds (
CREATE OR REPLACE TABLE "diamonds" (
"carat" FLOAT,
"cut" TEXT,
"color" TEXT,
Expand All @@ -11,7 +11,7 @@ CREATE OR REPLACE TABLE diamonds (
"z" FLOAT
);

CREATE OR REPLACE TABLE astronauts (
CREATE OR REPLACE TABLE "astronauts" (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
Expand All @@ -38,7 +38,7 @@ CREATE OR REPLACE TABLE astronauts (
"total_eva_hrs" FLOAT
);

CREATE OR REPLACE TABLE batting (
CREATE OR REPLACE TABLE "batting" (
"playerID" TEXT,
"yearID" BIGINT,
"stint" BIGINT,
Expand All @@ -63,7 +63,7 @@ CREATE OR REPLACE TABLE batting (
"GIDP" BIGINT
);

CREATE OR REPLACE TABLE awards_players (
CREATE OR REPLACE TABLE "awards_players" (
"playerID" TEXT,
"awardID" TEXT,
"yearID" BIGINT,
Expand All @@ -72,7 +72,7 @@ CREATE OR REPLACE TABLE awards_players (
"notes" TEXT
);

CREATE OR REPLACE TABLE functional_alltypes (
CREATE OR REPLACE TABLE "functional_alltypes" (
"id" INTEGER,
"bool_col" BOOLEAN,
"tinyint_col" SMALLINT,
Expand All @@ -88,7 +88,7 @@ CREATE OR REPLACE TABLE functional_alltypes (
"month" INTEGER
);

CREATE OR REPLACE TABLE array_types (
CREATE OR REPLACE TABLE "array_types" (
"x" ARRAY,
"y" ARRAY,
"z" ARRAY,
Expand All @@ -97,24 +97,24 @@ CREATE OR REPLACE TABLE array_types (
"multi_dim" ARRAY
);

INSERT INTO array_types ("x", "y", "z", "grouper", "scalar_column", "multi_dim")
INSERT INTO "array_types" ("x", "y", "z", "grouper", "scalar_column", "multi_dim")
SELECT [1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0, [[], [1, 2, 3], NULL] UNION
SELECT [4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0, [] UNION
SELECT [6, NULL], ['f', NULL], [6.0, NULL], 'a', 3.0, [NULL, [], NULL] UNION
SELECT [NULL, 1, NULL], [NULL, 'a', NULL], [], 'b', 4.0, [[1], [2], [], [3, 4, 5]] UNION
SELECT [2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0, NULL UNION
SELECT [4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]];

CREATE OR REPLACE TABLE map ("idx" BIGINT, "kv" OBJECT);
CREATE OR REPLACE TABLE "map" ("idx" BIGINT, "kv" OBJECT);

INSERT INTO map ("idx", "kv")
INSERT INTO "map" ("idx", "kv")
SELECT 1, object_construct('a', 1, 'b', 2, 'c', 3) UNION
SELECT 2, object_construct('d', 4, 'e', 5, 'f', 6);


CREATE OR REPLACE TABLE struct ("abc" OBJECT);
CREATE OR REPLACE TABLE "struct" ("abc" OBJECT);

INSERT INTO struct ("abc")
INSERT INTO "struct" ("abc")
SELECT {'a': 1.0, 'b': 'banana', 'c': 2} UNION
SELECT {'a': 2.0, 'b': 'apple', 'c': 3} UNION
SELECT {'a': 3.0, 'b': 'orange', 'c': 4} UNION
Expand All @@ -123,18 +123,18 @@ INSERT INTO struct ("abc")
SELECT NULL UNION
SELECT {'a': 3.0, 'b': 'orange', 'c': NULL};

CREATE OR REPLACE TABLE json_t ("js" VARIANT);
CREATE OR REPLACE TABLE "json_t" ("js" VARIANT);

INSERT INTO json_t ("js")
INSERT INTO "json_t" ("js")
SELECT parse_json('{"a": [1,2,3,4], "b": 1}') UNION
SELECT parse_json('{"a":null,"b":2}') UNION
SELECT parse_json('{"a":"foo", "c":null}') UNION
SELECT parse_json('null') UNION
SELECT parse_json('[42,47,55]') UNION
SELECT parse_json('[]');

CREATE OR REPLACE TABLE win ("g" TEXT, "x" BIGINT NOT NULL, "y" BIGINT);
INSERT INTO win VALUES
CREATE OR REPLACE TABLE "win" ("g" TEXT, "x" BIGINT NOT NULL, "y" BIGINT);
INSERT INTO "win" VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
Expand Down
30 changes: 15 additions & 15 deletions docs/backends/app/backend_info_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,25 @@ def support_matrix_df():
def backends_info_df():
return pd.DataFrame(
{
"bigquery": ["string", "sql"],
"clickhouse": ["string", "sql"],
"bigquery": ["sql"],
"clickhouse": ["sql"],
"dask": ["dataframe"],
"datafusion": ["sql"],
"druid": ["sqlalchemy", "sql"],
"duckdb": ["sqlalchemy", "sql"],
"exasol": ["sqlalchemy", "sql"],
"flink": ["string", "sql"],
"impala": ["string", "sql"],
"mssql": ["sqlalchemy", "sql"],
"mysql": ["sqlalchemy", "sql"],
"oracle": ["sqlalchemy", "sql"],
"druid": ["sql"],
"duckdb": ["sql"],
"exasol": ["sql"],
"flink": ["sql"],
"impala": ["sql"],
"mssql": ["sql"],
"mysql": ["sql"],
"oracle": ["sql"],
"pandas": ["dataframe"],
"polars": ["dataframe"],
"postgres": ["sqlalchemy", "sql"],
"pyspark": ["dataframe"],
"snowflake": ["sqlalchemy", "sql"],
"sqlite": ["sqlalchemy", "sql"],
"trino": ["sqlalchemy", "sql"],
"postgres": ["sql"],
"pyspark": ["sql"],
"snowflake": ["sql"],
"sqlite": ["sql"],
"trino": ["sql"],
}.items(),
columns=["backend_name", "categories"],
)
Expand Down
Loading

0 comments on commit 5dd75af

Please sign in to comment.