Skip to content

Commit

Permalink
Chore: pre-commit autoupdate
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci[bot] authored and ModeSevenIndustrialSolutions committed Sep 30, 2024
1 parent 31709b3 commit 88a7106
Show file tree
Hide file tree
Showing 12 changed files with 572 additions and 228 deletions.
8 changes: 3 additions & 5 deletions notebooks/00_vault_cleanup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@
}
],
"source": [
"import os\n",
"import pathlib\n",
"\n",
"import trino\n",
"import osc_ingest_trino as osc\n",
"\n",
"ingest_catalog = \"osc_datacommons_dev\"\n",
Expand Down Expand Up @@ -88,7 +84,9 @@
}
],
"source": [
"engine_dev = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)"
"engine_dev = osc.attach_trino_engine(\n",
" verbose=True, catalog=ingest_catalog, schema=ingest_schema\n",
")"
]
},
{
Expand Down
52 changes: 42 additions & 10 deletions notebooks/01a_vault_ingest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
"# Load some standard environment variables from a dot-env file, if it exists.\n",
"# If no such file can be found, does not fail, and so allows these environment vars to\n",
"# be populated in some other way\n",
"dotenv_dir = os.environ.get(\"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\"))\n",
"dotenv_dir = os.environ.get(\n",
" \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n",
")\n",
"dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n",
"if os.path.exists(dotenv_path):\n",
" load_dotenv(dotenv_path=dotenv_path, override=True)"
Expand All @@ -57,7 +59,9 @@
"print(\"Initializing Dev tables\")\n",
"\n",
"sqlstring = \"trino://{user}@{host}:{port}/\".format(\n",
" user=os.environ[\"TRINO_USER_USER1\"], host=os.environ[\"TRINO_HOST\"], port=os.environ[\"TRINO_PORT\"]\n",
" user=os.environ[\"TRINO_USER_USER1\"],\n",
" host=os.environ[\"TRINO_HOST\"],\n",
" port=os.environ[\"TRINO_PORT\"],\n",
")\n",
"\n",
"ingest_catalog = \"osc_datacommons_dev\"\n",
Expand Down Expand Up @@ -161,7 +165,11 @@
"source": [
"# loading excel doesn't work from 'get_object' mode\n",
"\n",
"s3.download_file(os.environ[\"DEMO1_S3_BUCKET\"], \"itr-demo-data/ITR_company_data_minimum_required.xlsx\", \"/tmp/t.xlsx\")\n",
"s3.download_file(\n",
" os.environ[\"DEMO1_S3_BUCKET\"],\n",
" \"itr-demo-data/ITR_company_data_minimum_required.xlsx\",\n",
" \"/tmp/t.xlsx\",\n",
")\n",
"\n",
"# load the raw file from the bucket\n",
"xls = pd.ExcelFile(\"/tmp/t.xlsx\")\n",
Expand All @@ -187,7 +195,7 @@
"tablename = f\"{itr_prefix}fundamental_data\"\n",
"\n",
"# get this sheet, and assess data types\n",
"df = pd.read_excel(xls, f\"fundamental_data\").convert_dtypes()\n",
"df = pd.read_excel(xls, \"fundamental_data\").convert_dtypes()\n",
"\n",
"# rename columns to forms that sql will handle\n",
"osc.enforce_sql_column_names(df, inplace=True)\n",
Expand Down Expand Up @@ -267,7 +275,11 @@
"# in live data platform there will need to be policies and mechanisms for either\n",
"# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n",
"# this is a data governance topic\n",
"osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
"osc._do_sql(\n",
" f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
" engine,\n",
" verbose=True,\n",
")"
]
},
{
Expand Down Expand Up @@ -322,7 +334,11 @@
],
"source": [
"# test that we can get data\n",
"qres = osc._do_sql(f\"select country from {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
"qres = osc._do_sql(\n",
" f\"select country from {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
" engine,\n",
" verbose=True,\n",
")"
]
},
{
Expand Down Expand Up @@ -439,7 +455,11 @@
"# in live data platform there will need to be policies and mechanisms for either\n",
"# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n",
"# this is a data governance topic\n",
"osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
"osc._do_sql(\n",
" f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
" engine,\n",
" verbose=True,\n",
")"
]
},
{
Expand Down Expand Up @@ -494,7 +514,11 @@
],
"source": [
"# test that we can get data\n",
"qres = osc._do_sql(f\"select y2020 from {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
"qres = osc._do_sql(\n",
" f\"select y2020 from {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
" engine,\n",
" verbose=True,\n",
")"
]
},
{
Expand Down Expand Up @@ -543,7 +567,11 @@
"# in live data platform there will need to be policies and mechanisms for either\n",
"# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n",
"# this is a data governance topic\n",
"osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
"osc._do_sql(\n",
" f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
" engine,\n",
" verbose=True,\n",
")"
]
},
{
Expand Down Expand Up @@ -627,7 +655,11 @@
"# in live data platform there will need to be policies and mechanisms for either\n",
"# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n",
"# this is a data governance topic\n",
"osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
"osc._do_sql(\n",
" f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
" engine,\n",
" verbose=True,\n",
")"
]
},
{
Expand Down
Loading

0 comments on commit 88a7106

Please sign in to comment.