Skip to content

Commit

Permalink
Merge pull request #44 from DalgoT4D/39-test-cases-for-dbt-automation
Browse files Browse the repository at this point in the history
39 test cases for dbt automation
  • Loading branch information
fatchat authored Nov 6, 2023
2 parents a57c15c + 9b7aeb8 commit d590848
Show file tree
Hide file tree
Showing 40 changed files with 2,342 additions and 157 deletions.
6 changes: 6 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[run]
omit=tests/*
*/__init__.py
setup.py
seeds/*
scripts/*
73 changes: 73 additions & 0 deletions .github/workflows/dbt_automation_pkg.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: DBT Automation Package

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
tests:
env:
TEST_PG_DBHOST: ${{ secrets.TEST_PG_DBHOST }}
TEST_PG_DBPORT: ${{ secrets.TEST_PG_DBPORT }}
TEST_PG_DBUSER: ${{ secrets.TEST_PG_DBUSER }}
TEST_PG_DBPASSWORD: ${{ secrets.TEST_PG_DBPASSWORD }}
TEST_PG_DBNAME: ${{ secrets.TEST_PG_DBNAME }}
TEST_PG_DBSCHEMA_SRC: ${{ secrets.TEST_PG_DBSCHEMA_SRC }}

TEST_BG_SERVICEJSON: ${{ secrets.TEST_BG_SERVICEJSON }}
TEST_BG_LOCATION: ${{ secrets.TEST_BG_LOCATION }}
TEST_BG_DATASET_SRC: ${{ secrets.TEST_BG_DATASET_SRC }}

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Create known_hosts file
run: |
mkdir -p ~/.ssh
touch ~/.ssh/known_hosts
- name: Add remote host key to known_hosts
run: ssh-keyscan ${{ secrets.SSH_SERVERIP }} >> ~/.ssh/known_hosts

- name: Login to the jump server and port forward to connect to the postgres warehouse
run: |
eval `ssh-agent -s`
ssh-add - <<< "${{ secrets.SSH_PRIVATE_KEY }}"
ssh -L 5432:${{ secrets.SSH_HOST }}:5432 -f -N ddp@${{ secrets.SSH_SERVERIP }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
python setup.py install
- name: Seed data in test warehouse
run: |
python seeds/seed.py --warehouse postgres
python seeds/seed.py --warehouse bigquery
- name: Run tests and collect coverage
run: |
pytest -s --cov=.
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
verbose: true
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ venv/
workspace/
*.yaml
.venv
.vscode
logs/

# Distribution / packaging
.Python
Expand All @@ -24,3 +26,20 @@ share/python-wheels/
*.egg
MANIFEST
*.json
!sample_sheet1.json
!sample_sheet2.json

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
pytest.ini
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,9 @@ Example usage:
python flattenairbyte.py --project-dir workspace/lahi


# Setting up the test environment

- Create a `pytest.ini` file and the test warehouse credentials.
- Create a `dbconnection.env` and add the test warehouse credentials. The test warehouse credentials will be used to seed data
- Seed the sample data by running ```python seeds/seed.py --warehouse <postgres or bigquery>```
- Run pytest ```pytest tests/ -c pytest.ini -s``` in your local virtual environment
25 changes: 25 additions & 0 deletions dbconnection.env.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
DBHOST=
DBPORT=
DBUSER=
DBPASSWORD=
DBNAME=

DBT_PROJECT_DIR=

# for bigquery
GOOGLE_APPLICATION_CREDENTIALS=<path_to_service_account_json_creds_file>
BIQUERY_LOCATION=<location of the project>

# sample data seed credentials
# postgres
TEST_PG_DBHOST=
TEST_PG_DBPORT=
TEST_PG_DBUSER=
TEST_PG_DBPASSWORD=
TEST_PG_DBNAME=
TEST_PG_DBSCHEMA_SRC=

# bigquery
TEST_BG_SERVICEJSON=
TEST_BG_LOCATION=
TEST_BG_DATASET_SRC=
26 changes: 0 additions & 26 deletions dbt_automation/assets/flatten_json.sql

This file was deleted.

2 changes: 2 additions & 0 deletions dbt_automation/operations/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def arithmetic(config: dict, warehouse, project_dir: str):
dbt_code += "{{dbt_utils.safe_add(["
for operand in operands:
dbt_code += f"'{str(operand)}',"
dbt_code = dbt_code[:-1]
dbt_code += "])}}"
dbt_code += f" AS {output_col_name} "

Expand All @@ -54,6 +55,7 @@ def arithmetic(config: dict, warehouse, project_dir: str):
dbt_code += "{{dbt_utils.safe_subtract(["
for operand in operands:
dbt_code += f"'{str(operand)}',"
dbt_code = dbt_code[:-1]
dbt_code += "])}}"
dbt_code += f" AS {output_col_name} "

Expand Down
2 changes: 1 addition & 1 deletion dbt_automation/operations/castdatatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def cast_datatypes(config: dict, warehouse, project_dir: str):
dbtproject = dbtProject(project_dir)
dbtproject.ensure_models_dir(dest_schema)

union_code = "{{ config(materialized='table',) }}\n"
union_code = f"{{{{ config(materialized='table',schema='{dest_schema}') }}}}\n"

columns = config["columns"]
columnnames = [c["columnname"] for c in columns]
Expand Down
2 changes: 1 addition & 1 deletion dbt_automation/operations/coalescecolumns.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def coalesce_columns(config: dict, warehouse, project_dir: str):
dbtproject = dbtProject(project_dir)
dbtproject.ensure_models_dir(dest_schema)

union_code = "{{ config(materialized='table',) }}\n"
union_code = f"{{{{ config(materialized='table', schema='{dest_schema}') }}}}\n"

columns = config["columns"]
columnnames = [c["columnname"] for c in columns]
Expand Down
2 changes: 1 addition & 1 deletion dbt_automation/operations/droprenamecolumns.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def rename_columns(config: dict, warehouse, project_dir: str):
dbtproject = dbtProject(project_dir)
dbtproject.ensure_models_dir(dest_schema)

model_code = '{{ config(materialized="table") }}\n\n'
model_code = '{{ config(materialized="table", schema="' + dest_schema + '") }}\n\n'
exclude_cols = ",".join([f'"{col}"' for col in columns.keys()])
model_code += f'SELECT {{{{ dbt_utils.star(from=ref("{input_name}"), except=[{exclude_cols}]) }}}}, '

Expand Down
14 changes: 9 additions & 5 deletions dbt_automation/operations/flattenairbyte.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def flatten_operation(config: dict, warehouse, project_dir: str):
# and the .sql model
model_sql = mk_dbtmodel(
warehouse,
DEST_SCHEMA,
source["name"], # pass the source in the yaml file
modelname,
zip(json_fields, sql_columns),
Expand All @@ -80,18 +81,21 @@ def flatten_operation(config: dict, warehouse, project_dir: str):


# ================================================================================
def mk_dbtmodel(warehouse, sourcename: str, srctablename: str, columntuples: list):
def mk_dbtmodel(
warehouse, dest_schema: str, sourcename: str, srctablename: str, columntuples: list
):
"""create the .sql model for this table"""

dbtmodel = """
{{
dbtmodel = f"""
{{{{
config(
materialized='table',
schema='{dest_schema}',
indexes=[
{'columns': ['_airbyte_ab_id'], 'type': 'hash'}
{{'columns': ['_airbyte_ab_id'], 'type': 'hash'}}
]
)
}}
}}}}
"""
dbtmodel += "SELECT _airbyte_ab_id "
dbtmodel += "\n"
Expand Down
2 changes: 1 addition & 1 deletion dbt_automation/operations/mergetables.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def union_tables(config, warehouse, project_dir):
relations += f"ref('{tablename}'),"
relations = relations[:-1]
relations += "]"
union_code = "{{ config(materialized='table',) }}\n"
union_code = f"{{{{ config(materialized='table',schema='{dest_schema}') }}}}\n"
# pylint:disable=consider-using-f-string
union_code += "{{ dbt_utils.union_relations("
union_code += f"relations={relations}"
Expand Down
4 changes: 3 additions & 1 deletion dbt_automation/operations/regexextraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ def regex_extraction(config: dict, warehouse, project_dir: str):
dbtproject = dbtProject(project_dir)
dbtproject.ensure_models_dir(dest_schema)

model_code = '{{ config(materialized="table") }}\n\nSELECT '
model_code = (
f"{{{{ config(materialized='table', schema='{dest_schema}') }}}}\n\nSELECT "
)

for col_name, regex in columns.items():
if warehouse.name == "postgres":
Expand Down
Loading

0 comments on commit d590848

Please sign in to comment.