Skip to content

Commit

Permalink
Add mock_pipeline_test to GHA
Browse files Browse the repository at this point in the history
  • Loading branch information
danielvdende committed Sep 11, 2023
1 parent 52fd668 commit ea746e5
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 19 deletions.
69 changes: 51 additions & 18 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,68 @@ on:
push

jobs:
lint:
# lint:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
#
# - uses: actions/setup-python@v4
# with:
# python-version: '3.10'
#
# - name: Install flake8
# run: pip install flake8
#
# - name: Run flake8
# run: flake8 dags/
#
# integrity_test:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
#
# - uses: actions/setup-python@v4
# with:
# python-version: '3.10'
#
# - name: Install integrity test requirements
# run: pip install -r integrity_tests/requirements.txt
#
# - name: Initialize Airflow DB
# run: airflow db init
#
# - name: Run integrity tests
# run: coverage run -m pytest integrity_tests/*

mock_pipeline_test:
runs-on: ubuntu-latest
# needs:
# - lint
# - integrity_test
steps:
- uses: actions/checkout@v3

- uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install flake8
run: pip install flake8
- name: Install system dependencies
run: sudo apt-get install -y libsasl2-dev build-essential

- name: Run flake8
run: flake8 dags/
- name: Install mock_pipeline test requirements
run: pip install -r mock_pipeline_requirements.txt

integrity_test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Generate mock-pipeline data
run: spark-submit --name spark-data-generate dags/spark/generate_data.py --warehouse-path ./spark-warehouse

- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: debug
run: ls -lah && ls -lah spark-warehouse && ls -lah metastore_db

- name: Install integrity test requirements
run: pip install -r integrity_tests/requirements.txt
- name: Run dbt
working-directory: dags/dbt
run : dbt run --target mock_pipeline

- name: Initialize Airflow DB
run: airflow db init
- name: Run dbt tests
working-directory: dags/dbt
run : dbt test --target mock_pipeline

- name: Run integrity tests
run: coverage run -m pytest integrity_tests/*
11 changes: 11 additions & 0 deletions dags/dbt/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,14 @@ transactions:
"spark.sql.warehouse.dir": "/opt/airflow/spark-warehouse"
"spark.sql.parquet.compression.codec": "gzip"
"spark.hadoop.javax.jdo.option.ConnectionURL": "jdbc:derby:;databaseName=/opt/airflow/metastore_db;create=true"

mock_pipeline:
type: spark
host: localhost
method: session
schema: bank
server_side_parameters:
"spark.databricks.delta.schema.autoMerge.enabled": "True"
"spark.sql.warehouse.dir": "/home/runner/work/data-testing-with-airflow/data-testing-with-airflow/spark-warehouse" # For on github runner
"spark.sql.parquet.compression.codec": "gzip"
"spark.hadoop.javax.jdo.option.ConnectionURL": "jdbc:derby:;databaseName=/home/runner/work/data-testing-with-airflow/data-testing-with-airflow/metastore_db;create=true"
8 changes: 7 additions & 1 deletion dags/spark/generate_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import argparse

from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, DoubleType, StringType, DateType, BooleanType
from random import uniform, sample, randint
Expand Down Expand Up @@ -87,8 +89,12 @@ def run_job(spark):


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate data')
parser.add_argument('--warehouse-path', default='/opt/airflow/spark-warehouse')

args = parser.parse_args()
spark = SparkSession.builder \
.config('spark.sql.warehouse.dir', '/opt/airflow/spark-warehouse') \
.config('spark.sql.warehouse.dir', args.warehouse_path) \
.config('spark.sql.parquet.compression.codec', 'gzip') \
.enableHiveSupport() \
.getOrCreate()
Expand Down
3 changes: 3 additions & 0 deletions mock_pipeline_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dbt-spark[PyHive]==1.7.0b1
dbt-core==1.7.0b1
pyspark==3.4.0

0 comments on commit ea746e5

Please sign in to comment.