Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Użyj sling do replikacji #3799

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 68 additions & 30 deletions .github/workflows/bi-transfer_pola_backend_to_bq.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,17 @@ env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

GCP_PROJECT_ID: pola-bi-looker
GCP_PROJECT_NUMBER: 354540873199
GCP_PROJECT_NUMBER: "354540873199"
GCP_REGION: europe-west3
GCP_BUCKET_NAME: pola-app_pola-backend_postgres_csv-files
GCP_IDENTITY_POOL: github
GCP_IDENTITY_PROVIDER: pola-backend-repo

HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }}
jobs:
deploy-bi:
name: "Transfer PostgresSQL to BQ"
runs-on: ubuntu-latest
env:
HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }}
steps:
- name: Set dynamic job variable
shell: python
Expand Down Expand Up @@ -88,12 +87,21 @@ jobs:
export_environment_variables: true
create_credentials_file: true
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
uses: 'google-github-actions/setup-gcloud@v2'
- name: 'Use gcloud CLI'
run: 'gcloud info'
- name: 'Use gcloud CLI'
run: 'gcloud auth list --filter=status:ACTIVE --format="value(account)"'
- name: 'Use gcloud CLI'
run: 'gcloud auth list'

# - name: Debug OIDC Claims
# uses: 'github/actions-oidc-debugger@main'
# with:
# audience: 'https://github.com/github'
- name: Install Heroku CLI
run: |
curl https://cli-assets.heroku.com/install.sh | sh
env:
HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }}
- name: Retrieve PostgreSQL credentials
run: |
DATABASE_URL=$(heroku config:get DATABASE_URL --app "${HEROKU_APP}")
Expand All @@ -103,37 +111,67 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.9'
- name: Copy file to bucket
run: |
set -x;
date | gcloud storage cp - "gs://${GCP_BUCKET_NAME}/date.txt"
gcloud storage cat "gs://${GCP_BUCKET_NAME}/date.txt"
gcloud storage ls "gs://${GCP_BUCKET_NAME}/"
- name: "Install dependencies"
run: pip install google-cloud-bigquery google-cloud-storage psycopg2-binary
- name: Run transfer script
env:
TABLE_NAMES: |
ai_pics_aiattachment,
ai_pics_aipics,
bi_companies_by_query_group,
bi_companies_with_count_group,
bi_new_product_by_hour,
bi_popular_not_verified_products,
bi_product_by_time,
bi_queries_by_time,
bi_queries_stats_intervals,
bi_stats_queries_uq_users_by_week,
company_brand,
company_company,
gpc_brick,
gpc_class,
gpc_family,
gpc_segment,
pola_query,
pola_searchquery,
pola_stats,
product_product,
report_attachment,
report_report,
users_user
ai_pics_aiattachment
run: |
python ./pola-bi/postgres_to_bigquery.py --verbose all \
--database-url "${DATABASE_URL}" \
--table-names "${TABLE_NAMES}" \
--staging-url "gs://${GCP_BUCKET_NAME}/" \
--dataset-id "${GCP_BIGQUERY_DATASET}" \
--dataset-id "${GCP_BIGQUERY_DATASET}"
- name: Set up GoLang
uses: actions/setup-go@v3
with:
go-version: "1.21"
cache: false

- name: "Checkout"
uses: actions/checkout@v4
with:
fetch-depth: 2
repository: slingdata-io/sling-cli
path: sling
ref: bigquery-openid

- run: |
cd sling
go mod edit -dropreplace='github.com/flarco/g' go.mod
go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod
go mod edit -droprequire='github.com/slingdata-io/sling' go.mod
go mod tidy
go get -u golang.org/x/oauth2
go get -u cloud.google.com/go
go get -u cloud.google.com/go/bigquery
go get -u cloud.google.com/go/bigtable
go get -u cloud.google.com/go/storage
go build -o /usr/local/bin/sling cmd/sling/*.go
cd ..

- name: Run transfer script
run: |
set -x;
sling conns set MY_BIGQUERY \
type=bigquery \
project="${GCP_PROJECT_ID}" \
dataset="${GCP_BIGQUERY_DATASET}" \
gc_bucket="${GCP_BUCKET_NAME}" \
location="${GCP_REGION}"
sling conns test MY_BIGQUERY
export "MY_POSTGRES=${DATABASE_URL}"
sling conns test MY_POSTGRES

# TODO: Run replication
# export "GOOGLE_APPLICATION_CREDENTIALS=$(./scripts/gcloud_generate_temp_creds.sh)"
# gcloud storage ls "gs://${GCP_BUCKET_NAME}/"
# cat "$(./scripts/gcloud_generate_temp_creds.sh)"
# export "GOOGLE_APPLICATION_CREDENTIALS=$(./scripts/gcloud_generate_temp_creds.sh)"
33 changes: 33 additions & 0 deletions pola-bi/sling-data/replication.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
source: MY_POSTGRES
target: MY_BIGQUERY

defaults:
mode: full-refresh
object: '{target_schema}.raw__{stream_table}'
primary_key: [id]

streams:
public.ai_pics_aiattachment:
public.ai_pics_aipics:
public.bi_*:
primary_key: []
public.company_brand:
public.company_company:
public.gpc_*:
public.pola_query:
mode: incremental
update_key: id
public.pola_searchquery:
public.pola_stats:
public.product_product:
public.report_attachment:
public.report_report:
public.users_user:
select: ["-password"]

env:
# Adds the _sling_loaded_at timestamp column
SLING_LOADED_AT_COLUMN: true

# Allows create empty tables
SLING_ALLOW_EMPTY: TRUE
14 changes: 14 additions & 0 deletions pola-bi/sling-data/run_local_sling.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash

set -euo pipefail

export MY_POSTGRES='postgresql://pola_app:pola_app@localhost:5432/pola_app?sslmode=disable'

sling conns set MY_BIGQUERY \
type=bigquery \
project=pola-bi-looker \
dataset=pola_backend__local \
gc_bucket=pola-app_pola-backend_postgres_csv-files \
location=europe-west3

sling "${@}"
20 changes: 20 additions & 0 deletions scripts/gcloud_generate_temp_creds.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

GCP_CREDENTIAL_FILE="$(mktemp -t gcp-credentials-XXXXXX).json"
# Copy the credentials file to the temporary directory
# Token refreshing SHOULD works, but it is not tested.

jq -n \
--arg access_token "$(gcloud auth print-access-token)" \
--arg refresh_token "$(gcloud auth print-refresh-token || true)" \
--arg gcp_client_id "$(gcloud config get auth/client_id)" \
--arg gcp_client_secret "$(gcloud config get auth/client_secret)" \
'{
type: "authorized_user",
token: $access_token,
refresh_token: $refresh_token,
client_id: $gcp_client_id,
client_secret: $gcp_client_secret
}' > "${GCP_CREDENTIAL_FILE}"

echo "${GCP_CREDENTIAL_FILE}"
Loading