From f23d3722ae206132f1945589b012040552f2ba7c Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Mon, 22 Jan 2024 06:50:58 -0800 Subject: [PATCH 1/7] updates to the import automation scripts --- import-automation/executor/Dockerfile | 5 ++++- .../executor/app/executor/cloud_scheduler.py | 2 +- import-automation/executor/gke/README.md | 7 +++++-- import-automation/executor/gke/configure_gke.sh | 15 +++++++-------- import-automation/executor/requirements.txt | 1 + .../executor/test/cloud_scheduler_test.py | 2 +- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/import-automation/executor/Dockerfile b/import-automation/executor/Dockerfile index 35253c9173..1bba3650c7 100644 --- a/import-automation/executor/Dockerfile +++ b/import-automation/executor/Dockerfile @@ -16,7 +16,10 @@ FROM python:3.11.4 -RUN apt upgrade +RUN apt-get update \ + && apt-get -y upgrade \ + && apt-get -y autoremove \ + && rm -rf /var/lib/apt/lists/* WORKDIR /workspace ADD requirements.txt /workspace/requirements.txt diff --git a/import-automation/executor/app/executor/cloud_scheduler.py b/import-automation/executor/app/executor/cloud_scheduler.py index 42bd5c332b..50e8793048 100644 --- a/import-automation/executor/app/executor/cloud_scheduler.py +++ b/import-automation/executor/app/executor/cloud_scheduler.py @@ -26,7 +26,7 @@ from google.protobuf import json_format from google.api_core.exceptions import AlreadyExists, NotFound -GKE_SERVICE_DOMAIN = os.getenv('GKE_SERVICE_DOMAIN', 'import.datacommons.dev') +GKE_SERVICE_DOMAIN = os.getenv('GKE_SERVICE_DOMAIN', 'importautomation.datacommons.org') GKE_CALLER_SERVICE_ACCOUNT = os.getenv('CLOUD_SCHEDULER_CALLER_SA') GKE_OAUTH_AUDIENCE = os.getenv('CLOUD_SCHEDULER_CALLER_OAUTH_AUDIENCE') diff --git a/import-automation/executor/gke/README.md b/import-automation/executor/gke/README.md index ddd9418a47..a01268a761 100644 --- a/import-automation/executor/gke/README.md +++ b/import-automation/executor/gke/README.md @@ -26,8 +26,11 @@ Follow ## (One Time) Setup GKE -1. Update OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET in "gke/configure_gke.sh". -2. Run `./gke/configure_gke.sh`. +1. Update the PROJECT_ID in "gke/configure_gke.sh", if needed. + +2. Update OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET in "gke/configure_gke.sh". + +3. Run `./gke/configure_gke.sh`. ## Deployment diff --git a/import-automation/executor/gke/configure_gke.sh b/import-automation/executor/gke/configure_gke.sh index dc753e58b4..9a7ce35130 100755 --- a/import-automation/executor/gke/configure_gke.sh +++ b/import-automation/executor/gke/configure_gke.sh @@ -14,7 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -PROJECT_ID=datcom-data +# Edit the PROJECT_ID below. +PROJECT_ID=datcom-import-automation gcloud config set project $PROJECT_ID @@ -40,11 +41,9 @@ kubectl create namespace import-automation \ kubectl create serviceaccount --namespace import-automation import-automation-ksa \ --dry-run=client -o yaml | kubectl apply -f - -gcloud iam service-accounts add-iam-policy-binding \ - --project $PROJECT_ID \ - --role roles/iam.workloadIdentityUser \ - --member "serviceAccount:$PROJECT_ID.svc.id.goog[import-automation/import-automation-ksa]" \ - $PROJECT_ID@appspot.gserviceaccount.com +gcloud projects add-iam-policy-binding $PROJECT_ID \ + --role=roles/iam.workloadIdentityUser \ + --member="serviceAccount:$PROJECT_ID.svc.id.goog[import-automation/import-automation-ksa]" kubectl annotate serviceaccount \ --namespace import-automation \ @@ -53,8 +52,8 @@ kubectl annotate serviceaccount \ iam.gke.io/gcp-service-account=$PROJECT_ID@appspot.gserviceaccount.com # Set the oauth env vars before running the script -# export OAUTH_CLIENT_ID= -# export OAUTH_CLIENT_SECRET= +# export OAUTH_CLIENT_ID=251280076183-ivh5hjgshftv3rgo4mc03t3vbkgdj3at.apps.googleusercontent.com +# export OAUTH_CLIENT_SECRET=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 kubectl -n import-automation create secret generic import-automation-iap-secret \ --from-literal=client_id=$OAUTH_CLIENT_ID \ --from-literal=client_secret=$OAUTH_CLIENT_SECRET diff --git a/import-automation/executor/requirements.txt b/import-automation/executor/requirements.txt index 6c15640481..d6ad7fd584 100644 --- a/import-automation/executor/requirements.txt +++ b/import-automation/executor/requirements.txt @@ -9,3 +9,4 @@ flask gunicorn pytz absl-py +libwebp-dev>=1.3.2 # To fix a zero-day vulnerability (CVE-2023-4863): https://snyk.io/blog/find-and-fix-webp-vulnerability-cve-2023-4863/ \ No newline at end of file diff --git a/import-automation/executor/test/cloud_scheduler_test.py b/import-automation/executor/test/cloud_scheduler_test.py index 71446a7372..468249bedb 100644 --- a/import-automation/executor/test/cloud_scheduler_test.py +++ b/import-automation/executor/test/cloud_scheduler_test.py @@ -84,7 +84,7 @@ def test_http_job_request(self): 'seconds': 60 * 30 }, 'http_target': { - 'uri': 'https://import.datacommons.dev/update', + 'uri': 'https://importautomation.datacommons.org/update', 'http_method': 'POST', 'headers': { 'Content-Type': 'application/json', From efb1a1cb890dcd0a7bc23f6739a7a3ca65323144 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Mon, 22 Jan 2024 06:58:43 -0800 Subject: [PATCH 2/7] removing client id and secret --- import-automation/executor/gke/configure_gke.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/import-automation/executor/gke/configure_gke.sh b/import-automation/executor/gke/configure_gke.sh index 9a7ce35130..27735ab04d 100755 --- a/import-automation/executor/gke/configure_gke.sh +++ b/import-automation/executor/gke/configure_gke.sh @@ -52,8 +52,8 @@ kubectl annotate serviceaccount \ iam.gke.io/gcp-service-account=$PROJECT_ID@appspot.gserviceaccount.com # Set the oauth env vars before running the script -# export OAUTH_CLIENT_ID=251280076183-ivh5hjgshftv3rgo4mc03t3vbkgdj3at.apps.googleusercontent.com -# export OAUTH_CLIENT_SECRET=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 +export OAUTH_CLIENT_ID= +export OAUTH_CLIENT_SECRET= kubectl -n import-automation create secret generic import-automation-iap-secret \ --from-literal=client_id=$OAUTH_CLIENT_ID \ --from-literal=client_secret=$OAUTH_CLIENT_SECRET From 2dd6e137144fadaaadfcd780196f229b17402c66 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Mon, 22 Jan 2024 07:36:54 -0800 Subject: [PATCH 3/7] updating test file to avoid test failures --- import-automation/executor/test/integration_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/import-automation/executor/test/integration_test.py b/import-automation/executor/test/integration_test.py index 1297548772..ca50860050 100644 --- a/import-automation/executor/test/integration_test.py +++ b/import-automation/executor/test/integration_test.py @@ -24,10 +24,11 @@ NUM_LINES_TO_CHECK = 50 CONFIGS = { - 'github_repo_owner_username': os.environ['GITHUB_AUTH_USERNAME'], + # TODO: read the params from the environment. + 'github_repo_owner_username': 'datacommonsorg', # os.environ['GITHUB_AUTH_USERNAME'], 'github_repo_name': 'data-demo', - 'github_auth_username': 'intrepiditee', - 'github_auth_access_token': os.environ['GITHUB_AUTH_ACCESS_TOKEN'] + 'github_auth_username': 'datacommons-bot@google.com', + 'github_auth_access_token': '' # os.environ['GITHUB_AUTH_ACCESS_TOKEN'] } From a45805058ad91beeb63637c3192d2b200034bc5b Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Mon, 22 Jan 2024 07:48:59 -0800 Subject: [PATCH 4/7] lint fixes --- import-automation/executor/app/executor/cloud_scheduler.py | 3 ++- import-automation/executor/test/integration_test.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/import-automation/executor/app/executor/cloud_scheduler.py b/import-automation/executor/app/executor/cloud_scheduler.py index 50e8793048..3a205f8f99 100644 --- a/import-automation/executor/app/executor/cloud_scheduler.py +++ b/import-automation/executor/app/executor/cloud_scheduler.py @@ -26,7 +26,8 @@ from google.protobuf import json_format from google.api_core.exceptions import AlreadyExists, NotFound -GKE_SERVICE_DOMAIN = os.getenv('GKE_SERVICE_DOMAIN', 'importautomation.datacommons.org') +GKE_SERVICE_DOMAIN = os.getenv('GKE_SERVICE_DOMAIN', + 'importautomation.datacommons.org') GKE_CALLER_SERVICE_ACCOUNT = os.getenv('CLOUD_SCHEDULER_CALLER_SA') GKE_OAUTH_AUDIENCE = os.getenv('CLOUD_SCHEDULER_CALLER_OAUTH_AUDIENCE') diff --git a/import-automation/executor/test/integration_test.py b/import-automation/executor/test/integration_test.py index ca50860050..4df4bd9bcb 100644 --- a/import-automation/executor/test/integration_test.py +++ b/import-automation/executor/test/integration_test.py @@ -25,10 +25,11 @@ CONFIGS = { # TODO: read the params from the environment. - 'github_repo_owner_username': 'datacommonsorg', # os.environ['GITHUB_AUTH_USERNAME'], + 'github_repo_owner_username': + 'datacommonsorg', # os.environ['GITHUB_AUTH_USERNAME'], 'github_repo_name': 'data-demo', 'github_auth_username': 'datacommons-bot@google.com', - 'github_auth_access_token': '' # os.environ['GITHUB_AUTH_ACCESS_TOKEN'] + 'github_auth_access_token': '' # os.environ['GITHUB_AUTH_ACCESS_TOKEN'] } From bae1b5e8ef97529b65541e4859b1f12f57056f60 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Mon, 22 Jan 2024 08:35:50 -0800 Subject: [PATCH 5/7] relying on setting the params in the environment --- import-automation/executor/app/configs.py | 2 +- import-automation/executor/test/integration_test.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/import-automation/executor/app/configs.py b/import-automation/executor/app/configs.py index 14d6e10bb9..37f5621b29 100644 --- a/import-automation/executor/app/configs.py +++ b/import-automation/executor/app/configs.py @@ -34,7 +34,7 @@ class ExecutorConfig: # ID of the Google Cloud project that hosts the executor. The project # needs to enable App Engine and Cloud Scheduler. - gcp_project_id: str = 'google.com:datcom-data' + gcp_project_id: str = 'google.com:datcom-import-automation' # ID of the Google Cloud project that stores generated CSVs and MCFs. The # project needs to enable Cloud Storage and gives the service account the # executor uses sufficient permissions to read and write the bucket below. diff --git a/import-automation/executor/test/integration_test.py b/import-automation/executor/test/integration_test.py index 4df4bd9bcb..95686f2df0 100644 --- a/import-automation/executor/test/integration_test.py +++ b/import-automation/executor/test/integration_test.py @@ -25,11 +25,10 @@ CONFIGS = { # TODO: read the params from the environment. - 'github_repo_owner_username': - 'datacommonsorg', # os.environ['GITHUB_AUTH_USERNAME'], + 'github_repo_owner_username': os.environ['_GITHUB_AUTH_USERNAME'], 'github_repo_name': 'data-demo', 'github_auth_username': 'datacommons-bot@google.com', - 'github_auth_access_token': '' # os.environ['GITHUB_AUTH_ACCESS_TOKEN'] + 'github_auth_access_token': os.environ['_GITHUB_AUTH_ACCESS_TOKEN'] } From 881342302fefc5f9128482c087013eb899b4a431 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Mon, 22 Jan 2024 10:01:55 -0800 Subject: [PATCH 6/7] updates after reviewer comments --- import-automation/executor/app/configs.py | 2 +- import-automation/executor/gke/README.md | 11 ++++++---- .../executor/gke/configure_gke.sh | 21 +++++++++++++++---- .../executor/test/integration_test.py | 8 ++++--- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/import-automation/executor/app/configs.py b/import-automation/executor/app/configs.py index 37f5621b29..2b36baede3 100644 --- a/import-automation/executor/app/configs.py +++ b/import-automation/executor/app/configs.py @@ -34,7 +34,7 @@ class ExecutorConfig: # ID of the Google Cloud project that hosts the executor. The project # needs to enable App Engine and Cloud Scheduler. - gcp_project_id: str = 'google.com:datcom-import-automation' + gcp_project_id: str = 'datcom-import-automation' # ID of the Google Cloud project that stores generated CSVs and MCFs. The # project needs to enable Cloud Storage and gives the service account the # executor uses sufficient permissions to read and write the bucket below. diff --git a/import-automation/executor/gke/README.md b/import-automation/executor/gke/README.md index a01268a761..92c5bf3d76 100644 --- a/import-automation/executor/gke/README.md +++ b/import-automation/executor/gke/README.md @@ -26,11 +26,14 @@ Follow ## (One Time) Setup GKE -1. Update the PROJECT_ID in "gke/configure_gke.sh", if needed. - -2. Update OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET in "gke/configure_gke.sh". +1. Set the PROJECT_ID, OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET environment variables in "gke/configure_gke.sh", e.g. +``` +export PROJECT_ID= +export PROJECT_ID= +export PROJECT_ID= +``` -3. Run `./gke/configure_gke.sh`. +2. Run `./gke/configure_gke.sh`. The script will error out if the environment variables in (1) are not set. ## Deployment diff --git a/import-automation/executor/gke/configure_gke.sh b/import-automation/executor/gke/configure_gke.sh index 27735ab04d..7e8ee9f41a 100755 --- a/import-automation/executor/gke/configure_gke.sh +++ b/import-automation/executor/gke/configure_gke.sh @@ -14,7 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Edit the PROJECT_ID below. +# Verify that the required environment variables are set. +if [ -z "$PROJECT_ID" ] +then + echo "\$PROJECT_ID must be set and cannot be empty." + exit 1 +fi +if [ -z "$OAUTH_CLIENT_ID" ] +then + echo "\$OAUTH_CLIENT_ID must be set and cannot be empty." + exit 1 +fi +if [ -z "$OAUTH_CLIENT_SECRET" ] +then + echo "\$OAUTH_CLIENT_SECRET must be set and cannot be empty." + exit 1 +fi + PROJECT_ID=datcom-import-automation gcloud config set project $PROJECT_ID @@ -51,9 +67,6 @@ kubectl annotate serviceaccount \ import-automation-ksa \ iam.gke.io/gcp-service-account=$PROJECT_ID@appspot.gserviceaccount.com -# Set the oauth env vars before running the script -export OAUTH_CLIENT_ID= -export OAUTH_CLIENT_SECRET= kubectl -n import-automation create secret generic import-automation-iap-secret \ --from-literal=client_id=$OAUTH_CLIENT_ID \ --from-literal=client_secret=$OAUTH_CLIENT_SECRET diff --git a/import-automation/executor/test/integration_test.py b/import-automation/executor/test/integration_test.py index 95686f2df0..ea10db4c0d 100644 --- a/import-automation/executor/test/integration_test.py +++ b/import-automation/executor/test/integration_test.py @@ -24,10 +24,12 @@ NUM_LINES_TO_CHECK = 50 CONFIGS = { - # TODO: read the params from the environment. - 'github_repo_owner_username': os.environ['_GITHUB_AUTH_USERNAME'], + # The GitHub params belong to the public Data Commons gmail account. + # Auth tokens, user name and other details can be found in the inbox + # and in the inbox of teammates. + 'github_repo_owner_username': os.environ['_GITHUB_REPO_OWNER_USERNAME'], 'github_repo_name': 'data-demo', - 'github_auth_username': 'datacommons-bot@google.com', + 'github_auth_username': os.environ['_GITHUB_AUTH_USERNAME'], 'github_auth_access_token': os.environ['_GITHUB_AUTH_ACCESS_TOKEN'] } From 3ca06d820218979ea28ce33330c36cde6f5d3a75 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Mon, 22 Jan 2024 10:11:33 -0800 Subject: [PATCH 7/7] reviewer comments --- import-automation/executor/gke/README.md | 4 ++-- import-automation/executor/gke/configure_gke.sh | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/import-automation/executor/gke/README.md b/import-automation/executor/gke/README.md index 92c5bf3d76..73a9f0b889 100644 --- a/import-automation/executor/gke/README.md +++ b/import-automation/executor/gke/README.md @@ -29,8 +29,8 @@ Follow 1. Set the PROJECT_ID, OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET environment variables in "gke/configure_gke.sh", e.g. ``` export PROJECT_ID= -export PROJECT_ID= -export PROJECT_ID= +export OAUTH_CLIENT_ID= +export OAUTH_CLIENT_SECRET= ``` 2. Run `./gke/configure_gke.sh`. The script will error out if the environment variables in (1) are not set. diff --git a/import-automation/executor/gke/configure_gke.sh b/import-automation/executor/gke/configure_gke.sh index 7e8ee9f41a..dea858b270 100755 --- a/import-automation/executor/gke/configure_gke.sh +++ b/import-automation/executor/gke/configure_gke.sh @@ -31,8 +31,6 @@ then exit 1 fi -PROJECT_ID=datcom-import-automation - gcloud config set project $PROJECT_ID # Create GKE cluster