From f60c8b3ca349aad2a7862c0d7ab2dd4f75e2b899 Mon Sep 17 00:00:00 2001
From: Jehangir Amjad <1021616+jehangiramjad@users.noreply.github.com>
Date: Thu, 8 Feb 2024 14:19:17 +0500
Subject: [PATCH] [Import Automation] Supporting the prod project and
 distinguish between different GCP projects (#980)

* supporting the prod project, distinguish between GCP project for config file (datcom) and GCP project where the import executor is running in

* lint fixes
---
 import-automation/executor/README.md          | 12 +++++------
 .../executor/schedule_update_import.py        | 21 ++++++++++++-------
 .../executor/schedule_update_import.sh        |  8 +++----
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/import-automation/executor/README.md b/import-automation/executor/README.md
index 9cf052b429..a96e6a4dbe 100644
--- a/import-automation/executor/README.md
+++ b/import-automation/executor/README.md
@@ -81,16 +81,16 @@ Run `./schedule_update_import.sh --help` for usage.
 To schedule an import to run as a cron job on the GCP Cloud Scheduler, do the following:
 
 ```
-Run `./schedule_update_import.sh -s <config_project_id> <path_to_import>`
+Run `./schedule_update_import.sh -s <gke_project_id> <path_to_import>`
 ```
 
-`<config_project_id>` is the GCP project id where the config file is stored, e.g. `datcom-import-automation`.
+`<gke_project_id>` is the GCP project id where the import executer is run from e.g. `datcom-import-automation-prod`.
 `<path_to_import>` is the path to the import (relative to the root directory of the `data` repo), with the name of the import provided with a colon, e.g. `scripts/us_usda/quickstats:UsdaAgSurvey`.
 
 Example invocation:
 
 ```
-Run `./schedule_update_import.sh -s datcom-import-automation scripts/us_usda/quickstats:UsdaAgSurvey`
+Run `./schedule_update_import.sh -s datcom-import-automation-prod scripts/us_usda/quickstats:UsdaAgSurvey`
 ```
 
 The script will log the name of the Cloud Scheduler job and a url for all the jobs on the scheduler. Please verify that all the job metadata was updated as expected.
@@ -106,16 +106,16 @@ Once the script runs to completion, the data directory's latest update is printe
 To excute an Update locally, do the following:
 
 ```
-Run `./schedule_update_import.sh -u <config_project_id> <path_to_import>`
+Run `./schedule_update_import.sh -u <gke_project_id> <path_to_import>`
 ```
 
-`<config_project_id>` is the GCP project id where the config file is stored, e.g. `datcom-import-automation`.
+`<gke_project_id>` is the GCP project id where the import executer is run from e.g. `datcom-import-automation-prod`.
 `<path_to_import>` is the path to the import (relative to the root directory of the `data` repo), with the name of the import provided with a colon, e.g. `scripts/us_usda/quickstats:UsdaAgSurvey`.
 
 Example invocation:
 
 ```
-Run `./schedule_update_import.sh -u datcom-import-automation scripts/us_usda/quickstats:UsdaAgSurvey`
+Run `./schedule_update_import.sh -u datcom-import-automation-prod scripts/us_usda/quickstats:UsdaAgSurvey`
 ```
 
 
diff --git a/import-automation/executor/schedule_update_import.py b/import-automation/executor/schedule_update_import.py
index b033360844..b6b8a62737 100644
--- a/import-automation/executor/schedule_update_import.py
+++ b/import-automation/executor/schedule_update_import.py
@@ -37,7 +37,10 @@
 _FLAGS = flags.FLAGS
 
 flags.DEFINE_string('mode', '', 'Options: update or schedule.')
-flags.DEFINE_string('config_project_id', '', 'GCS Project for the config file.')
+flags.DEFINE_string('gke_project_id', '',
+                    'GCP Project where import executor runs.')
+flags.DEFINE_string('config_project_id', 'datcom-204919',
+                    'GCS Project for the config file.')
 flags.DEFINE_string('config_bucket', 'import-automation-configs',
                     'GCS bucket name for the config file.')
 flags.DEFINE_string('config_filename', 'configs.json',
@@ -94,14 +97,14 @@ def _override_configs(filename: str,
 
 def _get_cloud_config(filename: str) -> Dict:
     logging.info('Getting cloud config.')
-    project_id = _FLAGS.config_project_id
+    config_project_id = _FLAGS.config_project_id
     bucket_name = _FLAGS.config_bucket
     logging.info(
-        f'\nProject ID: {project_id}\nBucket: {bucket_name}\nConfig Filename: {filename}'
+        f'\nProject ID: {config_project_id}\nBucket: {bucket_name}\nConfig Filename: {filename}'
     )
 
-    bucket = storage.Client(project_id).bucket(bucket_name,
-                                               user_project=project_id)
+    bucket = storage.Client(config_project_id).bucket(
+        bucket_name, user_project=config_project_id)
     blob = bucket.blob(filename)
     config_dict = json.loads(blob.download_as_string(client=None))
     return config_dict
@@ -261,8 +264,8 @@ def main(_):
     mode = _FLAGS.mode
     absolute_import_path = _FLAGS.absolute_import_path
 
-    if not _FLAGS.config_project_id:
-        raise Exception("Flag: config_project_if must be provided.")
+    if not _FLAGS.gke_project_id:
+        raise Exception("Flag: gke_project_id must be provided.")
 
     if not mode or (mode not in ['update', 'schedule']):
         raise Exception('Flag: mode must be set to \'update\' or \'schedule\'')
@@ -278,6 +281,7 @@ def main(_):
     repo_dir = cwd.split("data")[0] + "data"
     logging.info(f'{mode} called with the following:')
     logging.info(f'Config Project ID: {_FLAGS.config_project_id}')
+    logging.info(f'GKE (Import Executor) Project ID: {_FLAGS.gke_project_id}')
     logging.info(f'Import: {absolute_import_path}')
     logging.info(f'Repo root directory: {repo_dir}')
 
@@ -287,6 +291,9 @@ def main(_):
     config_dict = _get_cloud_config(_FLAGS.config_filename)
     cfg = configs.ExecutorConfig(**config_dict['configs'])
 
+    # Update the GCP project id to use with the configs.
+    cfg.gcp_project_id = _FLAGS.gke_project_id
+
     logging.info(
         f'Updating any config fields from local file: {_CONFIG_OVERRIDE_FILE}.')
     cfg = _override_configs(_CONFIG_OVERRIDE_FILE, cfg)
diff --git a/import-automation/executor/schedule_update_import.sh b/import-automation/executor/schedule_update_import.sh
index 9961405174..4ca0e24bb0 100755
--- a/import-automation/executor/schedule_update_import.sh
+++ b/import-automation/executor/schedule_update_import.sh
@@ -14,8 +14,8 @@
 # limitations under the License.
 
 function help {
-  echo "#Usage: -us <config_project_id> <absolute_import_path>"
-  echo "## <config_project_id> is the GCP project ID where the config file is located." 
+  echo "#Usage: -us <gke_project_id> <absolute_import_path>"
+  echo "## <gke_project_id> is the GCP project ID where the import executor is running in." 
   echo "## Update an import specified by <absolute_import_path>, e.g. scripts/us_usda/quickstats:UsdaAgSurvey"  exit 1
 }
 
@@ -37,13 +37,13 @@ while getopts us OPTION; do
     esac
 done
 
-CONFIG_PROJECT_ID=$2
+GKE_PROJECT_ID=$2
 IMPORT_PATH=$3
 
 python3 -m venv .env
 . .env/bin/activate
 pip3 install --disable-pip-version-check -r requirements.txt
 
-python3 -m schedule_update_import --config_project_id=$CONFIG_PROJECT_ID --mode=$MODE --absolute_import_path=$IMPORT_PATH
+python3 -m schedule_update_import --gke_project_id=$GKE_PROJECT_ID --mode=$MODE --absolute_import_path=$IMPORT_PATH
 
 deactivate
\ No newline at end of file