From f60c8b3ca349aad2a7862c0d7ab2dd4f75e2b899 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad <1021616+jehangiramjad@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:19:17 +0500 Subject: [PATCH] [Import Automation] Supporting the prod project and distinguish between different GCP projects (#980) * supporting the prod project, distinguish between GCP project for config file (datcom) and GCP project where the import executor is running in * lint fixes --- import-automation/executor/README.md | 12 +++++------ .../executor/schedule_update_import.py | 21 ++++++++++++------- .../executor/schedule_update_import.sh | 8 +++---- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/import-automation/executor/README.md b/import-automation/executor/README.md index 9cf052b429..a96e6a4dbe 100644 --- a/import-automation/executor/README.md +++ b/import-automation/executor/README.md @@ -81,16 +81,16 @@ Run `./schedule_update_import.sh --help` for usage. To schedule an import to run as a cron job on the GCP Cloud Scheduler, do the following: ``` -Run `./schedule_update_import.sh -s ` +Run `./schedule_update_import.sh -s ` ``` -`` is the GCP project id where the config file is stored, e.g. `datcom-import-automation`. +`` is the GCP project id where the import executer is run from e.g. `datcom-import-automation-prod`. `` is the path to the import (relative to the root directory of the `data` repo), with the name of the import provided with a colon, e.g. `scripts/us_usda/quickstats:UsdaAgSurvey`. Example invocation: ``` -Run `./schedule_update_import.sh -s datcom-import-automation scripts/us_usda/quickstats:UsdaAgSurvey` +Run `./schedule_update_import.sh -s datcom-import-automation-prod scripts/us_usda/quickstats:UsdaAgSurvey` ``` The script will log the name of the Cloud Scheduler job and a url for all the jobs on the scheduler. Please verify that all the job metadata was updated as expected. @@ -106,16 +106,16 @@ Once the script runs to completion, the data directory's latest update is printe To excute an Update locally, do the following: ``` -Run `./schedule_update_import.sh -u ` +Run `./schedule_update_import.sh -u ` ``` -`` is the GCP project id where the config file is stored, e.g. `datcom-import-automation`. +`` is the GCP project id where the import executer is run from e.g. `datcom-import-automation-prod`. `` is the path to the import (relative to the root directory of the `data` repo), with the name of the import provided with a colon, e.g. `scripts/us_usda/quickstats:UsdaAgSurvey`. Example invocation: ``` -Run `./schedule_update_import.sh -u datcom-import-automation scripts/us_usda/quickstats:UsdaAgSurvey` +Run `./schedule_update_import.sh -u datcom-import-automation-prod scripts/us_usda/quickstats:UsdaAgSurvey` ``` diff --git a/import-automation/executor/schedule_update_import.py b/import-automation/executor/schedule_update_import.py index b033360844..b6b8a62737 100644 --- a/import-automation/executor/schedule_update_import.py +++ b/import-automation/executor/schedule_update_import.py @@ -37,7 +37,10 @@ _FLAGS = flags.FLAGS flags.DEFINE_string('mode', '', 'Options: update or schedule.') -flags.DEFINE_string('config_project_id', '', 'GCS Project for the config file.') +flags.DEFINE_string('gke_project_id', '', + 'GCP Project where import executor runs.') +flags.DEFINE_string('config_project_id', 'datcom-204919', + 'GCS Project for the config file.') flags.DEFINE_string('config_bucket', 'import-automation-configs', 'GCS bucket name for the config file.') flags.DEFINE_string('config_filename', 'configs.json', @@ -94,14 +97,14 @@ def _override_configs(filename: str, def _get_cloud_config(filename: str) -> Dict: logging.info('Getting cloud config.') - project_id = _FLAGS.config_project_id + config_project_id = _FLAGS.config_project_id bucket_name = _FLAGS.config_bucket logging.info( - f'\nProject ID: {project_id}\nBucket: {bucket_name}\nConfig Filename: {filename}' + f'\nProject ID: {config_project_id}\nBucket: {bucket_name}\nConfig Filename: {filename}' ) - bucket = storage.Client(project_id).bucket(bucket_name, - user_project=project_id) + bucket = storage.Client(config_project_id).bucket( + bucket_name, user_project=config_project_id) blob = bucket.blob(filename) config_dict = json.loads(blob.download_as_string(client=None)) return config_dict @@ -261,8 +264,8 @@ def main(_): mode = _FLAGS.mode absolute_import_path = _FLAGS.absolute_import_path - if not _FLAGS.config_project_id: - raise Exception("Flag: config_project_if must be provided.") + if not _FLAGS.gke_project_id: + raise Exception("Flag: gke_project_id must be provided.") if not mode or (mode not in ['update', 'schedule']): raise Exception('Flag: mode must be set to \'update\' or \'schedule\'') @@ -278,6 +281,7 @@ def main(_): repo_dir = cwd.split("data")[0] + "data" logging.info(f'{mode} called with the following:') logging.info(f'Config Project ID: {_FLAGS.config_project_id}') + logging.info(f'GKE (Import Executor) Project ID: {_FLAGS.gke_project_id}') logging.info(f'Import: {absolute_import_path}') logging.info(f'Repo root directory: {repo_dir}') @@ -287,6 +291,9 @@ def main(_): config_dict = _get_cloud_config(_FLAGS.config_filename) cfg = configs.ExecutorConfig(**config_dict['configs']) + # Update the GCP project id to use with the configs. + cfg.gcp_project_id = _FLAGS.gke_project_id + logging.info( f'Updating any config fields from local file: {_CONFIG_OVERRIDE_FILE}.') cfg = _override_configs(_CONFIG_OVERRIDE_FILE, cfg) diff --git a/import-automation/executor/schedule_update_import.sh b/import-automation/executor/schedule_update_import.sh index 9961405174..4ca0e24bb0 100755 --- a/import-automation/executor/schedule_update_import.sh +++ b/import-automation/executor/schedule_update_import.sh @@ -14,8 +14,8 @@ # limitations under the License. function help { - echo "#Usage: -us " - echo "## is the GCP project ID where the config file is located." + echo "#Usage: -us " + echo "## is the GCP project ID where the import executor is running in." echo "## Update an import specified by , e.g. scripts/us_usda/quickstats:UsdaAgSurvey" exit 1 } @@ -37,13 +37,13 @@ while getopts us OPTION; do esac done -CONFIG_PROJECT_ID=$2 +GKE_PROJECT_ID=$2 IMPORT_PATH=$3 python3 -m venv .env . .env/bin/activate pip3 install --disable-pip-version-check -r requirements.txt -python3 -m schedule_update_import --config_project_id=$CONFIG_PROJECT_ID --mode=$MODE --absolute_import_path=$IMPORT_PATH +python3 -m schedule_update_import --gke_project_id=$GKE_PROJECT_ID --mode=$MODE --absolute_import_path=$IMPORT_PATH deactivate \ No newline at end of file