Skip to content

Commit

Permalink
Merge branch 'master' into placehealth_refresh
Browse files Browse the repository at this point in the history
  • Loading branch information
ajaits authored Feb 8, 2024
2 parents c7948c3 + f60c8b3 commit 0e739b5
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 17 deletions.
12 changes: 6 additions & 6 deletions import-automation/executor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,16 @@ Run `./schedule_update_import.sh --help` for usage.
To schedule an import to run as a cron job on the GCP Cloud Scheduler, do the following:

```
Run `./schedule_update_import.sh -s <config_project_id> <path_to_import>`
Run `./schedule_update_import.sh -s <gke_project_id> <path_to_import>`
```

`<config_project_id>` is the GCP project id where the config file is stored, e.g. `datcom-import-automation`.
`<gke_project_id>` is the GCP project id where the import executer is run from e.g. `datcom-import-automation-prod`.
`<path_to_import>` is the path to the import (relative to the root directory of the `data` repo), with the name of the import provided with a colon, e.g. `scripts/us_usda/quickstats:UsdaAgSurvey`.

Example invocation:

```
Run `./schedule_update_import.sh -s datcom-import-automation scripts/us_usda/quickstats:UsdaAgSurvey`
Run `./schedule_update_import.sh -s datcom-import-automation-prod scripts/us_usda/quickstats:UsdaAgSurvey`
```

The script will log the name of the Cloud Scheduler job and a url for all the jobs on the scheduler. Please verify that all the job metadata was updated as expected.
Expand All @@ -106,16 +106,16 @@ Once the script runs to completion, the data directory's latest update is printe
To excute an Update locally, do the following:

```
Run `./schedule_update_import.sh -u <config_project_id> <path_to_import>`
Run `./schedule_update_import.sh -u <gke_project_id> <path_to_import>`
```

`<config_project_id>` is the GCP project id where the config file is stored, e.g. `datcom-import-automation`.
`<gke_project_id>` is the GCP project id where the import executer is run from e.g. `datcom-import-automation-prod`.
`<path_to_import>` is the path to the import (relative to the root directory of the `data` repo), with the name of the import provided with a colon, e.g. `scripts/us_usda/quickstats:UsdaAgSurvey`.

Example invocation:

```
Run `./schedule_update_import.sh -u datcom-import-automation scripts/us_usda/quickstats:UsdaAgSurvey`
Run `./schedule_update_import.sh -u datcom-import-automation-prod scripts/us_usda/quickstats:UsdaAgSurvey`
```


Expand Down
21 changes: 14 additions & 7 deletions import-automation/executor/schedule_update_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
_FLAGS = flags.FLAGS

flags.DEFINE_string('mode', '', 'Options: update or schedule.')
flags.DEFINE_string('config_project_id', '', 'GCS Project for the config file.')
flags.DEFINE_string('gke_project_id', '',
'GCP Project where import executor runs.')
flags.DEFINE_string('config_project_id', 'datcom-204919',
'GCS Project for the config file.')
flags.DEFINE_string('config_bucket', 'import-automation-configs',
'GCS bucket name for the config file.')
flags.DEFINE_string('config_filename', 'configs.json',
Expand Down Expand Up @@ -94,14 +97,14 @@ def _override_configs(filename: str,

def _get_cloud_config(filename: str) -> Dict:
logging.info('Getting cloud config.')
project_id = _FLAGS.config_project_id
config_project_id = _FLAGS.config_project_id
bucket_name = _FLAGS.config_bucket
logging.info(
f'\nProject ID: {project_id}\nBucket: {bucket_name}\nConfig Filename: {filename}'
f'\nProject ID: {config_project_id}\nBucket: {bucket_name}\nConfig Filename: {filename}'
)

bucket = storage.Client(project_id).bucket(bucket_name,
user_project=project_id)
bucket = storage.Client(config_project_id).bucket(
bucket_name, user_project=config_project_id)
blob = bucket.blob(filename)
config_dict = json.loads(blob.download_as_string(client=None))
return config_dict
Expand Down Expand Up @@ -261,8 +264,8 @@ def main(_):
mode = _FLAGS.mode
absolute_import_path = _FLAGS.absolute_import_path

if not _FLAGS.config_project_id:
raise Exception("Flag: config_project_if must be provided.")
if not _FLAGS.gke_project_id:
raise Exception("Flag: gke_project_id must be provided.")

if not mode or (mode not in ['update', 'schedule']):
raise Exception('Flag: mode must be set to \'update\' or \'schedule\'')
Expand All @@ -278,6 +281,7 @@ def main(_):
repo_dir = cwd.split("data")[0] + "data"
logging.info(f'{mode} called with the following:')
logging.info(f'Config Project ID: {_FLAGS.config_project_id}')
logging.info(f'GKE (Import Executor) Project ID: {_FLAGS.gke_project_id}')
logging.info(f'Import: {absolute_import_path}')
logging.info(f'Repo root directory: {repo_dir}')

Expand All @@ -287,6 +291,9 @@ def main(_):
config_dict = _get_cloud_config(_FLAGS.config_filename)
cfg = configs.ExecutorConfig(**config_dict['configs'])

# Update the GCP project id to use with the configs.
cfg.gcp_project_id = _FLAGS.gke_project_id

logging.info(
f'Updating any config fields from local file: {_CONFIG_OVERRIDE_FILE}.')
cfg = _override_configs(_CONFIG_OVERRIDE_FILE, cfg)
Expand Down
8 changes: 4 additions & 4 deletions import-automation/executor/schedule_update_import.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# limitations under the License.

function help {
echo "#Usage: -us <config_project_id> <absolute_import_path>"
echo "## <config_project_id> is the GCP project ID where the config file is located."
echo "#Usage: -us <gke_project_id> <absolute_import_path>"
echo "## <gke_project_id> is the GCP project ID where the import executor is running in."
echo "## Update an import specified by <absolute_import_path>, e.g. scripts/us_usda/quickstats:UsdaAgSurvey" exit 1
}

Expand All @@ -37,13 +37,13 @@ while getopts us OPTION; do
esac
done

CONFIG_PROJECT_ID=$2
GKE_PROJECT_ID=$2
IMPORT_PATH=$3

python3 -m venv .env
. .env/bin/activate
pip3 install --disable-pip-version-check -r requirements.txt

python3 -m schedule_update_import --config_project_id=$CONFIG_PROJECT_ID --mode=$MODE --absolute_import_path=$IMPORT_PATH
python3 -m schedule_update_import --gke_project_id=$GKE_PROJECT_ID --mode=$MODE --absolute_import_path=$IMPORT_PATH

deactivate

0 comments on commit 0e739b5

Please sign in to comment.