Skip to content

Commit

Permalink
removing the option to provide user script args as a flag
Browse files Browse the repository at this point in the history
  • Loading branch information
jehangiramjad committed Feb 4, 2024
1 parent 9edd8ac commit d251fc8
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 30 deletions.
5 changes: 2 additions & 3 deletions import-automation/executor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,18 @@ Once the script runs to completion, the data directory's latest update is printe
Ensure this script is executed from the directory which contains `schedule_update_import.sh`, i.e. from `/data/import-automation/executor`.

```
Run `./schedule_update_import.sh -u <config_project_id> <path_to_import> <import_script_args>`
Run `./schedule_update_import.sh -u <config_project_id> <path_to_import>`
```

Run `./schedule_update_import.sh --help` for usage.

`<config_project_id>` is the GCP project id where the config file is stored, e.g. `datcom-import-automation`.
`<path_to_import>` is the path to the import (relative to the root directory of the `data` repo), with the name of the import provided with a colon, e.g. `scripts/us_usda/quickstats:UsdaAgSurvey`.
`import_script_args` is one string (enclosed by quotes) representing all the command line args to be provided to the import script, e.g. `"--start_year=2023 --another_flag=val"`.

Example invocation:

```
Run `./schedule_update_import.sh -u datcom-import-automation scripts/us_usda/quickstats:UsdaAgSurvey "--start_year=2023"`
Run `./schedule_update_import.sh -u datcom-import-automation scripts/us_usda/quickstats:UsdaAgSurvey`
```

## Local Executor [should be deprecated soon]
Expand Down
21 changes: 0 additions & 21 deletions import-automation/executor/schedule_update_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@
'<path_to_directory_relative_to_repository_root>:<import_name>.'
'Example: scripts/us_usda/quickstats:UsdaAgSurvey')

flags.DEFINE_string(
'import_script_args', '',
'One string of command line args for the import script,'
'e.g. "--flag1=value1 --flag2=value2"')

_FLAGS(sys.argv)

logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -117,7 +112,6 @@ def _print_fileupload_results(cfg: configs.ExecutorConfig,

def update(cfg: configs.ExecutorConfig,
absolute_import_path: str,
import_script_args: List[str] = [],
local_repo_dir: str = "") -> import_executor.ExecutionResult:
"""Executes an update on the specified import.
Expand All @@ -136,20 +130,13 @@ def update(cfg: configs.ExecutorConfig,
<path_to_directory_relative_to_repository_root>:<import_name>
example:
scripts/us_usda/quickstats:UsdaAgSurvey
import_script_args: a list of strings, each to be used as a command
line arg for the import script,
e.g. ['--flag1=value1', '--flag2=value2'].
local_repo_dir: the full path to the GitHub repository on local. The
path shoud be provided to the root directory of the repo,
e.g. `<base_path_on_disk>/data`.
Returns:
An import_executor.ExecutionResult object.
"""
# Update the configs with user script args, if provided.
if import_script_args:
cfg.user_script_args = import_script_args

executor = import_executor.ImportExecutor(
uploader=file_uploader.GCSFileUploader(
project_id=cfg.gcs_project_id,
Expand Down Expand Up @@ -181,20 +168,13 @@ def main(_):
'Flag: absolute_import_path is invalid. Path should be like:'
'scripts/us_usda/quickstats:UsdaAgSurvey')

# Converting string to list
args_list = import_script_args.split(' ')
if type(args_list) != type([]):
raise Exception(
'Flag: import_script_args could not be parsed into a list.')

# Get the root repo directory (data). Assumption is that this script is being
# called from a path within the data repo.
cwd = os.getcwd()
repo_dir = cwd.split("data")[0] + "data"
logging.info(f'{mode} called with the following:')
logging.info(f'Config Project ID: {_FLAGS.config_project_id}')
logging.info(f'Import: {absolute_import_path}')
logging.info(f'Import script args: {args_list}')
logging.info(f'Repo root directory: {repo_dir}')

# TODO: allow overriding/updating config params from a local config file as well.
Expand All @@ -206,7 +186,6 @@ def main(_):
res = dataclasses.asdict(
update(cfg,
absolute_import_path,
import_script_args=args_list,
local_repo_dir=repo_dir))
logging.info("*************************************************")
logging.info("*********** Update Complete. ********************")
Expand Down
9 changes: 3 additions & 6 deletions import-automation/executor/schedule_update_import.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@
# limitations under the License.

function help {
echo "#Usage: -us <config_project_id> <absolute_import_path> <import_script_args>"
echo "#Usage: -us <config_project_id> <absolute_import_path>"
echo "## <config_project_id> is the GCP project ID where the config file is located."
echo "## Update an import specified by <absolute_import_path>, e.g. scripts/us_usda/quickstats:UsdaAgSurvey"
echo "## Provide all args for the import script as one string, i.e. with quotes, e.g. \"--flag1=val1 --flag2=val2\""
exit 1
echo "## Update an import specified by <absolute_import_path>, e.g. scripts/us_usda/quickstats:UsdaAgSurvey" exit 1
}

if [[ $# -le 1 ]]; then
Expand All @@ -41,12 +39,11 @@ done

CONFIG_PROJECT_ID=$2
IMPORT_PATH=$3
IMPORT_SCRIPT_ARGS=$4

python3 -m venv .env
. .env/bin/activate
pip3 install --disable-pip-version-check -r requirements.txt

python3 -m schedule_update_import --config_project_id=$CONFIG_PROJECT_ID --mode=$MODE --absolute_import_path=$IMPORT_PATH --import_script_args="$IMPORT_SCRIPT_ARGS"
python3 -m schedule_update_import --config_project_id=$CONFIG_PROJECT_ID --mode=$MODE --absolute_import_path=$IMPORT_PATH"
deactivate

0 comments on commit d251fc8

Please sign in to comment.