From 083c34461ecd0feb8eb087ed9147988d1d704c55 Mon Sep 17 00:00:00 2001 From: Chen Haifeng Date: Sun, 17 Apr 2022 13:40:35 +0800 Subject: [PATCH] Use standard aws storage configuration names (#210) --- README.md | 6 ++--- example/cluster/aws/example-docker.yaml | 6 ++--- example/cluster/aws/example-standard.yaml | 6 ++--- python/cloudtik/core/config-schema.json | 12 ++++----- .../cloudtik/providers/_private/aws/config.py | 6 ++--- .../providers/_private/aws/node_provider.py | 10 +++---- .../cloudtik/providers/_private/aws/utils.py | 18 ++++++------- python/cloudtik/providers/aws/defaults.yaml | 4 +-- python/cloudtik/runtime/spark/scripts.py | 26 +++++++++---------- runtime/spark/scripts/configure.sh | 24 ++++++++--------- 10 files changed, 59 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 8cb93cf48..cfd5b06c5 100644 --- a/README.md +++ b/README.md @@ -105,10 +105,10 @@ provider: type: aws region: us-west-2 # S3 configurations for storage - aws_s3a_storage: + aws_s3_storage: s3.bucket: your_s3_bucket - fs.s3a.access.key: your_s3_access_key - fs.s3a.secret.key: your_s3_secret_key + s3.access.key.id: your_s3_access_key_id + s3.secret.access.key: your_s3_secret_access_key auth: ssh_user: ubuntu diff --git a/example/cluster/aws/example-docker.yaml b/example/cluster/aws/example-docker.yaml index 5a1e82376..c82a9534b 100644 --- a/example/cluster/aws/example-docker.yaml +++ b/example/cluster/aws/example-docker.yaml @@ -16,10 +16,10 @@ provider: type: aws region: us-west-2 # S3 configurations for storage - aws_s3a_storage: + aws_s3_storage: s3.bucket: your_s3_bucket - fs.s3a.access.key: your_s3_access_key - fs.s3a.secret.key: your_s3_secret_key + s3.access.key.id: your_s3_access_key_id + s3.secret.access.key: your_s3_secret_access_key auth: ssh_user: ubuntu diff --git a/example/cluster/aws/example-standard.yaml b/example/cluster/aws/example-standard.yaml index 898b44ac9..5a062df61 100644 --- a/example/cluster/aws/example-standard.yaml +++ b/example/cluster/aws/example-standard.yaml @@ -12,10 +12,10 @@ provider: type: aws region: us-west-2 # S3 configurations for storage - aws_s3a_storage: + aws_s3_storage: s3.bucket: your_s3_bucket - fs.s3a.access.key: your_s3_access_key - fs.s3a.secret.key: your_s3_secret_key + s3.access.key.id: your_s3_access_key_id + s3.secret.access.key: your_s3_secret_access_key auth: ssh_user: ubuntu diff --git a/python/cloudtik/core/config-schema.json b/python/cloudtik/core/config-schema.json index 0c21fac67..cb946ed77 100644 --- a/python/cloudtik/core/config-schema.json +++ b/python/cloudtik/core/config-schema.json @@ -224,22 +224,22 @@ } } }, - "aws_s3a_storage": { + "aws_s3_storage": { "type": "object", - "description": "use s3a", + "description": "AWS S3 storage configurations", "additionalProperties": false, "properties": { "s3.bucket": { "type": "string", "description": "the s3 bucket name" }, - "fs.s3a.access.key": { + "s3.access.key.id": { "type": "string", - "description": "access key of s3a" + "description": "access key id of s3" }, - "fs.s3a.secret.key": { + "s3.secret.access.key": { "type": "string", - "description": "secret key of s3a" + "description": "secret access key of s3" } } }, diff --git a/python/cloudtik/providers/_private/aws/config.py b/python/cloudtik/providers/_private/aws/config.py index e7b4b27b7..2f8655f0b 100644 --- a/python/cloudtik/providers/_private/aws/config.py +++ b/python/cloudtik/providers/_private/aws/config.py @@ -1902,11 +1902,11 @@ def _security_groups_in_network_config(config: Dict[str, Any]) \ def verify_s3_storage(provider_config: Dict[str, Any]): - s3_storage = provider_config["aws_s3a_storage"] + s3_storage = provider_config["aws_s3_storage"] s3 = boto3.client( 's3', - aws_access_key_id=s3_storage["fs.s3a.access.key"], - aws_secret_access_key=s3_storage["fs.s3a.secret.key"] + aws_access_key_id=s3_storage["s3.access.key.id"], + aws_secret_access_key=s3_storage["s3.secret.access.key"] ) try: diff --git a/python/cloudtik/providers/_private/aws/node_provider.py b/python/cloudtik/providers/_private/aws/node_provider.py index 82e7497d6..03c182a88 100644 --- a/python/cloudtik/providers/_private/aws/node_provider.py +++ b/python/cloudtik/providers/_private/aws/node_provider.py @@ -18,7 +18,7 @@ from cloudtik.providers._private.aws.config import bootstrap_aws, bootstrap_aws_from_workspace, verify_s3_storage from cloudtik.providers._private.aws.utils import boto_exception_handler, \ - resource_cache, client_cache, get_aws_s3a_config, get_boto_error_code + resource_cache, client_cache, get_aws_s3_config, get_boto_error_code from cloudtik.providers._private.utils import validate_config_dict logger = logging.getLogger(__name__) @@ -123,7 +123,7 @@ def __init__(self, provider_config, cluster_name): self.cached_nodes = {} def with_provider_environment_variables(self): - return get_aws_s3a_config(self.provider_config) + return get_aws_s3_config(self.provider_config) def non_terminated_nodes(self, tag_filters): # Note that these filters are acceptable because they are set on @@ -681,9 +681,9 @@ def validate_config( def validate_storage_config( provider_config: Dict[str, Any]) -> None: config_dict = { - "s3.bucket": provider_config.get("aws_s3a_storage", {}).get("s3.bucket"), - "fs.s3a.access.key": provider_config.get("aws_s3a_storage", {}).get("fs.s3a.access.key"), - "fs.s3a.secret.key": provider_config.get("aws_s3a_storage", {}).get("fs.s3a.secret.key") + "s3.bucket": provider_config.get("aws_s3_storage", {}).get("s3.bucket"), + "s3.access.key.id": provider_config.get("aws_s3_storage", {}).get("s3.access.key.id"), + "s3.secret.access.key": provider_config.get("aws_s3_storage", {}).get("s3.secret.access.key") } validate_config_dict(provider_config["type"], config_dict) diff --git a/python/cloudtik/providers/_private/aws/utils.py b/python/cloudtik/providers/_private/aws/utils.py index 8a59842f5..473f9479b 100644 --- a/python/cloudtik/providers/_private/aws/utils.py +++ b/python/cloudtik/providers/_private/aws/utils.py @@ -139,19 +139,19 @@ def __exit__(self, type, value, tb): return ExceptionHandlerContextManager() -def get_aws_s3a_config(provider_config): +def get_aws_s3_config(provider_config): config_dict = {} - s3_bucket = provider_config.get("aws_s3a_storage", {}).get("s3.bucket") + s3_bucket = provider_config.get("aws_s3_storage", {}).get("s3.bucket") if s3_bucket: - config_dict["AWS_S3A_BUCKET"] = s3_bucket + config_dict["AWS_S3_BUCKET"] = s3_bucket - s3_access_key = provider_config.get("aws_s3a_storage", {}).get("fs.s3a.access.key") - if s3_access_key: - config_dict["FS_S3A_ACCESS_KEY"] = s3_access_key + s3_access_key_id = provider_config.get("aws_s3_storage", {}).get("s3.access.key.id") + if s3_access_key_id: + config_dict["AWS_S3_ACCESS_KEY_ID"] = s3_access_key_id - s3_secret_key = provider_config.get("aws_s3a_storage", {}).get("fs.s3a.secret.key") - if s3_secret_key: - config_dict["FS_S3A_SECRET_KEY"] = s3_secret_key + s3_secret_access_key = provider_config.get("aws_s3_storage", {}).get("s3.secret.access.key") + if s3_secret_access_key: + config_dict["AWS_S3_SECRET_ACCESS_KEY"] = s3_secret_access_key return config_dict diff --git a/python/cloudtik/providers/aws/defaults.yaml b/python/cloudtik/providers/aws/defaults.yaml index 09a468df5..a4cc86733 100644 --- a/python/cloudtik/providers/aws/defaults.yaml +++ b/python/cloudtik/providers/aws/defaults.yaml @@ -87,9 +87,9 @@ setup_commands: head_setup_commands: - pip install 'boto3>=1.4.8' # 1.4.8 adds InstanceMarketOptions - cloudtik-spark install --head --provider=aws - - cloudtik-spark configure --head --provider=aws --aws_s3a_bucket=$AWS_S3A_BUCKET --s3a_access_key=$FS_S3A_ACCESS_KEY --s3a_secret_key=$FS_S3A_SECRET_KEY + - cloudtik-spark configure --head --provider=aws --aws_s3_bucket=$AWS_S3_BUCKET --aws_s3_access_key_id=$AWS_S3_ACCESS_KEY_ID --aws_s3_secret_access_key=$AWS_S3_SECRET_ACCESS_KEY # Custom commands that will be run on worker nodes after common setup. worker_setup_commands: - cloudtik-spark install --provider=aws - - cloudtik-spark configure --provider=aws --head_address=$CLOUDTIK_HEAD_IP --aws_s3a_bucket=$AWS_S3A_BUCKET --s3a_access_key=$FS_S3A_ACCESS_KEY --s3a_secret_key=$FS_S3A_SECRET_KEY + - cloudtik-spark configure --provider=aws --head_address=$CLOUDTIK_HEAD_IP --aws_s3_bucket=$AWS_S3_BUCKET --aws_s3_access_key_id=$AWS_S3_ACCESS_KEY_ID --aws_s3_secret_access_key=$AWS_S3_SECRET_ACCESS_KEY diff --git a/python/cloudtik/runtime/spark/scripts.py b/python/cloudtik/runtime/spark/scripts.py index 142499ae8..1e07f346b 100644 --- a/python/cloudtik/runtime/spark/scripts.py +++ b/python/cloudtik/runtime/spark/scripts.py @@ -92,23 +92,23 @@ def install(head, provider, script_args): default="", help="the head ip ") @click.option( - '--aws_s3a_bucket', + '--aws_s3_bucket', required=False, type=str, default="", - help="the bucket name of s3a") + help="the bucket name of s3") @click.option( - '--s3a_access_key', + '--aws_s3_access_key_id', required=False, type=str, default="", - help="the access key of s3a") + help="the access key id of s3") @click.option( - '--s3a_secret_key', + '--aws_s3_secret_access_key', required=False, type=str, default="", - help="the secret key of s3a") + help="the secret access key of s3") @click.option( '--project_id', required=False, @@ -164,7 +164,7 @@ def install(head, provider, script_args): default="", help="azure storage account access key") @click.argument("script_args", nargs=-1) -def configure(head, provider, head_address, aws_s3a_bucket, s3a_access_key, s3a_secret_key, project_id, gcp_gcs_bucket, +def configure(head, provider, head_address, aws_s3_bucket, aws_s3_access_key_id, aws_s3_secret_access_key, project_id, gcp_gcs_bucket, fs_gs_auth_service_account_email, fs_gs_auth_service_account_private_key_id, fs_gs_auth_service_account_private_key, azure_storage_kind, azure_storage_account, azure_container, azure_account_key, script_args): @@ -181,12 +181,12 @@ def configure(head, provider, head_address, aws_s3a_bucket, s3a_access_key, s3a_ if head_address: cmds += ["--head_address={}".format(head_address)] - if aws_s3a_bucket: - cmds += ["--aws_s3a_bucket={}".format(aws_s3a_bucket)] - if s3a_access_key: - cmds += ["--s3a_access_key={}".format(s3a_access_key)] - if s3a_secret_key: - cmds += ["--s3a_secret_key={}".format(s3a_secret_key)] + if aws_s3_bucket: + cmds += ["--aws_s3_bucket={}".format(aws_s3_bucket)] + if aws_s3_access_key_id: + cmds += ["--aws_s3_access_key_id={}".format(aws_s3_access_key_id)] + if aws_s3_secret_access_key: + cmds += ["--aws_s3_secret_access_key={}".format(aws_s3_secret_access_key)] if project_id: cmds += ["--project_id={}".format(project_id)] diff --git a/runtime/spark/scripts/configure.sh b/runtime/spark/scripts/configure.sh index d1703d340..f925e1d4b 100644 --- a/runtime/spark/scripts/configure.sh +++ b/runtime/spark/scripts/configure.sh @@ -1,6 +1,6 @@ #!/bin/bash -args=$(getopt -a -o h::p: -l head::,head_address::,provider:,aws_s3a_bucket::,s3a_access_key::,s3a_secret_key::,project_id::,gcp_gcs_bucket::,fs_gs_auth_service_account_email::,fs_gs_auth_service_account_private_key_id::,fs_gs_auth_service_account_private_key::,azure_storage_kind::,azure_storage_account::,azure_container::,azure_account_key:: -- "$@") +args=$(getopt -a -o h::p: -l head::,head_address::,provider:,aws_s3_bucket::,aws_s3_access_key_id::,aws_s3_secret_access_key::,project_id::,gcp_gcs_bucket::,fs_gs_auth_service_account_email::,fs_gs_auth_service_account_private_key_id::,fs_gs_auth_service_account_private_key::,azure_storage_kind::,azure_storage_account::,azure_container::,azure_account_key:: -- "$@") eval set -- "${args}" IS_HEAD_NODE=false @@ -19,16 +19,16 @@ do provider=$2 shift ;; - --aws_s3a_bucket) - AWS_S3A_BUCKET=$2 + --aws_s3_bucket) + AWS_S3_BUCKET=$2 shift ;; - --s3a_access_key) - FS_S3A_ACCESS_KEY=$2 + --aws_s3_access_key_id) + AWS_S3_ACCESS_KEY_ID=$2 shift ;; - --s3a_secret_key) - FS_S3A_SECRET_KEY=$2 + --aws_s3_secret_access_key) + AWS_S3_SECRET_ACCESS_KEY=$2 shift ;; --project_id) @@ -123,15 +123,15 @@ function set_resources_for_spark() { } function update_config_for_aws() { - sed -i "s#{%aws.s3a.bucket%}#${AWS_S3A_BUCKET}#g" `grep "{%aws.s3a.bucket%}" -rl ./` - sed -i "s#{%fs.s3a.access.key%}#${FS_S3A_ACCESS_KEY}#g" `grep "{%fs.s3a.access.key%}" -rl ./` - sed -i "s#{%fs.s3a.secret.key%}#${FS_S3A_SECRET_KEY}#g" `grep "{%fs.s3a.secret.key%}" -rl ./` + sed -i "s#{%aws.s3a.bucket%}#${AWS_S3_BUCKET}#g" `grep "{%aws.s3a.bucket%}" -rl ./` + sed -i "s#{%fs.s3a.access.key%}#${AWS_S3_ACCESS_KEY_ID}#g" `grep "{%fs.s3a.access.key%}" -rl ./` + sed -i "s#{%fs.s3a.secret.key%}#${AWS_S3_SECRET_ACCESS_KEY}#g" `grep "{%fs.s3a.secret.key%}" -rl ./` # event log dir - if [ -z "${AWS_S3A_BUCKET}" ]; then + if [ -z "${AWS_S3_BUCKET}" ]; then event_log_dir="file:///tmp/spark-events" else - event_log_dir="s3a://${AWS_S3A_BUCKET}/shared/spark-events" + event_log_dir="s3a://${AWS_S3_BUCKET}/shared/spark-events" fi sed -i "s!{%spark.eventLog.dir%}!${event_log_dir}!g" `grep "{%spark.eventLog.dir%}" -rl ./` }