Skip to content
This repository has been archived by the owner on Oct 12, 2023. It is now read-only.

Commit

Permalink
Use standard aws storage configuration names (#210)
Browse files Browse the repository at this point in the history
  • Loading branch information
jerrychenhf authored Apr 17, 2022
1 parent d8045fa commit 083c344
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 59 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ provider:
type: aws
region: us-west-2
# S3 configurations for storage
aws_s3a_storage:
aws_s3_storage:
s3.bucket: your_s3_bucket
fs.s3a.access.key: your_s3_access_key
fs.s3a.secret.key: your_s3_secret_key
s3.access.key.id: your_s3_access_key_id
s3.secret.access.key: your_s3_secret_access_key
auth:
ssh_user: ubuntu
Expand Down
6 changes: 3 additions & 3 deletions example/cluster/aws/example-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ provider:
type: aws
region: us-west-2
# S3 configurations for storage
aws_s3a_storage:
aws_s3_storage:
s3.bucket: your_s3_bucket
fs.s3a.access.key: your_s3_access_key
fs.s3a.secret.key: your_s3_secret_key
s3.access.key.id: your_s3_access_key_id
s3.secret.access.key: your_s3_secret_access_key

auth:
ssh_user: ubuntu
Expand Down
6 changes: 3 additions & 3 deletions example/cluster/aws/example-standard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ provider:
type: aws
region: us-west-2
# S3 configurations for storage
aws_s3a_storage:
aws_s3_storage:
s3.bucket: your_s3_bucket
fs.s3a.access.key: your_s3_access_key
fs.s3a.secret.key: your_s3_secret_key
s3.access.key.id: your_s3_access_key_id
s3.secret.access.key: your_s3_secret_access_key

auth:
ssh_user: ubuntu
Expand Down
12 changes: 6 additions & 6 deletions python/cloudtik/core/config-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -224,22 +224,22 @@
}
}
},
"aws_s3a_storage": {
"aws_s3_storage": {
"type": "object",
"description": "use s3a",
"description": "AWS S3 storage configurations",
"additionalProperties": false,
"properties": {
"s3.bucket": {
"type": "string",
"description": "the s3 bucket name"
},
"fs.s3a.access.key": {
"s3.access.key.id": {
"type": "string",
"description": "access key of s3a"
"description": "access key id of s3"
},
"fs.s3a.secret.key": {
"s3.secret.access.key": {
"type": "string",
"description": "secret key of s3a"
"description": "secret access key of s3"
}
}
},
Expand Down
6 changes: 3 additions & 3 deletions python/cloudtik/providers/_private/aws/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1902,11 +1902,11 @@ def _security_groups_in_network_config(config: Dict[str, Any]) \


def verify_s3_storage(provider_config: Dict[str, Any]):
s3_storage = provider_config["aws_s3a_storage"]
s3_storage = provider_config["aws_s3_storage"]
s3 = boto3.client(
's3',
aws_access_key_id=s3_storage["fs.s3a.access.key"],
aws_secret_access_key=s3_storage["fs.s3a.secret.key"]
aws_access_key_id=s3_storage["s3.access.key.id"],
aws_secret_access_key=s3_storage["s3.secret.access.key"]
)

try:
Expand Down
10 changes: 5 additions & 5 deletions python/cloudtik/providers/_private/aws/node_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from cloudtik.providers._private.aws.config import bootstrap_aws, bootstrap_aws_from_workspace, verify_s3_storage
from cloudtik.providers._private.aws.utils import boto_exception_handler, \
resource_cache, client_cache, get_aws_s3a_config, get_boto_error_code
resource_cache, client_cache, get_aws_s3_config, get_boto_error_code
from cloudtik.providers._private.utils import validate_config_dict

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -123,7 +123,7 @@ def __init__(self, provider_config, cluster_name):
self.cached_nodes = {}

def with_provider_environment_variables(self):
return get_aws_s3a_config(self.provider_config)
return get_aws_s3_config(self.provider_config)

def non_terminated_nodes(self, tag_filters):
# Note that these filters are acceptable because they are set on
Expand Down Expand Up @@ -681,9 +681,9 @@ def validate_config(
def validate_storage_config(
provider_config: Dict[str, Any]) -> None:
config_dict = {
"s3.bucket": provider_config.get("aws_s3a_storage", {}).get("s3.bucket"),
"fs.s3a.access.key": provider_config.get("aws_s3a_storage", {}).get("fs.s3a.access.key"),
"fs.s3a.secret.key": provider_config.get("aws_s3a_storage", {}).get("fs.s3a.secret.key")
"s3.bucket": provider_config.get("aws_s3_storage", {}).get("s3.bucket"),
"s3.access.key.id": provider_config.get("aws_s3_storage", {}).get("s3.access.key.id"),
"s3.secret.access.key": provider_config.get("aws_s3_storage", {}).get("s3.secret.access.key")
}

validate_config_dict(provider_config["type"], config_dict)
Expand Down
18 changes: 9 additions & 9 deletions python/cloudtik/providers/_private/aws/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,19 +139,19 @@ def __exit__(self, type, value, tb):
return ExceptionHandlerContextManager()


def get_aws_s3a_config(provider_config):
def get_aws_s3_config(provider_config):
config_dict = {}
s3_bucket = provider_config.get("aws_s3a_storage", {}).get("s3.bucket")
s3_bucket = provider_config.get("aws_s3_storage", {}).get("s3.bucket")
if s3_bucket:
config_dict["AWS_S3A_BUCKET"] = s3_bucket
config_dict["AWS_S3_BUCKET"] = s3_bucket

s3_access_key = provider_config.get("aws_s3a_storage", {}).get("fs.s3a.access.key")
if s3_access_key:
config_dict["FS_S3A_ACCESS_KEY"] = s3_access_key
s3_access_key_id = provider_config.get("aws_s3_storage", {}).get("s3.access.key.id")
if s3_access_key_id:
config_dict["AWS_S3_ACCESS_KEY_ID"] = s3_access_key_id

s3_secret_key = provider_config.get("aws_s3a_storage", {}).get("fs.s3a.secret.key")
if s3_secret_key:
config_dict["FS_S3A_SECRET_KEY"] = s3_secret_key
s3_secret_access_key = provider_config.get("aws_s3_storage", {}).get("s3.secret.access.key")
if s3_secret_access_key:
config_dict["AWS_S3_SECRET_ACCESS_KEY"] = s3_secret_access_key

return config_dict

Expand Down
4 changes: 2 additions & 2 deletions python/cloudtik/providers/aws/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ setup_commands:
head_setup_commands:
- pip install 'boto3>=1.4.8' # 1.4.8 adds InstanceMarketOptions
- cloudtik-spark install --head --provider=aws
- cloudtik-spark configure --head --provider=aws --aws_s3a_bucket=$AWS_S3A_BUCKET --s3a_access_key=$FS_S3A_ACCESS_KEY --s3a_secret_key=$FS_S3A_SECRET_KEY
- cloudtik-spark configure --head --provider=aws --aws_s3_bucket=$AWS_S3_BUCKET --aws_s3_access_key_id=$AWS_S3_ACCESS_KEY_ID --aws_s3_secret_access_key=$AWS_S3_SECRET_ACCESS_KEY

# Custom commands that will be run on worker nodes after common setup.
worker_setup_commands:
- cloudtik-spark install --provider=aws
- cloudtik-spark configure --provider=aws --head_address=$CLOUDTIK_HEAD_IP --aws_s3a_bucket=$AWS_S3A_BUCKET --s3a_access_key=$FS_S3A_ACCESS_KEY --s3a_secret_key=$FS_S3A_SECRET_KEY
- cloudtik-spark configure --provider=aws --head_address=$CLOUDTIK_HEAD_IP --aws_s3_bucket=$AWS_S3_BUCKET --aws_s3_access_key_id=$AWS_S3_ACCESS_KEY_ID --aws_s3_secret_access_key=$AWS_S3_SECRET_ACCESS_KEY
26 changes: 13 additions & 13 deletions python/cloudtik/runtime/spark/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,23 +92,23 @@ def install(head, provider, script_args):
default="",
help="the head ip ")
@click.option(
'--aws_s3a_bucket',
'--aws_s3_bucket',
required=False,
type=str,
default="",
help="the bucket name of s3a")
help="the bucket name of s3")
@click.option(
'--s3a_access_key',
'--aws_s3_access_key_id',
required=False,
type=str,
default="",
help="the access key of s3a")
help="the access key id of s3")
@click.option(
'--s3a_secret_key',
'--aws_s3_secret_access_key',
required=False,
type=str,
default="",
help="the secret key of s3a")
help="the secret access key of s3")
@click.option(
'--project_id',
required=False,
Expand Down Expand Up @@ -164,7 +164,7 @@ def install(head, provider, script_args):
default="",
help="azure storage account access key")
@click.argument("script_args", nargs=-1)
def configure(head, provider, head_address, aws_s3a_bucket, s3a_access_key, s3a_secret_key, project_id, gcp_gcs_bucket,
def configure(head, provider, head_address, aws_s3_bucket, aws_s3_access_key_id, aws_s3_secret_access_key, project_id, gcp_gcs_bucket,
fs_gs_auth_service_account_email, fs_gs_auth_service_account_private_key_id,
fs_gs_auth_service_account_private_key, azure_storage_kind, azure_storage_account, azure_container,
azure_account_key, script_args):
Expand All @@ -181,12 +181,12 @@ def configure(head, provider, head_address, aws_s3a_bucket, s3a_access_key, s3a_
if head_address:
cmds += ["--head_address={}".format(head_address)]

if aws_s3a_bucket:
cmds += ["--aws_s3a_bucket={}".format(aws_s3a_bucket)]
if s3a_access_key:
cmds += ["--s3a_access_key={}".format(s3a_access_key)]
if s3a_secret_key:
cmds += ["--s3a_secret_key={}".format(s3a_secret_key)]
if aws_s3_bucket:
cmds += ["--aws_s3_bucket={}".format(aws_s3_bucket)]
if aws_s3_access_key_id:
cmds += ["--aws_s3_access_key_id={}".format(aws_s3_access_key_id)]
if aws_s3_secret_access_key:
cmds += ["--aws_s3_secret_access_key={}".format(aws_s3_secret_access_key)]

if project_id:
cmds += ["--project_id={}".format(project_id)]
Expand Down
24 changes: 12 additions & 12 deletions runtime/spark/scripts/configure.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

args=$(getopt -a -o h::p: -l head::,head_address::,provider:,aws_s3a_bucket::,s3a_access_key::,s3a_secret_key::,project_id::,gcp_gcs_bucket::,fs_gs_auth_service_account_email::,fs_gs_auth_service_account_private_key_id::,fs_gs_auth_service_account_private_key::,azure_storage_kind::,azure_storage_account::,azure_container::,azure_account_key:: -- "$@")
args=$(getopt -a -o h::p: -l head::,head_address::,provider:,aws_s3_bucket::,aws_s3_access_key_id::,aws_s3_secret_access_key::,project_id::,gcp_gcs_bucket::,fs_gs_auth_service_account_email::,fs_gs_auth_service_account_private_key_id::,fs_gs_auth_service_account_private_key::,azure_storage_kind::,azure_storage_account::,azure_container::,azure_account_key:: -- "$@")
eval set -- "${args}"

IS_HEAD_NODE=false
Expand All @@ -19,16 +19,16 @@ do
provider=$2
shift
;;
--aws_s3a_bucket)
AWS_S3A_BUCKET=$2
--aws_s3_bucket)
AWS_S3_BUCKET=$2
shift
;;
--s3a_access_key)
FS_S3A_ACCESS_KEY=$2
--aws_s3_access_key_id)
AWS_S3_ACCESS_KEY_ID=$2
shift
;;
--s3a_secret_key)
FS_S3A_SECRET_KEY=$2
--aws_s3_secret_access_key)
AWS_S3_SECRET_ACCESS_KEY=$2
shift
;;
--project_id)
Expand Down Expand Up @@ -123,15 +123,15 @@ function set_resources_for_spark() {
}

function update_config_for_aws() {
sed -i "s#{%aws.s3a.bucket%}#${AWS_S3A_BUCKET}#g" `grep "{%aws.s3a.bucket%}" -rl ./`
sed -i "s#{%fs.s3a.access.key%}#${FS_S3A_ACCESS_KEY}#g" `grep "{%fs.s3a.access.key%}" -rl ./`
sed -i "s#{%fs.s3a.secret.key%}#${FS_S3A_SECRET_KEY}#g" `grep "{%fs.s3a.secret.key%}" -rl ./`
sed -i "s#{%aws.s3a.bucket%}#${AWS_S3_BUCKET}#g" `grep "{%aws.s3a.bucket%}" -rl ./`
sed -i "s#{%fs.s3a.access.key%}#${AWS_S3_ACCESS_KEY_ID}#g" `grep "{%fs.s3a.access.key%}" -rl ./`
sed -i "s#{%fs.s3a.secret.key%}#${AWS_S3_SECRET_ACCESS_KEY}#g" `grep "{%fs.s3a.secret.key%}" -rl ./`

# event log dir
if [ -z "${AWS_S3A_BUCKET}" ]; then
if [ -z "${AWS_S3_BUCKET}" ]; then
event_log_dir="file:///tmp/spark-events"
else
event_log_dir="s3a://${AWS_S3A_BUCKET}/shared/spark-events"
event_log_dir="s3a://${AWS_S3_BUCKET}/shared/spark-events"
fi
sed -i "s!{%spark.eventLog.dir%}!${event_log_dir}!g" `grep "{%spark.eventLog.dir%}" -rl ./`
}
Expand Down

0 comments on commit 083c344

Please sign in to comment.