From ea881b46b0a93fac8f1012bfc27af2e5234c605c Mon Sep 17 00:00:00 2001 From: Jay Thomason Date: Wed, 9 Oct 2024 18:13:40 -0700 Subject: [PATCH 1/2] aws: use IDMSv2 in zone shell cmd (#4052) * aws: use IDMSv2 in zone shell cmd Usage of IDMSv2 is considered a best practice for security. see: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html This change was tested manually on an ec2 instance using a test script. * fix formatting * fix whitespace * prefer idmsv2 in aws template for node config --- sky/clouds/aws.py | 5 ++++- sky/templates/aws-ray.yml.j2 | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/sky/clouds/aws.py b/sky/clouds/aws.py index be1ecce0350..2207a977f25 100644 --- a/sky/clouds/aws.py +++ b/sky/clouds/aws.py @@ -299,7 +299,10 @@ def get_zone_shell_cmd(cls) -> Optional[str]: # The command for getting the current zone is from: # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html # pylint: disable=line-too-long command_str = ( - 'curl -s http://169.254.169.254/latest/dynamic/instance-identity/document' # pylint: disable=line-too-long + 'TOKEN=`curl -X PUT "http://169.254.169.254/latest/api/token" ' + '-H "X-aws-ec2-metadata-token-ttl-seconds: 21600"` && ' + 'curl -H "X-aws-ec2-metadata-token: $TOKEN" -s ' + 'http://169.254.169.254/latest/dynamic/instance-identity/document' f' | {constants.SKY_PYTHON_CMD} -u -c "import sys, json; ' 'print(json.load(sys.stdin)[\'availabilityZone\'])"') return command_str diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2 index 6afdf381cc0..11c3c3e1a3c 100644 --- a/sky/templates/aws-ray.yml.j2 +++ b/sky/templates/aws-ray.yml.j2 @@ -131,6 +131,9 @@ available_node_types: - Key: {{ label_key }} Value: {{ label_value|tojson }} {%- endfor %} + # Use IDMSv2 + MetadataOptions: + HttpTokens: required head_node_type: ray.head.default From 5491cf3e3e3945e5a9938df583e4155cff90d765 Mon Sep 17 00:00:00 2001 From: Tian Xia Date: Wed, 9 Oct 2024 22:01:27 -0700 Subject: [PATCH 2/2] [K8s] Add user hash to the kind config for multi-user system permission issue (#4045) * [K8s] Remove the kind config after `sky local down` for multi-user system permission issue * upd * fix * resolve comments --- sky/cli.py | 3 ++- sky/utils/kubernetes/create_cluster.sh | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index c538c99aeb3..093db23adbf 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -5097,7 +5097,8 @@ def _deploy_local_cluster(gpus: bool): # Get directory of script and run it from there cwd = os.path.dirname(os.path.abspath(up_script_path)) - run_command = up_script_path + ' --gpus' if gpus else up_script_path + run_command = up_script_path + f' {common_utils.get_user_hash()}' + run_command = run_command + ' --gpus' if gpus else run_command run_command = shlex.split(run_command) # Setup logging paths diff --git a/sky/utils/kubernetes/create_cluster.sh b/sky/utils/kubernetes/create_cluster.sh index 52bbd1804e8..7c5c4cea57f 100755 --- a/sky/utils/kubernetes/create_cluster.sh +++ b/sky/utils/kubernetes/create_cluster.sh @@ -12,9 +12,11 @@ IMAGE_GPU="us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot-gpu:l PORT_RANGE_START=30000 PORT_RANGE_END=30100 +USER_HASH=$1 + # Check for GPU flag ENABLE_GPUS=false -if [[ "$1" == "--gpus" ]]; then +if [[ "$2" == "--gpus" ]]; then ENABLE_GPUS=true fi @@ -88,16 +90,17 @@ if kind get clusters | grep -q skypilot; then fi # Generate cluster YAML -echo "Generating /tmp/skypilot-kind.yaml" +YAML_PATH="/tmp/skypilot-kind-$USER_HASH.yaml" +echo "Generating $YAML_PATH" # Add GPUs flag to the generate_kind_config.py command if GPUs are enabled if $ENABLE_GPUS; then - python -m sky.utils.kubernetes.generate_kind_config --path /tmp/skypilot-kind.yaml --port-start ${PORT_RANGE_START} --port-end ${PORT_RANGE_END} --gpus + python -m sky.utils.kubernetes.generate_kind_config --path $YAML_PATH --port-start ${PORT_RANGE_START} --port-end ${PORT_RANGE_END} --gpus else - python -m sky.utils.kubernetes.generate_kind_config --path /tmp/skypilot-kind.yaml --port-start ${PORT_RANGE_START} --port-end ${PORT_RANGE_END} + python -m sky.utils.kubernetes.generate_kind_config --path $YAML_PATH --port-start ${PORT_RANGE_START} --port-end ${PORT_RANGE_END} fi -kind create cluster --config /tmp/skypilot-kind.yaml --name skypilot +kind create cluster --config $YAML_PATH --name skypilot echo "Kind cluster created."