diff --git a/ann_benchmark_gcp.sh b/ann_benchmark_gcp.sh index 9e215936..90d275ef 100755 --- a/ann_benchmark_gcp.sh +++ b/ann_benchmark_gcp.sh @@ -18,8 +18,23 @@ function cleanup { } trap cleanup EXIT -echo "sleeping 30s for ssh to be ready" -sleep 30 +# Busy loop to wait for SSH to be ready with a timeout of 5 minutes +echo "Waiting for SSH to be ready..." +SECONDS=0 +timeout=300 +while [ $SECONDS -lt $timeout ]; do + if gcloud compute ssh --zone $ZONE $instance --command="echo SSH is ready" &>/dev/null; then + break + fi + echo "SSH not ready, retrying in 5 seconds..." + sleep 5 + SECONDS=$((SECONDS + 5)) +done + +if [ $SECONDS -ge $timeout ]; then + echo "Timeout: VM is not SSH'able after 300 seconds" + exit 1 +fi gcloud compute scp --zone $ZONE --recurse install_docker_ubuntu.sh "$instance:~" gcloud compute ssh --zone $ZONE $instance -- 'sh install_docker_ubuntu.sh' diff --git a/ann_benchmark_quantization_aws.sh b/ann_benchmark_quantization_aws.sh index 7ba24abc..9d61d568 100755 --- a/ann_benchmark_quantization_aws.sh +++ b/ann_benchmark_quantization_aws.sh @@ -33,12 +33,55 @@ instance_id=$(aws ec2 run-instances --image-id $ami --count 1 --instance-type $M echo "instance ready: $instance_id" function cleanup() { - aws ec2 terminate-instances --instance-ids "$instance_id" --region "$region" | jq - aws ec2 wait instance-terminated --instance-ids "$instance_id" --region "$region" - aws ec2 delete-key-pair --key-name "$key_id" --region "$region" | jq - aws ec2 delete-security-group --group-id "$group_id" --region "$region" | jq + set +e # Continue cleanup even if individual commands fail + + if [ ! -z "$instance_id" ]; then + echo "Terminating instance $instance_id" + aws ec2 terminate-instances --instance-ids "$instance_id" --region "$region" | jq || true + + # Busy loop to wait for instance termination with timeout + echo "Waiting for instance to terminate..." + SECONDS=0 + timeout=300 + while [ $SECONDS -lt $timeout ]; do + status=$(aws ec2 describe-instances --instance-ids "$instance_id" --region "$region" | jq -r '.Reservations[0].Instances[0].State.Name' || echo "error") + if [ "$status" = "terminated" ]; then + echo "Instance successfully terminated" + break + elif [ "$status" = "error" ]; then + echo "Instance not found - assuming terminated" + break + fi + echo "Instance status: $status" + sleep 5 + SECONDS=$((SECONDS + 5)) + done + + if [ $SECONDS -ge $timeout ]; then + echo "Error: Timeout waiting for instance termination. Please check AWS instances for manual cleanup." + exit 1 + fi + fi + + if [ ! -z "$key_id" ]; then + echo "Deleting key pair $key_id" + aws ec2 delete-key-pair --key-name "$key_id" --region "$region" | jq || true + rm -f "${key_id}.pem" || true + fi + + if [ ! -z "$group_id" ]; then + echo "Deleting security group $group_id" + # Add retry loop for security group deletion since it might fail if instance is still terminating + for i in {1..6}; do + if aws ec2 delete-security-group --group-id "$group_id" --region "$region" | jq; then + break + fi + echo "Retrying security group deletion in 10 seconds..." + sleep 10 + done + fi } -trap cleanup EXIT +trap cleanup EXIT SIGINT SIGTERM ERR dns_name= for i in {1..600}; do diff --git a/ann_benchmark_quantization_gcp.sh b/ann_benchmark_quantization_gcp.sh index e17a5624..dd2e5f51 100755 --- a/ann_benchmark_quantization_gcp.sh +++ b/ann_benchmark_quantization_gcp.sh @@ -18,8 +18,23 @@ function cleanup { } trap cleanup EXIT -echo "sleeping 30s for ssh to be ready" -sleep 30 +# Busy loop to wait for SSH to be ready with a timeout of 5 minutes +echo "Waiting for SSH to be ready..." +SECONDS=0 +timeout=300 +while [ $SECONDS -lt $timeout ]; do + if gcloud compute ssh --zone $ZONE $instance --command="echo SSH is ready" &>/dev/null; then + break + fi + echo "SSH not ready, retrying in 5 seconds..." + sleep 5 + SECONDS=$((SECONDS + 5)) +done + +if [ $SECONDS -ge $timeout ]; then + echo "Timeout: VM is not SSH'able after 300 seconds" + exit 1 +fi gcloud compute scp --zone $ZONE --recurse install_docker_ubuntu.sh "$instance:~" gcloud compute ssh --zone $ZONE $instance -- 'sh install_docker_ubuntu.sh'