Skip to content

Commit

Permalink
Merge pull request #285 from weaviate/jose/clean-gcp-keys
Browse files Browse the repository at this point in the history
Add busy loop to wait for instance to be ssh'able
  • Loading branch information
jfrancoa authored Dec 10, 2024
2 parents eb97c1d + 51c9a2f commit 28763e6
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 9 deletions.
19 changes: 17 additions & 2 deletions ann_benchmark_gcp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,23 @@ function cleanup {
}
trap cleanup EXIT

echo "sleeping 30s for ssh to be ready"
sleep 30
# Busy loop to wait for SSH to be ready with a timeout of 5 minutes
echo "Waiting for SSH to be ready..."
SECONDS=0
timeout=300
while [ $SECONDS -lt $timeout ]; do
if gcloud compute ssh --zone $ZONE $instance --command="echo SSH is ready" &>/dev/null; then
break
fi
echo "SSH not ready, retrying in 5 seconds..."
sleep 5
SECONDS=$((SECONDS + 5))
done

if [ $SECONDS -ge $timeout ]; then
echo "Timeout: VM is not SSH'able after 300 seconds"
exit 1
fi

gcloud compute scp --zone $ZONE --recurse install_docker_ubuntu.sh "$instance:~"
gcloud compute ssh --zone $ZONE $instance -- 'sh install_docker_ubuntu.sh'
Expand Down
53 changes: 48 additions & 5 deletions ann_benchmark_quantization_aws.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,55 @@ instance_id=$(aws ec2 run-instances --image-id $ami --count 1 --instance-type $M
echo "instance ready: $instance_id"

function cleanup() {
aws ec2 terminate-instances --instance-ids "$instance_id" --region "$region" | jq
aws ec2 wait instance-terminated --instance-ids "$instance_id" --region "$region"
aws ec2 delete-key-pair --key-name "$key_id" --region "$region" | jq
aws ec2 delete-security-group --group-id "$group_id" --region "$region" | jq
set +e # Continue cleanup even if individual commands fail

if [ ! -z "$instance_id" ]; then
echo "Terminating instance $instance_id"
aws ec2 terminate-instances --instance-ids "$instance_id" --region "$region" | jq || true

# Busy loop to wait for instance termination with timeout
echo "Waiting for instance to terminate..."
SECONDS=0
timeout=300
while [ $SECONDS -lt $timeout ]; do
status=$(aws ec2 describe-instances --instance-ids "$instance_id" --region "$region" | jq -r '.Reservations[0].Instances[0].State.Name' || echo "error")
if [ "$status" = "terminated" ]; then
echo "Instance successfully terminated"
break
elif [ "$status" = "error" ]; then
echo "Instance not found - assuming terminated"
break
fi
echo "Instance status: $status"
sleep 5
SECONDS=$((SECONDS + 5))
done

if [ $SECONDS -ge $timeout ]; then
echo "Error: Timeout waiting for instance termination. Please check AWS instances for manual cleanup."
exit 1
fi
fi

if [ ! -z "$key_id" ]; then
echo "Deleting key pair $key_id"
aws ec2 delete-key-pair --key-name "$key_id" --region "$region" | jq || true
rm -f "${key_id}.pem" || true
fi

if [ ! -z "$group_id" ]; then
echo "Deleting security group $group_id"
# Add retry loop for security group deletion since it might fail if instance is still terminating
for i in {1..6}; do
if aws ec2 delete-security-group --group-id "$group_id" --region "$region" | jq; then
break
fi
echo "Retrying security group deletion in 10 seconds..."
sleep 10
done
fi
}
trap cleanup EXIT
trap cleanup EXIT SIGINT SIGTERM ERR

dns_name=
for i in {1..600}; do
Expand Down
19 changes: 17 additions & 2 deletions ann_benchmark_quantization_gcp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,23 @@ function cleanup {
}
trap cleanup EXIT

echo "sleeping 30s for ssh to be ready"
sleep 30
# Busy loop to wait for SSH to be ready with a timeout of 5 minutes
echo "Waiting for SSH to be ready..."
SECONDS=0
timeout=300
while [ $SECONDS -lt $timeout ]; do
if gcloud compute ssh --zone $ZONE $instance --command="echo SSH is ready" &>/dev/null; then
break
fi
echo "SSH not ready, retrying in 5 seconds..."
sleep 5
SECONDS=$((SECONDS + 5))
done

if [ $SECONDS -ge $timeout ]; then
echo "Timeout: VM is not SSH'able after 300 seconds"
exit 1
fi

gcloud compute scp --zone $ZONE --recurse install_docker_ubuntu.sh "$instance:~"
gcloud compute ssh --zone $ZONE $instance -- 'sh install_docker_ubuntu.sh'
Expand Down

0 comments on commit 28763e6

Please sign in to comment.