Skip to content

Commit

Permalink
Handle eventually-consistent PrivateDnsName
Browse files Browse the repository at this point in the history
  • Loading branch information
cartermckinnon committed Aug 29, 2023
1 parent f74b8e6 commit fe4063a
Show file tree
Hide file tree
Showing 5 changed files with 251 additions and 8 deletions.
47 changes: 47 additions & 0 deletions files/bin/private-dns-name
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env bash

set -o errexit
set -o nounset
set -o xtrace

# Retrieves the PrivateDnsName from EC2 for this instance, waiting until
# it is available if necessary (due to eventual consistency).

function log {
echo >&2 "$(date '+%Y-%m-%dT%H:%M:%S%z')" "[private-dns-name]" "$@"
}

INSTANCE_ID=$(imds /latest/meta-data/instance-id)

# the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region)
# more info: https://github.com/aws/aws-cli/issues/7043
REGION=$(imds /latest/meta-data/placement/region)

# by default, wait for ~120 seconds (depending on jitter)
PRIVATE_DNS_NAME_MAX_ATTEMPTS=${PRIVATE_DNS_NAME_MAX_ATTEMPTS:-20}
PRIVATE_DNS_NAME_ATTEMPT_INTERVAL=${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL:-6}
PRIVATE_DNS_NAME_JITTER=${PRIVATE_DNS_NAME_JITTER:-1}

log "will make up to ${PRIVATE_DNS_NAME_MAX_ATTEMPTS} attempt(s) every ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} second(s) with ${PRIVATE_DNS_NAME_JITTER} second(s) of jitter"

ATTEMPT=0
while true; do
PRIVATE_DNS_NAME=$(aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].PrivateDnsName')
if [ ! "${PRIVATE_DNS_NAME}" = "" ] || [ ${ATTEMPT} -ge ${PRIVATE_DNS_NAME_MAX_ATTEMPTS} ]; then
break
fi
ATTEMPT=$((ATTEMPT + 1))
JITTER=$(seq "-${PRIVATE_DNS_NAME_JITTER}" "${PRIVATE_DNS_NAME_JITTER}" | shuf -n1)
DELAY=$((PRIVATE_DNS_NAME_ATTEMPT_INTERVAL + JITTER))
log "WARN: PrivateDnsName is not available, waiting for ${DELAY} seconds..."
sleep ${DELAY}
done

if [ "${PRIVATE_DNS_NAME}" = "" ]; then
log "ERROR: failed to retrieve PrivateDnsName after ${ATTEMPT} attempts!"
exit 1
else
log "INFO: retrieved PrivateDnsName: ${PRIVATE_DNS_NAME}"
echo "${PRIVATE_DNS_NAME}"
exit 0
fi
7 changes: 1 addition & 6 deletions files/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -531,12 +531,7 @@ else
# If the VPC has a custom `domain-name` in its DHCP options set, and the VPC has `enableDnsHostnames` set to `true`,
# then /etc/hostname is not the same as EC2's PrivateDnsName.
# The name of the Node object must be equal to EC2's PrivateDnsName for the aws-iam-authenticator to allow this kubelet to manage it.
INSTANCE_ID=$(imds /latest/meta-data/instance-id)
# the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region)
# more info: https://github.com/aws/aws-cli/issues/7043
REGION=$(imds /latest/meta-data/placement/region)
PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text)
KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$PRIVATE_DNS_NAME"
KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$(private-dns-name)"
fi

KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER"
Expand Down
32 changes: 32 additions & 0 deletions test/cases/private-dns-name.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail

echo "--> Should fetch PrivateDnsName correctly"
EXPECTED_PRIVATE_DNS_NAME="ip-10-0-0-157.us-east-2.compute.internal"
PRIVATE_DNS_NAME=$(private-dns-name)
if [ ! "$PRIVATE_DNS_NAME" = "$EXPECTED_PRIVATE_DNS_NAME" ]; then
echo "❌ Test Failed: expected private-dns-name=$EXPECTED_PRIVATE_DNS_NAME but got '${PRIVATE_DNS_NAME}'"
exit 1
fi

echo "--> Should try to fetch PrivateDnsName until timeout is reached"
export PRIVATE_DNS_NAME_ATTEMPT_INTERVAL=3
export PRIVATE_DNS_NAME_MAX_ATTEMPTS=2
export PRIVATE_DNS_NAME_JITTER=0
export AWS_MOCK_FAIL=true
START_TIME=$(date '+%s')
EXIT_CODE=0
private-dns-name || EXIT_CODE=$?
STOP_TIME=$(date '+%s')
if [[ ${EXIT_CODE} -eq 0 ]]; then
echo "❌ Test Failed: expected a non-zero exit code"
exit 1
fi
ELAPSED_TIME=$((STOP_TIME - START_TIME))
if [[ "$ELAPSED_TIME" -lt 6 ]]; then
echo "❌ Test Failed: expected 6 seconds to elapse, but got: $ELAPSED_TIME"
exit 1
fi
19 changes: 17 additions & 2 deletions test/mocks/aws
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,30 @@ SCRIPTPATH="$(

echo >&2 "mocking 'aws $@'"

if [[ $1 == "ec2" ]]; then
AWS_MOCK_FAIL=${AWS_MOCK_FAIL:-false}
if [ "$AWS_MOCK_FAIL" = "true" ]; then
echo >&2 "failing mocked 'aws $@'"
exit 1
fi

if [[ $1 == "ec2" ]]; then
if [[ $2 == "describe-instance-types" ]]; then
instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-')
if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then
cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json"
exit 0
fi
echo "instance type not found"
echo >&2 "instance type not found"
exit 1
fi
if [[ $2 == "describe-instances" ]]; then
instance_id=$(echo "${@}" | grep -o 'i\-[a-z0-9]\+')
echo >&2 "instance-id: $instance_id"
if [[ -f "${SCRIPTPATH}/describe-instances/${instance_id}.json" ]]; then
cat "${SCRIPTPATH}/describe-instances/${instance_id}.json"
exit 0
fi
echo >&2 "instance not found"
exit 1
fi
fi
154 changes: 154 additions & 0 deletions test/mocks/describe-instances/i-1234567890abcdef0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
{
"Reservations": [
{
"Groups": [],
"Instances": [
{
"AmiLaunchIndex": 0,
"ImageId": "ami-0abcdef1234567890",
"InstanceId": "i-1234567890abcdef0",
"InstanceType": "t3.nano",
"KeyName": "my-key-pair",
"LaunchTime": "2022-11-15T10:48:59+00:00",
"Monitoring": {
"State": "disabled"
},
"Placement": {
"AvailabilityZone": "us-east-2a",
"GroupName": "",
"Tenancy": "default"
},
"PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal",
"PrivateIpAddress": "10-0-0-157",
"ProductCodes": [],
"PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com",
"PublicIpAddress": "34.253.223.13",
"State": {
"Code": 16,
"Name": "running"
},
"StateTransitionReason": "",
"SubnetId": "subnet-04a636d18e83cfacb",
"VpcId": "vpc-1234567890abcdef0",
"Architecture": "x86_64",
"BlockDeviceMappings": [
{
"DeviceName": "/dev/xvda",
"Ebs": {
"AttachTime": "2022-11-15T10:49:00+00:00",
"DeleteOnTermination": true,
"Status": "attached",
"VolumeId": "vol-02e6ccdca7de29cf2"
}
}
],
"ClientToken": "1234abcd-1234-abcd-1234-d46a8903e9bc",
"EbsOptimized": true,
"EnaSupport": true,
"Hypervisor": "xen",
"IamInstanceProfile": {
"Arn": "arn:aws:iam::111111111111:instance-profile/AmazonSSMRoleForInstancesQuickSetup",
"Id": "111111111111111111111"
},
"NetworkInterfaces": [
{
"Association": {
"IpOwnerId": "amazon",
"PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com",
"PublicIp": "34.253.223.13"
},
"Attachment": {
"AttachTime": "2022-11-15T10:48:59+00:00",
"AttachmentId": "eni-attach-1234567890abcdefg",
"DeleteOnTermination": true,
"DeviceIndex": 0,
"Status": "attached",
"NetworkCardIndex": 0
},
"Description": "",
"Groups": [
{
"GroupName": "launch-wizard-146",
"GroupId": "sg-1234567890abcdefg"
}
],
"Ipv6Addresses": [],
"MacAddress": "00:11:22:33:44:55",
"NetworkInterfaceId": "eni-1234567890abcdefg",
"OwnerId": "104024344472",
"PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal",
"PrivateIpAddress": "10-0-0-157",
"PrivateIpAddresses": [
{
"Association": {
"IpOwnerId": "amazon",
"PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com",
"PublicIp": "34.253.223.13"
},
"Primary": true,
"PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal",
"PrivateIpAddress": "10-0-0-157"
}
],
"SourceDestCheck": true,
"Status": "in-use",
"SubnetId": "subnet-1234567890abcdefg",
"VpcId": "vpc-1234567890abcdefg",
"InterfaceType": "interface"
}
],
"RootDeviceName": "/dev/xvda",
"RootDeviceType": "ebs",
"SecurityGroups": [
{
"GroupName": "launch-wizard-146",
"GroupId": "sg-1234567890abcdefg"
}
],
"SourceDestCheck": true,
"Tags": [
{
"Key": "Name",
"Value": "my-instance"
}
],
"VirtualizationType": "hvm",
"CpuOptions": {
"CoreCount": 1,
"ThreadsPerCore": 2
},
"CapacityReservationSpecification": {
"CapacityReservationPreference": "open"
},
"HibernationOptions": {
"Configured": false
},
"MetadataOptions": {
"State": "applied",
"HttpTokens": "optional",
"HttpPutResponseHopLimit": 1,
"HttpEndpoint": "enabled",
"HttpProtocolIpv6": "disabled",
"InstanceMetadataTags": "enabled"
},
"EnclaveOptions": {
"Enabled": false
},
"PlatformDetails": "Linux/UNIX",
"UsageOperation": "RunInstances",
"UsageOperationUpdateTime": "2022-11-15T10:48:59+00:00",
"PrivateDnsNameOptions": {
"HostnameType": "ip-name",
"EnableResourceNameDnsARecord": true,
"EnableResourceNameDnsAAAARecord": false
},
"MaintenanceOptions": {
"AutoRecovery": "default"
}
}
],
"OwnerId": "111111111111",
"ReservationId": "r-1234567890abcdefg"
}
]
}

0 comments on commit fe4063a

Please sign in to comment.