Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle eventually-consistent PrivateDnsName on 1.26+ #1383

Merged
merged 1 commit into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions files/bin/private-dns-name
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

set -o errexit
set -o nounset
set -o xtrace

# Retrieves the PrivateDnsName from EC2 for this instance, waiting until
# it is available if necessary (due to eventual consistency).

function log {
echo >&2 "$(date '+%Y-%m-%dT%H:%M:%S%z')" "[private-dns-name]" "$@"
}

INSTANCE_ID=$(imds /latest/meta-data/instance-id)

# the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region)
# more info: https://github.com/aws/aws-cli/issues/7043
REGION=$(imds /latest/meta-data/placement/region)

# by default, wait for 120 seconds
PRIVATE_DNS_NAME_MAX_ATTEMPTS=${PRIVATE_DNS_NAME_MAX_ATTEMPTS:-20}
PRIVATE_DNS_NAME_ATTEMPT_INTERVAL=${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL:-6}

log "will make up to ${PRIVATE_DNS_NAME_MAX_ATTEMPTS} attempt(s) every ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} second(s)"

ATTEMPT=0
while true; do
PRIVATE_DNS_NAME=$(aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].PrivateDnsName')
if [ ! "${PRIVATE_DNS_NAME}" = "" ] || [ ${ATTEMPT} -ge ${PRIVATE_DNS_NAME_MAX_ATTEMPTS} ]; then
break
fi
ATTEMPT=$((ATTEMPT + 1))
log "WARN: PrivateDnsName is not available, waiting for ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} seconds..."
sleep ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL}
done

if [ "${PRIVATE_DNS_NAME}" = "" ]; then
log "ERROR: failed to retrieve PrivateDnsName after ${ATTEMPT} attempts!"
exit 1
else
log "INFO: retrieved PrivateDnsName: ${PRIVATE_DNS_NAME}"
echo "${PRIVATE_DNS_NAME}"
exit 0
fi
7 changes: 1 addition & 6 deletions files/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -531,12 +531,7 @@ else
# If the VPC has a custom `domain-name` in its DHCP options set, and the VPC has `enableDnsHostnames` set to `true`,
# then /etc/hostname is not the same as EC2's PrivateDnsName.
# The name of the Node object must be equal to EC2's PrivateDnsName for the aws-iam-authenticator to allow this kubelet to manage it.
INSTANCE_ID=$(imds /latest/meta-data/instance-id)
# the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region)
# more info: https://github.com/aws/aws-cli/issues/7043
REGION=$(imds /latest/meta-data/placement/region)
PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text)
KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$PRIVATE_DNS_NAME"
KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$(private-dns-name)"
fi

KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER"
Expand Down
31 changes: 31 additions & 0 deletions test/cases/private-dns-name.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail

echo "--> Should fetch PrivateDnsName correctly"
EXPECTED_PRIVATE_DNS_NAME="ip-10-0-0-157.us-east-2.compute.internal"
PRIVATE_DNS_NAME=$(private-dns-name)
if [ ! "$PRIVATE_DNS_NAME" = "$EXPECTED_PRIVATE_DNS_NAME" ]; then
echo "❌ Test Failed: expected private-dns-name=$EXPECTED_PRIVATE_DNS_NAME but got '${PRIVATE_DNS_NAME}'"
exit 1
fi

echo "--> Should try to fetch PrivateDnsName until timeout is reached"
export PRIVATE_DNS_NAME_ATTEMPT_INTERVAL=3
export PRIVATE_DNS_NAME_MAX_ATTEMPTS=2
export AWS_MOCK_FAIL=true
START_TIME=$(date '+%s')
EXIT_CODE=0
private-dns-name || EXIT_CODE=$?
STOP_TIME=$(date '+%s')
if [[ ${EXIT_CODE} -eq 0 ]]; then
echo "❌ Test Failed: expected a non-zero exit code"
exit 1
fi
ELAPSED_TIME=$((STOP_TIME - START_TIME))
if [[ "$ELAPSED_TIME" -lt 6 ]]; then
echo "❌ Test Failed: expected 6 seconds to elapse, but got: $ELAPSED_TIME"
exit 1
fi
19 changes: 17 additions & 2 deletions test/mocks/aws
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,30 @@ SCRIPTPATH="$(

echo >&2 "mocking 'aws $@'"

if [[ $1 == "ec2" ]]; then
AWS_MOCK_FAIL=${AWS_MOCK_FAIL:-false}
if [ "$AWS_MOCK_FAIL" = "true" ]; then
echo >&2 "failing mocked 'aws $@'"
exit 1
fi

if [[ $1 == "ec2" ]]; then
if [[ $2 == "describe-instance-types" ]]; then
instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-')
if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then
cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json"
exit 0
fi
echo "instance type not found"
echo >&2 "instance type not found"
exit 1
fi
if [[ $2 == "describe-instances" ]]; then
instance_id=$(echo "${@}" | grep -o 'i\-[a-z0-9]\+')
echo >&2 "instance-id: $instance_id"
if [[ -f "${SCRIPTPATH}/describe-instances/${instance_id}.json" ]]; then
cat "${SCRIPTPATH}/describe-instances/${instance_id}.json"
exit 0
fi
echo >&2 "instance not found"
exit 1
fi
fi
154 changes: 154 additions & 0 deletions test/mocks/describe-instances/i-1234567890abcdef0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
{
"Reservations": [
{
"Groups": [],
"Instances": [
{
"AmiLaunchIndex": 0,
"ImageId": "ami-0abcdef1234567890",
"InstanceId": "i-1234567890abcdef0",
"InstanceType": "t3.nano",
"KeyName": "my-key-pair",
"LaunchTime": "2022-11-15T10:48:59+00:00",
"Monitoring": {
"State": "disabled"
},
"Placement": {
"AvailabilityZone": "us-east-2a",
"GroupName": "",
"Tenancy": "default"
},
"PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal",
"PrivateIpAddress": "10-0-0-157",
"ProductCodes": [],
"PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com",
"PublicIpAddress": "34.253.223.13",
"State": {
"Code": 16,
"Name": "running"
},
"StateTransitionReason": "",
"SubnetId": "subnet-04a636d18e83cfacb",
"VpcId": "vpc-1234567890abcdef0",
"Architecture": "x86_64",
"BlockDeviceMappings": [
{
"DeviceName": "/dev/xvda",
"Ebs": {
"AttachTime": "2022-11-15T10:49:00+00:00",
"DeleteOnTermination": true,
"Status": "attached",
"VolumeId": "vol-02e6ccdca7de29cf2"
}
}
],
"ClientToken": "1234abcd-1234-abcd-1234-d46a8903e9bc",
"EbsOptimized": true,
"EnaSupport": true,
"Hypervisor": "xen",
"IamInstanceProfile": {
"Arn": "arn:aws:iam::111111111111:instance-profile/AmazonSSMRoleForInstancesQuickSetup",
"Id": "111111111111111111111"
},
"NetworkInterfaces": [
{
"Association": {
"IpOwnerId": "amazon",
"PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com",
"PublicIp": "34.253.223.13"
},
"Attachment": {
"AttachTime": "2022-11-15T10:48:59+00:00",
"AttachmentId": "eni-attach-1234567890abcdefg",
"DeleteOnTermination": true,
"DeviceIndex": 0,
"Status": "attached",
"NetworkCardIndex": 0
},
"Description": "",
"Groups": [
{
"GroupName": "launch-wizard-146",
"GroupId": "sg-1234567890abcdefg"
}
],
"Ipv6Addresses": [],
"MacAddress": "00:11:22:33:44:55",
"NetworkInterfaceId": "eni-1234567890abcdefg",
"OwnerId": "104024344472",
"PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal",
"PrivateIpAddress": "10-0-0-157",
"PrivateIpAddresses": [
{
"Association": {
"IpOwnerId": "amazon",
"PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com",
"PublicIp": "34.253.223.13"
},
"Primary": true,
"PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal",
"PrivateIpAddress": "10-0-0-157"
}
],
"SourceDestCheck": true,
"Status": "in-use",
"SubnetId": "subnet-1234567890abcdefg",
"VpcId": "vpc-1234567890abcdefg",
"InterfaceType": "interface"
}
],
"RootDeviceName": "/dev/xvda",
"RootDeviceType": "ebs",
"SecurityGroups": [
{
"GroupName": "launch-wizard-146",
"GroupId": "sg-1234567890abcdefg"
}
],
"SourceDestCheck": true,
"Tags": [
{
"Key": "Name",
"Value": "my-instance"
}
],
"VirtualizationType": "hvm",
"CpuOptions": {
"CoreCount": 1,
"ThreadsPerCore": 2
},
"CapacityReservationSpecification": {
"CapacityReservationPreference": "open"
},
"HibernationOptions": {
"Configured": false
},
"MetadataOptions": {
"State": "applied",
"HttpTokens": "optional",
"HttpPutResponseHopLimit": 1,
"HttpEndpoint": "enabled",
"HttpProtocolIpv6": "disabled",
"InstanceMetadataTags": "enabled"
},
"EnclaveOptions": {
"Enabled": false
},
"PlatformDetails": "Linux/UNIX",
"UsageOperation": "RunInstances",
"UsageOperationUpdateTime": "2022-11-15T10:48:59+00:00",
"PrivateDnsNameOptions": {
"HostnameType": "ip-name",
"EnableResourceNameDnsARecord": true,
"EnableResourceNameDnsAAAARecord": false
},
"MaintenanceOptions": {
"AutoRecovery": "default"
}
}
],
"OwnerId": "111111111111",
"ReservationId": "r-1234567890abcdefg"
}
]
}