Skip to content

Commit

Permalink
Search for existing instances by instance ID
Browse files Browse the repository at this point in the history
This changes the behavior of the actuator's `getMachineInstances`
method to first look for a machine's backing instance by the instance
ID in the provider status.  If the instance ID is not set, it falls
back to the previous method of filtering by tags.  This should help
prevent issues where AWS creates the instance, but takes some time to
propagate tag values, which can result in Exists() wrongly returning
false.
  • Loading branch information
bison committed Oct 24, 2019
1 parent a0cc5b5 commit 238a8df
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 7 deletions.
18 changes: 18 additions & 0 deletions pkg/actuators/machine/actuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,24 @@ func (a *Actuator) getMachineInstances(cluster *clusterv1.Cluster, machine *mach
return nil, err
}

status := &providerconfigv1.AWSMachineProviderStatus{}
err = a.codec.DecodeProviderStatus(machine.Status.ProviderStatus, status)

// If the status was decoded successfully, and there is a non-empty instance
// ID, search using that, otherwise fallback to filtering based on tags.
if err == nil && status.InstanceID != nil && *status.InstanceID != "" {
i, err := getExistingInstanceByID(*status.InstanceID, client)

if err != nil {
glog.Warningf("%s: Failed to find running instance by id %s: %v",
machine.Name, *status.InstanceID, err)
} else {
glog.Infof("%s: Found instance by id: %s", machine.Name, *status.InstanceID)

return []*ec2.Instance{i}, nil
}
}

return getExistingInstances(machine, client)
}

Expand Down
120 changes: 120 additions & 0 deletions pkg/actuators/machine/actuator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1135,3 +1135,123 @@ func TestCreate(t *testing.T) {
}
}
}

func TestGetMachineInstances(t *testing.T) {
clusterID := "aws-actuator-cluster"
instanceID := "i-02fa4197109214b46"
imageID := "ami-a9acbbd6"

machine, err := stubMachine()
if err != nil {
t.Fatalf("unable to build stub machine: %v", err)
}

codec, err := providerconfigv1.NewCodec()
if err != nil {
t.Fatalf("unable to build codec: %v", err)
}

testCases := []struct {
testcase string
providerStatus providerconfigv1.AWSMachineProviderStatus
awsClientFunc func(*gomock.Controller) awsclient.Client
}{
{
testcase: "empty-status-search-by-tag",
providerStatus: providerconfigv1.AWSMachineProviderStatus{},
awsClientFunc: func(ctrl *gomock.Controller) awsclient.Client {
mockAWSClient := mockaws.NewMockClient(ctrl)

request := &ec2.DescribeInstancesInput{
Filters: []*ec2.Filter{
{
Name: awsTagFilter("Name"),
Values: aws.StringSlice([]string{machine.Name}),
},

clusterFilter(clusterID),

{
Name: aws.String("instance-state-name"),
Values: existingInstanceStates(),
},
},
}

mockAWSClient.EXPECT().DescribeInstances(request).Return(
stubDescribeInstancesOutput(imageID, instanceID),
nil,
).Times(1)

return mockAWSClient
},
},
{
testcase: "has-status-search-by-id",
providerStatus: providerconfigv1.AWSMachineProviderStatus{
InstanceID: aws.String(instanceID),
},
awsClientFunc: func(ctrl *gomock.Controller) awsclient.Client {
mockAWSClient := mockaws.NewMockClient(ctrl)

request := &ec2.DescribeInstancesInput{
Filters: []*ec2.Filter{
{
Name: aws.String("instance-id"),
Values: aws.StringSlice([]string{instanceID}),
},
{
Name: aws.String("instance-state-name"),
Values: existingInstanceStates(),
},
},
}

mockAWSClient.EXPECT().DescribeInstances(request).Return(
stubDescribeInstancesOutput(imageID, instanceID),
nil,
).Times(1)

return mockAWSClient
},
},
}

for _, tc := range testCases {
t.Run(tc.testcase, func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

awsStatusRaw, err := codec.EncodeProviderStatus(&tc.providerStatus)
if err != nil {
t.Errorf("Error encoding ProviderStatus: %v", err)
}

machineCopy := machine.DeepCopy()
machineCopy.Status.ProviderStatus = awsStatusRaw

awsClient := tc.awsClientFunc(ctrl)

params := ActuatorParams{
Codec: codec,
AwsClientBuilder: awsClientBuilderFunc(awsClient),
}

actuator, err := NewActuator(params)
if err != nil {
t.Errorf("Error creating Actuator: %v", err)
}

_, err = actuator.getMachineInstances(nil, machineCopy)
if err != nil {
t.Errorf("Unexpected error from getMachineInstances: %v", err)
}
})
}
}

func awsClientBuilderFunc(c awsclient.Client) awsclient.AwsClientBuilderFuncType {
return func(_ client.Client, _, _, _ string) (awsclient.Client, error) {
return c, nil
}
}
64 changes: 57 additions & 7 deletions pkg/actuators/machine/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ import (
awsclient "sigs.k8s.io/cluster-api-provider-aws/pkg/client"
)

// existingInstanceStates returns the list of states an EC2 instance can be in
// while being considered "existing", i.e. mostly anything but "Terminated".
func existingInstanceStates() []*string {
return []*string{
aws.String(ec2.InstanceStateNameRunning),
aws.String(ec2.InstanceStateNamePending),
aws.String(ec2.InstanceStateNameStopped),
aws.String(ec2.InstanceStateNameStopping),
aws.String(ec2.InstanceStateNameShuttingDown),
}
}

// getRunningInstance returns the AWS instance for a given machine. If multiple instances match our machine,
// the most recently launched will be returned. If no instance exists, an error will be returned.
func getRunningInstance(machine *machinev1.Machine, client awsclient.Client) (*ec2.Instance, error) {
Expand Down Expand Up @@ -74,13 +86,51 @@ func getStoppedInstances(machine *machinev1.Machine, client awsclient.Client) ([
}

func getExistingInstances(machine *machinev1.Machine, client awsclient.Client) ([]*ec2.Instance, error) {
return getInstances(machine, client, []*string{
aws.String(ec2.InstanceStateNameRunning),
aws.String(ec2.InstanceStateNamePending),
aws.String(ec2.InstanceStateNameStopped),
aws.String(ec2.InstanceStateNameStopping),
aws.String(ec2.InstanceStateNameShuttingDown),
})
return getInstances(machine, client, existingInstanceStates())
}

func getExistingInstanceByID(id string, client awsclient.Client) (*ec2.Instance, error) {
return getInstanceByID(id, client, existingInstanceStates())
}

// getInstanceByID returns the instance with the given ID if it exists.
func getInstanceByID(id string, client awsclient.Client, instanceStateFilter []*string) (*ec2.Instance, error) {
if id == "" {
return nil, fmt.Errorf("instance-id not specified")
}

requestFilters := []*ec2.Filter{
{
Name: aws.String("instance-id"),
Values: aws.StringSlice([]string{id}),
},
}

if instanceStateFilter != nil {
requestFilters = append(requestFilters, &ec2.Filter{
Name: aws.String("instance-state-name"),
Values: instanceStateFilter,
})
}

request := &ec2.DescribeInstancesInput{Filters: requestFilters}

result, err := client.DescribeInstances(request)
if err != nil {
return nil, err
}

if len(result.Reservations) != 1 {
return nil, fmt.Errorf("found %d reservations for instance-id %s", len(result.Reservations), id)
}

reservation := result.Reservations[0]

if len(reservation.Instances) != 1 {
return nil, fmt.Errorf("found %d instances for instance-id %s", len(reservation.Instances), id)
}

return reservation.Instances[0], nil
}

// getInstances returns all instances that have a tag matching our machine name,
Expand Down

0 comments on commit 238a8df

Please sign in to comment.