From c8e341f22f95c367b72ea0f52305ad899674470a Mon Sep 17 00:00:00 2001 From: Enxebre Date: Thu, 14 Nov 2019 12:38:34 +0100 Subject: [PATCH] Stop retieving instances by state on API call This attemps to mitigate https://bugzilla.redhat.com/show_bug.cgi?id=1772163. Instead of telling the API request to filter by state, we fetch all and discriminate only terminated instances --- pkg/actuators/machine/actuator.go | 10 ++--- pkg/actuators/machine/actuator_test.go | 57 +++++++++++++++++++------- pkg/actuators/machine/stubs.go | 20 +++++++++ pkg/actuators/machine/utils.go | 47 ++++++++++++--------- 4 files changed, 95 insertions(+), 39 deletions(-) diff --git a/pkg/actuators/machine/actuator.go b/pkg/actuators/machine/actuator.go index a958895d8d..a8a438efd8 100644 --- a/pkg/actuators/machine/actuator.go +++ b/pkg/actuators/machine/actuator.go @@ -483,14 +483,14 @@ func (a *Actuator) Update(context context.Context, cluster *clusterv1.Cluster, m return a.updateStatus(machine, newestInstance) } -// Exists determines if the given machine currently exists. For AWS we query for instances in -// running state, with a matching name tag, to determine a match. +// Exists determines if the given machine currently exists. +// A machine which is not terminated is considered as existing. func (a *Actuator) Exists(context context.Context, cluster *clusterv1.Cluster, machine *machinev1.Machine) (bool, error) { glog.Infof("%s: Checking if machine exists", machine.Name) instances, err := a.getMachineInstances(cluster, machine) if err != nil { - glog.Errorf("%s: Error getting running instances: %v", machine.Name, err) + glog.Errorf("%s: Error getting existing instances: %v", machine.Name, err) return false, err } if len(instances) == 0 { @@ -545,13 +545,11 @@ func (a *Actuator) getMachineInstances(cluster *clusterv1.Cluster, machine *mach // ID, search using that, otherwise fallback to filtering based on tags. if err == nil && status.InstanceID != nil && *status.InstanceID != "" { i, err := getExistingInstanceByID(*status.InstanceID, client) - if err != nil { - glog.Warningf("%s: Failed to find running instance by id %s: %v", + glog.Warningf("%s: Failed to find existing instance by id %s: %v", machine.Name, *status.InstanceID, err) } else { glog.Infof("%s: Found instance by id: %s", machine.Name, *status.InstanceID) - return []*ec2.Instance{i}, nil } } diff --git a/pkg/actuators/machine/actuator_test.go b/pkg/actuators/machine/actuator_test.go index 0b83023600..0e65699b5e 100644 --- a/pkg/actuators/machine/actuator_test.go +++ b/pkg/actuators/machine/actuator_test.go @@ -1155,6 +1155,7 @@ func TestGetMachineInstances(t *testing.T) { testcase string providerStatus providerconfigv1.AWSMachineProviderStatus awsClientFunc func(*gomock.Controller) awsclient.Client + exists bool }{ { testcase: "empty-status-search-by-tag", @@ -1170,11 +1171,6 @@ func TestGetMachineInstances(t *testing.T) { }, clusterFilter(clusterID), - - { - Name: aws.String("instance-state-name"), - Values: existingInstanceStates(), - }, }, } @@ -1185,6 +1181,7 @@ func TestGetMachineInstances(t *testing.T) { return mockAWSClient }, + exists: true, }, { testcase: "has-status-search-by-id", @@ -1195,25 +1192,54 @@ func TestGetMachineInstances(t *testing.T) { mockAWSClient := mockaws.NewMockClient(ctrl) request := &ec2.DescribeInstancesInput{ + InstanceIds: aws.StringSlice([]string{instanceID}), + } + + mockAWSClient.EXPECT().DescribeInstances(request).Return( + stubDescribeInstancesOutput(imageID, instanceID), + nil, + ).Times(1) + + return mockAWSClient + }, + exists: true, + }, + { + testcase: "has-status-search-by-id and machine is terminated", + providerStatus: providerconfigv1.AWSMachineProviderStatus{ + InstanceID: aws.String(instanceID), + }, + awsClientFunc: func(ctrl *gomock.Controller) awsclient.Client { + mockAWSClient := mockaws.NewMockClient(ctrl) + + request := &ec2.DescribeInstancesInput{ + InstanceIds: aws.StringSlice([]string{instanceID}), + } + + mockAWSClient.EXPECT().DescribeInstances(request).Return( + stubTerminatedInstanceDescribeInstancesOutput(imageID, instanceID), + nil, + ).Times(1) + + request2 := &ec2.DescribeInstancesInput{ Filters: []*ec2.Filter{ { - Name: aws.String("instance-id"), - Values: aws.StringSlice([]string{instanceID}), - }, - { - Name: aws.String("instance-state-name"), - Values: existingInstanceStates(), + Name: awsTagFilter("Name"), + Values: aws.StringSlice([]string{machine.Name}), }, + + clusterFilter(clusterID), }, } - mockAWSClient.EXPECT().DescribeInstances(request).Return( - stubDescribeInstancesOutput(imageID, instanceID), + mockAWSClient.EXPECT().DescribeInstances(request2).Return( + stubTerminatedInstanceDescribeInstancesOutput(imageID, instanceID), nil, ).Times(1) return mockAWSClient }, + exists: false, }, } @@ -1242,10 +1268,13 @@ func TestGetMachineInstances(t *testing.T) { t.Errorf("Error creating Actuator: %v", err) } - _, err = actuator.getMachineInstances(nil, machineCopy) + instance, err := actuator.getMachineInstances(nil, machineCopy) if err != nil { t.Errorf("Unexpected error from getMachineInstances: %v", err) } + if tc.exists != (instance != nil) { + t.Errorf("Expected instance exists: %t, got instance: %v", tc.exists, instance) + } }) } } diff --git a/pkg/actuators/machine/stubs.go b/pkg/actuators/machine/stubs.go index a2974a41ad..4f0c673470 100644 --- a/pkg/actuators/machine/stubs.go +++ b/pkg/actuators/machine/stubs.go @@ -292,3 +292,23 @@ func stubDescribeInstancesOutput(imageID, instanceID string) *ec2.DescribeInstan }, } } + +func stubTerminatedInstanceDescribeInstancesOutput(imageID, instanceID string) *ec2.DescribeInstancesOutput { + return &ec2.DescribeInstancesOutput{ + Reservations: []*ec2.Reservation{ + { + Instances: []*ec2.Instance{ + { + ImageId: aws.String(imageID), + InstanceId: aws.String(instanceID), + State: &ec2.InstanceState{ + Name: aws.String(ec2.InstanceStateNameTerminated), + Code: aws.Int64(16), + }, + LaunchTime: aws.Time(time.Now()), + }, + }, + }, + }, + } +} diff --git a/pkg/actuators/machine/utils.go b/pkg/actuators/machine/utils.go index 7cb59769bf..8d1298e2c9 100644 --- a/pkg/actuators/machine/utils.go +++ b/pkg/actuators/machine/utils.go @@ -85,36 +85,47 @@ func getStoppedInstances(machine *machinev1.Machine, client awsclient.Client) ([ return getInstances(machine, client, stoppedInstanceStateFilter) } +// getExistingInstances returns all instances not terminated func getExistingInstances(machine *machinev1.Machine, client awsclient.Client) ([]*ec2.Instance, error) { - return getInstances(machine, client, existingInstanceStates()) + instances, err := getInstances(machine, client, nil) + if err != nil { + return nil, err + } + + var existingInstances []*ec2.Instance + for key := range instances { + if (instances[key].State) != nil { + if aws.StringValue(instances[key].State.Name) != ec2.InstanceStateNameTerminated { + existingInstances = append(existingInstances, instances[key]) + } + } + } + return existingInstances, nil } func getExistingInstanceByID(id string, client awsclient.Client) (*ec2.Instance, error) { - return getInstanceByID(id, client, existingInstanceStates()) + instance, err := getInstanceByID(id, client) + if err != nil { + return nil, err + } + if instance.State != nil { + if aws.StringValue(instance.State.Name) == ec2.InstanceStateNameTerminated { + return nil, fmt.Errorf("failed to getExistingInstanceByID for instance-id %s, instance is terminated", id) + } + } + return instance, nil } // getInstanceByID returns the instance with the given ID if it exists. -func getInstanceByID(id string, client awsclient.Client, instanceStateFilter []*string) (*ec2.Instance, error) { +func getInstanceByID(id string, client awsclient.Client) (*ec2.Instance, error) { if id == "" { return nil, fmt.Errorf("instance-id not specified") } - requestFilters := []*ec2.Filter{ - { - Name: aws.String("instance-id"), - Values: aws.StringSlice([]string{id}), - }, - } - - if instanceStateFilter != nil { - requestFilters = append(requestFilters, &ec2.Filter{ - Name: aws.String("instance-state-name"), - Values: instanceStateFilter, - }) + request := &ec2.DescribeInstancesInput{ + InstanceIds: aws.StringSlice([]string{id}), } - request := &ec2.DescribeInstancesInput{Filters: requestFilters} - result, err := client.DescribeInstances(request) if err != nil { return nil, err @@ -136,7 +147,6 @@ func getInstanceByID(id string, client awsclient.Client, instanceStateFilter []* // getInstances returns all instances that have a tag matching our machine name, // and cluster ID. func getInstances(machine *machinev1.Machine, client awsclient.Client, instanceStateFilter []*string) ([]*ec2.Instance, error) { - clusterID, ok := getClusterID(machine) if !ok { return []*ec2.Instance{}, fmt.Errorf("unable to get cluster ID for machine: %q", machine.Name) @@ -149,7 +159,6 @@ func getInstances(machine *machinev1.Machine, client awsclient.Client, instanceS }, clusterFilter(clusterID), } - if instanceStateFilter != nil { requestFilters = append(requestFilters, &ec2.Filter{ Name: aws.String("instance-state-name"),