Skip to content

Commit

Permalink
Make Update correctly consider existing vs running
Browse files Browse the repository at this point in the history
This commit brings the existing vs running logic
of Update to be similar to what we do with Create.

This commit changes getRunningInstances function
to only consider 'running' status as running,
removes 'pending' from valid running statuses.

This commit also removes logic that deletes older
running instances in the unusual case where there
is more than one instance running with same tag
Name.  Any instances, including extras/duplicates
with same tag Name will still be deleted when
machine-object is deleted.
  • Loading branch information
michaelgugino committed Jun 4, 2019
1 parent 57f8907 commit bd70685
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 28 deletions.
54 changes: 27 additions & 27 deletions pkg/actuators/machine/actuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
userDataSecretKey = "userData"
ec2InstanceIDNotFoundCode = "InvalidInstanceID.NotFound"
requeueAfterSeconds = 20
requeueAfterFatalSeconds = 180

// MachineCreationSucceeded indicates success for machine creation
MachineCreationSucceeded = "MachineCreationSucceeded"
Expand Down Expand Up @@ -351,17 +352,18 @@ func (a *Actuator) Update(context context.Context, cluster *clusterv1.Cluster, m
glog.Error(errMsg)
return errMsg
}

instances, err := getRunningInstances(machine, client)
// Get all instances not terminated.
existingInstances, err := getExistingInstances(machine, client)
if err != nil {
glog.Errorf("error getting running instances: %v", err)
glog.Errorf("error getting existing instances: %v", err)
return err
}
glog.Infof("found %d instances for machine", len(instances))
existingLen := len(existingInstances)
glog.Infof("found %d existing instances for machine", existingLen)

// Parent controller should prevent this from ever happening by calling Exists and then Create,
// but instance could be deleted between the two calls.
if len(instances) == 0 {
if existingLen == 0 {
glog.Warningf("attempted to update machine but no instances found")

a.handleMachineError(machine, apierrors.UpdateMachine("no instance found, reason unknown"), updateEventAction)
Expand All @@ -370,31 +372,29 @@ func (a *Actuator) Update(context context.Context, cluster *clusterv1.Cluster, m
if err := a.updateStatus(machine, nil); err != nil {
return err
}

errMsg := "attempted to update machine but no instances found"
glog.Error(errMsg)
return fmt.Errorf(errMsg)
}

glog.Info("instance found")

// In very unusual circumstances, there could be more than one machine running matching this
// machine name and cluster ID. In this scenario we will keep the newest, and delete all others.
sortInstances(instances)
if len(instances) > 1 {
err = terminateInstances(client, instances[1:])
// This is an unrecoverable error condition. We should delay to
// minimize unnecessary API calls.
return &clustererror.RequeueAfterError{RequeueAfter: requeueAfterFatalSeconds * time.Second}
}
sortInstances(existingInstances)
runningInstances := getRunningFromInstances(existingInstances)
runningLen := len(runningInstances)
var newestInstance *ec2.Instance
if runningLen > 0 {
// It would be very unusual to have more than one here, but it is
// possible if someone manually provisions a machine with same tag name.
glog.Infof("found %d running instances for machine %v", runningLen, machine.Name)
newestInstance = runningInstances[0]

err = a.updateLoadBalancers(client, machineProviderConfig, newestInstance)
if err != nil {
glog.Errorf("Unable to remove redundant instances: %v", err)
a.handleMachineError(machine, apierrors.CreateMachine("Error updating load balancers: %v", err), updateEventAction)
return err
}
}

newestInstance := instances[0]

err = a.updateLoadBalancers(client, machineProviderConfig, newestInstance)
if err != nil {
a.handleMachineError(machine, apierrors.CreateMachine("Error updating load balancers: %v", err), updateEventAction)
return err
} else {
// Didn't find any running instances, just newest existing one.
// In most cases, there should only be one existing Instance.
newestInstance = existingInstances[0]
}

a.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "Updated", "Updated machine %v", machine.Name)
Expand Down
13 changes: 12 additions & 1 deletion pkg/actuators/machine/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,21 @@ func getRunningInstance(machine *machinev1.Machine, client awsclient.Client) (*e
// getRunningInstances returns all running instances that have a tag matching our machine name,
// and cluster ID.
func getRunningInstances(machine *machinev1.Machine, client awsclient.Client) ([]*ec2.Instance, error) {
runningInstanceStateFilter := []*string{aws.String(ec2.InstanceStateNameRunning), aws.String(ec2.InstanceStateNamePending)}
runningInstanceStateFilter := []*string{aws.String(ec2.InstanceStateNameRunning)}
return getInstances(machine, client, runningInstanceStateFilter)
}

// getRunningFromInstances returns all running instances from a list of instances.
func getRunningFromInstances(instances []*ec2.Instance) []*ec2.Instance {
var runningInstances []*ec2.Instance
for _, instance := range instances {
if *instance.State.Name == ec2.InstanceStateNameRunning {
runningInstances = append(runningInstances, instance)
}
}
return runningInstances
}

// getStoppedInstances returns all stopped instances that have a tag matching our machine name,
// and cluster ID.
func getStoppedInstances(machine *machinev1.Machine, client awsclient.Client) ([]*ec2.Instance, error) {
Expand Down

0 comments on commit bd70685

Please sign in to comment.