Skip to content

Commit

Permalink
Prefers scheduling of pods on newer machines during roll-outs
Browse files Browse the repository at this point in the history
- Optimise pod handling in machine updates
- It prefers scheduling of pods on newer machines by tainting the older machines with prefer-no-schedule
  • Loading branch information
prashanth26 committed Dec 17, 2018
1 parent d156c29 commit 1606ca0
Showing 1 changed file with 104 additions and 2 deletions.
106 changes: 104 additions & 2 deletions pkg/controller/deployment_rolling.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,21 @@ package controller
import (
"fmt"
"sort"
"time"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/golang/glog"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/integer"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/golang/glog"
)

var (
maxRetryDeadline = 5 * time.Minute
conflictRetryInterval = 10 * time.Second
)

// rolloutRolling implements the logic for rolling a new machine set.
Expand All @@ -40,6 +50,11 @@ func (dc *controller) rolloutRolling(d *v1alpha1.MachineDeployment, isList []*v1
}
allISs := append(oldISs, newIS)

err = dc.taintNodesBackingMachineSets(oldISs)
if err != nil {
return nil
}

// Scale up, if we can.
scaledUp, err := dc.reconcileNewMachineSet(allISs, newIS, d)
if err != nil {
Expand Down Expand Up @@ -236,3 +251,90 @@ func (dc *controller) scaleDownOldMachineSetsForRollingUpdate(allISs []*v1alpha1

return totalScaledDown, nil
}

// taintNodesBackingMachineSets taints all nodes backing the machineSets
func (dc *controller) taintNodesBackingMachineSets(MachineSets []*v1alpha1.MachineSet) error {

for _, machineSet := range MachineSets {

if machineSet.Annotations["PreferNoSchedule"] != "" {
continue
}

glog.V(3).Infof("Trying to taint MachineSet object %q with prefer-no-schedule to avoid scheduling of pods", machineSet.Name)
selector, err := metav1.LabelSelectorAsSelector(machineSet.Spec.Selector)
if err != nil {
return err
}

// list all machines to include the machines that don't match the ms`s selector
// anymore but has the stale controller ref.
// TODO: Do the List and Filter in a single pass, or use an index.
filteredMachines, err := dc.machineLister.List(labels.Everything())
if err != nil {
return err
}
// NOTE: filteredMachines are pointing to objects from cache - if you need to
// modify them, you need to copy it first.
filteredMachines, err = dc.claimMachines(machineSet, selector, filteredMachines)
if err != nil {
return err
}

taints := v1.Taint{
Key: "old-machine-set",
Value: "True",
Effect: "PreferNoSchedule",
}

// Iterate through all machines and place the PreferNoSchedule taint
// to avoid scheduling on older machines
for _, machine := range filteredMachines {
if machine.Labels["node"] != "" {
err = AddOrUpdateTaintOnNode(
dc.targetCoreClient,
machine.Labels["node"],
&taints,
)
if err != nil {
glog.Errorf("Node tainting failed for node: %s, %s", machine.Labels["node"], err)
return err
}
}
}

retryDeadline := time.Now().Add(maxRetryDeadline)
for {
machineSet, err = dc.controlMachineClient.MachineSets(machineSet.Namespace).Get(machineSet.Name, metav1.GetOptions{})
if err != nil && time.Now().Before(retryDeadline) {
glog.Warningf("Unable to fetch MachineSet object %s, Error: %+v", machineSet.Name, err)
time.Sleep(conflictRetryInterval)
continue
} else if err != nil {
// Timeout occurred
glog.Errorf("Unable to fetch MachineSet object %s, Error: %+v", machineSet.Name, err)
return err
}

msCopy := machineSet.DeepCopy()
msCopy.Annotations["PreferNoSchedule"] = "True"

_, err = dc.controlMachineClient.MachineSets(msCopy.Namespace).Update(msCopy)
if err != nil && time.Now().Before(retryDeadline) {
glog.Warningf("Unable to update MachineSet object %s, Error: %+v", machineSet.Name, err)
time.Sleep(conflictRetryInterval)
continue
} else if err != nil {
// Timeout occurred
glog.Errorf("Unable to update MachineSet object %s, Error: %+v", machineSet.Name, err)
return err
}

// Break out of loop when update succeeds
break
}
glog.V(2).Infof("Tainted MachineSet object %q with prefer-no-schedule to avoid scheduling of pods", machineSet.Name)
}

return nil
}

0 comments on commit 1606ca0

Please sign in to comment.