Skip to content

Commit

Permalink
Prefers scheduling of pods on newer machines during roll-outs
Browse files Browse the repository at this point in the history
- Optimise pod handling in machine updates
- It prefers scheduling of pods on newer machines by tainting the older machines with PreferNoSchedule
  • Loading branch information
prashanth26 committed Jan 14, 2019
1 parent b63dbde commit 410e0dc
Show file tree
Hide file tree
Showing 4 changed files with 230 additions and 9 deletions.
15 changes: 12 additions & 3 deletions pkg/controller/controller_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import (
machineapi "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1"
hashutil "github.com/gardener/machine-controller-manager/pkg/util/hash"
taintutils "github.com/gardener/machine-controller-manager/pkg/util/taints"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -820,7 +820,7 @@ func AddOrUpdateTaintOnNode(c clientset.Interface, nodeName string, taints ...*v
if !updated {
return nil
}
return PatchNodeTaints(c, nodeName, oldNode, newNode)
return UpdateNodeTaints(c, nodeName, oldNode, newNode)
})
}

Expand Down Expand Up @@ -877,7 +877,7 @@ func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, t
if !updated {
return nil
}
return PatchNodeTaints(c, nodeName, oldNode, newNode)
return UpdateNodeTaints(c, nodeName, oldNode, newNode)
})
}

Expand Down Expand Up @@ -905,6 +905,15 @@ func PatchNodeTaints(c clientset.Interface, nodeName string, oldNode *v1.Node, n
return err
}

// UpdateNodeTaints updates node's taints.
func UpdateNodeTaints(c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error {
newNodeClone := oldNode.DeepCopy()
newNodeClone.Spec.Taints = newNode.Spec.Taints

_, err := c.Core().Nodes().Update(newNodeClone)
return err
}

// WaitForCacheSync is a wrapper around cache.WaitForCacheSync that generates log messages
// indicating that the controller identified by controllerName is waiting for syncs, followed by
// either a successful or failed sync.
Expand Down
109 changes: 106 additions & 3 deletions pkg/controller/deployment_rollback.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ package controller

import (
"fmt"

"github.com/golang/glog"
"time"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"k8s.io/api/core/v1"
"github.com/golang/glog"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
)

Expand Down Expand Up @@ -58,6 +60,13 @@ func (dc *controller) rollback(d *v1alpha1.MachineDeployment, isList []*v1alpha1
}
if v == *toRevision {
glog.V(4).Infof("Found machine set %q with desired revision %d", is.Name, v)

// Remove PreferNoSchedule taints from nodes which were backing the machineSet
err = dc.removeTaintNodesBackingMachineSets(is)
if err != nil {
glog.Warningf("Failed to remove taints %s off nodes. Error: %s", PreferNoSchedule, err)
}

// rollback by copying podTemplate.Spec from the machine set
// revision number will be incremented during the next getAllMachineSetsAndSyncRevision call
// no-op if the spec matches current deployment's podTemplate.Spec
Expand Down Expand Up @@ -120,3 +129,97 @@ func (dc *controller) updateMachineDeploymentAndClearRollbackTo(d *v1alpha1.Mach
_, err := dc.controlMachineClient.MachineDeployments(d.Namespace).Update(d)
return err
}

// removeTaintNodesBackingMachineSets removes taints from all nodes backing the machineSets
func (dc *controller) removeTaintNodesBackingMachineSets(machineSet *v1alpha1.MachineSet) error {

if machineSet.Annotations[PreferNoSchedule] == "" {
// No taint exists
glog.Warningf("No taint exits on machineSet: %s. Hence not removing.", machineSet.Name)
return nil
}

glog.V(2).Infof("Trying to untaint MachineSet object %q with %s to enable scheduling of pods", machineSet.Name, PreferNoSchedule)
selector, err := metav1.LabelSelectorAsSelector(machineSet.Spec.Selector)
if err != nil {
return err
}

// list all machines to include the machines that don't match the ms`s selector
// anymore but has the stale controller ref.
// TODO: Do the List and Filter in a single pass, or use an index.
filteredMachines, err := dc.machineLister.List(labels.Everything())
if err != nil {
return err
}
// NOTE: filteredMachines are pointing to objects from cache - if you need to
// modify them, you need to copy it first.
filteredMachines, err = dc.claimMachines(machineSet, selector, filteredMachines)
if err != nil {
return err
}

taints := v1.Taint{
Key: PreferNoSchedule,
Value: "True",
Effect: "PreferNoSchedule",
}

// Iterate through all machines and place the PreferNoSchedule taint
// to avoid scheduling on older machines
for _, machine := range filteredMachines {
if machine.Status.Node != "" {
node, err := dc.targetCoreClient.CoreV1().Nodes().Get(machine.Status.Node, metav1.GetOptions{})
if err != nil {
glog.Warningf("Node taint removal failed for node: %s, Error: %s", machine.Status.Node, err)
continue
}

err = RemoveTaintOffNode(
dc.targetCoreClient,
machine.Status.Node,
node,
&taints,
)
if err != nil {
glog.Warningf("Node taint removal failed for node: %s, Error: %s", machine.Status.Node, err)
}
node, err = dc.targetCoreClient.CoreV1().Nodes().Get(machine.Status.Node, metav1.GetOptions{})
}
}

retryDeadline := time.Now().Add(maxRetryDeadline)
for {
machineSet, err = dc.controlMachineClient.MachineSets(machineSet.Namespace).Get(machineSet.Name, metav1.GetOptions{})
if err != nil && time.Now().Before(retryDeadline) {
glog.Warningf("Unable to fetch MachineSet object %s, Error: %+v", machineSet.Name, err)
time.Sleep(conflictRetryInterval)
continue
} else if err != nil {
// Timeout occurred
glog.Errorf("Timeout occurred: Unable to fetch MachineSet object %s, Error: %+v", machineSet.Name, err)
return err
}

msCopy := machineSet.DeepCopy()
delete(msCopy.Annotations, PreferNoSchedule)

machineSet, err = dc.controlMachineClient.MachineSets(msCopy.Namespace).Update(msCopy)

if err != nil && time.Now().Before(retryDeadline) {
glog.Warningf("Unable to update MachineSet object %s, Error: %+v", machineSet.Name, err)
time.Sleep(conflictRetryInterval)
continue
} else if err != nil {
// Timeout occurred
glog.Errorf("Timeout occurred: Unable to update MachineSet object %s, Error: %+v", machineSet.Name, err)
return err
}

// Break out of loop when update succeeds
break
}
glog.V(2).Infof("Removed taint %s from MachineSet object %q", PreferNoSchedule, machineSet.Name)

return nil
}
108 changes: 106 additions & 2 deletions pkg/controller/deployment_rolling.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,21 @@ package controller
import (
"fmt"
"sort"
"time"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/golang/glog"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/integer"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/golang/glog"
)

var (
maxRetryDeadline = 1 * time.Minute
conflictRetryInterval = 5 * time.Second
)

// rolloutRolling implements the logic for rolling a new machine set.
Expand All @@ -40,6 +50,11 @@ func (dc *controller) rolloutRolling(d *v1alpha1.MachineDeployment, isList []*v1
}
allISs := append(oldISs, newIS)

err = dc.taintNodesBackingMachineSets(oldISs)
if err != nil {
glog.Warningf("Failed to add %s on all nodes. Error: %s", PreferNoSchedule, err)
}

// Scale up, if we can.
scaledUp, err := dc.reconcileNewMachineSet(allISs, newIS, d)
if err != nil {
Expand Down Expand Up @@ -236,3 +251,92 @@ func (dc *controller) scaleDownOldMachineSetsForRollingUpdate(allISs []*v1alpha1

return totalScaledDown, nil
}

// taintNodesBackingMachineSets taints all nodes backing the machineSets
func (dc *controller) taintNodesBackingMachineSets(MachineSets []*v1alpha1.MachineSet) error {

for _, machineSet := range MachineSets {

if machineSet.Annotations[PreferNoSchedule] != "" {
continue
}

glog.V(3).Infof("Trying to taint MachineSet object %q with %s to avoid scheduling of pods", machineSet.Name, PreferNoSchedule)
selector, err := metav1.LabelSelectorAsSelector(machineSet.Spec.Selector)
if err != nil {
return err
}

// list all machines to include the machines that don't match the ms`s selector
// anymore but has the stale controller ref.
// TODO: Do the List and Filter in a single pass, or use an index.
filteredMachines, err := dc.machineLister.List(labels.Everything())
if err != nil {
return err
}
// NOTE: filteredMachines are pointing to objects from cache - if you need to
// modify them, you need to copy it first.
filteredMachines, err = dc.claimMachines(machineSet, selector, filteredMachines)
if err != nil {
return err
}

taints := v1.Taint{
Key: PreferNoSchedule,
Value: "True",
Effect: "PreferNoSchedule",
}

// Iterate through all machines and place the PreferNoSchedule taint
// to avoid scheduling on older machines
for _, machine := range filteredMachines {
if machine.Status.Node != "" {
err = AddOrUpdateTaintOnNode(
dc.targetCoreClient,
machine.Status.Node,
&taints,
)
if err != nil {
glog.Warningf("Node tainting failed for node: %s, %s", machine.Status.Node, err)
}
}
}

retryDeadline := time.Now().Add(maxRetryDeadline)
for {
machineSet, err = dc.controlMachineClient.MachineSets(machineSet.Namespace).Get(machineSet.Name, metav1.GetOptions{})
if err != nil && time.Now().Before(retryDeadline) {
glog.Warningf("Unable to fetch MachineSet object %s, Error: %+v", machineSet.Name, err)
time.Sleep(conflictRetryInterval)
continue
} else if err != nil {
// Timeout occurred
glog.Errorf("Timeout occurred: Unable to fetch MachineSet object %s, Error: %+v", machineSet.Name, err)
return err
}

msCopy := machineSet.DeepCopy()
if msCopy.Annotations == nil {
msCopy.Annotations = make(map[string]string, 0)
}
msCopy.Annotations[PreferNoSchedule] = "True"

_, err = dc.controlMachineClient.MachineSets(msCopy.Namespace).Update(msCopy)
if err != nil && time.Now().Before(retryDeadline) {
glog.Warningf("Unable to update MachineSet object %s, Error: %+v", machineSet.Name, err)
time.Sleep(conflictRetryInterval)
continue
} else if err != nil {
// Timeout occurred
glog.Errorf("Timeout occurred: Unable to update MachineSet object %s, Error: %+v", machineSet.Name, err)
return err
}

// Break out of loop when update succeeds
break
}
glog.V(2).Infof("Tainted MachineSet object %q with %s to avoid scheduling of pods", machineSet.Name, PreferNoSchedule)
}

return nil
}
7 changes: 6 additions & 1 deletion pkg/controller/deployment_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import (
"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
v1alpha1client "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1"
labelsutil "github.com/gardener/machine-controller-manager/pkg/util/labels"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -106,6 +106,9 @@ const (
// is deployment.spec.replicas + maxSurge. Used by the underlying machine sets to estimate their
// proportions in case the deployment has surge replicas.
MaxReplicasAnnotation = "deployment.kubernetes.io/max-replicas"
// PreferNoSchedule is used to identify machineSet nodes on which PreferNoSchedule taint is added on
// older machineSets during a rolling update
PreferNoSchedule = "deployment.machine.sapcloud.io/prefer-no-schedule"

// RollbackRevisionNotFound is not found rollback event reason
RollbackRevisionNotFound = "DeploymentRollbackRevisionNotFound"
Expand Down Expand Up @@ -343,6 +346,8 @@ var annotationsToSkip = map[string]bool{
RevisionHistoryAnnotation: true,
DesiredReplicasAnnotation: true,
MaxReplicasAnnotation: true,
LastReplicaUpdate: true,
PreferNoSchedule: true,
}

// skipCopyAnnotation returns true if we should skip copying the annotation with the given annotation key
Expand Down

0 comments on commit 410e0dc

Please sign in to comment.