Skip to content

Commit

Permalink
Implement TaintAllNeedUpdate
Browse files Browse the repository at this point in the history
  • Loading branch information
johngmyers committed Dec 6, 2019
1 parent d4a1f10 commit ceb9409
Show file tree
Hide file tree
Showing 4 changed files with 289 additions and 7 deletions.
2 changes: 2 additions & 0 deletions pkg/featureflag/featureflag.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ var (
VSphereCloudProvider = New("VSphereCloudProvider", Bool(false))
// SkipEtcdVersionCheck will bypass the check that etcd-manager is using a supported etcd version
SkipEtcdVersionCheck = New("SkipEtcdVersionCheck", Bool(false))
// ConfigurableRollingUpdate enables the RollingUpdate strategy configuration settings
ConfigurableRollingUpdate = New("ConfigurableRollingUpdate", Bool(false))
)

// FeatureFlag defines a feature flag
Expand Down
6 changes: 6 additions & 0 deletions pkg/instancegroups/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ go_library(
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/json:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/strategicpatch:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
Expand All @@ -37,6 +40,7 @@ go_test(
"//cloudmock/aws/mockautoscaling:go_default_library",
"//pkg/apis/kops:go_default_library",
"//pkg/cloudinstances:go_default_library",
"//pkg/featureflag:go_default_library",
"//pkg/validation:go_default_library",
"//upup/pkg/fi/cloudup/awsup:go_default_library",
"//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
Expand All @@ -45,5 +49,7 @@ go_test(
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/fake:go_default_library",
"//vendor/k8s.io/client-go/testing:go_default_library",
"//vendor/k8s.io/utils/pointer:go_default_library",
],
)
91 changes: 90 additions & 1 deletion pkg/instancegroups/instancegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ import (
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/json"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/klog"
api "k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/pkg/cloudinstances"
Expand All @@ -34,6 +37,8 @@ import (
"k8s.io/kops/upup/pkg/fi"
)

const rollingUpdateTaintKey = "kops.k8s.io/rolling-update"

// RollingUpdateInstanceGroup is the AWS ASG backing an InstanceGroup.
type RollingUpdateInstanceGroup struct {
// Cloud is the kops cloud provider
Expand Down Expand Up @@ -114,6 +119,7 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
return fmt.Errorf("rollingUpdate is missing a k8s client")
}

noneReady := len(r.CloudGroup.Ready) == 0
update := r.CloudGroup.NeedUpdate
if rollingUpdateData.Force {
update = append(update, r.CloudGroup.Ready...)
Expand All @@ -137,7 +143,16 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
}
}

for _, u := range update {
settings := resolveSettings(cluster, r.CloudGroup.InstanceGroup)

for uIdx, u := range update {
if featureflag.ConfigurableRollingUpdate.Enabled() && *settings.TaintAllNeedUpdate {
err := r.maybeTaintAllNeedUpdate(update, rollingUpdateData, noneReady, uIdx)
if err != nil {
return err
}
}

instanceId := u.ID

nodeName := ""
Expand Down Expand Up @@ -225,6 +240,80 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
return nil
}

func (r *RollingUpdateInstanceGroup) maybeTaintAllNeedUpdate(update []*cloudinstances.CloudInstanceGroupMember, rollingUpdateData *RollingUpdateCluster, noneReady bool, uIdx int) error {
if r.CloudGroup.InstanceGroup.Spec.Role != api.InstanceGroupRoleNode || rollingUpdateData.CloudOnly {
return nil
}

if noneReady {
// Wait until after one node is deleted and its replacement validates before the mass-cordoning
// in case the current spec does not result in usable nodes.
if uIdx != 1 || len(update) < 2 {
return nil
}
} else {
if uIdx != 0 || len(update) < 1 {
return nil
}
}

var toTaint []*corev1.Node
for _, u := range update {
if u.Node != nil && !u.Node.Spec.Unschedulable {
foundTaint := false
for _, taint := range u.Node.Spec.Taints {
if taint.Key == rollingUpdateTaintKey {
foundTaint = true
}
}
if !foundTaint {
toTaint = append(toTaint, u.Node)
}
}
}
if len(toTaint) > 0 {
noun := "nodes"
if len(toTaint) == 1 {
noun = "node"
}
klog.Infof("Tainting %d %s in %q instancegroup.", len(toTaint), noun, r.CloudGroup.InstanceGroup.Name)
for _, n := range toTaint {
if err := r.patchTaint(rollingUpdateData, n); err != nil {
if rollingUpdateData.FailOnDrainError {
return fmt.Errorf("failed to taint node %q: %v", n, err)
}
klog.Infof("Ignoring error tainting node %q: %v", n, err)
}
}
}
return nil
}

func (r *RollingUpdateInstanceGroup) patchTaint(rollingUpdateData *RollingUpdateCluster, node *corev1.Node) error {
oldData, err := json.Marshal(node)
if err != nil {
return err
}

node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{
Key: rollingUpdateTaintKey,
Effect: corev1.TaintEffectNoSchedule,
})

newData, err := json.Marshal(node)
if err != nil {
return err
}

patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, node)
if err != nil {
return err
}

_, err = rollingUpdateData.K8sClient.CoreV1().Nodes().Patch(node.Name, types.StrategicMergePatchType, patchBytes)
return err
}

// validateClusterWithDuration runs validation.ValidateCluster until either we get positive result or the timeout expires
func (r *RollingUpdateInstanceGroup) validateClusterWithDuration(rollingUpdateData *RollingUpdateCluster, duration time.Duration) error {
// TODO should we expose this to the UI?
Expand Down
Loading

0 comments on commit ceb9409

Please sign in to comment.