From 47ca48a0e1e04e3b2ff487e68c252e12c31ebc1e Mon Sep 17 00:00:00 2001 From: Furkat Gofurov Date: Mon, 14 Dec 2020 13:53:05 +0200 Subject: [PATCH] Add a possibility to mark a specific control plane Machine(s) to be deleted with delete annotation --- api/v1alpha3/common_types.go | 4 ++ controllers/machineset_delete_policy.go | 7 +-- controllers/machineset_delete_policy_test.go | 6 +-- controlplane/kubeadm/controllers/scale.go | 7 ++- .../kubeadm/controllers/scale_test.go | 52 +++++++++++++++++- .../kubeadm/internal/control_plane.go | 11 +++- .../kubeadm/internal/machine_collection.go | 1 + .../20191017-kubeadm-based-control-plane.md | 53 ++++++++++--------- 8 files changed, 107 insertions(+), 34 deletions(-) diff --git a/api/v1alpha3/common_types.go b/api/v1alpha3/common_types.go index bcace3cc3985..37ab39e918e0 100644 --- a/api/v1alpha3/common_types.go +++ b/api/v1alpha3/common_types.go @@ -38,6 +38,10 @@ const ( // on the reconciled object. PausedAnnotation = "cluster.x-k8s.io/paused" + // DeleteMachineAnnotation marks control plane and worker nodes that will be given priority for deletion + // when KCP or a machineset scales down. This annotation is given top priority on all delete policies. + DeleteMachineAnnotation = "cluster.x-k8s.io/delete-machine" + // TemplateClonedFromNameAnnotation is the infrastructure machine annotation that stores the name of the infrastructure template resource // that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation. TemplateClonedFromNameAnnotation = "cluster.x-k8s.io/cloned-from-name" diff --git a/controllers/machineset_delete_policy.go b/controllers/machineset_delete_policy.go index de9ec999bf65..2b284a528d8f 100644 --- a/controllers/machineset_delete_policy.go +++ b/controllers/machineset_delete_policy.go @@ -37,6 +37,7 @@ const ( DeleteNodeAnnotation = "cluster.k8s.io/delete-machine" // DeleteMachineAnnotation marks nodes that will be given priority for deletion // when a machineset scales down. This annotation is given top priority on all delete policies. + // Deprecated: Please use DeleteMachineAnnotation under api/v1alpha3 instead. DeleteMachineAnnotation = "cluster.x-k8s.io/delete-machine" mustDelete deletePriority = 100.0 @@ -55,7 +56,7 @@ func oldestDeletePriority(machine *clusterv1.Machine) deletePriority { if machine.ObjectMeta.Annotations != nil && machine.ObjectMeta.Annotations[DeleteNodeAnnotation] != "" { return mustDelete } - if _, ok := machine.ObjectMeta.Annotations[DeleteMachineAnnotation]; ok { + if _, ok := machine.ObjectMeta.Annotations[clusterv1.DeleteMachineAnnotation]; ok { return mustDelete } if machine.Status.NodeRef == nil { @@ -81,7 +82,7 @@ func newestDeletePriority(machine *clusterv1.Machine) deletePriority { if machine.ObjectMeta.Annotations != nil && machine.ObjectMeta.Annotations[DeleteNodeAnnotation] != "" { return mustDelete } - if _, ok := machine.ObjectMeta.Annotations[DeleteMachineAnnotation]; ok { + if _, ok := machine.ObjectMeta.Annotations[clusterv1.DeleteMachineAnnotation]; ok { return mustDelete } if machine.Status.NodeRef == nil { @@ -100,7 +101,7 @@ func randomDeletePolicy(machine *clusterv1.Machine) deletePriority { if machine.ObjectMeta.Annotations != nil && machine.ObjectMeta.Annotations[DeleteNodeAnnotation] != "" { return betterDelete } - if _, ok := machine.ObjectMeta.Annotations[DeleteMachineAnnotation]; ok { + if _, ok := machine.ObjectMeta.Annotations[clusterv1.DeleteMachineAnnotation]; ok { return betterDelete } if machine.Status.NodeRef == nil { diff --git a/controllers/machineset_delete_policy_test.go b/controllers/machineset_delete_policy_test.go index 0a09b05a162e..479b547a5973 100644 --- a/controllers/machineset_delete_policy_test.go +++ b/controllers/machineset_delete_policy_test.go @@ -44,7 +44,7 @@ func TestMachineToDelete(t *testing.T) { Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } deleteMachineWithMachineAnnotation := &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{DeleteMachineAnnotation: ""}}, + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.DeleteMachineAnnotation: ""}}, Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } deleteMachineWithoutNodeRef := &clusterv1.Machine{} @@ -231,7 +231,7 @@ func TestMachineNewestDelete(t *testing.T) { Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } deleteMachineWithMachineAnnotation := &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{DeleteMachineAnnotation: ""}, CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -10))}, + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.DeleteMachineAnnotation: ""}, CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -10))}, Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } unhealthyMachine := &clusterv1.Machine{ @@ -344,7 +344,7 @@ func TestMachineOldestDelete(t *testing.T) { Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } deleteMachineWithMachineAnnotation := &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{DeleteMachineAnnotation: ""}, CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -10))}, + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.DeleteMachineAnnotation: ""}, CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -10))}, Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } unhealthyMachine := &clusterv1.Machine{ diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go index 55d5b6bca032..c01114eae604 100644 --- a/controlplane/kubeadm/controllers/scale.go +++ b/controlplane/kubeadm/controllers/scale.go @@ -227,7 +227,12 @@ func preflightCheckCondition(kind string, obj conditions.Getter, condition clust func selectMachineForScaleDown(controlPlane *internal.ControlPlane, outdatedMachines internal.FilterableMachineCollection) (*clusterv1.Machine, error) { machines := controlPlane.Machines - if outdatedMachines.Len() > 0 { + switch { + case controlPlane.MachineWithDeleteAnnotation(outdatedMachines).Len() > 0: + machines = controlPlane.MachineWithDeleteAnnotation(outdatedMachines) + case controlPlane.MachineWithDeleteAnnotation(machines).Len() > 0: + machines = controlPlane.MachineWithDeleteAnnotation(machines) + case outdatedMachines.Len() > 0: machines = outdatedMachines } return controlPlane.MachineInFailureDomainWithMostMachines(machines) diff --git a/controlplane/kubeadm/controllers/scale_test.go b/controlplane/kubeadm/controllers/scale_test.go index 5f16fba9d913..734a4ba69f8d 100644 --- a/controlplane/kubeadm/controllers/scale_test.go +++ b/controlplane/kubeadm/controllers/scale_test.go @@ -294,8 +294,12 @@ func TestSelectMachineForScaleDown(t *testing.T) { m3 := machine("machine-3", withFailureDomain("one"), withTimestamp(startDate.Add(-4*time.Hour))) m4 := machine("machine-4", withFailureDomain("two"), withTimestamp(startDate.Add(-time.Hour))) m5 := machine("machine-5", withFailureDomain("two"), withTimestamp(startDate.Add(-2*time.Hour))) + m6 := machine("machine-6", withFailureDomain("two"), withTimestamp(startDate.Add(-7*time.Hour))) + m7 := machine("machine-7", withFailureDomain("two"), withTimestamp(startDate.Add(-5*time.Hour)), withAnnotation("cluster.x-k8s.io/delete-machine")) + m8 := machine("machine-8", withFailureDomain("two"), withTimestamp(startDate.Add(-6*time.Hour)), withAnnotation("cluster.x-k8s.io/delete-machine")) mc3 := internal.NewFilterableMachineCollection(m1, m2, m3, m4, m5) + mc6 := internal.NewFilterableMachineCollection(m6, m7, m8) fd := clusterv1.FailureDomains{ "one": failureDomain(true), "two": failureDomain(true), @@ -313,6 +317,11 @@ func TestSelectMachineForScaleDown(t *testing.T) { return m.Name != "machine-5" }), } + annotatedControlPlane := &internal.ControlPlane{ + KCP: &kcp, + Cluster: &clusterv1.Cluster{Status: clusterv1.ClusterStatus{FailureDomains: fd}}, + Machines: mc6, + } testCases := []struct { name string @@ -322,7 +331,7 @@ func TestSelectMachineForScaleDown(t *testing.T) { expectedMachine clusterv1.Machine }{ { - name: "when there are are machines needing upgrade, it returns the oldest machine in the failure domain with the most machines needing upgrade", + name: "when there are machines needing upgrade, it returns the oldest machine in the failure domain with the most machines needing upgrade", cp: needsUpgradeControlPlane, outDatedMachines: internal.NewFilterableMachineCollection(m5), expectErr: false, @@ -335,6 +344,41 @@ func TestSelectMachineForScaleDown(t *testing.T) { expectErr: false, expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-3"}}, }, + { + name: "when there is a single machine marked with delete annotation key in machine collection, it returns only that marked machine", + cp: annotatedControlPlane, + outDatedMachines: internal.NewFilterableMachineCollection(m6, m7), + expectErr: false, + expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-7"}}, + }, + { + name: "when there are machines marked with delete annotation key in machine collection, it returns the oldest marked machine first", + cp: annotatedControlPlane, + outDatedMachines: internal.NewFilterableMachineCollection(m7, m8), + expectErr: false, + expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-8"}}, + }, + { + name: "when there are annotated machines which are part of the annotatedControlPlane but not in outdatedMachines, it returns the oldest marked machine first", + cp: annotatedControlPlane, + outDatedMachines: internal.NewFilterableMachineCollection(), + expectErr: false, + expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-8"}}, + }, + { + name: "when there are machines needing upgrade, it returns the oldest machine in the failure domain with the most machines needing upgrade", + cp: needsUpgradeControlPlane, + outDatedMachines: internal.NewFilterableMachineCollection(m7, m3), + expectErr: false, + expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-7"}}, + }, + { + name: "when there is an up to date machine with delete annotation, while there are any outdated machines without annotatio that still exist, it returns oldest marked machine first", + cp: upToDateControlPlane, + outDatedMachines: internal.NewFilterableMachineCollection(m5, m3, m8, m7, m6, m1, m2), + expectErr: false, + expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-8"}}, + }, } for _, tc := range testCases { @@ -519,6 +563,12 @@ func withFailureDomain(fd string) machineOpt { } } +func withAnnotation(annotation string) machineOpt { + return func(m *clusterv1.Machine) { + m.ObjectMeta.Annotations = (map[string]string{annotation: ""}) + } +} + func withTimestamp(t time.Time) machineOpt { return func(m *clusterv1.Machine) { m.CreationTimestamp = metav1.NewTime(t) diff --git a/controlplane/kubeadm/internal/control_plane.go b/controlplane/kubeadm/internal/control_plane.go index 983af69d8c9f..fcd2ac1bb049 100644 --- a/controlplane/kubeadm/internal/control_plane.go +++ b/controlplane/kubeadm/internal/control_plane.go @@ -137,6 +137,14 @@ func (c *ControlPlane) MachineInFailureDomainWithMostMachines(machines Filterabl return machineToMark, nil } +// MachineWithDeleteAnnotation returns a machine that has been annotated with DeleteMachineAnnotation key. +func (c *ControlPlane) MachineWithDeleteAnnotation(machines FilterableMachineCollection) FilterableMachineCollection { + // See if there are any machines with DeleteMachineAnnotation key. + annotatedMachines := machines.Filter(machinefilters.HasAnnotationKey(clusterv1.DeleteMachineAnnotation)) + // If there are, return list of annotated machines. + return annotatedMachines +} + // FailureDomainWithMostMachines returns a fd which exists both in machines and control-plane machines and has the most // control-plane machines on it. func (c *ControlPlane) FailureDomainWithMostMachines(machines FilterableMachineCollection) *string { @@ -150,7 +158,6 @@ func (c *ControlPlane) FailureDomainWithMostMachines(machines FilterableMachineC // in the cluster status. return notInFailureDomains.Oldest().Spec.FailureDomain } - return PickMost(c, machines) } @@ -275,7 +282,7 @@ func getInfraResources(ctx context.Context, cl client.Client, machines Filterabl return result, nil } -// getInfraResources fetches the kubeadm config for each machine in the collection and returns a map of machine.Name -> KubeadmConfig. +// getKubeadmConfigs fetches the kubeadm config for each machine in the collection and returns a map of machine.Name -> KubeadmConfig. func getKubeadmConfigs(ctx context.Context, cl client.Client, machines FilterableMachineCollection) (map[string]*bootstrapv1.KubeadmConfig, error) { result := map[string]*bootstrapv1.KubeadmConfig{} for _, m := range machines { diff --git a/controlplane/kubeadm/internal/machine_collection.go b/controlplane/kubeadm/internal/machine_collection.go index 975aea467218..04ec906a6d30 100644 --- a/controlplane/kubeadm/internal/machine_collection.go +++ b/controlplane/kubeadm/internal/machine_collection.go @@ -97,6 +97,7 @@ func (s FilterableMachineCollection) Len() int { return len(s) } +// newFilteredMachineCollection creates a FilterableMachineCollection from a filtered list of values. func newFilteredMachineCollection(filter machinefilters.Func, machines ...*clusterv1.Machine) FilterableMachineCollection { ss := make(FilterableMachineCollection, len(machines)) for i := range machines { diff --git a/docs/proposals/20191017-kubeadm-based-control-plane.md b/docs/proposals/20191017-kubeadm-based-control-plane.md index 4bab32e96274..94c1be76d832 100644 --- a/docs/proposals/20191017-kubeadm-based-control-plane.md +++ b/docs/proposals/20191017-kubeadm-based-control-plane.md @@ -93,7 +93,7 @@ During 2019 we saw control plane management implementations in each infrastructu bootstrapping was identified as being reimplemented in every infrastructure provider and then extracted into Cluster API Bootstrap Provider Kubeadm (CABPK), we believe we can reduce the redundancy of control plane management across providers and centralize the logic in Cluster API. We also wanted to ensure that any default control plane management that we -for the default implementation would not preclude the use of alternative control plane management solutions. +for the default implementation would not preclude the use of alternative control plane management solutions. ### Goals @@ -173,12 +173,12 @@ With the following validations: - `KubeadmControlPlane.Spec.KubeadmConfigSpec` allows mutations required for supporting following use cases: - Change of imagesRepository/imageTags (with validation of CoreDNS supported upgrade) - Change of node registration options - - Change of pre/post kubeadm commands + - Change of pre/post kubeadm commands - Change of cloud init files And the following defaulting: -- `KubeadmControlPlane.Spec.Replicas: 1` +- `KubeadmControlPlane.Spec.Replicas: 1` #### Modifications required to existing API Types @@ -322,7 +322,7 @@ spec: - Upgrading machines - Scale up operations are blocked based on Etcd and control plane health checks. - See [Health checks](#Health checks) below. -- Scale up operations creates the next machine in the failure domain with the fewest number of machines. +- Scale up operations creates the next machine in the failure domain with the fewest number of machines. ![controlplane-init-6](images/controlplane/controlplane-init-6.png) @@ -331,12 +331,17 @@ spec: - Allow scale down a control plane with stacked etcd to only odd numbers, as per [etcd best practice](https://etcd.io/docs/v3.3.12/faq/#why-an-odd-number-of-cluster-members). - However, allow a control plane using an external etcd cluster to scale down to other numbers such as 2 or 4. -- Scale up operations must not be done in conjunction with: +- Scale down operations must not be done in conjunction with: - Adopting machines - Upgrading machines -- Scale up operations are blocked based on Etcd and control plane health checks. +- Scale down operations are blocked based on Etcd and control plane health checks. - See [Health checks](#Health checks) below. -- Scale down operations removes the oldest machine in the failure domain that has the most control-plane machines on it +- Scale down operations removes the oldest machine in the failure domain that has the most control-plane machines on it. +- Allow scaling down of KCP with the possibility of marking specific control plane machine(s) to be deleted with delete annotation key. The presence of the annotation will affect the rollout strategy in a way that, it implements the following prioritization logic in descending order, while selecting machines for scale down: + - outdatedMachines with the delete annotation + - machines with the delete annotation + - outdated machines + - all machines ![controlplane-init-7](images/controlplane/controlplane-init-7.png) @@ -348,7 +353,7 @@ spec: ##### KubeadmControlPlane rollout (using create-swap-and-delete) -- Triggered by: +- Triggered by: - Changes to Version - Changes to the kubeadmConfigSpec - Changes to the infrastructureRef @@ -366,13 +371,13 @@ spec: - If there is a machine requiring rollout - Scale up control plane creating a machine with the new spec - Scale down control plane by removing one of the machine that needs rollout (the oldest out-of date machine in the failure domain that has the most control-plane machines on it) - + - In order to determine if a Machine to be rolled out, KCP implements the following: - The infrastructureRef link used by each machine at creation time is stored in annotations at machine level. - The kubeadmConfigSpec used by each machine at creation time is stored in annotations at machine level. - If the annotation is not present (machine is either old or adopted), we won't roll out on any possible changes made in KCP's ClusterConfiguration given that we don't have enough information to make a decision. Users should use KCP.Spec.UpgradeAfter field to force a rollout in this case. - + - The controller should tolerate the manual or automatic removal of a replica during the upgrade process. A replica that fails during the upgrade may block the completion of the upgrade. Removal or other remedial action may be necessary to allow the upgrade to complete. ###### Constraints and Assumptions @@ -381,23 +386,23 @@ spec: * Infrastructure templates are expected to be immutable, so infrastructure template contents do not have to hashed in order to detect changes. - + ##### Remediation (using delete-and-recreate) -- KCP remediation is triggered by the MachineHealthCheck controller marking a machine for remediation. See +- KCP remediation is triggered by the MachineHealthCheck controller marking a machine for remediation. See [machine-health-checking proposal](https://github.com/kubernetes-sigs/cluster-api/blob/11485f4f817766c444840d8ea7e4e7d1a6b94cc9/docs/proposals/20191030-machine-health-checking.md) for additional details. When there are multiple machines that are marked for remediation, the oldest one will be remediated first. - + - Following rules should be satisfied in order to start remediation - The cluster MUST have spec.replicas >= 3, because this is the smallest cluster size that allows any etcd failure tolerance. - The number of replicas MUST be equal to or greater than the desired replicas. This rule ensures that when the cluster - is missing replicas, we skip remediation and instead perform regular scale up/rollout operations first. + is missing replicas, we skip remediation and instead perform regular scale up/rollout operations first. - The cluster MUST have no machines with a deletion timestamp. This rule prevents KCP taking actions while the cluster is in a transitional state. - Remediation MUST preserve etcd quorum. This rule ensures that we will not remove a member that would result in etcd losing a majority of members and thus become unable to field new requests. - When all the conditions for starting remediation are satisfied, KCP temporarily suspend any operation in progress - in order to perform remediation. + in order to perform remediation. - Remediation will be performed by issuing a delete on the unhealthy machine; after deleting the machine, KCP will restore the target number of machines by triggering a scale up (current replicas NOTE: This paragraph describes KCP health checks specifically designed to ensure a kubeadm -generated control-plane is stable before proceeding with KCP actions like scale up, scale down and rollout. +> NOTE: This paragraph describes KCP health checks specifically designed to ensure a kubeadm +generated control-plane is stable before proceeding with KCP actions like scale up, scale down and rollout. KCP health checks are different from the one implemented by the MachineHealthCheck controller. - Will be used during scaling and upgrade operations.