From e98e2de6e64414e998928ec6a6d51d7deeb4060c Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Sat, 21 Jan 2023 13:55:55 +0100 Subject: [PATCH] Add support for KCP remediation during cluster provisioning Co-authored-by: sbueringer --- .../kubeadm/api/v1alpha3/conversion.go | 7 + .../api/v1alpha3/zz_generated.conversion.go | 2 + .../kubeadm/api/v1alpha4/conversion.go | 17 + .../api/v1alpha4/zz_generated.conversion.go | 17 +- .../v1beta1/kubeadm_control_plane_types.go | 86 ++ .../v1beta1/kubeadm_control_plane_webhook.go | 2 + .../kubeadm_control_plane_webhook_test.go | 5 + .../kubeadmcontrolplanetemplate_types.go | 4 + .../api/v1beta1/zz_generated.deepcopy.go | 57 + ...cluster.x-k8s.io_kubeadmcontrolplanes.yaml | 69 + ...x-k8s.io_kubeadmcontrolplanetemplates.yaml | 49 + .../kubeadm/internal/controllers/helpers.go | 12 + .../internal/controllers/helpers_test.go | 5 + .../internal/controllers/remediation.go | 312 ++++- .../internal/controllers/remediation_test.go | 1128 ++++++++++++++--- .../kubeadm/internal/controllers/status.go | 28 + .../src/developer/providers/v1.3-to-v1.4.md | 2 + .../src/reference/labels_and_annotations.md | 2 + .../machinehealthcheck_controller.go | 20 +- .../machinehealthcheck_targets.go | 38 +- test/e2e/config/docker.yaml | 3 + .../cluster-with-kcp.yaml | 98 ++ .../kustomization.yaml | 2 +- .../cluster-template-kcp-remediation/mhc.yaml | 7 +- test/e2e/kcp_remediations.go | 687 ++++++++++ ...tions_test.go => kcp_remediations_test.go} | 6 +- ...mhc_remediations.go => md_remediations.go} | 62 +- test/e2e/md_remediations_test.go | 36 + test/infrastructure/container/docker.go | 3 +- .../controllers/dockermachine_controller.go | 23 +- 30 files changed, 2483 insertions(+), 306 deletions(-) create mode 100644 test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/cluster-with-kcp.yaml create mode 100644 test/e2e/kcp_remediations.go rename test/e2e/{mhc_remediations_test.go => kcp_remediations_test.go} (83%) rename test/e2e/{mhc_remediations.go => md_remediations.go} (61%) create mode 100644 test/e2e/md_remediations_test.go diff --git a/controlplane/kubeadm/api/v1alpha3/conversion.go b/controlplane/kubeadm/api/v1alpha3/conversion.go index 0061e6331a8a..e8fb091e02d0 100644 --- a/controlplane/kubeadm/api/v1alpha3/conversion.go +++ b/controlplane/kubeadm/api/v1alpha3/conversion.go @@ -99,6 +99,13 @@ func (src *KubeadmControlPlane) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.KubeadmConfigSpec.InitConfiguration.NodeRegistration.ImagePullPolicy = restored.Spec.KubeadmConfigSpec.InitConfiguration.NodeRegistration.ImagePullPolicy } + if restored.Spec.RemediationStrategy != nil { + dst.Spec.RemediationStrategy = restored.Spec.RemediationStrategy + } + if restored.Status.LastRemediation != nil { + dst.Status.LastRemediation = restored.Status.LastRemediation + } + return nil } diff --git a/controlplane/kubeadm/api/v1alpha3/zz_generated.conversion.go b/controlplane/kubeadm/api/v1alpha3/zz_generated.conversion.go index 94b5206584e7..ce3672490458 100644 --- a/controlplane/kubeadm/api/v1alpha3/zz_generated.conversion.go +++ b/controlplane/kubeadm/api/v1alpha3/zz_generated.conversion.go @@ -201,6 +201,7 @@ func autoConvert_v1beta1_KubeadmControlPlaneSpec_To_v1alpha3_KubeadmControlPlane // WARNING: in.RolloutBefore requires manual conversion: does not exist in peer-type // WARNING: in.RolloutAfter requires manual conversion: does not exist in peer-type out.RolloutStrategy = (*RolloutStrategy)(unsafe.Pointer(in.RolloutStrategy)) + // WARNING: in.RemediationStrategy requires manual conversion: does not exist in peer-type return nil } @@ -257,6 +258,7 @@ func autoConvert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha3_KubeadmControlPla } else { out.Conditions = nil } + // WARNING: in.LastRemediation requires manual conversion: does not exist in peer-type return nil } diff --git a/controlplane/kubeadm/api/v1alpha4/conversion.go b/controlplane/kubeadm/api/v1alpha4/conversion.go index 9fae7f390022..1637b272a82c 100644 --- a/controlplane/kubeadm/api/v1alpha4/conversion.go +++ b/controlplane/kubeadm/api/v1alpha4/conversion.go @@ -84,6 +84,13 @@ func (src *KubeadmControlPlane) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.KubeadmConfigSpec.InitConfiguration.NodeRegistration.ImagePullPolicy = restored.Spec.KubeadmConfigSpec.InitConfiguration.NodeRegistration.ImagePullPolicy } + if restored.Spec.RemediationStrategy != nil { + dst.Spec.RemediationStrategy = restored.Spec.RemediationStrategy + } + if restored.Status.LastRemediation != nil { + dst.Status.LastRemediation = restored.Status.LastRemediation + } + return nil } @@ -173,6 +180,10 @@ func (src *KubeadmControlPlaneTemplate) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.Template.Spec.KubeadmConfigSpec.InitConfiguration.NodeRegistration.ImagePullPolicy = restored.Spec.Template.Spec.KubeadmConfigSpec.InitConfiguration.NodeRegistration.ImagePullPolicy } + if restored.Spec.Template.Spec.RemediationStrategy != nil { + dst.Spec.Template.Spec.RemediationStrategy = restored.Spec.Template.Spec.RemediationStrategy + } + return nil } @@ -262,5 +273,11 @@ func Convert_v1beta1_KubeadmControlPlaneMachineTemplate_To_v1alpha4_KubeadmContr func Convert_v1beta1_KubeadmControlPlaneSpec_To_v1alpha4_KubeadmControlPlaneSpec(in *controlplanev1.KubeadmControlPlaneSpec, out *KubeadmControlPlaneSpec, scope apiconversion.Scope) error { // .RolloutBefore was added in v1beta1. + // .RemediationStrategy was added in v1beta1. return autoConvert_v1beta1_KubeadmControlPlaneSpec_To_v1alpha4_KubeadmControlPlaneSpec(in, out, scope) } + +func Convert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha4_KubeadmControlPlaneStatus(in *controlplanev1.KubeadmControlPlaneStatus, out *KubeadmControlPlaneStatus, scope apiconversion.Scope) error { + // .LastRemediation was added in v1beta1. + return autoConvert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha4_KubeadmControlPlaneStatus(in, out, scope) +} diff --git a/controlplane/kubeadm/api/v1alpha4/zz_generated.conversion.go b/controlplane/kubeadm/api/v1alpha4/zz_generated.conversion.go index 83cdb2e4718d..efe50a58635e 100644 --- a/controlplane/kubeadm/api/v1alpha4/zz_generated.conversion.go +++ b/controlplane/kubeadm/api/v1alpha4/zz_generated.conversion.go @@ -77,11 +77,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta1.KubeadmControlPlaneStatus)(nil), (*KubeadmControlPlaneStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha4_KubeadmControlPlaneStatus(a.(*v1beta1.KubeadmControlPlaneStatus), b.(*KubeadmControlPlaneStatus), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*KubeadmControlPlaneTemplate)(nil), (*v1beta1.KubeadmControlPlaneTemplate)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_KubeadmControlPlaneTemplate_To_v1beta1_KubeadmControlPlaneTemplate(a.(*KubeadmControlPlaneTemplate), b.(*v1beta1.KubeadmControlPlaneTemplate), scope) }); err != nil { @@ -157,6 +152,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta1.KubeadmControlPlaneStatus)(nil), (*KubeadmControlPlaneStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha4_KubeadmControlPlaneStatus(a.(*v1beta1.KubeadmControlPlaneStatus), b.(*KubeadmControlPlaneStatus), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta1.KubeadmControlPlaneTemplateResourceSpec)(nil), (*KubeadmControlPlaneSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_KubeadmControlPlaneTemplateResourceSpec_To_v1alpha4_KubeadmControlPlaneSpec(a.(*v1beta1.KubeadmControlPlaneTemplateResourceSpec), b.(*KubeadmControlPlaneSpec), scope) }); err != nil { @@ -295,6 +295,7 @@ func autoConvert_v1beta1_KubeadmControlPlaneSpec_To_v1alpha4_KubeadmControlPlane // WARNING: in.RolloutBefore requires manual conversion: does not exist in peer-type out.RolloutAfter = (*v1.Time)(unsafe.Pointer(in.RolloutAfter)) out.RolloutStrategy = (*RolloutStrategy)(unsafe.Pointer(in.RolloutStrategy)) + // WARNING: in.RemediationStrategy requires manual conversion: does not exist in peer-type return nil } @@ -352,14 +353,10 @@ func autoConvert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha4_KubeadmControlPla } else { out.Conditions = nil } + // WARNING: in.LastRemediation requires manual conversion: does not exist in peer-type return nil } -// Convert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha4_KubeadmControlPlaneStatus is an autogenerated conversion function. -func Convert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha4_KubeadmControlPlaneStatus(in *v1beta1.KubeadmControlPlaneStatus, out *KubeadmControlPlaneStatus, s conversion.Scope) error { - return autoConvert_v1beta1_KubeadmControlPlaneStatus_To_v1alpha4_KubeadmControlPlaneStatus(in, out, s) -} - func autoConvert_v1alpha4_KubeadmControlPlaneTemplate_To_v1beta1_KubeadmControlPlaneTemplate(in *KubeadmControlPlaneTemplate, out *v1beta1.KubeadmControlPlaneTemplate, s conversion.Scope) error { out.ObjectMeta = in.ObjectMeta if err := Convert_v1alpha4_KubeadmControlPlaneTemplateSpec_To_v1beta1_KubeadmControlPlaneTemplateSpec(&in.Spec, &out.Spec, s); err != nil { diff --git a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_types.go b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_types.go index 823fb123f679..272ffd12a4a1 100644 --- a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_types.go +++ b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_types.go @@ -17,6 +17,8 @@ limitations under the License. package v1beta1 import ( + "time" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" @@ -49,6 +51,23 @@ const ( // KubeadmClusterConfigurationAnnotation is a machine annotation that stores the json-marshalled string of KCP ClusterConfiguration. // This annotation is used to detect any changes in ClusterConfiguration and trigger machine rollout in KCP. KubeadmClusterConfigurationAnnotation = "controlplane.cluster.x-k8s.io/kubeadm-cluster-configuration" + + // RemediationInProgressAnnotation is used to keep track that a KCP remediation is in progress, and more + // specifically it tracks that the system is in between having deleted an unhealthy machine and recreating its replacement. + // NOTE: if something external to CAPI removes this annotation the system cannot detect the above situation; this can lead to + // failures in updating remediation retry or remediation count (both counters restart from zero). + RemediationInProgressAnnotation = "controlplane.cluster.x-k8s.io/remediation-in-progress" + + // RemediationForAnnotation is used to link a new machine to the unhealthy machine it is replacing; + // please note that in case of retry, when also the remediating machine fails, the system keeps track of + // the first machine of the sequence only. + // NOTE: if something external to CAPI removes this annotation the system this can lead to + // failures in updating remediation retry (the counter restarts from zero). + RemediationForAnnotation = "controlplane.cluster.x-k8s.io/remediation-for" + + // DefaultMinHealthyPeriod defines the default minimum period before we consider a remediation on a + // machine unrelated from the previous remediation. + DefaultMinHealthyPeriod = 1 * time.Hour ) // KubeadmControlPlaneSpec defines the desired state of KubeadmControlPlane. @@ -91,6 +110,10 @@ type KubeadmControlPlaneSpec struct { // +optional // +kubebuilder:default={type: "RollingUpdate", rollingUpdate: {maxSurge: 1}} RolloutStrategy *RolloutStrategy `json:"rolloutStrategy,omitempty"` + + // The RemediationStrategy that controls how control plane machine remediation happens. + // +optional + RemediationStrategy *RemediationStrategy `json:"remediationStrategy,omitempty"` } // KubeadmControlPlaneMachineTemplate defines the template for Machines @@ -158,6 +181,50 @@ type RollingUpdate struct { MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty"` } +// RemediationStrategy allows to define how control plane machine remediation happens. +type RemediationStrategy struct { + // MaxRetry is the Max number of retries while attempting to remediate an unhealthy machine. + // A retry happens when a machine that was created as a replacement for an unhealthy machine also fails. + // For example, given a control plane with three machines M1, M2, M3: + // + // M1 become unhealthy; remediation happens, and M1-1 is created as a replacement. + // If M1-1 (replacement of M1) has problems while bootstrapping it will become unhealthy, and then be + // remediated; such operation is considered a retry, remediation-retry #1. + // If M1-2 (replacement of M1-2) becomes unhealthy, remediation-retry #2 will happen, etc. + // + // A retry could happen only after RetryPeriod from the previous retry. + // If a machine is marked as unhealthy after MinHealthyPeriod from the previous remediation expired, + // this is not considered a retry anymore because the new issue is assumed unrelated from the previous one. + // + // If not set, the remedation will be retried infinitely. + // +optional + MaxRetry *int32 `json:"maxRetry,omitempty"` + + // RetryPeriod is the duration that KCP should wait before remediating a machine being created as a replacement + // for an unhealthy machine (a retry). + // + // If not set, a retry will happen immediately. + // +optional + RetryPeriod metav1.Duration `json:"retryPeriod,omitempty"` + + // MinHealthyPeriod defines the duration after which KCP will consider any failure to a machine unrelated + // from the previous one. In this case the remediation is not considered a retry anymore, and thus the retry + // counter restarts from 0. For example, assuming MinHealthyPeriod is set to 1h (default) + // + // M1 become unhealthy; remediation happens, and M1-1 is created as a replacement. + // If M1-1 (replacement of M1) has problems within the 1hr after the creation, also + // this machine will be remediated and this operation is considered a retry - a problem related + // to the original issue happened to M1 -. + // + // If instead the problem on M1-1 is happening after MinHealthyPeriod expired, e.g. four days after + // m1-1 has been created as a remediation of M1, the problem on M1-1 is considered unrelated to + // the original issue happened to M1. + // + // If not set, this value is defaulted to 1h. + // +optional + MinHealthyPeriod *metav1.Duration `json:"minHealthyPeriod,omitempty"` +} + // KubeadmControlPlaneStatus defines the observed state of KubeadmControlPlane. type KubeadmControlPlaneStatus struct { // Selector is the label selector in string format to avoid introspection @@ -223,6 +290,25 @@ type KubeadmControlPlaneStatus struct { // Conditions defines current service state of the KubeadmControlPlane. // +optional Conditions clusterv1.Conditions `json:"conditions,omitempty"` + + // LastRemediation stores info about last remediation performed. + // +optional + LastRemediation *LastRemediationStatus `json:"lastRemediation,omitempty"` +} + +// LastRemediationStatus stores info about last remediation performed. +// NOTE: if for any reason information about last remediation are lost, RetryCount is going to restart from 0 and thus +// more remediations than expected might happen. +type LastRemediationStatus struct { + // Machine is the machine name of the latest machine being remediated. + Machine string `json:"machine"` + + // Timestamp is when last remediation happened. It is represented in RFC3339 form and is in UTC. + Timestamp metav1.Time `json:"timestamp"` + + // RetryCount used to keep track of remediation retry for the last remediated machine. + // A retry happens when a machine that was created as a replacement for an unhealthy machine also fails. + RetryCount int32 `json:"retryCount"` } // +kubebuilder:object:root=true diff --git a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook.go b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook.go index c17ac6049ce5..031115fb6dce 100644 --- a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook.go +++ b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook.go @@ -181,6 +181,8 @@ func (in *KubeadmControlPlane) ValidateUpdate(old runtime.Object) error { {spec, "machineTemplate", "nodeDeletionTimeout"}, {spec, "replicas"}, {spec, "version"}, + {spec, "remediationStrategy"}, + {spec, "remediationStrategy", "*"}, {spec, "rolloutAfter"}, {spec, "rolloutBefore"}, {spec, "rolloutBefore", "*"}, diff --git a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go index ee20890ed807..cb6cdec40787 100644 --- a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go +++ b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go @@ -410,6 +410,11 @@ func TestKubeadmControlPlaneValidateUpdate(t *testing.T) { validUpdate.Spec.RolloutBefore = &RolloutBefore{ CertificatesExpiryDays: pointer.Int32(14), } + validUpdate.Spec.RemediationStrategy = &RemediationStrategy{ + MaxRetry: pointer.Int32(50), + MinHealthyPeriod: &metav1.Duration{Duration: 10 * time.Hour}, + RetryPeriod: metav1.Duration{Duration: 10 * time.Minute}, + } validUpdate.Spec.KubeadmConfigSpec.Format = bootstrapv1.CloudConfig scaleToZero := before.DeepCopy() diff --git a/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_types.go b/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_types.go index 4817b71b18fb..9fab688e84f7 100644 --- a/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_types.go +++ b/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_types.go @@ -91,6 +91,10 @@ type KubeadmControlPlaneTemplateResourceSpec struct { // +optional // +kubebuilder:default={type: "RollingUpdate", rollingUpdate: {maxSurge: 1}} RolloutStrategy *RolloutStrategy `json:"rolloutStrategy,omitempty"` + + // The RemediationStrategy that controls how control plane machine remediation happens. + // +optional + RemediationStrategy *RemediationStrategy `json:"remediationStrategy,omitempty"` } // KubeadmControlPlaneTemplateMachineTemplate defines the template for Machines diff --git a/controlplane/kubeadm/api/v1beta1/zz_generated.deepcopy.go b/controlplane/kubeadm/api/v1beta1/zz_generated.deepcopy.go index 27f9d2ecd98a..5d6d56bccd09 100644 --- a/controlplane/kubeadm/api/v1beta1/zz_generated.deepcopy.go +++ b/controlplane/kubeadm/api/v1beta1/zz_generated.deepcopy.go @@ -143,6 +143,11 @@ func (in *KubeadmControlPlaneSpec) DeepCopyInto(out *KubeadmControlPlaneSpec) { *out = new(RolloutStrategy) (*in).DeepCopyInto(*out) } + if in.RemediationStrategy != nil { + in, out := &in.RemediationStrategy, &out.RemediationStrategy + *out = new(RemediationStrategy) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeadmControlPlaneSpec. @@ -175,6 +180,11 @@ func (in *KubeadmControlPlaneStatus) DeepCopyInto(out *KubeadmControlPlaneStatus (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.LastRemediation != nil { + in, out := &in.LastRemediation, &out.LastRemediation + *out = new(LastRemediationStatus) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeadmControlPlaneStatus. @@ -314,6 +324,11 @@ func (in *KubeadmControlPlaneTemplateResourceSpec) DeepCopyInto(out *KubeadmCont *out = new(RolloutStrategy) (*in).DeepCopyInto(*out) } + if in.RemediationStrategy != nil { + in, out := &in.RemediationStrategy, &out.RemediationStrategy + *out = new(RemediationStrategy) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeadmControlPlaneTemplateResourceSpec. @@ -342,6 +357,48 @@ func (in *KubeadmControlPlaneTemplateSpec) DeepCopy() *KubeadmControlPlaneTempla return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LastRemediationStatus) DeepCopyInto(out *LastRemediationStatus) { + *out = *in + in.Timestamp.DeepCopyInto(&out.Timestamp) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LastRemediationStatus. +func (in *LastRemediationStatus) DeepCopy() *LastRemediationStatus { + if in == nil { + return nil + } + out := new(LastRemediationStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemediationStrategy) DeepCopyInto(out *RemediationStrategy) { + *out = *in + if in.MaxRetry != nil { + in, out := &in.MaxRetry, &out.MaxRetry + *out = new(int32) + **out = **in + } + out.RetryPeriod = in.RetryPeriod + if in.MinHealthyPeriod != nil { + in, out := &in.MinHealthyPeriod, &out.MinHealthyPeriod + *out = new(v1.Duration) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemediationStrategy. +func (in *RemediationStrategy) DeepCopy() *RemediationStrategy { + if in == nil { + return nil + } + out := new(RemediationStrategy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RollingUpdate) DeepCopyInto(out *RollingUpdate) { *out = *in diff --git a/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml b/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml index 2a8d52fb2c6e..52b83f8d68d7 100644 --- a/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml +++ b/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml @@ -3620,6 +3620,51 @@ spec: required: - infrastructureRef type: object + remediationStrategy: + description: The RemediationStrategy that controls how control plane + machine remediation happens. + properties: + maxRetry: + description: "MaxRetry is the Max number of retries while attempting + to remediate an unhealthy machine. A retry happens when a machine + that was created as a replacement for an unhealthy machine also + fails. For example, given a control plane with three machines + M1, M2, M3: \n M1 become unhealthy; remediation happens, and + M1-1 is created as a replacement. If M1-1 (replacement of M1) + has problems while bootstrapping it will become unhealthy, and + then be remediated; such operation is considered a retry, remediation-retry + #1. If M1-2 (replacement of M1-2) becomes unhealthy, remediation-retry + #2 will happen, etc. \n A retry could happen only after RetryPeriod + from the previous retry. If a machine is marked as unhealthy + after MinHealthyPeriod from the previous remediation expired, + this is not considered a retry anymore because the new issue + is assumed unrelated from the previous one. \n If not set, the + remedation will be retried infinitely." + format: int32 + type: integer + minHealthyPeriod: + description: "MinHealthyPeriod defines the duration after which + KCP will consider any failure to a machine unrelated from the + previous one. In this case the remediation is not considered + a retry anymore, and thus the retry counter restarts from 0. + For example, assuming MinHealthyPeriod is set to 1h (default) + \n M1 become unhealthy; remediation happens, and M1-1 is created + as a replacement. If M1-1 (replacement of M1) has problems within + the 1hr after the creation, also this machine will be remediated + and this operation is considered a retry - a problem related + to the original issue happened to M1 -. \n If instead the problem + on M1-1 is happening after MinHealthyPeriod expired, e.g. four + days after m1-1 has been created as a remediation of M1, the + problem on M1-1 is considered unrelated to the original issue + happened to M1. \n If not set, this value is defaulted to 1h." + type: string + retryPeriod: + description: "RetryPeriod is the duration that KCP should wait + before remediating a machine being created as a replacement + for an unhealthy machine (a retry). \n If not set, a retry will + happen immediately." + type: string + type: object replicas: description: Number of desired machines. Defaults to 1. When stacked etcd is used only odd numbers are permitted, as per [etcd best practice](https://etcd.io/docs/v3.3.12/faq/#why-an-odd-number-of-cluster-members). @@ -3746,6 +3791,30 @@ spec: description: Initialized denotes whether or not the control plane has the uploaded kubeadm-config configmap. type: boolean + lastRemediation: + description: LastRemediation stores info about last remediation performed. + properties: + machine: + description: Machine is the machine name of the latest machine + being remediated. + type: string + retryCount: + description: RetryCount used to keep track of remediation retry + for the last remediated machine. A retry happens when a machine + that was created as a replacement for an unhealthy machine also + fails. + format: int32 + type: integer + timestamp: + description: Timestamp is when last remediation happened. It is + represented in RFC3339 form and is in UTC. + format: date-time + type: string + required: + - machine + - retryCount + - timestamp + type: object observedGeneration: description: ObservedGeneration is the latest generation observed by the controller. diff --git a/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanetemplates.yaml b/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanetemplates.yaml index 365bf50644cb..bca4dfebb4e7 100644 --- a/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanetemplates.yaml +++ b/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanetemplates.yaml @@ -2382,6 +2382,55 @@ spec: time limitations. type: string type: object + remediationStrategy: + description: The RemediationStrategy that controls how control + plane machine remediation happens. + properties: + maxRetry: + description: "MaxRetry is the Max number of retries while + attempting to remediate an unhealthy machine. A retry + happens when a machine that was created as a replacement + for an unhealthy machine also fails. For example, given + a control plane with three machines M1, M2, M3: \n M1 + become unhealthy; remediation happens, and M1-1 is created + as a replacement. If M1-1 (replacement of M1) has problems + while bootstrapping it will become unhealthy, and then + be remediated; such operation is considered a retry, + remediation-retry #1. If M1-2 (replacement of M1-2) + becomes unhealthy, remediation-retry #2 will happen, + etc. \n A retry could happen only after RetryPeriod + from the previous retry. If a machine is marked as unhealthy + after MinHealthyPeriod from the previous remediation + expired, this is not considered a retry anymore because + the new issue is assumed unrelated from the previous + one. \n If not set, the remedation will be retried infinitely." + format: int32 + type: integer + minHealthyPeriod: + description: "MinHealthyPeriod defines the duration after + which KCP will consider any failure to a machine unrelated + from the previous one. In this case the remediation + is not considered a retry anymore, and thus the retry + counter restarts from 0. For example, assuming MinHealthyPeriod + is set to 1h (default) \n M1 become unhealthy; remediation + happens, and M1-1 is created as a replacement. If M1-1 + (replacement of M1) has problems within the 1hr after + the creation, also this machine will be remediated and + this operation is considered a retry - a problem related + to the original issue happened to M1 -. \n If instead + the problem on M1-1 is happening after MinHealthyPeriod + expired, e.g. four days after m1-1 has been created + as a remediation of M1, the problem on M1-1 is considered + unrelated to the original issue happened to M1. \n If + not set, this value is defaulted to 1h." + type: string + retryPeriod: + description: "RetryPeriod is the duration that KCP should + wait before remediating a machine being created as a + replacement for an unhealthy machine (a retry). \n If + not set, a retry will happen immediately." + type: string + type: object rolloutAfter: description: RolloutAfter is a field to indicate a rollout should be performed after the specified time even if no diff --git a/controlplane/kubeadm/internal/controllers/helpers.go b/controlplane/kubeadm/internal/controllers/helpers.go index 9bd59c7b8dac..bae5e7918b2d 100644 --- a/controlplane/kubeadm/internal/controllers/helpers.go +++ b/controlplane/kubeadm/internal/controllers/helpers.go @@ -314,6 +314,13 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp }, } + // In case this machine is being created as a consequence of a remediation, then add an annotation + // tracking remediating data. + // NOTE: This is required in order to track remediation retries. + if remediationData, ok := kcp.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok { + machine.Annotations[controlplanev1.RemediationForAnnotation] = remediationData + } + // Machine's bootstrap config may be missing ClusterConfiguration if it is not the first machine in the control plane. // We store ClusterConfiguration as annotation here to detect any changes in KCP ClusterConfiguration and rollout the machine if any. clusterConfig, err := json.Marshal(kcp.Spec.KubeadmConfigSpec.ClusterConfiguration) @@ -331,5 +338,10 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp if err := r.Client.Create(ctx, machine); err != nil { return errors.Wrap(err, "failed to create machine") } + + // Remove the annotation tracking that a remediation is in progress (the remediation completed when + // the replacement machine has been created above). + delete(kcp.Annotations, controlplanev1.RemediationInProgressAnnotation) + return nil } diff --git a/controlplane/kubeadm/internal/controllers/helpers_test.go b/controlplane/kubeadm/internal/controllers/helpers_test.go index 53e699925434..39a76fdb6799 100644 --- a/controlplane/kubeadm/internal/controllers/helpers_test.go +++ b/controlplane/kubeadm/internal/controllers/helpers_test.go @@ -512,6 +512,9 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "testControlPlane", Namespace: cluster.Namespace, + Annotations: map[string]string{ + controlplanev1.RemediationInProgressAnnotation: "foo", + }, }, Spec: controlplanev1.KubeadmControlPlaneSpec{ Version: "v1.16.6", @@ -562,6 +565,8 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) { g.Expect(machine.Namespace).To(Equal(kcp.Namespace)) g.Expect(machine.OwnerReferences).To(HaveLen(1)) g.Expect(machine.OwnerReferences).To(ContainElement(*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")))) + g.Expect(machine.Annotations).To(HaveKeyWithValue(controlplanev1.RemediationForAnnotation, "foo")) + g.Expect(kcp.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) g.Expect(machine.Spec).To(Equal(expectedMachineSpec)) // Verify that the machineTemplate.ObjectMeta has been propagated to the Machine. diff --git a/controlplane/kubeadm/internal/controllers/remediation.go b/controlplane/kubeadm/internal/controllers/remediation.go index 429f8a2e954a..72a0eb8236aa 100644 --- a/controlplane/kubeadm/internal/controllers/remediation.go +++ b/controlplane/kubeadm/internal/controllers/remediation.go @@ -18,10 +18,14 @@ package controllers import ( "context" + "encoding/json" "fmt" + "time" "github.com/blang/semver" + "github.com/go-logr/logr" "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" @@ -31,6 +35,7 @@ import ( controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" ) @@ -39,6 +44,7 @@ import ( // based on the process described in https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20191017-kubeadm-based-control-plane.md#remediation-using-delete-and-recreate func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.Context, controlPlane *internal.ControlPlane) (ret ctrl.Result, retErr error) { log := ctrl.LoggerFrom(ctx) + reconciliationTime := time.Now().UTC() // Cleanup pending remediation actions not completed for any reasons (e.g. number of current replicas is less or equal to 1) // if the underlying machine is now back to healthy / not deleting. @@ -88,6 +94,16 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C return ctrl.Result{}, nil } + log = log.WithValues("Machine", klog.KObj(machineToBeRemediated), "initialized", controlPlane.KCP.Status.Initialized) + + // Returns if another remediation is in progress but the new Machine is not yet created. + // Note: This condition is checked after we check for unhealthy Machines and if machineToBeRemediated + // is being deleted to avoid unnecessary logs if no further remediation should be done. + if _, ok := controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok { + log.Info("Another remediation is already in progress. Skipping remediation.") + return ctrl.Result{}, nil + } + patchHelper, err := patch.NewHelper(machineToBeRemediated, r.Client) if err != nil { return ctrl.Result{}, err @@ -107,93 +123,220 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C // Before starting remediation, run preflight checks in order to verify it is safe to remediate. // If any of the following checks fails, we'll surface the reason in the MachineOwnerRemediated condition. - log.WithValues("Machine", klog.KObj(machineToBeRemediated)) - desiredReplicas := int(*controlPlane.KCP.Spec.Replicas) - // The cluster MUST have more than one replica, because this is the smallest cluster size that allows any etcd failure tolerance. - if controlPlane.Machines.Len() <= 1 { - log.Info("A control plane machine needs remediation, but the number of current replicas is less or equal to 1. Skipping remediation", "Replicas", controlPlane.Machines.Len()) - conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate if current replicas are less or equal to 1") - return ctrl.Result{}, nil + // Check if KCP is allowed to remediate considering retry limits: + // - Remediation cannot happen because retryPeriod is not yet expired. + // - KCP already reached MaxRetries limit. + remediationInProgressData, canRemediate, err := r.checkRetryLimits(log, machineToBeRemediated, controlPlane, reconciliationTime) + if err != nil { + return ctrl.Result{}, err } - - // The number of replicas MUST be equal to or greater than the desired replicas. This rule ensures that when the cluster - // is missing replicas, we skip remediation and instead perform regular scale up/rollout operations first. - if controlPlane.Machines.Len() < desiredReplicas { - log.Info("A control plane machine needs remediation, but the current number of replicas is lower that expected. Skipping remediation", "Replicas", desiredReplicas, "CurrentReplicas", controlPlane.Machines.Len()) - conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP waiting for having at least %d control plane machines before triggering remediation", desiredReplicas) + if !canRemediate { + // NOTE: log lines and conditions surfacing why it is not possible to remediate are set by checkRetryLimits. return ctrl.Result{}, nil } - // The cluster MUST have no machines with a deletion timestamp. This rule prevents KCP taking actions while the cluster is in a transitional state. - if controlPlane.HasDeletingMachine() { - log.Info("A control plane machine needs remediation, but there are other control-plane machines being deleted. Skipping remediation") - conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP waiting for control plane machine deletion to complete before triggering remediation") - return ctrl.Result{}, nil - } + if controlPlane.KCP.Status.Initialized { + // Executes checks that apply only if the control plane is already initialized; in this case KCP can + // remediate only if it can safely assume that the operation preserves the operation state of the + // existing cluster (or at least it doesn't make it worse). - // Remediation MUST preserve etcd quorum. This rule ensures that we will not remove a member that would result in etcd - // losing a majority of members and thus become unable to field new requests. - if controlPlane.IsEtcdManaged() { - canSafelyRemediate, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, machineToBeRemediated) - if err != nil { - conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return ctrl.Result{}, err - } - if !canSafelyRemediate { - log.Info("A control plane machine needs remediation, but removing this machine could result in etcd quorum loss. Skipping remediation") - conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because this could result in etcd loosing quorum") + // The cluster MUST have more than one replica, because this is the smallest cluster size that allows any etcd failure tolerance. + if controlPlane.Machines.Len() <= 1 { + log.Info("A control plane machine needs remediation, but the number of current replicas is less or equal to 1. Skipping remediation", "Replicas", controlPlane.Machines.Len()) + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate if current replicas are less or equal to 1") return ctrl.Result{}, nil } - } - - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster)) - if err != nil { - log.Error(err, "Failed to create client to workload cluster") - return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster") - } - // If the machine that is about to be deleted is the etcd leader, move it to the newest member available. - if controlPlane.IsEtcdManaged() { - etcdLeaderCandidate := controlPlane.HealthyMachines().Newest() - if etcdLeaderCandidate == nil { - log.Info("A control plane machine needs remediation, but there is no healthy machine to forward etcd leadership to") - conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityWarning, - "A control plane machine needs remediation, but there is no healthy machine to forward etcd leadership to. Skipping remediation") + // The cluster MUST have no machines with a deletion timestamp. This rule prevents KCP taking actions while the cluster is in a transitional state. + if controlPlane.HasDeletingMachine() { + log.Info("A control plane machine needs remediation, but there are other control-plane machines being deleted. Skipping remediation") + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP waiting for control plane machine deletion to complete before triggering remediation") return ctrl.Result{}, nil } - if err := workloadCluster.ForwardEtcdLeadership(ctx, machineToBeRemediated, etcdLeaderCandidate); err != nil { - log.Error(err, "Failed to move etcd leadership to candidate machine", "candidate", klog.KObj(etcdLeaderCandidate)) - conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return ctrl.Result{}, err + + // Remediation MUST preserve etcd quorum. This rule ensures that KCP will not remove a member that would result in etcd + // losing a majority of members and thus become unable to field new requests. + if controlPlane.IsEtcdManaged() { + canSafelyRemediate, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, machineToBeRemediated) + if err != nil { + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + if !canSafelyRemediate { + log.Info("A control plane machine needs remediation, but removing this machine could result in etcd quorum loss. Skipping remediation") + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because this could result in etcd loosing quorum") + return ctrl.Result{}, nil + } } - if err := workloadCluster.RemoveEtcdMemberForMachine(ctx, machineToBeRemediated); err != nil { - log.Error(err, "Failed to remove etcd member for machine") - conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return ctrl.Result{}, err + + // Start remediating the unhealthy control plane machine by deleting it. + // A new machine will come up completing the operation as part of the regular reconcile. + + // If the control plane is initialized, before deleting the machine: + // - if the machine hosts the etcd leader, forward etcd leadership to another machine. + // - delete the etcd member hosted on the machine being deleted. + // - remove the etcd member from the kubeadm config map (only for kubernetes version older than v1.22.0) + workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster)) + if err != nil { + log.Error(err, "Failed to create client to workload cluster") + return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster") } - } - parsedVersion, err := semver.ParseTolerant(controlPlane.KCP.Spec.Version) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", controlPlane.KCP.Spec.Version) - } + // If the machine that is about to be deleted is the etcd leader, move it to the newest member available. + if controlPlane.IsEtcdManaged() { + etcdLeaderCandidate := controlPlane.HealthyMachines().Newest() + if etcdLeaderCandidate == nil { + log.Info("A control plane machine needs remediation, but there is no healthy machine to forward etcd leadership to") + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityWarning, + "A control plane machine needs remediation, but there is no healthy machine to forward etcd leadership to. Skipping remediation") + return ctrl.Result{}, nil + } + if err := workloadCluster.ForwardEtcdLeadership(ctx, machineToBeRemediated, etcdLeaderCandidate); err != nil { + log.Error(err, "Failed to move etcd leadership to candidate machine", "candidate", klog.KObj(etcdLeaderCandidate)) + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + if err := workloadCluster.RemoveEtcdMemberForMachine(ctx, machineToBeRemediated); err != nil { + log.Error(err, "Failed to remove etcd member for machine") + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + } - if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToBeRemediated, parsedVersion); err != nil { - log.Error(err, "Failed to remove machine from kubeadm ConfigMap") - return ctrl.Result{}, err + parsedVersion, err := semver.ParseTolerant(controlPlane.KCP.Spec.Version) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", controlPlane.KCP.Spec.Version) + } + + if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToBeRemediated, parsedVersion); err != nil { + log.Error(err, "Failed to remove machine from kubeadm ConfigMap") + return ctrl.Result{}, err + } } + // Delete the machine if err := r.Client.Delete(ctx, machineToBeRemediated); err != nil { conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityError, err.Error()) return ctrl.Result{}, errors.Wrapf(err, "failed to delete unhealthy machine %s", machineToBeRemediated.Name) } + // Surface the operation is in progress. log.Info("Remediating unhealthy machine") conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + // Prepare the info for tracking the remediation progress into the RemediationInProgressAnnotation. + remediationInProgressValue, err := remediationInProgressData.Marshal() + if err != nil { + return ctrl.Result{}, err + } + + // Set annotations tracking remediation details so they can be picked up by the machine + // that will be created as part of the scale up action that completes the remediation. + annotations.AddAnnotations(controlPlane.KCP, map[string]string{ + controlplanev1.RemediationInProgressAnnotation: remediationInProgressValue, + }) + return ctrl.Result{Requeue: true}, nil } +// checkRetryLimits checks if KCP is allowed to remediate considering retry limits: +// - Remediation cannot happen because retryPeriod is not yet expired. +// - KCP already reached the maximum number of retries for a machine. +// NOTE: Counting the number of retries is required In order to prevent infinite remediation e.g. in case the +// first Control Plane machine is failing due to quota issue. +func (r *KubeadmControlPlaneReconciler) checkRetryLimits(log logr.Logger, machineToBeRemediated *clusterv1.Machine, controlPlane *internal.ControlPlane, reconciliationTime time.Time) (*RemediationData, bool, error) { + // Get last remediation info from the machine. + var lastRemediationData *RemediationData + if value, ok := machineToBeRemediated.Annotations[controlplanev1.RemediationForAnnotation]; ok { + l, err := RemediationDataFromAnnotation(value) + if err != nil { + return nil, false, err + } + lastRemediationData = l + } + + remediationInProgressData := &RemediationData{ + Machine: machineToBeRemediated.Name, + Timestamp: metav1.Time{Time: reconciliationTime}, + RetryCount: 0, + } + + // If there is no last remediation, this is the first try of a new retry sequence. + if lastRemediationData == nil { + return remediationInProgressData, true, nil + } + + // Gets MinHealthyPeriod and RetryPeriod from the remediation strategy, or use defaults. + minHealthyPeriod := controlplanev1.DefaultMinHealthyPeriod + if controlPlane.KCP.Spec.RemediationStrategy != nil && controlPlane.KCP.Spec.RemediationStrategy.MinHealthyPeriod != nil { + minHealthyPeriod = controlPlane.KCP.Spec.RemediationStrategy.MinHealthyPeriod.Duration + } + retryPeriod := time.Duration(0) + if controlPlane.KCP.Spec.RemediationStrategy != nil { + retryPeriod = controlPlane.KCP.Spec.RemediationStrategy.RetryPeriod.Duration + } + + // Gets the timestamp of the last remediation; if missing, default to a value + // that ensures both MinHealthyPeriod and RetryPeriod are expired. + // NOTE: this could potentially lead to executing more retries than expected or to executing retries before than + // expected, but this is considered acceptable when the system recovers from someone/something changes or deletes + // the RemediationForAnnotation on Machines. + lastRemediationTime := reconciliationTime.Add(-2 * max(minHealthyPeriod, retryPeriod)) + if !lastRemediationData.Timestamp.IsZero() { + lastRemediationTime = lastRemediationData.Timestamp.Time + } + + // Once we get here we already know that there was a last remediation for the Machine. + // If the current remediation is happening before minHealthyPeriod is expired, then KCP considers this + // as a remediation for the same previously unhealthy machine. + // NOTE: If someone/something changes the RemediationForAnnotation on Machines (e.g. changes the Timestamp), + // this could potentially lead to executing more retries than expected, but this is considered acceptable in such a case. + var retryForSameMachineInProgress bool + if lastRemediationTime.Add(minHealthyPeriod).After(reconciliationTime) { + retryForSameMachineInProgress = true + log = log.WithValues("RemediationRetryFor", klog.KRef(machineToBeRemediated.Namespace, lastRemediationData.Machine)) + } + + // If the retry for the same machine is not in progress, this is the first try of a new retry sequence. + if !retryForSameMachineInProgress { + return remediationInProgressData, true, nil + } + + // If the remediation is for the same machine, carry over the retry count. + remediationInProgressData.RetryCount = lastRemediationData.RetryCount + + // Check if remediation can happen because retryPeriod is passed. + if lastRemediationTime.Add(retryPeriod).After(reconciliationTime) { + log.Info(fmt.Sprintf("A control plane machine needs remediation, but the operation already failed in the latest %s. Skipping remediation", retryPeriod)) + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because the operation already failed in the latest %s (RetryPeriod)", retryPeriod) + return remediationInProgressData, false, nil + } + + // Check if remediation can happen because of maxRetry is not reached yet, if defined. + if controlPlane.KCP.Spec.RemediationStrategy != nil && controlPlane.KCP.Spec.RemediationStrategy.MaxRetry != nil { + maxRetry := int(*controlPlane.KCP.Spec.RemediationStrategy.MaxRetry) + if remediationInProgressData.RetryCount >= maxRetry { + log.Info(fmt.Sprintf("A control plane machine needs remediation, but the operation already failed %d times (MaxRetry %d). Skipping remediation", remediationInProgressData.RetryCount, maxRetry)) + conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because the operation already failed %d times (MaxRetry)", maxRetry) + return remediationInProgressData, false, nil + } + } + + // All the check passed, increase the remediation retry count. + remediationInProgressData.RetryCount++ + + return remediationInProgressData, true, nil +} + +// max calculates the maximum duration. +func max(x, y time.Duration) time.Duration { + if x < y { + return y + } + return x +} + // canSafelyRemoveEtcdMember assess if it is possible to remove the member hosted on the machine to be remediated // without loosing etcd quorum. // @@ -208,7 +351,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C // - etc. // // NOTE: this func assumes the list of members in sync with the list of machines/nodes, it is required to call reconcileEtcdMembers -// ans well as reconcileControlPlaneConditions before this. +// as well as reconcileControlPlaneConditions before this. func (r *KubeadmControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Context, controlPlane *internal.ControlPlane, machineToBeRemediated *clusterv1.Machine) (bool, error) { log := ctrl.LoggerFrom(ctx) @@ -258,10 +401,10 @@ func (r *KubeadmControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Co } } - // If an etcd member does not have a corresponding machine, it is not possible to retrieve etcd member health - // so we are assuming the worst scenario and considering the member unhealthy. + // If an etcd member does not have a corresponding machine it is not possible to retrieve etcd member health, + // so KCP is assuming the worst scenario and considering the member unhealthy. // - // NOTE: This should not happen given that we are running reconcileEtcdMembers before calling this method. + // NOTE: This should not happen given that KCP is running reconcileEtcdMembers before calling this method. if machine == nil { log.Info("An etcd member does not have a corresponding machine, assuming this member is unhealthy", "MemberName", etcdMember) targetUnhealthyMembers++ @@ -293,3 +436,44 @@ func (r *KubeadmControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Co return canSafelyRemediate, nil } + +// RemediationData struct is used to keep track of information stored in the RemediationInProgressAnnotation in KCP +// during remediation and then into the RemediationForAnnotation on the replacement machine once it is created. +type RemediationData struct { + // Machine is the machine name of the latest machine being remediated. + Machine string `json:"machine"` + + // Timestamp is when last remediation happened. It is represented in RFC3339 form and is in UTC. + Timestamp metav1.Time `json:"timestamp"` + + // RetryCount used to keep track of remediation retry for the last remediated machine. + // A retry happens when a machine that was created as a replacement for an unhealthy machine also fails. + RetryCount int `json:"retryCount"` +} + +// RemediationDataFromAnnotation gets RemediationData from an annotation value. +func RemediationDataFromAnnotation(value string) (*RemediationData, error) { + ret := &RemediationData{} + if err := json.Unmarshal([]byte(value), ret); err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal value %s for %s annotation", value, clusterv1.RemediationInProgressReason) + } + return ret, nil +} + +// Marshal an RemediationData into an annotation value. +func (r *RemediationData) Marshal() (string, error) { + b, err := json.Marshal(r) + if err != nil { + return "", errors.Wrapf(err, "failed to marshal value for %s annotation", clusterv1.RemediationInProgressReason) + } + return string(b), nil +} + +// ToStatus converts a RemediationData into a LastRemediationStatus struct. +func (r *RemediationData) ToStatus() *controlplanev1.LastRemediationStatus { + return &controlplanev1.LastRemediationStatus{ + Machine: r.Machine, + Timestamp: r.Timestamp, + RetryCount: int32(r.RetryCount), + } +} diff --git a/controlplane/kubeadm/internal/controllers/remediation_test.go b/controlplane/kubeadm/internal/controllers/remediation_test.go index e6c13e1691b3..26183baa1cc7 100644 --- a/controlplane/kubeadm/internal/controllers/remediation_test.go +++ b/controlplane/kubeadm/internal/controllers/remediation_test.go @@ -42,7 +42,7 @@ import ( func TestReconcileUnhealthyMachines(t *testing.T) { g := NewWithT(t) - ctx := context.TODO() + r := &KubeadmControlPlaneReconciler{ Client: env.GetClient(), recorder: record.NewFakeRecorder(32), @@ -53,7 +53,14 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) }() - t.Run("Remediation cleans up stuck remediation on previously unhealthy machines", func(t *testing.T) { + var removeFinalizer = func(g *WithT, m *clusterv1.Machine) { + patchHelper, err := patch.NewHelper(m, env.GetClient()) + g.Expect(err).ToNot(HaveOccurred()) + m.ObjectMeta.Finalizers = nil + g.Expect(patchHelper.Patch(ctx, m)) + } + + t.Run("It cleans up stuck remediation on previously unhealthy machines", func(t *testing.T) { g := NewWithT(t) m := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withStuckRemediation()) @@ -63,7 +70,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { Cluster: &clusterv1.Cluster{}, Machines: collections.FromMachines(m), } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped g.Expect(err).ToNot(HaveOccurred()) @@ -79,6 +86,10 @@ func TestReconcileUnhealthyMachines(t *testing.T) { return errors.Errorf("condition %s still exists", clusterv1.MachineOwnerRemediatedCondition) }, 10*time.Second).Should(Succeed()) }) + + // Generic preflight checks + // Those are ore flight checks that happen no matter if the control plane has been already initialized or not. + t.Run("Remediation does not happen if there are no unhealthy machines", func(t *testing.T) { g := NewWithT(t) @@ -87,12 +98,38 @@ func TestReconcileUnhealthyMachines(t *testing.T) { Cluster: &clusterv1.Cluster{}, Machines: collections.New(), } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped g.Expect(err).ToNot(HaveOccurred()) }) - t.Run("reconcileUnhealthyMachines return early if the machine to be remediated is marked for deletion", func(t *testing.T) { + t.Run("reconcileUnhealthyMachines return early if another remediation is in progress", func(t *testing.T) { + g := NewWithT(t) + + m := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withStuckRemediation()) + conditions.MarkFalse(m, clusterv1.MachineHealthCheckSucceededCondition, clusterv1.MachineHasFailureReason, clusterv1.ConditionSeverityWarning, "") + conditions.MarkFalse(m, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "") + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + controlplanev1.RemediationInProgressAnnotation: MustMarshalRemediationData(&RemediationData{ + Machine: "foo", + Timestamp: metav1.Time{Time: time.Now().UTC()}, + RetryCount: 0, + }), + }, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m), + } + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + g.Expect(err).ToNot(HaveOccurred()) + }) + t.Run("reconcileUnhealthyMachines return early if the machine to be remediated is already being deleted", func(t *testing.T) { g := NewWithT(t) m := getDeletingMachine(ns.Name, "m1-unhealthy-deleting-", withMachineHealthCheckFailed()) @@ -103,92 +140,644 @@ func TestReconcileUnhealthyMachines(t *testing.T) { Cluster: &clusterv1.Cluster{}, Machines: collections.FromMachines(m), } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + + g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + g.Expect(err).ToNot(HaveOccurred()) + }) + t.Run("Remediation does not happen if MaxRetry is reached", func(t *testing.T) { + g := NewWithT(t) + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(&RemediationData{ + Machine: "m0", + Timestamp: metav1.Time{Time: time.Now().Add(-controlplanev1.DefaultMinHealthyPeriod / 2).UTC()}, // minHealthy not expired yet. + RetryCount: 3, + }))) + m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + Version: "v1.19.1", + RemediationStrategy: &controlplanev1.RemediationStrategy{ + MaxRetry: utilpointer.Int32(3), + }, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1, m2, m3), + } + + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because the operation already failed 3 times (MaxRetry)") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeTrue()) + + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed()) }) + t.Run("Retry history is ignored if min healthy period is expired, default min healthy period", func(t *testing.T) { + g := NewWithT(t) + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(&RemediationData{ + Machine: "m0", + Timestamp: metav1.Time{Time: time.Now().Add(-2 * controlplanev1.DefaultMinHealthyPeriod).UTC()}, // minHealthyPeriod already expired. + RetryCount: 3, + }))) + m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + Version: "v1.19.1", + RemediationStrategy: &controlplanev1.RemediationStrategy{ + MaxRetry: utilpointer.Int32(3), + }, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1, m2, m3), + } + + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed()) + }) + t.Run("Retry history is ignored if min healthy period is expired", func(t *testing.T) { + g := NewWithT(t) + + minHealthyPeriod := 4 * controlplanev1.DefaultMinHealthyPeriod // big min healthy period, so we are user that we are not using DefaultMinHealthyPeriod. + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(&RemediationData{ + Machine: "m0", + Timestamp: metav1.Time{Time: time.Now().Add(-2 * minHealthyPeriod).UTC()}, // minHealthyPeriod already expired. + RetryCount: 3, + }))) + m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + Version: "v1.19.1", + RemediationStrategy: &controlplanev1.RemediationStrategy{ + MaxRetry: utilpointer.Int32(3), + MinHealthyPeriod: &metav1.Duration{Duration: minHealthyPeriod}, + }, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1, m2, m3), + } + + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed()) + }) + t.Run("Remediation does not happen if RetryPeriod is not yet passed", func(t *testing.T) { + g := NewWithT(t) + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(&RemediationData{ + Machine: "m0", + Timestamp: metav1.Time{Time: time.Now().Add(-controlplanev1.DefaultMinHealthyPeriod / 2).UTC()}, // minHealthyPeriod not yet expired. + RetryCount: 2, + }))) + m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + Version: "v1.19.1", + RemediationStrategy: &controlplanev1.RemediationStrategy{ + MaxRetry: utilpointer.Int32(3), + RetryPeriod: metav1.Duration{Duration: controlplanev1.DefaultMinHealthyPeriod}, // RetryPeriod not yet expired. + }, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1, m2, m3), + } + + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because the operation already failed in the latest 1h0m0s (RetryPeriod)") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeTrue()) + + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed()) + }) + + // There are no preflight checks for when control plane is not yet initialized + // (it is the first CP, we can nuke it). + + // Preflight checks for when control plane is already initialized. + t.Run("Remediation does not happen if desired replicas <= 1", func(t *testing.T) { g := NewWithT(t) m := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(1), - RolloutStrategy: &controlplanev1.RolloutStrategy{ - RollingUpdate: &controlplanev1.RollingUpdate{ - MaxSurge: &intstr.IntOrString{ - IntVal: 1, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(1), + RolloutStrategy: &controlplanev1.RolloutStrategy{ + RollingUpdate: &controlplanev1.RollingUpdate{ + MaxSurge: &intstr.IntOrString{ + IntVal: 1, + }, }, }, }, - }}, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, Cluster: &clusterv1.Cluster{}, Machines: collections.FromMachines(m), } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + assertMachineCondition(ctx, g, m, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate if current replicas are less or equal to 1") g.Expect(env.Cleanup(ctx, m)).To(Succeed()) }) - t.Run("Remediation does not happen if number of machines lower than desired", func(t *testing.T) { + t.Run("Remediation does not happen if there is another machine being deleted (not the one to be remediated)", func(t *testing.T) { g := NewWithT(t) m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) m2 := createMachine(ctx, g, ns.Name, "m2-healthy-") + m3 := getDeletingMachine(ns.Name, "m3-deleting") // NB. This machine is not created, it gets only added to control plane controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(3), - RolloutStrategy: &controlplanev1.RolloutStrategy{}, - }}, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1, m2, m3), + } + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP waiting for control plane machine deletion to complete before triggering remediation") + + g.Expect(env.Cleanup(ctx, m1, m2)).To(Succeed()) + }) + t.Run("Remediation does not happen if there is at least one additional unhealthy etcd member on a 3 machine CP", func(t *testing.T) { + g := NewWithT(t) + + m1 := createMachine(ctx, g, ns.Name, "m1-mhc-unhealthy-", withMachineHealthCheckFailed()) + m2 := createMachine(ctx, g, ns.Name, "m2-etcd-unhealthy-", withUnhealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-etcd-healthy-", withHealthyEtcdMember()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1, m2, m3), + } + + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because this could result in etcd loosing quorum") + + g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed()) + }) + t.Run("Remediation does not happen if there are at least two additional unhealthy etcd member on a 5 machine CP", func(t *testing.T) { + g := NewWithT(t) + + m1 := createMachine(ctx, g, ns.Name, "m1-mhc-unhealthy-", withMachineHealthCheckFailed()) + m2 := createMachine(ctx, g, ns.Name, "m2-etcd-unhealthy-", withUnhealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-etcd-unhealthy-", withUnhealthyEtcdMember()) + m4 := createMachine(ctx, g, ns.Name, "m4-etcd-healthy-", withHealthyEtcdMember()) + m5 := createMachine(ctx, g, ns.Name, "m5-etcd-healthy-", withHealthyEtcdMember()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(5), + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1, m2, m3, m4, m5), + } + + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because this could result in etcd loosing quorum") + + g.Expect(env.Cleanup(ctx, m1, m2, m3, m4, m5)).To(Succeed()) + }) + + // Remediation for when control plane is not yet initialized + + t.Run("Remediation deletes unhealthy machine - 1 CP not initialized", func(t *testing.T) { + g := NewWithT(t) + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(1), + Version: "v1.19.1", + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: false, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1), + } + + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1)).To(Succeed()) + }) + t.Run("Subsequent remediation of the same machine increase retry count - 1 CP not initialized", func(t *testing.T) { + g := NewWithT(t) + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(1), + Version: "v1.19.1", + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: false, + }, + }, + Cluster: &clusterv1.Cluster{}, + Machines: collections.FromMachines(m1), + } + + // First reconcile, remediate machine m1 for the first time + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m1) + g.Expect(env.CleanupAndWait(ctx, m1)).To(Succeed()) + + for i := 2; i < 4; i++ { + // Simulate the creation of a replacement for 0. + mi := createMachine(ctx, g, ns.Name, fmt.Sprintf("m%d-unhealthy-", i), withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(remediationData))) + + // Simulate KCP dropping RemediationInProgressAnnotation after creating the replacement machine. + delete(controlPlane.KCP.Annotations, controlplanev1.RemediationInProgressAnnotation) + + controlPlane.Machines = collections.FromMachines(mi) + + // Reconcile unhealthy replacements for m1. + r.managementCluster = &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(collections.FromMachines(mi)), + }, + } + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err = RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(mi.Name)) + g.Expect(remediationData.RetryCount).To(Equal(i - 1)) + + assertMachineCondition(ctx, g, mi, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: mi.Namespace, Name: mi.Name}, mi) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mi.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, mi) + g.Expect(env.CleanupAndWait(ctx, mi)).To(Succeed()) + } + }) + + // Remediation for when control plane is already initialized + + t.Run("Remediation deletes unhealthy machine - 2 CP (during 1 CP rolling upgrade)", func(t *testing.T) { + g := NewWithT(t) + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) + m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) + + controlPlane := &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(2), + Version: "v1.19.1", + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, Cluster: &clusterv1.Cluster{}, Machines: collections.FromMachines(m1, m2), } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) - g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) g.Expect(err).ToNot(HaveOccurred()) - assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP waiting for having at least 3 control plane machines before triggering remediation") + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m1) g.Expect(env.Cleanup(ctx, m1, m2)).To(Succeed()) }) - t.Run("Remediation does not happen if there is a deleting machine", func(t *testing.T) { + t.Run("Remediation deletes unhealthy machine - 3 CP", func(t *testing.T) { g := NewWithT(t) - m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) - m2 := createMachine(ctx, g, ns.Name, "m2-healthy-") - m3 := getDeletingMachine(ns.Name, "m3-deleting") // NB. This machine is not created, it gets only added to control plane + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) + m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) + controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(3), - }}, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + Version: "v1.19.1", + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, Cluster: &clusterv1.Cluster{}, Machines: collections.FromMachines(m1, m2, m3), } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) - g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + r := &KubeadmControlPlaneReconciler{ + Client: env.GetClient(), + recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue g.Expect(err).ToNot(HaveOccurred()) - assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP waiting for control plane machine deletion to complete before triggering remediation") - g.Expect(env.Cleanup(ctx, m1, m2)).To(Succeed()) + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed()) }) - t.Run("Remediation does not happen if there is at least one additional unhealthy etcd member on a 3 machine CP", func(t *testing.T) { + t.Run("Remediation deletes unhealthy machine - 4 CP (during 3 CP rolling upgrade)", func(t *testing.T) { g := NewWithT(t) - m1 := createMachine(ctx, g, ns.Name, "m1-mhc-unhealthy-", withMachineHealthCheckFailed()) - m2 := createMachine(ctx, g, ns.Name, "m2-etcd-unhealthy-", withUnhealthyEtcdMember()) - m3 := createMachine(ctx, g, ns.Name, "m3-etcd-healthy-", withHealthyEtcdMember()) + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) + m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) + m4 := createMachine(ctx, g, ns.Name, "m4-healthy-", withHealthyEtcdMember()) controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(3), - }}, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(4), + Version: "v1.19.1", + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, Cluster: &clusterv1.Cluster{}, - Machines: collections.FromMachines(m1, m2, m3), + Machines: collections.FromMachines(m1, m2, m3, m4), } r := &KubeadmControlPlaneReconciler{ @@ -201,29 +790,46 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) - g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue g.Expect(err).ToNot(HaveOccurred()) - assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because this could result in etcd loosing quorum") - g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed()) + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1, m2, m3, m4)).To(Succeed()) }) - t.Run("Remediation does not happen if there is at least two additional unhealthy etcd member on a 5 machine CP", func(t *testing.T) { + t.Run("Remediation fails gracefully if no healthy Control Planes are available to become etcd leader", func(t *testing.T) { g := NewWithT(t) - m1 := createMachine(ctx, g, ns.Name, "m1-mhc-unhealthy-", withMachineHealthCheckFailed()) - m2 := createMachine(ctx, g, ns.Name, "m2-etcd-unhealthy-", withUnhealthyEtcdMember()) - m3 := createMachine(ctx, g, ns.Name, "m3-etcd-unhealthy-", withUnhealthyEtcdMember()) - m4 := createMachine(ctx, g, ns.Name, "m4-etcd-healthy-", withHealthyEtcdMember()) - m5 := createMachine(ctx, g, ns.Name, "m5-etcd-healthy-", withHealthyEtcdMember()) + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) + m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withMachineHealthCheckFailed(), withHealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withMachineHealthCheckFailed(), withHealthyEtcdMember()) + m4 := createMachine(ctx, g, ns.Name, "m4-healthy-", withMachineHealthCheckFailed(), withHealthyEtcdMember()) controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(5), - }}, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(4), + Version: "v1.19.1", + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, Cluster: &clusterv1.Cluster{}, - Machines: collections.FromMachines(m1, m2, m3, m4, m5), + Machines: collections.FromMachines(m1, m2, m3, m4), } r := &KubeadmControlPlaneReconciler{ @@ -235,35 +841,39 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, }, } + _, err = r.reconcileUnhealthyMachines(ctx, controlPlane) + g.Expect(err).ToNot(HaveOccurred()) - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) - g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped - g.Expect(err).ToNot(HaveOccurred()) - assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because this could result in etcd loosing quorum") + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityWarning, + "A control plane machine needs remediation, but there is no healthy machine to forward etcd leadership to. Skipping remediation") - g.Expect(env.Cleanup(ctx, m1, m2, m3, m4, m5)).To(Succeed()) + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1, m2, m3, m4)).To(Succeed()) }) - t.Run("Remediation deletes unhealthy machine - 2 CP (during 1 CP rolling upgrade)", func(t *testing.T) { + t.Run("Subsequent remediation of the same machine increase retry count - 3 CP", func(t *testing.T) { g := NewWithT(t) - m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) - patchHelper, err := patch.NewHelper(m1, env.GetClient()) - g.Expect(err).ToNot(HaveOccurred()) - m1.ObjectMeta.Finalizers = []string{"wait-before-delete"} - g.Expect(patchHelper.Patch(ctx, m1)) - + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(2), - Version: "v1.19.1", - }}, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(1), + Version: "v1.19.1", + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: false, + }, + }, Cluster: &clusterv1.Cluster{}, - Machines: collections.FromMachines(m1, m2), + Machines: collections.FromMachines(m1, m2, m3), } + // First reconcile, remediate machine m1 for the first time r := &KubeadmControlPlaneReconciler{ Client: env.GetClient(), recorder: record.NewFakeRecorder(32), @@ -274,43 +884,99 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue g.Expect(err).ToNot(HaveOccurred()) + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) - patchHelper, err = patch.NewHelper(m1, env.GetClient()) - g.Expect(err).ToNot(HaveOccurred()) - m1.ObjectMeta.Finalizers = nil - g.Expect(patchHelper.Patch(ctx, m1)) + removeFinalizer(g, m1) + g.Expect(env.CleanupAndWait(ctx, m1)).To(Succeed()) - g.Expect(env.Cleanup(ctx, m1, m2)).To(Succeed()) + for i := 5; i < 6; i++ { + // Simulate the creation of a replacement for m1. + mi := createMachine(ctx, g, ns.Name, fmt.Sprintf("m%d-unhealthy-", i), withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(remediationData))) + + // Simulate KCP dropping RemediationInProgressAnnotation after creating the replacement machine. + delete(controlPlane.KCP.Annotations, controlplanev1.RemediationInProgressAnnotation) + controlPlane.Machines = collections.FromMachines(mi, m2, m3) + + // Reconcile unhealthy replacements for m1. + r.managementCluster = &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(collections.FromMachines(mi, m2, m3)), + }, + } + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(mi.Name)) + g.Expect(remediationData.RetryCount).To(Equal(i - 4)) + + assertMachineCondition(ctx, g, mi, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: mi.Namespace, Name: mi.Name}, mi) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mi.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, mi) + g.Expect(env.CleanupAndWait(ctx, mi)).To(Succeed()) + } + + g.Expect(env.CleanupAndWait(ctx, m2, m3)).To(Succeed()) }) - t.Run("Remediation deletes unhealthy machine - 3 CP", func(t *testing.T) { +} + +func TestReconcileUnhealthyMachinesSequences(t *testing.T) { + var removeFinalizer = func(g *WithT, m *clusterv1.Machine) { + patchHelper, err := patch.NewHelper(m, env.GetClient()) + g.Expect(err).ToNot(HaveOccurred()) + m.ObjectMeta.Finalizers = nil + g.Expect(patchHelper.Patch(ctx, m)) + } + + t.Run("Remediates the first CP machine having problems to come up", func(t *testing.T) { g := NewWithT(t) - m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) - patchHelper, err := patch.NewHelper(m1, env.GetClient()) + ns, err := env.CreateNamespace(ctx, "ns1") g.Expect(err).ToNot(HaveOccurred()) - m1.ObjectMeta.Finalizers = []string{"wait-before-delete"} - g.Expect(patchHelper.Patch(ctx, m1)) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() - m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) - m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) + // Control plane not initialized yet, First CP is unhealthy and gets remediated: + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(3), - Version: "v1.19.1", - }}, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + Version: "v1.19.1", + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: false, + }, + }, Cluster: &clusterv1.Cluster{}, - Machines: collections.FromMachines(m1, m2, m3), + Machines: collections.FromMachines(m1), } r := &KubeadmControlPlaneReconciler{ @@ -323,44 +989,103 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue g.Expect(err).ToNot(HaveOccurred()) + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m1.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) + assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) - patchHelper, err = patch.NewHelper(m1, env.GetClient()) + removeFinalizer(g, m1) + g.Expect(env.Cleanup(ctx, m1)).To(Succeed()) + + // Fake scaling up, which creates a remediation machine, fast forwards to when also the replacement machine is marked unhealthy. + // NOTE: scale up also resets remediation in progress and remediation counts. + + m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(remediationData))) + delete(controlPlane.KCP.Annotations, controlplanev1.RemediationInProgressAnnotation) + + // Control plane not initialized yet, Second CP is unhealthy and gets remediated (retry 2) + + controlPlane.Machines = collections.FromMachines(m2) + r.managementCluster = &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + } + + ret, err = r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue g.Expect(err).ToNot(HaveOccurred()) - m1.ObjectMeta.Finalizers = nil - g.Expect(patchHelper.Patch(ctx, m1)) - g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed()) + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err = RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m2.Name)) + g.Expect(remediationData.RetryCount).To(Equal(1)) + + assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m1) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m2.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m2) + g.Expect(env.Cleanup(ctx, m2)).To(Succeed()) + + // Fake scaling up, which creates a remediation machine, which is healthy. + // NOTE: scale up also resets remediation in progress and remediation counts. + + m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember(), withRemediateForAnnotation(MustMarshalRemediationData(remediationData))) + delete(controlPlane.KCP.Annotations, controlplanev1.RemediationInProgressAnnotation) + + g.Expect(env.Cleanup(ctx, m3)).To(Succeed()) }) - t.Run("Remediation deletes unhealthy machine - 4 CP (during 3 CP rolling upgrade)", func(t *testing.T) { + + t.Run("Remediates the second CP machine having problems to come up", func(t *testing.T) { g := NewWithT(t) - m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) - patchHelper, err := patch.NewHelper(m1, env.GetClient()) + ns, err := env.CreateNamespace(ctx, "ns1") g.Expect(err).ToNot(HaveOccurred()) - m1.ObjectMeta.Finalizers = []string{"wait-before-delete"} - g.Expect(patchHelper.Patch(ctx, m1)) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() - m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withHealthyEtcdMember()) - m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withHealthyEtcdMember()) - m4 := createMachine(ctx, g, ns.Name, "m4-healthy-", withHealthyEtcdMember()) + // Control plane initialized yet, First CP healthy, second CP is unhealthy and gets remediated: + + m1 := createMachine(ctx, g, ns.Name, "m1-healthy-", withHealthyEtcdMember()) + m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(4), - Version: "v1.19.1", - }}, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + Version: "v1.19.1", + RolloutStrategy: &controlplanev1.RolloutStrategy{ + RollingUpdate: &controlplanev1.RollingUpdate{ + MaxSurge: &intstr.IntOrString{ + IntVal: 1, + }, + }, + }, + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, Cluster: &clusterv1.Cluster{}, - Machines: collections.FromMachines(m1, m2, m3, m4), + Machines: collections.FromMachines(m1, m2), } r := &KubeadmControlPlaneReconciler{ @@ -373,44 +1098,104 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, } - ret, err := r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue g.Expect(err).ToNot(HaveOccurred()) - assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m2.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) - err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) + assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m2) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(m1.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + g.Expect(m2.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m2) + g.Expect(env.Cleanup(ctx, m2)).To(Succeed()) + + // Fake scaling up, which creates a remediation machine, fast forwards to when also the replacement machine is marked unhealthy. + // NOTE: scale up also resets remediation in progress and remediation counts. + + m3 := createMachine(ctx, g, ns.Name, "m3-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(remediationData))) + delete(controlPlane.KCP.Annotations, controlplanev1.RemediationInProgressAnnotation) + + // Control plane not initialized yet, Second CP is unhealthy and gets remediated (retry 2) + + controlPlane.Machines = collections.FromMachines(m1, m3) + r.managementCluster = &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + } - patchHelper, err = patch.NewHelper(m1, env.GetClient()) + ret, err = r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue g.Expect(err).ToNot(HaveOccurred()) - m1.ObjectMeta.Finalizers = nil - g.Expect(patchHelper.Patch(ctx, m1)) - g.Expect(env.Cleanup(ctx, m1, m2, m3, m4)).To(Succeed()) + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err = RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m3.Name)) + g.Expect(remediationData.RetryCount).To(Equal(1)) + + assertMachineCondition(ctx, g, m3, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m3.Namespace, Name: m3.Name}, m3) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(m3.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) + + removeFinalizer(g, m3) + g.Expect(env.Cleanup(ctx, m3)).To(Succeed()) + + // Fake scaling up, which creates a remediation machine, which is healthy. + // NOTE: scale up also resets remediation in progress and remediation counts. + + m4 := createMachine(ctx, g, ns.Name, "m4-healthy-", withHealthyEtcdMember(), withRemediateForAnnotation(MustMarshalRemediationData(remediationData))) + delete(controlPlane.KCP.Annotations, controlplanev1.RemediationInProgressAnnotation) + + g.Expect(env.Cleanup(ctx, m1, m4)).To(Succeed()) }) - t.Run("Remediation does not happen if no healthy Control Planes are available to become etcd leader", func(t *testing.T) { + + t.Run("Remediates only one CP machine in case of multiple failures", func(t *testing.T) { g := NewWithT(t) - m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) - patchHelper, err := patch.NewHelper(m1, env.GetClient()) + ns, err := env.CreateNamespace(ctx, "ns1") g.Expect(err).ToNot(HaveOccurred()) - m1.ObjectMeta.Finalizers = []string{"wait-before-delete"} - g.Expect(patchHelper.Patch(ctx, m1)) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() - m2 := createMachine(ctx, g, ns.Name, "m2-healthy-", withMachineHealthCheckFailed(), withHealthyEtcdMember()) - m3 := createMachine(ctx, g, ns.Name, "m3-healthy-", withMachineHealthCheckFailed(), withHealthyEtcdMember()) - m4 := createMachine(ctx, g, ns.Name, "m4-healthy-", withMachineHealthCheckFailed(), withHealthyEtcdMember()) + // Control plane initialized yet, First CP healthy, second and third CP are unhealthy. second gets remediated: + + m1 := createMachine(ctx, g, ns.Name, "m1-healthy-", withHealthyEtcdMember()) + m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withHealthyEtcdMember(), withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) + m3 := createMachine(ctx, g, ns.Name, "m3-unhealthy-", withHealthyEtcdMember(), withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer()) controlPlane := &internal.ControlPlane{ - KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{ - Replicas: utilpointer.Int32(4), - Version: "v1.19.1", - }}, + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: utilpointer.Int32(3), + Version: "v1.19.1", + RolloutStrategy: &controlplanev1.RolloutStrategy{ + RollingUpdate: &controlplanev1.RollingUpdate{ + MaxSurge: &intstr.IntOrString{ + IntVal: 1, + }, + }, + }, + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + }, + }, Cluster: &clusterv1.Cluster{}, - Machines: collections.FromMachines(m1, m2, m3, m4), + Machines: collections.FromMachines(m1, m2, m3), } r := &KubeadmControlPlaneReconciler{ @@ -422,24 +1207,48 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, }, } - _, err = r.reconcileUnhealthyMachines(context.TODO(), controlPlane) + + ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeFalse()) // Remediation completed, requeue g.Expect(err).ToNot(HaveOccurred()) - assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityWarning, - "A control plane machine needs remediation, but there is no healthy machine to forward etcd leadership to. Skipping remediation") + g.Expect(controlPlane.KCP.Annotations).To(HaveKey(controlplanev1.RemediationInProgressAnnotation)) + remediationData, err := RemediationDataFromAnnotation(controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remediationData.Machine).To(Equal(m2.Name)) + g.Expect(remediationData.RetryCount).To(Equal(0)) - patchHelper, err = patch.NewHelper(m1, env.GetClient()) + assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") + assertMachineCondition(ctx, g, m3, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "") + + err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m2) g.Expect(err).ToNot(HaveOccurred()) - m1.ObjectMeta.Finalizers = nil - g.Expect(patchHelper.Patch(ctx, m1)) + g.Expect(m2.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse()) - g.Expect(env.Cleanup(ctx, m1, m2, m3, m4)).To(Succeed()) + removeFinalizer(g, m2) + g.Expect(env.Cleanup(ctx, m2)).To(Succeed()) + + // Check next reconcile does not further remediate + + controlPlane.Machines = collections.FromMachines(m1, m3) + r.managementCluster = &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + EtcdMembersResult: nodes(controlPlane.Machines), + }, + } + + ret, err = r.reconcileUnhealthyMachines(ctx, controlPlane) + + g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(env.Cleanup(ctx, m1)).To(Succeed()) }) } func TestCanSafelyRemoveEtcdMember(t *testing.T) { g := NewWithT(t) - ctx := context.TODO() ns, err := env.CreateNamespace(ctx, "ns1") g.Expect(err).ToNot(HaveOccurred()) @@ -470,7 +1279,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) g.Expect(err).ToNot(HaveOccurred()) @@ -501,7 +1310,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) g.Expect(err).ToNot(HaveOccurred()) @@ -538,7 +1347,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) g.Expect(err).ToNot(HaveOccurred()) @@ -568,7 +1377,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) g.Expect(err).ToNot(HaveOccurred()) @@ -599,7 +1408,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) g.Expect(err).ToNot(HaveOccurred()) @@ -637,7 +1446,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) g.Expect(err).ToNot(HaveOccurred()) @@ -668,7 +1477,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) g.Expect(err).ToNot(HaveOccurred()) @@ -701,7 +1510,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) g.Expect(err).ToNot(HaveOccurred()) @@ -734,7 +1543,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) g.Expect(err).ToNot(HaveOccurred()) @@ -769,7 +1578,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) g.Expect(err).ToNot(HaveOccurred()) @@ -804,7 +1613,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, } - ret, err := r.canSafelyRemoveEtcdMember(context.TODO(), controlPlane, m1) + ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) g.Expect(err).ToNot(HaveOccurred()) @@ -859,6 +1668,21 @@ func withNodeRef(ref string) machineOption { } } +func withRemediateForAnnotation(remediatedFor string) machineOption { + return func(machine *clusterv1.Machine) { + if machine.Annotations == nil { + machine.Annotations = map[string]string{} + } + machine.Annotations[controlplanev1.RemediationForAnnotation] = remediatedFor + } +} + +func withWaitBeforeDeleteFinalizer() machineOption { + return func(machine *clusterv1.Machine) { + machine.Finalizers = []string{"wait-before-delete"} + } +} + func createMachine(ctx context.Context, g *WithT, namespace, name string, options ...machineOption) *clusterv1.Machine { m := &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{ @@ -931,3 +1755,11 @@ func assertMachineCondition(ctx context.Context, g *WithT, m *clusterv1.Machine, return nil }, 10*time.Second).Should(Succeed()) } + +func MustMarshalRemediationData(r *RemediationData) string { + s, err := r.Marshal() + if err != nil { + panic("failed to marshal remediation data") + } + return s +} diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index c17d62c0cf7c..3d742bdac7f4 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -116,5 +116,33 @@ func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *c kcp.Status.Ready = true } + // Surface lastRemediation data in status. + // LastRemediation is the remediation currently in progress, in any, or the + // most recent of the remediation we are keeping track on machines. + var lastRemediation *RemediationData + + if v, ok := kcp.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok { + remediationData, err := RemediationDataFromAnnotation(v) + if err != nil { + return err + } + lastRemediation = remediationData + } else { + for _, m := range ownedMachines.UnsortedList() { + if v, ok := m.Annotations[controlplanev1.RemediationForAnnotation]; ok { + remediationData, err := RemediationDataFromAnnotation(v) + if err != nil { + return err + } + if lastRemediation == nil || lastRemediation.Timestamp.Time.Before(remediationData.Timestamp.Time) { + lastRemediation = remediationData + } + } + } + } + + if lastRemediation != nil { + kcp.Status.LastRemediation = lastRemediation.ToStatus() + } return nil } diff --git a/docs/book/src/developer/providers/v1.3-to-v1.4.md b/docs/book/src/developer/providers/v1.3-to-v1.4.md index 5bd68da0e3fc..5873897d4627 100644 --- a/docs/book/src/developer/providers/v1.3-to-v1.4.md +++ b/docs/book/src/developer/providers/v1.3-to-v1.4.md @@ -67,6 +67,8 @@ maintainers of providers and consumers of our Go API. Please note that the following logging flags have been removed: (in `component-base`, but this affects all CAPI controllers): `--add-dir-header`, `--alsologtostderr`, `--log-backtrace-at`, `--log-dir`, `--log-file`, `--log-file-max-size`, `--logtostderr`, `--one-output`, `--skip-headers`, `--skip-log-headers` and `--stderrthreshold`. For more information, please see: https://github.com/kubernetes/enhancements/issues/2845 +- A new `KCPRemediationSpec` test has been added providing better test coverage for KCP remediation most common use cases. As a consequence `MachineRemediationSpec` now only tests remediation of + worker machines (NOTE: we plan to improve this test as well in a future iteration). ### Suggested changes for providers diff --git a/docs/book/src/reference/labels_and_annotations.md b/docs/book/src/reference/labels_and_annotations.md index 78ef2fcbbbfd..a8a2172c6a27 100644 --- a/docs/book/src/reference/labels_and_annotations.md +++ b/docs/book/src/reference/labels_and_annotations.md @@ -49,3 +49,5 @@ | controlplane.cluster.x-k8s.io/skip-coredns | It explicitly skips reconciling CoreDNS if set. | | controlplane.cluster.x-k8s.io/skip-kube-proxy | It explicitly skips reconciling kube-proxy if set. | | controlplane.cluster.x-k8s.io/kubeadm-cluster-configuration | It is a machine annotation that stores the json-marshalled string of KCP ClusterConfiguration. This annotation is used to detect any changes in ClusterConfiguration and trigger machine rollout in KCP. | +| controlplane.cluster.x-k8s.io/remediation-in-progress | It is a KCP annotation that tracks that the system is in between having deleted an unhealthy machine and recreating its replacement. | +| controlplane.cluster.x-k8s.io/remediation-for | It is a machine annotation that links a new machine to the unhealthy machine it is replacing. | diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go b/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go index f2b98f34b581..2d0557132958 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go @@ -199,15 +199,19 @@ func (r *Reconciler) reconcile(ctx context.Context, logger logr.Logger, cluster UID: cluster.UID, }) - // Get the remote cluster cache to use as a client.Reader. - remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) - if err != nil { - logger.Error(err, "error creating remote cluster cache") - return ctrl.Result{}, err - } + // If the cluster is already initialized, get the remote cluster cache to use as a client.Reader. + var remoteClient client.Client + if conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { + var err error + remoteClient, err = r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) + if err != nil { + logger.Error(err, "error creating remote cluster cache") + return ctrl.Result{}, err + } - if err := r.watchClusterNodes(ctx, cluster); err != nil { - return ctrl.Result{}, err + if err := r.watchClusterNodes(ctx, cluster); err != nil { + return ctrl.Result{}, err + } } // fetch all targets diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go b/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go index 97eb0659bf0c..5d174fb63155 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go @@ -31,6 +31,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" @@ -111,8 +112,9 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi return true, time.Duration(0) } - // Don't penalize any Machine/Node if the control plane has not been initialized. - if !conditions.IsTrue(t.Cluster, clusterv1.ControlPlaneInitializedCondition) { + // Don't penalize any Machine/Node if the control plane has not been initialized + // Exception of this rule are control plane machine itself, so the first control plane machine can be remediated. + if !conditions.IsTrue(t.Cluster, clusterv1.ControlPlaneInitializedCondition) && !util.IsControlPlaneMachine(t.Machine) { logger.V(3).Info("Not evaluating target health because the control plane has not yet been initialized") // Return a nextCheck time of 0 because we'll get requeued when the Cluster is updated. return false, 0 @@ -133,18 +135,18 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi return false, 0 } - controlPlaneInitializedTime := conditions.GetLastTransitionTime(t.Cluster, clusterv1.ControlPlaneInitializedCondition).Time - clusterInfraReadyTime := conditions.GetLastTransitionTime(t.Cluster, clusterv1.InfrastructureReadyCondition).Time + controlPlaneInitialized := conditions.GetLastTransitionTime(t.Cluster, clusterv1.ControlPlaneInitializedCondition) + clusterInfraReady := conditions.GetLastTransitionTime(t.Cluster, clusterv1.InfrastructureReadyCondition) machineCreationTime := t.Machine.CreationTimestamp.Time // Use the latest of the 3 times comparisonTime := machineCreationTime - logger.V(3).Info("Determining comparison time", "machineCreationTime", machineCreationTime, "clusterInfraReadyTime", clusterInfraReadyTime, "controlPlaneInitializedTime", controlPlaneInitializedTime) - if controlPlaneInitializedTime.After(comparisonTime) { - comparisonTime = controlPlaneInitializedTime + logger.V(3).Info("Determining comparison time", "machineCreationTime", machineCreationTime, "clusterInfraReadyTime", clusterInfraReady, "controlPlaneInitializedTime", controlPlaneInitialized) + if conditions.IsTrue(t.Cluster, clusterv1.ControlPlaneInitializedCondition) && controlPlaneInitialized != nil && controlPlaneInitialized.Time.After(comparisonTime) { + comparisonTime = controlPlaneInitialized.Time } - if clusterInfraReadyTime.After(comparisonTime) { - comparisonTime = clusterInfraReadyTime + if conditions.IsTrue(t.Cluster, clusterv1.InfrastructureReadyCondition) && clusterInfraReady != nil && clusterInfraReady.Time.After(comparisonTime) { + comparisonTime = clusterInfraReady.Time } logger.V(3).Info("Using comparison time", "time", comparisonTime) @@ -218,16 +220,18 @@ func (r *Reconciler) getTargetsFromMHC(ctx context.Context, logger logr.Logger, Machine: &machines[k], patchHelper: patchHelper, } - node, err := r.getNodeFromMachine(ctx, clusterClient, target.Machine) - if err != nil { - if !apierrors.IsNotFound(err) { - return nil, errors.Wrap(err, "error getting node") + if clusterClient != nil { + node, err := r.getNodeFromMachine(ctx, clusterClient, target.Machine) + if err != nil { + if !apierrors.IsNotFound(err) { + return nil, errors.Wrap(err, "error getting node") + } + + // A node has been seen for this machine, but it no longer exists + target.nodeMissing = true } - - // A node has been seen for this machine, but it no longer exists - target.nodeMissing = true + target.Node = node } - target.Node = node targets = append(targets, target) } return targets, nil diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml index 708ecef3b2e0..74a52cd3647e 100644 --- a/test/e2e/config/docker.yaml +++ b/test/e2e/config/docker.yaml @@ -290,3 +290,6 @@ intervals: node-drain/wait-deployment-available: ["3m", "10s"] node-drain/wait-control-plane: ["15m", "10s"] node-drain/wait-machine-deleted: ["2m", "10s"] + kcp-remediation/wait-machines: ["5m", "10s"] + kcp-remediation/check-machines-stable: ["30s", "5s"] + kcp-remediation/wait-machine-provisioned: ["5m", "10s"] diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/cluster-with-kcp.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/cluster-with-kcp.yaml new file mode 100644 index 000000000000..12516bab5ccd --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/cluster-with-kcp.yaml @@ -0,0 +1,98 @@ +--- +# DockerCluster object referenced by the Cluster object +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerCluster +metadata: + name: '${CLUSTER_NAME}' +--- +# Cluster object with +# - Reference to the KubeadmControlPlane object +# - the label cni=${CLUSTER_NAME}-crs-0, so the cluster can be selected by the ClusterResourceSet. +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: '${CLUSTER_NAME}' + labels: + cni: "${CLUSTER_NAME}-crs-0" +spec: + clusterNetwork: + services: + cidrBlocks: ['${DOCKER_SERVICE_CIDRS}'] + pods: + cidrBlocks: ['${DOCKER_POD_CIDRS}'] + serviceDomain: '${DOCKER_SERVICE_DOMAIN}' + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerCluster + name: '${CLUSTER_NAME}' + controlPlaneRef: + kind: KubeadmControlPlane + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + name: "${CLUSTER_NAME}-control-plane" +--- +# DockerMachineTemplate object referenced by the KubeadmControlPlane object +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: "${CLUSTER_NAME}-control-plane" +spec: + template: + spec: {} +--- +# KubeadmControlPlane referenced by the Cluster +kind: KubeadmControlPlane +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +metadata: + name: "${CLUSTER_NAME}-control-plane" +spec: + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + machineTemplate: + infrastructureRef: + kind: DockerMachineTemplate + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + name: "${CLUSTER_NAME}-control-plane" + kubeadmConfigSpec: + clusterConfiguration: + controllerManager: + extraArgs: {enable-hostpath-provisioner: 'true'} + apiServer: + # host.docker.internal is required by kubetest when running on MacOS because of the way ports are proxied. + certSANs: [localhost, 127.0.0.1, 0.0.0.0, host.docker.internal] + initConfiguration: + nodeRegistration: + kubeletExtraArgs: + eviction-hard: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%' + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + eviction-hard: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%' + files: + - path: /wait-signal.sh + content: | + #!/bin/bash + + set -o errexit + set -o pipefail + + echo "Waiting for signal..." + + TOKEN=$1 + SERVER=$2 + NAMESPACE=$3 + + while true; + do + sleep 1s + + signal=$(curl -k -s --header "Authorization: Bearer $TOKEN" $SERVER/api/v1/namespaces/$NAMESPACE/configmaps/mhc-test | jq -r .data.signal?) + echo "signal $signal" + + if [ "$signal" == "pass" ]; then + curl -k -s --header "Authorization: Bearer $TOKEN" -XPATCH -H "Content-Type: application/strategic-merge-patch+json" --data '{"data": {"signal": "ack-pass"}}' $SERVER/api/v1/namespaces/$NAMESPACE/configmaps/mhc-test + exit 0 + fi + done + permissions: "0777" + preKubeadmCommands: + - ./wait-signal.sh "${TOKEN}" "${SERVER}" "${NAMESPACE}" + version: "${KUBERNETES_VERSION}" diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/kustomization.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/kustomization.yaml index e234e37be1b2..92761f83d820 100644 --- a/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/kustomization.yaml +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/kustomization.yaml @@ -1,5 +1,5 @@ bases: - - ../bases/cluster-with-kcp.yaml + - cluster-with-kcp.yaml - ../bases/md.yaml - ../bases/crs.yaml - mhc.yaml diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/mhc.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/mhc.yaml index 3ed3e0a9473a..39187cec0a40 100644 --- a/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/mhc.yaml +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-kcp-remediation/mhc.yaml @@ -1,7 +1,8 @@ --- # MachineHealthCheck object with -# - a selector that targets all the machines with label cluster.x-k8s.io/control-plane="" -# - unhealthyConditions triggering remediation after 10s the condition is set +# - a selector that targets all the machines with label cluster.x-k8s.io/control-plane="" and the mhc-test: "fail" (the label is used to trigger remediation in a controlled way - by adding CP under MHC control intentionally -) +# - nodeStartupTimeout: 30s (to force remediation on nodes still provisioning) +# - unhealthyConditions triggering remediation after 10s the e2e.remediation.condition condition is set to false (to force remediation on nodes already provisioned) apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineHealthCheck metadata: @@ -12,6 +13,8 @@ spec: selector: matchLabels: cluster.x-k8s.io/control-plane: "" + mhc-test: "fail" + nodeStartupTimeout: 30s unhealthyConditions: - type: e2e.remediation.condition status: "False" diff --git a/test/e2e/kcp_remediations.go b/test/e2e/kcp_remediations.go new file mode 100644 index 000000000000..64fc229b0b59 --- /dev/null +++ b/test/e2e/kcp_remediations.go @@ -0,0 +1,687 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "os" + "path/filepath" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/test/e2e/internal/log" + "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" +) + +const ( + configMapName = "mhc-test" + configMapDataKey = "signal" + + failLabelValue = "fail" +) + +// KCPRemediationSpecInput is the input for KCPRemediationSpec. +type KCPRemediationSpecInput struct { + // This spec requires following intervals to be defined in order to work: + // - wait-cluster, used when waiting for the cluster infrastructure to be provisioned. + // - wait-machines, used when waiting for an old machine to be remediated and a new one provisioned. + // - check-machines-stable, used when checking that the current list of machines in stable. + // - wait-machine-provisioned, used when waiting for a machine to be provisioned after unblocking bootstrap. + E2EConfig *clusterctl.E2EConfig + ClusterctlConfigPath string + BootstrapClusterProxy framework.ClusterProxy + ArtifactFolder string + SkipCleanup bool + + // Flavor, if specified, must refer to a template that has a MachineHealthCheck + // - 3 node CP, no workers + // - Control plane machines having a pre-kubeadm command that queries for a well-known ConfigMap on the management cluster, + // holding up bootstrap until a signal is passed via the config map. + // NOTE: In order for this to work communications from workload cluster to management cluster must be enabled. + // - An MHC targeting control plane machines with the mhc-test=fail labels and + // nodeStartupTimeout: 30s + // unhealthyConditions: + // - type: e2e.remediation.condition + // status: "False" + // timeout: 10s + // If not specified, "kcp-remediation" is used. + Flavor *string +} + +// KCPRemediationSpec implements a test that verifies that Machines are remediated by MHC during unhealthy conditions. +func KCPRemediationSpec(ctx context.Context, inputGetter func() KCPRemediationSpecInput) { + var ( + specName = "kcp-remediation" + input KCPRemediationSpecInput + namespace *corev1.Namespace + cancelWatches context.CancelFunc + clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult + ) + + BeforeEach(func() { + Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) + input = inputGetter() + Expect(input.E2EConfig).ToNot(BeNil(), "Invalid argument. input.E2EConfig can't be nil when calling %s spec", specName) + Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling %s spec", specName) + Expect(input.BootstrapClusterProxy).ToNot(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", specName) + Expect(os.MkdirAll(input.ArtifactFolder, 0750)).To(Succeed(), "Invalid argument. input.ArtifactFolder can't be created for %s spec", specName) + Expect(input.E2EConfig.Variables).To(HaveKey(KubernetesVersion)) + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder) + }) + + It("Should replace unhealthy machines", func() { + By("Creating a workload cluster") + + // NOTE: This test is quite different from other tests, because it has to trigger failures on machines in a controlled ways. + + // creates the mhc-test ConfigMap that will be used to control machines bootstrap during the remediation tests. + createConfigMapForMachinesBootstrapSignal(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name) + + // Creates the workload cluster. + clusterResources = createWorkloadClusterAndWait(ctx, createWorkloadClusterAndWaitInput{ + E2EConfig: input.E2EConfig, + ClusterctlConfigPath: input.ClusterctlConfigPath, + Proxy: input.BootstrapClusterProxy, + ArtifactFolder: input.ArtifactFolder, + SpecName: specName, + Flavor: pointer.StringDeref(input.Flavor, "kcp-remediation"), + + // values to be injected in the template + + Namespace: namespace.Name, + // Token with credentials to use for accessing the ConfigMap on managements cluster from the workload cluster. + // NOTE: this func also setups credentials/RBAC rules and everything necessary to get the authenticationToken. + AuthenticationToken: getAuthenticationToken(ctx, input.BootstrapClusterProxy, namespace.Name), + // Address to be used for accessing the management cluster from a workload cluster. + ServerAddr: getServerAddr(ctx, input.BootstrapClusterProxy), + }) + + // The first CP machine comes up but it does not complete bootstrap + + By("FIRST CONTROL PLANE MACHINE") + + By("Wait for the cluster to get stuck with the first CP machine not completing the bootstrap") + allMachines, newMachines := waitForMachines(ctx, waitForMachinesInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + ClusterName: clusterResources.Cluster.Name, + ExpectedReplicas: 1, + WaitForMachinesIntervals: input.E2EConfig.GetIntervals(specName, "wait-machines"), + CheckMachineListStableIntervals: input.E2EConfig.GetIntervals(specName, "check-machines-stable"), + }) + Expect(allMachines).To(HaveLen(1)) + Expect(newMachines).To(HaveLen(1)) + firstMachineName := newMachines[0] + firstMachine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: firstMachineName, + Namespace: namespace.Name, + }, + } + Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(firstMachine), firstMachine)).To(Succeed(), "Failed to get machine %d", firstMachineName) + Expect(firstMachine.Status.NodeRef).To(BeNil()) + log.Logf("Machine %s is up but still bootstrapping", firstMachineName) + + // Intentionally trigger remediation on the first CP, and validate the first machine is deleted and a replacement should come up. + + By("REMEDIATING FIRST CONTROL PLANE MACHINE") + + Byf("Add mhc-test:fail label to machine %s so it will be immediately remediated", firstMachineName) + firstMachineWithLabel := firstMachine.DeepCopy() + firstMachineWithLabel.Labels["mhc-test"] = failLabelValue + Expect(input.BootstrapClusterProxy.GetClient().Patch(ctx, firstMachineWithLabel, client.MergeFrom(firstMachine))).To(Succeed(), "Failed to patch machine %d", firstMachineName) + + log.Logf("Wait for the first CP machine to be remediated, and the replacement machine to come up, but again get stuck with the Machine not completing the bootstrap") + allMachines, newMachines = waitForMachines(ctx, waitForMachinesInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + ClusterName: clusterResources.Cluster.Name, + ExpectedReplicas: 1, + ExpectedDeletedMachines: []string{firstMachineName}, + WaitForMachinesIntervals: input.E2EConfig.GetIntervals(specName, "wait-machines"), + CheckMachineListStableIntervals: input.E2EConfig.GetIntervals(specName, "check-machines-stable"), + }) + Expect(allMachines).To(HaveLen(1)) + Expect(newMachines).To(HaveLen(1)) + firstMachineReplacementName := newMachines[0] + firstMachineReplacement := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: firstMachineReplacementName, + Namespace: namespace.Name, + }, + } + Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(firstMachineReplacement), firstMachineReplacement)).To(Succeed(), "Failed to get machine %d", firstMachineReplacementName) + Expect(firstMachineReplacement.Status.NodeRef).To(BeNil()) + log.Logf("Machine %s is up but still bootstrapping", firstMachineReplacementName) + + // The firstMachine replacement is up, meaning that the test validated that remediation of the first CP machine works (note: first CP is a special case because the cluster is not initialized yet). + // In order to test remediation of other machine while provisioning we unblock bootstrap of the first CP replacement + // and wait for the second cp machine to come up. + + By("FIRST CONTROL PLANE MACHINE SUCCESSFULLY REMEDIATED!") + + Byf("Unblock bootstrap for Machine %s and wait for it to be provisioned", firstMachineReplacementName) + sendSignalToBootstrappingMachine(ctx, sendSignalToBootstrappingMachineInput{ + Client: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + Machine: firstMachineReplacementName, + Signal: "pass", + }) + log.Logf("Waiting for Machine %s to be provisioned", firstMachineReplacementName) + Eventually(func() bool { + if err := input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(firstMachineReplacement), firstMachineReplacement); err != nil { + return false + } + return firstMachineReplacement.Status.NodeRef != nil + }, input.E2EConfig.GetIntervals(specName, "wait-machine-provisioned")...).Should(BeTrue(), "Machine %s failed to be provisioned", firstMachineReplacementName) + + By("FIRST CONTROL PLANE MACHINE UP AND RUNNING!") + By("START PROVISIONING OF SECOND CONTROL PLANE MACHINE!") + + By("Wait for the cluster to get stuck with the second CP machine not completing the bootstrap") + allMachines, newMachines = waitForMachines(ctx, waitForMachinesInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + ClusterName: clusterResources.Cluster.Name, + ExpectedReplicas: 2, + ExpectedDeletedMachines: []string{}, + ExpectedOldMachines: []string{firstMachineReplacementName}, + WaitForMachinesIntervals: input.E2EConfig.GetIntervals(specName, "wait-machines"), + CheckMachineListStableIntervals: input.E2EConfig.GetIntervals(specName, "check-machines-stable"), + }) + Expect(allMachines).To(HaveLen(2)) + Expect(newMachines).To(HaveLen(1)) + secondMachineName := newMachines[0] + secondMachine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: secondMachineName, + Namespace: namespace.Name, + }, + } + Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(secondMachine), secondMachine)).To(Succeed(), "Failed to get machine %d", secondMachineName) + Expect(secondMachine.Status.NodeRef).To(BeNil()) + log.Logf("Machine %s is up but still bootstrapping", secondMachineName) + + // Intentionally trigger remediation on the second CP and validate that also this one is deleted and a replacement should come up. + + By("REMEDIATING SECOND CONTROL PLANE MACHINE") + + Byf("Add mhc-test:fail label to machine %s so it will be immediately remediated", firstMachineName) + secondMachineWithLabel := secondMachine.DeepCopy() + secondMachineWithLabel.Labels["mhc-test"] = failLabelValue + Expect(input.BootstrapClusterProxy.GetClient().Patch(ctx, secondMachineWithLabel, client.MergeFrom(secondMachine))).To(Succeed(), "Failed to patch machine %d", secondMachineName) + + log.Logf("Wait for the second CP machine to be remediated, and the replacement machine to come up, but again get stuck with the Machine not completing the bootstrap") + allMachines, newMachines = waitForMachines(ctx, waitForMachinesInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + ClusterName: clusterResources.Cluster.Name, + ExpectedReplicas: 2, + ExpectedDeletedMachines: []string{secondMachineName}, + ExpectedOldMachines: []string{firstMachineReplacementName}, + WaitForMachinesIntervals: input.E2EConfig.GetIntervals(specName, "wait-machines"), + CheckMachineListStableIntervals: input.E2EConfig.GetIntervals(specName, "check-machines-stable"), + }) + Expect(allMachines).To(HaveLen(2)) + Expect(newMachines).To(HaveLen(1)) + secondMachineReplacementName := newMachines[0] + secondMachineReplacement := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: secondMachineReplacementName, + Namespace: namespace.Name, + }, + } + Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(secondMachineReplacement), secondMachineReplacement)).To(Succeed(), "Failed to get machine %d", secondMachineReplacementName) + Expect(secondMachineReplacement.Status.NodeRef).To(BeNil()) + log.Logf("Machine %s is up but still bootstrapping", secondMachineReplacementName) + + // The secondMachine replacement is up, meaning that the test validated that remediation of the second CP machine works (note: this test remediation after the cluster is initialized, but not yet fully provisioned). + // In order to test remediation after provisioning we unblock bootstrap of the second CP replacement as well as for the third CP machine. + // and wait for the second cp machine to come up. + + By("SECOND CONTROL PLANE MACHINE SUCCESSFULLY REMEDIATED!") + + Byf("Unblock bootstrap for Machine %s and wait for it to be provisioned", secondMachineReplacementName) + sendSignalToBootstrappingMachine(ctx, sendSignalToBootstrappingMachineInput{ + Client: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + Machine: secondMachineReplacementName, + Signal: "pass", + }) + log.Logf("Waiting for Machine %s to be provisioned", secondMachineReplacementName) + Eventually(func() bool { + if err := input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(secondMachineReplacement), secondMachineReplacement); err != nil { + return false + } + return secondMachineReplacement.Status.NodeRef != nil + }, input.E2EConfig.GetIntervals(specName, "wait-machine-provisioned")...).Should(BeTrue(), "Machine %s failed to be provisioned", secondMachineReplacementName) + + By("SECOND CONTROL PLANE MACHINE UP AND RUNNING!") + By("START PROVISIONING OF THIRD CONTROL PLANE MACHINE!") + + By("Wait for the cluster to get stuck with the third CP machine not completing the bootstrap") + allMachines, newMachines = waitForMachines(ctx, waitForMachinesInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + ClusterName: clusterResources.Cluster.Name, + ExpectedReplicas: 3, + ExpectedDeletedMachines: []string{}, + ExpectedOldMachines: []string{firstMachineReplacementName, secondMachineReplacementName}, + WaitForMachinesIntervals: input.E2EConfig.GetIntervals(specName, "wait-machines"), + CheckMachineListStableIntervals: input.E2EConfig.GetIntervals(specName, "check-machines-stable"), + }) + Expect(allMachines).To(HaveLen(3)) + Expect(newMachines).To(HaveLen(1)) + thirdMachineName := newMachines[0] + thirdMachine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: thirdMachineName, + Namespace: namespace.Name, + }, + } + Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(thirdMachine), thirdMachine)).To(Succeed(), "Failed to get machine %d", thirdMachineName) + Expect(thirdMachine.Status.NodeRef).To(BeNil()) + log.Logf("Machine %s is up but still bootstrapping", thirdMachineName) + + Byf("Unblock bootstrap for Machine %s and wait for it to be provisioned", thirdMachineName) + sendSignalToBootstrappingMachine(ctx, sendSignalToBootstrappingMachineInput{ + Client: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + Machine: thirdMachineName, + Signal: "pass", + }) + log.Logf("Waiting for Machine %s to be provisioned", thirdMachineName) + Eventually(func() bool { + if err := input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(thirdMachine), thirdMachine); err != nil { + return false + } + return thirdMachine.Status.NodeRef != nil + }, input.E2EConfig.GetIntervals(specName, "wait-machine-provisioned")...).Should(BeTrue(), "Machine %s failed to be provisioned", thirdMachineName) + + // All three CP machines are up. + + By("ALL THE CONTROL PLANE MACHINES SUCCESSFULLY PROVISIONED!") + + // We now want to test remediation of a CP machine already provisioned. + // In order to do so we need to apply both mhc-test:fail as well as setting an unhealthy condition in order to trigger remediation + + By("REMEDIATING THIRD CP") + + Byf("Add mhc-test:fail label to machine %s and set an unhealthy condition on the node so it will be immediately remediated", thirdMachineName) + thirdMachineWithLabel := thirdMachine.DeepCopy() + thirdMachineWithLabel.Labels["mhc-test"] = failLabelValue + Expect(input.BootstrapClusterProxy.GetClient().Patch(ctx, thirdMachineWithLabel, client.MergeFrom(thirdMachine))).To(Succeed(), "Failed to patch machine %d", thirdMachineName) + + unhealthyNodeCondition := corev1.NodeCondition{ + Type: "e2e.remediation.condition", + Status: "False", + LastTransitionTime: metav1.Time{Time: time.Now()}, + } + framework.PatchNodeCondition(ctx, framework.PatchNodeConditionInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + NodeCondition: unhealthyNodeCondition, + Machine: *thirdMachine, // TODO: make this a pointer. + }) + + log.Logf("Wait for the third CP machine to be remediated, and the replacement machine to come up, but again get stuck with the Machine not completing the bootstrap") + allMachines, newMachines = waitForMachines(ctx, waitForMachinesInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + ClusterName: clusterResources.Cluster.Name, + ExpectedReplicas: 3, + ExpectedDeletedMachines: []string{thirdMachineName}, + ExpectedOldMachines: []string{firstMachineReplacementName, secondMachineReplacementName}, + WaitForMachinesIntervals: input.E2EConfig.GetIntervals(specName, "wait-machines"), + CheckMachineListStableIntervals: input.E2EConfig.GetIntervals(specName, "check-machines-stable"), + }) + Expect(allMachines).To(HaveLen(3)) + Expect(newMachines).To(HaveLen(1)) + thirdMachineReplacementName := newMachines[0] + thirdMachineReplacement := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: thirdMachineReplacementName, + Namespace: namespace.Name, + }, + } + Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(thirdMachineReplacement), thirdMachineReplacement)).To(Succeed(), "Failed to get machine %d", thirdMachineReplacementName) + Expect(thirdMachineReplacement.Status.NodeRef).To(BeNil()) + log.Logf("Machine %s is up but still bootstrapping", thirdMachineReplacementName) + + // The thirdMachine replacement is up, meaning that the test validated that remediation of the third CP machine works (note: this test remediation after the cluster is fully provisioned). + + By("THIRD CP SUCCESSFULLY REMEDIATED!") + + Byf("Unblock bootstrap for Machine %s and wait for it to be provisioned", thirdMachineReplacementName) + sendSignalToBootstrappingMachine(ctx, sendSignalToBootstrappingMachineInput{ + Client: input.BootstrapClusterProxy.GetClient(), + Namespace: namespace.Name, + Machine: thirdMachineReplacementName, + Signal: "pass", + }) + log.Logf("Waiting for Machine %s to be provisioned", thirdMachineReplacementName) + Eventually(func() bool { + if err := input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(thirdMachineReplacement), thirdMachineReplacement); err != nil { + return false + } + return thirdMachineReplacement.Status.NodeRef != nil + }, input.E2EConfig.GetIntervals(specName, "wait-machine-provisioned")...).Should(BeTrue(), "Machine %s failed to be provisioned", thirdMachineReplacementName) + + // All three CP machines are up again. + + By("CP BACK TO FULL OPERATIONAL STATE!") + + By("PASSED!") + }) + + AfterEach(func() { + // Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. + dumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup) + }) +} + +func createConfigMapForMachinesBootstrapSignal(ctx context.Context, writer client.Writer, namespace string) { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: namespace, + }, + Data: map[string]string{ + configMapDataKey: "hold", + }, + } + Expect(writer.Create(ctx, cm)).To(Succeed(), "failed to create mhc-test config map") +} + +type createWorkloadClusterAndWaitInput struct { + E2EConfig *clusterctl.E2EConfig + ClusterctlConfigPath string + Proxy framework.ClusterProxy + ArtifactFolder string + SpecName string + Flavor string + Namespace string + AuthenticationToken string + ServerAddr string +} + +// createWorkloadClusterAndWait creates a workload cluster and return as soon as the cluster infrastructure is ready. +// NOTE: we are not using the same func used by other tests because it would fail if the control plane doesn't come up, +// which instead is expected in this case. +// NOTE: clusterResources is filled only partially. +func createWorkloadClusterAndWait(ctx context.Context, input createWorkloadClusterAndWaitInput) (clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult) { + clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + + // gets the cluster template + log.Logf("Getting the cluster template yaml") + clusterName := fmt.Sprintf("%s-%s", input.SpecName, util.RandomString(6)) + workloadClusterTemplate := clusterctl.ConfigCluster(ctx, clusterctl.ConfigClusterInput{ + // pass the clusterctl config file that points to the local provider repository created for this test, + ClusterctlConfigPath: input.ClusterctlConfigPath, + // pass reference to the management cluster hosting this test + KubeconfigPath: input.Proxy.GetKubeconfigPath(), + + // select template + Flavor: input.Flavor, + // define template variables + Namespace: input.Namespace, + ClusterName: clusterName, + KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersion), + ControlPlaneMachineCount: pointer.Int64(3), + WorkerMachineCount: pointer.Int64(0), + InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, + // setup clusterctl logs folder + LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.Proxy.GetName()), + // Adds authenticationToken, server address and namespace variables to be injected in the cluster template. + ClusterctlVariables: map[string]string{ + "TOKEN": input.AuthenticationToken, + "SERVER": input.ServerAddr, + "NAMESPACE": input.Namespace, + }, + }) + Expect(workloadClusterTemplate).ToNot(BeNil(), "Failed to get the cluster template") + + Eventually(func() error { + return input.Proxy.Apply(ctx, workloadClusterTemplate) + }, 10*time.Second).Should(Succeed(), "Failed to apply the cluster template") + + log.Logf("Waiting for the cluster infrastructure to be provisioned") + clusterResources.Cluster = framework.DiscoveryAndWaitForCluster(ctx, framework.DiscoveryAndWaitForClusterInput{ + Getter: input.Proxy.GetClient(), + Namespace: input.Namespace, + Name: clusterName, + }, input.E2EConfig.GetIntervals(input.SpecName, "wait-cluster")...) + + return clusterResources +} + +type sendSignalToBootstrappingMachineInput struct { + Client client.Client + Namespace string + Machine string + Signal string +} + +// sendSignalToBootstrappingMachine sends a signal to a machine stuck during bootstrap. +func sendSignalToBootstrappingMachine(ctx context.Context, input sendSignalToBootstrappingMachineInput) { + log.Logf("Sending bootstrap signal %s to Machine %s", input.Signal, input.Machine) + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: input.Namespace, + }, + } + Expect(input.Client.Get(ctx, client.ObjectKeyFromObject(cm), cm)).To(Succeed(), "failed to get mhc-test config map") + + cmWithSignal := cm.DeepCopy() + cmWithSignal.Data[configMapDataKey] = input.Signal + Expect(input.Client.Patch(ctx, cmWithSignal, client.MergeFrom(cm))).To(Succeed(), "failed to patch mhc-test config map") + + log.Logf("Waiting for Machine %s to acknowledge signal %s has been received", input.Machine, input.Signal) + Eventually(func() string { + _ = input.Client.Get(ctx, client.ObjectKeyFromObject(cmWithSignal), cmWithSignal) + return cmWithSignal.Data[configMapDataKey] + }, "1m", "10s").Should(Equal(fmt.Sprintf("ack-%s", input.Signal)), "Failed to get ack signal from machine %s", input.Machine) + + machine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: input.Machine, + Namespace: input.Namespace, + }, + } + Expect(input.Client.Get(ctx, client.ObjectKeyFromObject(machine), machine)).To(Succeed()) + + // Resetting the signal in the config map + cmWithSignal.Data[configMapDataKey] = "hold" + Expect(input.Client.Patch(ctx, cmWithSignal, client.MergeFrom(cm))).To(Succeed(), "failed to patch mhc-test config map") +} + +type waitForMachinesInput struct { + Lister framework.Lister + Namespace string + ClusterName string + ExpectedReplicas int + ExpectedOldMachines []string + ExpectedDeletedMachines []string + WaitForMachinesIntervals []interface{} + CheckMachineListStableIntervals []interface{} +} + +// waitForMachines waits for machines to reach a well known state defined by number of replicas, a list of machines to exists, +// a list of machines to not exist anymore. The func also check that the state is stable for some time before +// returning the list of new machines. +func waitForMachines(ctx context.Context, input waitForMachinesInput) (allMachineNames, newMachineNames []string) { + inClustersNamespaceListOption := client.InNamespace(input.Namespace) + matchClusterListOption := client.MatchingLabels{ + clusterv1.ClusterNameLabel: input.ClusterName, + clusterv1.MachineControlPlaneLabel: "", + } + + expectedOldMachines := sets.Set[string]{}.Insert(input.ExpectedOldMachines...) + expectedDeletedMachines := sets.Set[string]{}.Insert(input.ExpectedDeletedMachines...) + allMachines := sets.Set[string]{} + newMachines := sets.Set[string]{} + machineList := &clusterv1.MachineList{} + + // Waits for the desired set of machines to exist. + log.Logf("Waiting for %d machines, must have %s, must not have %s", input.ExpectedReplicas, expectedOldMachines.UnsortedList(), expectedDeletedMachines.UnsortedList()) + Eventually(func(g Gomega) { + // Gets the list of machines + g.Expect(input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption)).To(Succeed()) + + allMachines = sets.Set[string]{} + for i := range machineList.Items { + allMachines.Insert(machineList.Items[i].Name) + } + + // Compute new machines (all - old - to be deleted) + newMachines = allMachines.Clone() + newMachines.Delete(expectedOldMachines.UnsortedList()...) + newMachines.Delete(expectedDeletedMachines.UnsortedList()...) + + log.Logf(" - expected %d, got %d: %s, of which new %s, must have check: %t, must not have check: %t", input.ExpectedReplicas, allMachines.Len(), allMachines.UnsortedList(), newMachines.UnsortedList(), allMachines.HasAll(expectedOldMachines.UnsortedList()...), !allMachines.HasAny(expectedDeletedMachines.UnsortedList()...)) + + // Ensures all the expected old machines are still there. + g.Expect(allMachines.HasAll(expectedOldMachines.UnsortedList()...)).To(BeTrue(), + "Got machines: %s, must contain all of: %s", allMachines.UnsortedList(), expectedOldMachines.UnsortedList()) + + // Ensures none of the machines to be deleted is still there. + g.Expect(!allMachines.HasAny(expectedDeletedMachines.UnsortedList()...)).To(BeTrue(), + "Got machines: %s, must not contain any of: %s", allMachines.UnsortedList(), expectedDeletedMachines.UnsortedList()) + + g.Expect(allMachines).To(HaveLen(input.ExpectedReplicas), "Got %d machines, must be %d", len(allMachines), input.ExpectedReplicas) + }, input.WaitForMachinesIntervals...).Should(Succeed(), + "Failed to get the expected list of machines: got %s (expected %d machines, must have %s, must not have %s)", + allMachines.UnsortedList(), input.ExpectedReplicas, expectedOldMachines.UnsortedList(), expectedDeletedMachines.UnsortedList()) + log.Logf("Got %d machines: %s", allMachines.Len(), allMachines.UnsortedList()) + + // Ensures the desired set of machines is stable (no further machines are created or deleted). + log.Logf("Checking the list of machines is stable") + allMachinesNow := sets.Set[string]{} + Consistently(func() bool { + // Gets the list of machines + if err := input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption); err != nil { + return false + } + allMachinesNow = sets.Set[string]{} + for i := range machineList.Items { + allMachinesNow.Insert(machineList.Items[i].Name) + } + + return allMachines.Equal(allMachinesNow) + }, input.CheckMachineListStableIntervals...).Should(BeTrue(), "Expected list of machines is not stable: got %s, expected %s", allMachinesNow.UnsortedList(), allMachines.UnsortedList()) + + return allMachines.UnsortedList(), newMachines.UnsortedList() +} + +// getServerAddr returns the address to be used for accessing the management cluster from a workload cluster. +func getServerAddr(ctx context.Context, clusterProxy framework.ClusterProxy) string { + // With CAPD, we can't just access the bootstrap cluster via 127.0.0.1: from the + // workload cluster. Instead we retrieve the server name from the cluster-info ConfigMap in the bootstrap + // cluster (e.g. "https://test-z45p9k-control-plane:6443") + // Note: This has been tested with MacOS,Linux and Prow. + clusterInfoCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-info", + Namespace: metav1.NamespacePublic, + }, + } + Expect(clusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(clusterInfoCM), clusterInfoCM)).To(Succeed()) + Expect(clusterInfoCM.Data).To(HaveKey("kubeconfig")) + + kubeConfigString := clusterInfoCM.Data["kubeconfig"] + + kubeConfig, err := clientcmd.Load([]byte(kubeConfigString)) + Expect(err).ToNot(HaveOccurred()) + + return kubeConfig.Clusters[""].Server +} + +// getAuthenticationToken returns a bearer authenticationToken with minimal RBAC permissions to access the mhc-test ConfigMap that will be used +// to control machines bootstrap during the remediation tests. +func getAuthenticationToken(ctx context.Context, managementClusterProxy framework.ClusterProxy, namespace string) string { + sa := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mhc-test", + Namespace: namespace, + }, + } + Expect(managementClusterProxy.GetClient().Create(ctx, sa)).To(Succeed(), "failed to create mhc-test service account") + + role := &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mhc-test", + Namespace: namespace, + }, + Rules: []rbacv1.PolicyRule{ + { + Verbs: []string{"get", "list", "patch"}, + APIGroups: []string{""}, + Resources: []string{"configmaps"}, + ResourceNames: []string{"mhc-test"}, + }, + }, + } + Expect(managementClusterProxy.GetClient().Create(ctx, role)).To(Succeed(), "failed to create mhc-test role") + + roleBinding := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mhc-test", + Namespace: namespace, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + APIGroup: "", + Name: "mhc-test", + Namespace: namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.SchemeGroupVersion.Group, + Kind: "Role", + Name: "mhc-test", + }, + } + Expect(managementClusterProxy.GetClient().Create(ctx, roleBinding)).To(Succeed(), "failed to create mhc-test role binding") + + tokenRequest := &authenticationv1.TokenRequest{ + Spec: authenticationv1.TokenRequestSpec{ + ExpirationSeconds: pointer.Int64(2 * 60 * 60), // 2 hours. + }, + } + tokenRequest, err := managementClusterProxy.GetClientSet().CoreV1().ServiceAccounts(namespace).CreateToken(ctx, "mhc-test", tokenRequest, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + return tokenRequest.Status.Token +} diff --git a/test/e2e/mhc_remediations_test.go b/test/e2e/kcp_remediations_test.go similarity index 83% rename from test/e2e/mhc_remediations_test.go rename to test/e2e/kcp_remediations_test.go index 66471901d939..cdd9fd50efc4 100644 --- a/test/e2e/mhc_remediations_test.go +++ b/test/e2e/kcp_remediations_test.go @@ -23,9 +23,9 @@ import ( . "github.com/onsi/ginkgo/v2" ) -var _ = Describe("When testing unhealthy machines remediation", func() { - MachineRemediationSpec(ctx, func() MachineRemediationSpecInput { - return MachineRemediationSpecInput{ +var _ = Describe("When testing KCP remediation", func() { + KCPRemediationSpec(ctx, func() KCPRemediationSpecInput { + return KCPRemediationSpecInput{ E2EConfig: e2eConfig, ClusterctlConfigPath: clusterctlConfigPath, BootstrapClusterProxy: bootstrapClusterProxy, diff --git a/test/e2e/mhc_remediations.go b/test/e2e/md_remediations.go similarity index 61% rename from test/e2e/mhc_remediations.go rename to test/e2e/md_remediations.go index b1c3989d4f61..7d95a032518f 100644 --- a/test/e2e/mhc_remediations.go +++ b/test/e2e/md_remediations.go @@ -32,8 +32,8 @@ import ( "sigs.k8s.io/cluster-api/util" ) -// MachineRemediationSpecInput is the input for MachineRemediationSpec. -type MachineRemediationSpecInput struct { +// MachineDeploymentRemediationSpecInput is the input for MachineDeploymentRemediationSpec. +type MachineDeploymentRemediationSpecInput struct { E2EConfig *clusterctl.E2EConfig ClusterctlConfigPath string BootstrapClusterProxy framework.ClusterProxy @@ -41,26 +41,19 @@ type MachineRemediationSpecInput struct { SkipCleanup bool ControlPlaneWaiters clusterctl.ControlPlaneWaiters - // KCPFlavor, if specified, must refer to a template that has a MachineHealthCheck - // resource configured to match the control plane Machines (cluster.x-k8s.io/controlplane: "" label) - // and be configured to treat "e2e.remediation.condition" "False" as an unhealthy - // condition with a short timeout. - // If not specified, "kcp-remediation" is used. - KCPFlavor *string - - // MDFlavor, if specified, must refer to a template that has a MachineHealthCheck + // Flavor, if specified, must refer to a template that has a MachineHealthCheck // resource configured to match the MachineDeployment managed Machines and be // configured to treat "e2e.remediation.condition" "False" as an unhealthy // condition with a short timeout. // If not specified, "md-remediation" is used. - MDFlavor *string + Flavor *string } -// MachineRemediationSpec implements a test that verifies that Machines are remediated by MHC during unhealthy conditions. -func MachineRemediationSpec(ctx context.Context, inputGetter func() MachineRemediationSpecInput) { +// MachineDeploymentRemediationSpec implements a test that verifies that Machines are remediated by MHC during unhealthy conditions. +func MachineDeploymentRemediationSpec(ctx context.Context, inputGetter func() MachineDeploymentRemediationSpecInput) { var ( - specName = "mhc-remediation" - input MachineRemediationSpecInput + specName = "md-remediation" + input MachineDeploymentRemediationSpecInput namespace *corev1.Namespace cancelWatches context.CancelFunc clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult @@ -80,7 +73,7 @@ func MachineRemediationSpec(ctx context.Context, inputGetter func() MachineRemed clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) }) - It("Should successfully trigger machine deployment remediation", func() { + It("Should replace unhealthy machines", func() { By("Creating a workload cluster") clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ @@ -90,7 +83,7 @@ func MachineRemediationSpec(ctx context.Context, inputGetter func() MachineRemed ClusterctlConfigPath: input.ClusterctlConfigPath, KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(), InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, - Flavor: pointer.StringDeref(input.MDFlavor, "md-remediation"), + Flavor: pointer.StringDeref(input.Flavor, "md-remediation"), Namespace: namespace.Name, ClusterName: fmt.Sprintf("%s-%s", specName, util.RandomString(6)), KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersion), @@ -103,6 +96,8 @@ func MachineRemediationSpec(ctx context.Context, inputGetter func() MachineRemed WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }, clusterResources) + // TODO: this should be re-written like the KCP remediation test, because the current implementation + // only tests that MHC applies the unhealthy condition but it doesn't test that the unhealthy machine is delete and a replacement machine comes up. By("Setting a machine unhealthy and wait for MachineDeployment remediation") framework.DiscoverMachineHealthChecksAndWaitForRemediation(ctx, framework.DiscoverMachineHealthCheckAndWaitForRemediationInput{ ClusterProxy: input.BootstrapClusterProxy, @@ -113,39 +108,6 @@ func MachineRemediationSpec(ctx context.Context, inputGetter func() MachineRemed By("PASSED!") }) - It("Should successfully trigger KCP remediation", func() { - By("Creating a workload cluster") - - clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - ConfigCluster: clusterctl.ConfigClusterInput{ - LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.BootstrapClusterProxy.GetName()), - ClusterctlConfigPath: input.ClusterctlConfigPath, - KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(), - InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, - Flavor: pointer.StringDeref(input.KCPFlavor, "kcp-remediation"), - Namespace: namespace.Name, - ClusterName: fmt.Sprintf("%s-%s", specName, util.RandomString(6)), - KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersion), - ControlPlaneMachineCount: pointer.Int64(3), - WorkerMachineCount: pointer.Int64(1), - }, - ControlPlaneWaiters: input.ControlPlaneWaiters, - WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"), - WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), - WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), - }, clusterResources) - - By("Setting a machine unhealthy and wait for KubeadmControlPlane remediation") - framework.DiscoverMachineHealthChecksAndWaitForRemediation(ctx, framework.DiscoverMachineHealthCheckAndWaitForRemediationInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - WaitForMachineRemediation: input.E2EConfig.GetIntervals(specName, "wait-machine-remediation"), - }) - - By("PASSED!") - }) - AfterEach(func() { // Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. dumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup) diff --git a/test/e2e/md_remediations_test.go b/test/e2e/md_remediations_test.go new file mode 100644 index 000000000000..c35b60938d82 --- /dev/null +++ b/test/e2e/md_remediations_test.go @@ -0,0 +1,36 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + . "github.com/onsi/ginkgo/v2" +) + +var _ = Describe("When testing MachineDeployment remediation", func() { + MachineDeploymentRemediationSpec(ctx, func() MachineDeploymentRemediationSpecInput { + return MachineDeploymentRemediationSpecInput{ + E2EConfig: e2eConfig, + ClusterctlConfigPath: clusterctlConfigPath, + BootstrapClusterProxy: bootstrapClusterProxy, + ArtifactFolder: artifactFolder, + SkipCleanup: skipCleanup, + } + }) +}) diff --git a/test/infrastructure/container/docker.go b/test/infrastructure/container/docker.go index 204ec75ee7a3..a790f881b782 100644 --- a/test/infrastructure/container/docker.go +++ b/test/infrastructure/container/docker.go @@ -161,8 +161,7 @@ func (d *dockerRuntime) GetHostPort(ctx context.Context, containerName, portAndP } // ExecContainer executes a command in a running container and writes any output to the provided writer. -func (d *dockerRuntime) ExecContainer(_ context.Context, containerName string, config *ExecContainerInput, command string, args ...string) error { - ctx := context.Background() // Let the command finish, even if it takes longer than the default timeout +func (d *dockerRuntime) ExecContainer(ctx context.Context, containerName string, config *ExecContainerInput, command string, args ...string) error { execConfig := types.ExecConfig{ // Run with privileges so we can remount etc.. // This might not make sense in the most general sense, but it is diff --git a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go index edc90b4232ee..7920065ae40f 100644 --- a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go @@ -305,10 +305,31 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, false); err != nil { bootstrapData, format, err := r.getBootstrapData(timeoutCtx, machine) if err != nil { - log.Error(err, "failed to get bootstrap data") return ctrl.Result{}, err } + // Setup a go routing to check for the machine being deleted while running bootstrap as a + // synchronous process, e.g. due to remediation. The routine stops when timeoutCtx is Done + // (either because canceled intentionally due to machine deletion or canceled by the defer cancel() + // call when exiting from this func). + go func() { + for { + select { + case <-timeoutCtx.Done(): + return + default: + updatedDockerMachine := &infrav1.DockerMachine{} + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(dockerMachine), updatedDockerMachine); err == nil && + !updatedDockerMachine.DeletionTimestamp.IsZero() { + log.Info("Cancelling Bootstrap because the underlying machine has been deleted") + cancel() + return + } + time.Sleep(5 * time.Second) + } + } + }() + // Run the bootstrap script. Simulates cloud-init/Ignition. if err := externalMachine.ExecBootstrap(timeoutCtx, bootstrapData, format); err != nil { conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")