From 2e64a16506df7ad95349916997bd464af733b295 Mon Sep 17 00:00:00 2001 From: Andy Goldstein Date: Tue, 1 Dec 2020 17:27:11 -0500 Subject: [PATCH 1/4] Add ControlPlaneInitializedCondition Signed-off-by: Andy Goldstein --- api/v1alpha4/condition_consts.go | 10 ++++++++++ controllers/cluster_controller.go | 16 ++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/api/v1alpha4/condition_consts.go b/api/v1alpha4/condition_consts.go index 7aca8adc6167..455d89893ff0 100644 --- a/api/v1alpha4/condition_consts.go +++ b/api/v1alpha4/condition_consts.go @@ -55,6 +55,16 @@ const ( // Conditions and condition Reasons for the Cluster object const ( + // ControlPlaneInitializedCondition reports if the cluster's control plane has been initialized such that the + // cluster's apiserver is reachable and at least one control plane Machine has a node reference. Once this + // condition is marked true, its value is never changed. See the ControlPlaneReady condition for an indication of + // the current readiness of the cluster's control plane. + ControlPlaneInitializedCondition ConditionType = "ControlPlaneInitialized" + + // MissingNodeRefReason (Severity=Info) documents a cluster waiting for at least one control plane Machine to have + // its node reference populated. + MissingNodeRefReason = "MissingNodeRef" + // ControlPlaneReady reports the ready condition from the control plane object defined for this cluster. // This condition is mirrored from the Ready condition in the control plane ref object, and // the absence of this condition might signal problems in the reconcile external loops or the fact that diff --git a/controllers/cluster_controller.go b/controllers/cluster_controller.go index 385dbe2df0bd..bac33948cadb 100644 --- a/controllers/cluster_controller.go +++ b/controllers/cluster_controller.go @@ -463,30 +463,26 @@ func splitMachineList(list *clusterv1.MachineList) (*clusterv1.MachineList, *clu } func (r *ClusterReconciler) reconcileControlPlaneInitialized(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) { - log := ctrl.LoggerFrom(ctx) - - // Skip checking if the control plane is initialized when using a Control Plane Provider - if cluster.Spec.ControlPlaneRef != nil { - return ctrl.Result{}, nil - } - - if cluster.Status.ControlPlaneInitialized { + if conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { return ctrl.Result{}, nil } machines, err := getActiveMachinesInCluster(ctx, r.Client, cluster.Namespace, cluster.Name) if err != nil { - log.Error(err, "Error getting machines in cluster") + log := ctrl.LoggerFrom(ctx) + log.Error(err, "unable to determine ControlPlaneInitialized") return ctrl.Result{}, err } for _, m := range machines { if util.IsControlPlaneMachine(m) && m.Status.NodeRef != nil { - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) return ctrl.Result{}, nil } } + conditions.MarkFalse(cluster, clusterv1.ControlPlaneInitializedCondition, clusterv1.MissingNodeRefReason, clusterv1.ConditionSeverityInfo, "Waiting for the first control plane machine to have its status.nodeRef set") + return ctrl.Result{}, nil } From d1032c65d858c2bd2b7cf5e6235ec0b8eb163a43 Mon Sep 17 00:00:00 2001 From: Andy Goldstein Date: Thu, 3 Dec 2020 09:56:39 -0500 Subject: [PATCH 2/4] remove cluster.status.controlPlaneInitialized Signed-off-by: Andy Goldstein --- api/v1alpha3/conversion.go | 28 +++++++++- api/v1alpha3/zz_generated.conversion.go | 42 ++++++++++----- api/v1alpha4/cluster_types.go | 4 -- api/v1alpha4/condition_consts.go | 4 ++ .../controllers/kubeadmconfig_controller.go | 3 +- .../kubeadmconfig_controller_test.go | 19 ++++--- cmd/clusterctl/client/cluster/mover.go | 4 +- cmd/clusterctl/client/cluster/mover_test.go | 54 +++++++++++++++---- .../crd/bases/cluster.x-k8s.io_clusters.yaml | 3 -- controllers/cluster_controller.go | 8 ++- controllers/cluster_controller_phases.go | 8 ++- controllers/cluster_controller_test.go | 7 +-- controllers/machine_controller.go | 2 +- controllers/machine_controller_test.go | 2 +- .../machinehealthcheck_controller_test.go | 3 +- controllers/remote/cluster_cache.go | 3 +- .../remote/cluster_cache_healthcheck_test.go | 3 +- .../remote/cluster_cache_tracker_test.go | 3 +- .../controllers/dockermachine_controller.go | 2 +- 19 files changed, 143 insertions(+), 59 deletions(-) diff --git a/api/v1alpha3/conversion.go b/api/v1alpha3/conversion.go index 87b9251b61ea..19dbb65cb083 100644 --- a/api/v1alpha3/conversion.go +++ b/api/v1alpha3/conversion.go @@ -19,6 +19,7 @@ package v1alpha3 import ( apiconversion "k8s.io/apimachinery/pkg/conversion" "sigs.k8s.io/cluster-api/api/v1alpha4" + "sigs.k8s.io/cluster-api/util/conditions" utilconversion "sigs.k8s.io/cluster-api/util/conversion" "sigs.k8s.io/controller-runtime/pkg/conversion" ) @@ -26,13 +27,32 @@ import ( func (src *Cluster) ConvertTo(dstRaw conversion.Hub) error { dst := dstRaw.(*v1alpha4.Cluster) - return Convert_v1alpha3_Cluster_To_v1alpha4_Cluster(src, dst, nil) + if err := Convert_v1alpha3_Cluster_To_v1alpha4_Cluster(src, dst, nil); err != nil { + return err + } + + // Given this is a bool and there is no timestamp associated with it, when this condition is set, its timestamp + // will be "now". See https://github.com/kubernetes-sigs/cluster-api/issues/3798#issuecomment-708619826 for more + // discussion. + if src.Status.ControlPlaneInitialized { + conditions.MarkTrue(dst, v1alpha4.ControlPlaneInitializedCondition) + } + + return nil } func (dst *Cluster) ConvertFrom(srcRaw conversion.Hub) error { src := srcRaw.(*v1alpha4.Cluster) - return Convert_v1alpha4_Cluster_To_v1alpha3_Cluster(src, dst, nil) + if err := Convert_v1alpha4_Cluster_To_v1alpha3_Cluster(src, dst, nil); err != nil { + return err + } + + if conditions.IsTrue(src, v1alpha4.ControlPlaneInitializedCondition) { + dst.Status.ControlPlaneInitialized = true + } + + return nil } func (src *ClusterList) ConvertTo(dstRaw conversion.Hub) error { @@ -208,3 +228,7 @@ func Convert_v1alpha4_MachineRollingUpdateDeployment_To_v1alpha3_MachineRollingU func Convert_v1alpha4_MachineHealthCheckSpec_To_v1alpha3_MachineHealthCheckSpec(in *v1alpha4.MachineHealthCheckSpec, out *MachineHealthCheckSpec, s apiconversion.Scope) error { return autoConvert_v1alpha4_MachineHealthCheckSpec_To_v1alpha3_MachineHealthCheckSpec(in, out, s) } + +func Convert_v1alpha3_ClusterStatus_To_v1alpha4_ClusterStatus(in *ClusterStatus, out *v1alpha4.ClusterStatus, s apiconversion.Scope) error { + return autoConvert_v1alpha3_ClusterStatus_To_v1alpha4_ClusterStatus(in, out, s) +} diff --git a/api/v1alpha3/zz_generated.conversion.go b/api/v1alpha3/zz_generated.conversion.go index 3000514fced1..8122dd24a13a 100644 --- a/api/v1alpha3/zz_generated.conversion.go +++ b/api/v1alpha3/zz_generated.conversion.go @@ -94,11 +94,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*ClusterStatus)(nil), (*v1alpha4.ClusterStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha3_ClusterStatus_To_v1alpha4_ClusterStatus(a.(*ClusterStatus), b.(*v1alpha4.ClusterStatus), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*v1alpha4.ClusterStatus)(nil), (*ClusterStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_ClusterStatus_To_v1alpha3_ClusterStatus(a.(*v1alpha4.ClusterStatus), b.(*ClusterStatus), scope) }); err != nil { @@ -349,6 +344,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*ClusterStatus)(nil), (*v1alpha4.ClusterStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha3_ClusterStatus_To_v1alpha4_ClusterStatus(a.(*ClusterStatus), b.(*v1alpha4.ClusterStatus), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1alpha4.MachineHealthCheckSpec)(nil), (*MachineHealthCheckSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_MachineHealthCheckSpec_To_v1alpha3_MachineHealthCheckSpec(a.(*v1alpha4.MachineHealthCheckSpec), b.(*MachineHealthCheckSpec), scope) }); err != nil { @@ -436,7 +436,17 @@ func Convert_v1alpha4_Cluster_To_v1alpha3_Cluster(in *v1alpha4.Cluster, out *Clu func autoConvert_v1alpha3_ClusterList_To_v1alpha4_ClusterList(in *ClusterList, out *v1alpha4.ClusterList, s conversion.Scope) error { out.ListMeta = in.ListMeta - out.Items = *(*[]v1alpha4.Cluster)(unsafe.Pointer(&in.Items)) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]v1alpha4.Cluster, len(*in)) + for i := range *in { + if err := Convert_v1alpha3_Cluster_To_v1alpha4_Cluster(&(*in)[i], &(*out)[i], s); err != nil { + return err + } + } + } else { + out.Items = nil + } return nil } @@ -447,7 +457,17 @@ func Convert_v1alpha3_ClusterList_To_v1alpha4_ClusterList(in *ClusterList, out * func autoConvert_v1alpha4_ClusterList_To_v1alpha3_ClusterList(in *v1alpha4.ClusterList, out *ClusterList, s conversion.Scope) error { out.ListMeta = in.ListMeta - out.Items = *(*[]Cluster)(unsafe.Pointer(&in.Items)) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Cluster, len(*in)) + for i := range *in { + if err := Convert_v1alpha4_Cluster_To_v1alpha3_Cluster(&(*in)[i], &(*out)[i], s); err != nil { + return err + } + } + } else { + out.Items = nil + } return nil } @@ -520,25 +540,19 @@ func autoConvert_v1alpha3_ClusterStatus_To_v1alpha4_ClusterStatus(in *ClusterSta out.FailureMessage = (*string)(unsafe.Pointer(in.FailureMessage)) out.Phase = in.Phase out.InfrastructureReady = in.InfrastructureReady - out.ControlPlaneInitialized = in.ControlPlaneInitialized + // WARNING: in.ControlPlaneInitialized requires manual conversion: does not exist in peer-type out.ControlPlaneReady = in.ControlPlaneReady out.Conditions = *(*v1alpha4.Conditions)(unsafe.Pointer(&in.Conditions)) out.ObservedGeneration = in.ObservedGeneration return nil } -// Convert_v1alpha3_ClusterStatus_To_v1alpha4_ClusterStatus is an autogenerated conversion function. -func Convert_v1alpha3_ClusterStatus_To_v1alpha4_ClusterStatus(in *ClusterStatus, out *v1alpha4.ClusterStatus, s conversion.Scope) error { - return autoConvert_v1alpha3_ClusterStatus_To_v1alpha4_ClusterStatus(in, out, s) -} - func autoConvert_v1alpha4_ClusterStatus_To_v1alpha3_ClusterStatus(in *v1alpha4.ClusterStatus, out *ClusterStatus, s conversion.Scope) error { out.FailureDomains = *(*FailureDomains)(unsafe.Pointer(&in.FailureDomains)) out.FailureReason = (*errors.ClusterStatusError)(unsafe.Pointer(in.FailureReason)) out.FailureMessage = (*string)(unsafe.Pointer(in.FailureMessage)) out.Phase = in.Phase out.InfrastructureReady = in.InfrastructureReady - out.ControlPlaneInitialized = in.ControlPlaneInitialized out.ControlPlaneReady = in.ControlPlaneReady out.Conditions = *(*Conditions)(unsafe.Pointer(&in.Conditions)) out.ObservedGeneration = in.ObservedGeneration diff --git a/api/v1alpha4/cluster_types.go b/api/v1alpha4/cluster_types.go index b41a77c1e30e..51d1893a2b06 100644 --- a/api/v1alpha4/cluster_types.go +++ b/api/v1alpha4/cluster_types.go @@ -128,10 +128,6 @@ type ClusterStatus struct { // +optional InfrastructureReady bool `json:"infrastructureReady"` - // ControlPlaneInitialized defines if the control plane has been initialized. - // +optional - ControlPlaneInitialized bool `json:"controlPlaneInitialized"` - // ControlPlaneReady defines if the control plane is ready. // +optional ControlPlaneReady bool `json:"controlPlaneReady,omitempty"` diff --git a/api/v1alpha4/condition_consts.go b/api/v1alpha4/condition_consts.go index 455d89893ff0..cd2f3898842e 100644 --- a/api/v1alpha4/condition_consts.go +++ b/api/v1alpha4/condition_consts.go @@ -65,6 +65,10 @@ const ( // its node reference populated. MissingNodeRefReason = "MissingNodeRef" + // WaitingForControlPlaneProviderInitializedReason (Severity=Info) documents a cluster waiting for the control plane + // provider to report successful control plane initialization. + WaitingForControlPlaneProviderInitializedReason = "WaitingForControlPlaneProviderInitialized" + // ControlPlaneReady reports the ready condition from the control plane object defined for this cluster. // This condition is mirrored from the Ready condition in the control plane ref object, and // the absence of this condition might signal problems in the reconcile external loops or the fact that diff --git a/bootstrap/kubeadm/controllers/kubeadmconfig_controller.go b/bootstrap/kubeadm/controllers/kubeadmconfig_controller.go index f3355a268b30..01987cc37d9e 100644 --- a/bootstrap/kubeadm/controllers/kubeadmconfig_controller.go +++ b/bootstrap/kubeadm/controllers/kubeadmconfig_controller.go @@ -242,7 +242,8 @@ func (r *KubeadmConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, nil } - if !cluster.Status.ControlPlaneInitialized { + // Note: can't use IsFalse here because we need to handle the absence of the condition as well as false. + if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { return r.handleClusterNotInitialized(ctx, scope) } diff --git a/bootstrap/kubeadm/controllers/kubeadmconfig_controller_test.go b/bootstrap/kubeadm/controllers/kubeadmconfig_controller_test.go index bfdf03972e66..17261fbf4611 100644 --- a/bootstrap/kubeadm/controllers/kubeadmconfig_controller_test.go +++ b/bootstrap/kubeadm/controllers/kubeadmconfig_controller_test.go @@ -21,7 +21,6 @@ import ( "context" "fmt" "reflect" - "sigs.k8s.io/cluster-api/util/patch" "testing" "time" @@ -41,6 +40,7 @@ import ( "sigs.k8s.io/cluster-api/test/helpers" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/secret" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -423,7 +423,7 @@ func TestKubeadmConfigReconciler_Reconcile_ErrorIfJoiningControlPlaneHasInvalidC // TODO: extract this kind of code into a setup function that puts the state of objects into an initialized controlplane (implies secrets exist) cluster := newCluster("cluster") cluster.Status.InfrastructureReady = true - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443} controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine") controlPlaneInitConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine, "control-plane-init-cfg") @@ -462,7 +462,7 @@ func TestKubeadmConfigReconciler_Reconcile_RequeueIfControlPlaneIsMissingAPIEndp cluster := newCluster("cluster") cluster.Status.InfrastructureReady = true - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine") controlPlaneInitConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine, "control-plane-init-cfg") @@ -498,7 +498,7 @@ func TestKubeadmConfigReconciler_Reconcile_RequeueIfControlPlaneIsMissingAPIEndp func TestReconcileIfJoinNodesAndControlPlaneIsReady(t *testing.T) { cluster := newCluster("cluster") cluster.Status.InfrastructureReady = true - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443} var useCases = []struct { @@ -587,7 +587,7 @@ func TestReconcileIfJoinNodePoolsAndControlPlaneIsReady(t *testing.T) { cluster := newCluster("cluster") cluster.Status.InfrastructureReady = true - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443} var useCases = []struct { @@ -666,7 +666,7 @@ func TestKubeadmConfigSecretCreatedStatusNotPatched(t *testing.T) { cluster := newCluster("cluster") cluster.Status.InfrastructureReady = true - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443} controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine") @@ -735,7 +735,7 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { cluster := newCluster("cluster") cluster.Status.InfrastructureReady = true - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443} controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine") @@ -887,7 +887,7 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { cluster := newCluster("cluster") cluster.Status.InfrastructureReady = true - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443} controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine") @@ -1305,7 +1305,7 @@ func TestKubeadmConfigReconciler_Reconcile_AlwaysCheckCAVerificationUnlessReques // Setup work for an initialized cluster clusterName := "my-cluster" cluster := newCluster(clusterName) - cluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) cluster.Status.InfrastructureReady = true cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{ Host: "example.com", @@ -1433,7 +1433,6 @@ func TestKubeadmConfigReconciler_Reconcile_DoesNotFailIfCASecretsAlreadyExist(t cluster := newCluster("my-cluster") cluster.Status.InfrastructureReady = true - cluster.Status.ControlPlaneInitialized = false m := newControlPlaneMachine(cluster, "control-plane-machine") configName := "my-config" c := newControlPlaneInitKubeadmConfig(m, configName) diff --git a/cmd/clusterctl/client/cluster/mover.go b/cmd/clusterctl/client/cluster/mover.go index a4d40be34dd3..a44f7b48cb2d 100644 --- a/cmd/clusterctl/client/cluster/mover.go +++ b/cmd/clusterctl/client/cluster/mover.go @@ -30,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/util/version" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -137,7 +138,8 @@ func (o *objectMover) checkProvisioningCompleted(graph *objectGraph) error { continue } - if !clusterObj.Status.ControlPlaneInitialized { + // Note: can't use IsFalse here because we need to handle the absence of the condition as well as false. + if !conditions.IsTrue(clusterObj, clusterv1.ControlPlaneInitializedCondition) { errList = append(errList, errors.Errorf("cannot start the move operation while the control plane for %q %s/%s is not yet initialized", clusterObj.GroupVersionKind(), clusterObj.GetNamespace(), clusterObj.GetName())) continue } diff --git a/cmd/clusterctl/client/cluster/mover_test.go b/cmd/clusterctl/client/cluster/mover_test.go index 5718d4fa0700..07fc152cedbd 100644 --- a/cmd/clusterctl/client/cluster/mover_test.go +++ b/cmd/clusterctl/client/cluster/mover_test.go @@ -27,6 +27,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" clusterctlv1 "sigs.k8s.io/cluster-api/cmd/clusterctl/api/v1alpha3" "sigs.k8s.io/cluster-api/cmd/clusterctl/internal/test" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -643,8 +644,10 @@ func Test_objectMover_checkProvisioningCompleted(t *testing.T) { Name: "cluster1", }, Status: clusterv1.ClusterStatus{ - InfrastructureReady: false, - ControlPlaneInitialized: true, + InfrastructureReady: false, + Conditions: clusterv1.Conditions{ + *conditions.TrueCondition(clusterv1.ControlPlaneInitializedCondition), + }, }, }, }, @@ -665,8 +668,31 @@ func Test_objectMover_checkProvisioningCompleted(t *testing.T) { Name: "cluster1", }, Status: clusterv1.ClusterStatus{ - InfrastructureReady: true, - ControlPlaneInitialized: false, + InfrastructureReady: true, + }, + }, + }, + }, + wantErr: true, + }, + { + name: "Blocks with a cluster with ControlPlaneInitialized=False", + fields: fields{ + objs: []client.Object{ + &clusterv1.Cluster{ + TypeMeta: metav1.TypeMeta{ + Kind: "Cluster", + APIVersion: clusterv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns1", + Name: "cluster1", + }, + Status: clusterv1.ClusterStatus{ + InfrastructureReady: true, + Conditions: clusterv1.Conditions{ + *conditions.FalseCondition(clusterv1.ControlPlaneInitializedCondition, "", clusterv1.ConditionSeverityInfo, ""), + }, }, }, }, @@ -690,9 +716,11 @@ func Test_objectMover_checkProvisioningCompleted(t *testing.T) { ControlPlaneRef: &corev1.ObjectReference{}, }, Status: clusterv1.ClusterStatus{ - InfrastructureReady: true, - ControlPlaneInitialized: true, - ControlPlaneReady: false, + InfrastructureReady: true, + Conditions: clusterv1.Conditions{ + *conditions.TrueCondition(clusterv1.ControlPlaneInitializedCondition), + }, + ControlPlaneReady: false, }, }, }, @@ -714,8 +742,10 @@ func Test_objectMover_checkProvisioningCompleted(t *testing.T) { UID: "cluster1", }, Status: clusterv1.ClusterStatus{ - InfrastructureReady: true, - ControlPlaneInitialized: true, + InfrastructureReady: true, + Conditions: clusterv1.Conditions{ + *conditions.TrueCondition(clusterv1.ControlPlaneInitializedCondition), + }, }, }, &clusterv1.Machine{ @@ -758,8 +788,10 @@ func Test_objectMover_checkProvisioningCompleted(t *testing.T) { UID: "cluster1", }, Status: clusterv1.ClusterStatus{ - InfrastructureReady: true, - ControlPlaneInitialized: true, + InfrastructureReady: true, + Conditions: clusterv1.Conditions{ + *conditions.TrueCondition(clusterv1.ControlPlaneInitializedCondition), + }, }, }, &clusterv1.Machine{ diff --git a/config/crd/bases/cluster.x-k8s.io_clusters.yaml b/config/crd/bases/cluster.x-k8s.io_clusters.yaml index 8d4b3d5177c4..c800cdcb4ae4 100644 --- a/config/crd/bases/cluster.x-k8s.io_clusters.yaml +++ b/config/crd/bases/cluster.x-k8s.io_clusters.yaml @@ -367,9 +367,6 @@ spec: - type type: object type: array - controlPlaneInitialized: - description: ControlPlaneInitialized defines if the control plane has been initialized. - type: boolean controlPlaneReady: description: ControlPlaneReady defines if the control plane is ready. type: boolean diff --git a/controllers/cluster_controller.go b/controllers/cluster_controller.go index bac33948cadb..59d16d420b34 100644 --- a/controllers/cluster_controller.go +++ b/controllers/cluster_controller.go @@ -463,6 +463,12 @@ func splitMachineList(list *clusterv1.MachineList) (*clusterv1.MachineList, *clu } func (r *ClusterReconciler) reconcileControlPlaneInitialized(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) { + // Skip checking if the control plane is initialized when using a Control Plane Provider (this is reconciled in + // reconcileControlPlane instead). + if cluster.Spec.ControlPlaneRef != nil { + return ctrl.Result{}, nil + } + if conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { return ctrl.Result{}, nil } @@ -505,7 +511,7 @@ func (r *ClusterReconciler) controlPlaneMachineToCluster(o client.Object) []ctrl return nil } - if cluster.Status.ControlPlaneInitialized { + if conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { return nil } diff --git a/controllers/cluster_controller_phases.go b/controllers/cluster_controller_phases.go index 0167b5425909..234bc854fdaa 100644 --- a/controllers/cluster_controller_phases.go +++ b/controllers/cluster_controller_phases.go @@ -237,12 +237,16 @@ func (r *ClusterReconciler) reconcileControlPlane(ctx context.Context, cluster * // Update cluster.Status.ControlPlaneInitialized if it hasn't already been set // Determine if the control plane provider is initialized. - if !cluster.Status.ControlPlaneInitialized { + if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { initialized, err := external.IsInitialized(controlPlaneConfig) if err != nil { return ctrl.Result{}, err } - cluster.Status.ControlPlaneInitialized = initialized + if initialized { + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) + } else { + conditions.MarkFalse(cluster, clusterv1.ControlPlaneInitializedCondition, clusterv1.WaitingForControlPlaneProviderInitializedReason, clusterv1.ConditionSeverityInfo, "Waiting for control plane provider to indicate the control plane has been initialized") + } } return ctrl.Result{}, nil diff --git a/controllers/cluster_controller_test.go b/controllers/cluster_controller_test.go index 4636d03e5a3b..bb30a4f0ab7e 100644 --- a/controllers/cluster_controller_test.go +++ b/controllers/cluster_controller_test.go @@ -21,6 +21,7 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + "sigs.k8s.io/cluster-api/util/conditions" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" @@ -240,7 +241,7 @@ var _ = Describe("Cluster Reconciler", func() { }, timeout).ShouldNot(BeEmpty()) }) - It("Should successfully set Status.ControlPlaneInitialized on the cluster object if controlplane is ready", func() { + It("Should successfully set ControlPlaneInitialized on the cluster object if controlplane is ready", func() { cluster := &clusterv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "test6-", @@ -321,7 +322,7 @@ var _ = Describe("Cluster Reconciler", func() { if err := testEnv.Get(ctx, key, cluster); err != nil { return false } - return cluster.Status.ControlPlaneInitialized + return conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) }, timeout).Should(BeTrue()) }) }) @@ -687,5 +688,5 @@ func TestReconcileControlPlaneInitializedControlPlaneRef(t *testing.T) { res, err := r.reconcileControlPlaneInitialized(ctx, c) g.Expect(res.IsZero()).To(BeTrue()) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(c.Status.ControlPlaneInitialized).To(BeFalse()) + g.Expect(conditions.Has(c, clusterv1.ControlPlaneInitializedCondition)).To(BeFalse()) } diff --git a/controllers/machine_controller.go b/controllers/machine_controller.go index 86dabb571aa2..370c018b4f7a 100644 --- a/controllers/machine_controller.go +++ b/controllers/machine_controller.go @@ -251,7 +251,7 @@ func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clust func (r *MachineReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) - if cluster.Status.ControlPlaneInitialized { + if conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { if err := r.watchClusterNodes(ctx, cluster); err != nil { log.Error(err, "error watching nodes on target cluster") return ctrl.Result{}, err diff --git a/controllers/machine_controller_test.go b/controllers/machine_controller_test.go index d0ca798f50d8..3bc8b4aca355 100644 --- a/controllers/machine_controller_test.go +++ b/controllers/machine_controller_test.go @@ -112,7 +112,7 @@ func TestWatches(t *testing.T) { // Patch cluster control plane initialized (this is required to start node watch) patchHelper, err := patch.NewHelper(testCluster, testEnv) g.Expect(err).ShouldNot(HaveOccurred()) - testCluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(testCluster, clusterv1.ControlPlaneInitializedCondition) g.Expect(patchHelper.Patch(ctx, testCluster, patch.WithStatusObservedGeneration{})).To(Succeed()) // Patch infra machine ready diff --git a/controllers/machinehealthcheck_controller_test.go b/controllers/machinehealthcheck_controller_test.go index a353438ac578..34c950e76cb0 100644 --- a/controllers/machinehealthcheck_controller_test.go +++ b/controllers/machinehealthcheck_controller_test.go @@ -19,7 +19,7 @@ import ( "context" "errors" "fmt" - "sigs.k8s.io/controller-runtime/pkg/log" + "sort" "testing" "time" @@ -43,6 +43,7 @@ import ( "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) diff --git a/controllers/remote/cluster_cache.go b/controllers/remote/cluster_cache.go index b6c402a9a85e..9c45ae68069c 100644 --- a/controllers/remote/cluster_cache.go +++ b/controllers/remote/cluster_cache.go @@ -32,6 +32,7 @@ import ( "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" + "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" @@ -313,7 +314,7 @@ func (t *ClusterCacheTracker) healthCheckCluster(ctx context.Context, in *health return false, nil } - if !cluster.Status.InfrastructureReady || !cluster.Status.ControlPlaneInitialized { + if !cluster.Status.InfrastructureReady || !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { // If the infrastructure or control plane aren't marked as ready, we should requeue and wait. return false, nil } diff --git a/controllers/remote/cluster_cache_healthcheck_test.go b/controllers/remote/cluster_cache_healthcheck_test.go index 77fb2a541693..37212e2b5b24 100644 --- a/controllers/remote/cluster_cache_healthcheck_test.go +++ b/controllers/remote/cluster_cache_healthcheck_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/klog/klogr" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/manager" ) @@ -85,7 +86,7 @@ var _ = Describe("ClusterCache HealthCheck suite", func() { }, } Expect(k8sClient.Create(ctx, testCluster)).To(Succeed()) - testCluster.Status.ControlPlaneInitialized = true + conditions.MarkTrue(testCluster, clusterv1.ControlPlaneInitializedCondition) testCluster.Status.InfrastructureReady = true Expect(k8sClient.Status().Update(ctx, testCluster)).To(Succeed()) diff --git a/controllers/remote/cluster_cache_tracker_test.go b/controllers/remote/cluster_cache_tracker_test.go index 5b67cc09a995..3daf58205ca2 100644 --- a/controllers/remote/cluster_cache_tracker_test.go +++ b/controllers/remote/cluster_cache_tracker_test.go @@ -27,6 +27,7 @@ import ( "k8s.io/client-go/kubernetes/scheme" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/handler" @@ -101,7 +102,7 @@ var _ = Describe("ClusterCache Tracker suite", func() { }, } Expect(k8sClient.Create(ctx, clusterA)).To(Succeed()) - clusterA.Status.ControlPlaneInitialized = true + conditions.MarkTrue(clusterA, clusterv1.ControlPlaneInitializedCondition) clusterA.Status.InfrastructureReady = true Expect(k8sClient.Status().Update(ctx, clusterA)).To(Succeed()) diff --git a/test/infrastructure/docker/controllers/dockermachine_controller.go b/test/infrastructure/docker/controllers/dockermachine_controller.go index a96334e87eae..980007ed2dc8 100644 --- a/test/infrastructure/docker/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/controllers/dockermachine_controller.go @@ -191,7 +191,7 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * // Make sure bootstrap data is available and populated. if machine.Spec.Bootstrap.DataSecretName == nil { - if !util.IsControlPlaneMachine(machine) && !cluster.Status.ControlPlaneInitialized { + if !util.IsControlPlaneMachine(machine) && !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { log.Info("Waiting for the control plane to be initialized") conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, clusterv1.WaitingForControlPlaneAvailableReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{}, nil From ca9dd7e1da84e8133f9379a5b8dd3bfa8c0443ad Mon Sep 17 00:00:00 2001 From: Andy Goldstein Date: Fri, 25 Sep 2020 09:50:40 -0400 Subject: [PATCH 3/4] MHC: control plane init / cluster infra ready Change the MachineHealthCheck logic to require the Cluster's "control plane initialized" and "infrastructure ready" conditions to be true before proceeding to determine if a target is unhealhty. We can't look just at a Machine's last updated time when determining if the Machine has exceeded the node startup timeout. A node can't bootstrap until after the cluster's infrastructure is ready and the control plane is initialized, and we need to base node startup timeout on the latter of machine creation time, control plane initialized time, and cluster infrastructure ready time. Signed-off-by: Andy Goldstein --- controllers/cluster_controller.go | 7 +- controllers/machine_controller_test.go | 11 +- .../machinehealthcheck_controller_test.go | 439 +++++++++++++++--- controllers/machinehealthcheck_targets.go | 46 +- .../machinehealthcheck_targets_test.go | 41 +- 5 files changed, 453 insertions(+), 91 deletions(-) diff --git a/controllers/cluster_controller.go b/controllers/cluster_controller.go index 59d16d420b34..12ea00e5b7b2 100644 --- a/controllers/cluster_controller.go +++ b/controllers/cluster_controller.go @@ -463,19 +463,24 @@ func splitMachineList(list *clusterv1.MachineList) (*clusterv1.MachineList, *clu } func (r *ClusterReconciler) reconcileControlPlaneInitialized(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + // Skip checking if the control plane is initialized when using a Control Plane Provider (this is reconciled in // reconcileControlPlane instead). if cluster.Spec.ControlPlaneRef != nil { + log.V(4).Info("Skipping reconcileControlPlaneInitialized because cluster has a controlPlaneRef") return ctrl.Result{}, nil } if conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { + log.V(4).Info("Skipping reconcileControlPlaneInitialized because control plane already initialized") return ctrl.Result{}, nil } + log.V(4).Info("Checking for control plane initialization") + machines, err := getActiveMachinesInCluster(ctx, r.Client, cluster.Namespace, cluster.Name) if err != nil { - log := ctrl.LoggerFrom(ctx) log.Error(err, "unable to determine ControlPlaneInitialized") return ctrl.Result{}, err } diff --git a/controllers/machine_controller_test.go b/controllers/machine_controller_test.go index 3bc8b4aca355..4c060aa3ad88 100644 --- a/controllers/machine_controller_test.go +++ b/controllers/machine_controller_test.go @@ -109,14 +109,8 @@ func TestWatches(t *testing.T) { g.Expect(testEnv.Cleanup(ctx, do...)).To(Succeed()) }(ns, testCluster, defaultBootstrap) - // Patch cluster control plane initialized (this is required to start node watch) - patchHelper, err := patch.NewHelper(testCluster, testEnv) - g.Expect(err).ShouldNot(HaveOccurred()) - conditions.MarkTrue(testCluster, clusterv1.ControlPlaneInitializedCondition) - g.Expect(patchHelper.Patch(ctx, testCluster, patch.WithStatusObservedGeneration{})).To(Succeed()) - // Patch infra machine ready - patchHelper, err = patch.NewHelper(infraMachine, testEnv) + patchHelper, err := patch.NewHelper(infraMachine, testEnv) g.Expect(err).ShouldNot(HaveOccurred()) g.Expect(unstructured.SetNestedField(infraMachine.Object, true, "status", "ready")).To(Succeed()) g.Expect(patchHelper.Patch(ctx, infraMachine, patch.WithStatusObservedGeneration{})).To(Succeed()) @@ -132,6 +126,9 @@ func TestWatches(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ GenerateName: "machine-created-", Namespace: ns.Name, + Labels: map[string]string{ + clusterv1.MachineControlPlaneLabelName: "", + }, }, Spec: clusterv1.MachineSpec{ ClusterName: testCluster.Name, diff --git a/controllers/machinehealthcheck_controller_test.go b/controllers/machinehealthcheck_controller_test.go index 34c950e76cb0..42b74b737192 100644 --- a/controllers/machinehealthcheck_controller_test.go +++ b/controllers/machinehealthcheck_controller_test.go @@ -38,6 +38,7 @@ import ( "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/controllers/remote" + capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" @@ -174,6 +175,115 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { )) }) + t.Run("it ignores Machines not matching the label selector", func(t *testing.T) { + g := NewWithT(t) + cluster := createNamespaceAndCluster(g) + + mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) + + g.Expect(testEnv.Create(ctx, mhc)).To(Succeed()) + defer func(do ...client.Object) { + g.Expect(testEnv.Cleanup(ctx, do...)).To(Succeed()) + }(cluster, mhc) + + // Healthy nodes and machines matching the MHC's label selector. + _, machines, cleanup := createMachinesWithNodes(g, cluster, + count(2), + firstMachineAsControlPlane(), + createNodeRefForMachine(true), + nodeStatus(corev1.ConditionTrue), + machineLabels(mhc.Spec.Selector.MatchLabels), + ) + defer cleanup() + targetMachines := make([]string, len(machines)) + for i, m := range machines { + targetMachines[i] = m.Name + } + sort.Strings(targetMachines) + + // Healthy nodes and machines NOT matching the MHC's label selector. + _, _, cleanup2 := createMachinesWithNodes(g, cluster, + count(2), + createNodeRefForMachine(true), + nodeStatus(corev1.ConditionTrue), + ) + defer cleanup2() + + // Make sure the status matches. + g.Eventually(func() *clusterv1.MachineHealthCheckStatus { + err := testEnv.Get(ctx, util.ObjectKey(mhc), mhc) + if err != nil { + return nil + } + return &mhc.Status + }, 5*time.Second, 100*time.Millisecond).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ + ExpectedMachines: 2, + CurrentHealthy: 2, + RemediationsAllowed: 2, + ObservedGeneration: 1, + Targets: targetMachines, + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.RemediationAllowedCondition, + Status: corev1.ConditionTrue, + }, + }, + })) + }) + + t.Run("it doesn't mark anything unhealthy when cluster infrastructure is not ready", func(t *testing.T) { + g := NewWithT(t) + cluster := createNamespaceAndCluster(g) + + patchHelper, err := patch.NewHelper(cluster, testEnv.Client) + g.Expect(err).To(BeNil()) + + conditions.MarkFalse(cluster, clusterv1.InfrastructureReadyCondition, "SomeReason", clusterv1.ConditionSeverityError, "") + g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) + + mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) + + g.Expect(testEnv.Create(ctx, mhc)).To(Succeed()) + defer func(do ...client.Object) { + g.Expect(testEnv.Cleanup(ctx, do...)).To(Succeed()) + }(cluster, mhc) + + // Healthy nodes and machines. + _, machines, cleanup := createMachinesWithNodes(g, cluster, + count(2), + firstMachineAsControlPlane(), + createNodeRefForMachine(true), + machineLabels(mhc.Spec.Selector.MatchLabels), + ) + defer cleanup() + targetMachines := make([]string, len(machines)) + for i, m := range machines { + targetMachines[i] = m.Name + } + sort.Strings(targetMachines) + + // Make sure the status matches. + g.Eventually(func() *clusterv1.MachineHealthCheckStatus { + err := testEnv.Get(ctx, util.ObjectKey(mhc), mhc) + if err != nil { + return nil + } + return &mhc.Status + }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ + ExpectedMachines: 2, + CurrentHealthy: 2, + RemediationsAllowed: 2, + ObservedGeneration: 1, + Targets: targetMachines, + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.RemediationAllowedCondition, + Status: corev1.ConditionTrue, + }, + }, + })) + }) + t.Run("it doesn't mark anything unhealthy when all Machines are healthy", func(t *testing.T) { g := NewWithT(t) cluster := createNamespaceAndCluster(g) @@ -188,8 +298,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. _, machines, cleanup := createMachinesWithNodes(g, cluster, count(2), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup() @@ -235,8 +346,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. _, machines, cleanup1 := createMachinesWithNodes(g, cluster, count(2), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup1() @@ -244,8 +356,124 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, count(1), createNodeRefForMachine(true), - markNodeAsHealthy(false), + nodeStatus(corev1.ConditionUnknown), + machineLabels(mhc.Spec.Selector.MatchLabels), + ) + defer cleanup2() + machines = append(machines, unhealthyMachines...) + targetMachines := make([]string, len(machines)) + for i, m := range machines { + targetMachines[i] = m.Name + } + sort.Strings(targetMachines) + + // Make sure the status matches. + g.Eventually(func() *clusterv1.MachineHealthCheckStatus { + err := testEnv.Get(ctx, util.ObjectKey(mhc), mhc) + if err != nil { + return nil + } + return &mhc.Status + }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ + ExpectedMachines: 3, + CurrentHealthy: 2, + RemediationsAllowed: 2, + ObservedGeneration: 1, + Targets: targetMachines, + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.RemediationAllowedCondition, + Status: corev1.ConditionTrue, + }, + }, + })) + }) + + t.Run("it marks unhealthy machines for remediation when there a Machine has a failure reason", func(t *testing.T) { + g := NewWithT(t) + cluster := createNamespaceAndCluster(g) + + mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) + + g.Expect(testEnv.Create(ctx, mhc)).To(Succeed()) + defer func(do ...client.Object) { + g.Expect(testEnv.Cleanup(ctx, do...)).To(Succeed()) + }(cluster, mhc) + + // Healthy nodes and machines. + _, machines, cleanup1 := createMachinesWithNodes(g, cluster, + count(2), + firstMachineAsControlPlane(), + createNodeRefForMachine(true), + nodeStatus(corev1.ConditionTrue), + machineLabels(mhc.Spec.Selector.MatchLabels), + ) + defer cleanup1() + // Machine with failure reason. + _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, + count(1), + createNodeRefForMachine(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), + machineFailureReason("some failure"), + ) + defer cleanup2() + machines = append(machines, unhealthyMachines...) + targetMachines := make([]string, len(machines)) + for i, m := range machines { + targetMachines[i] = m.Name + } + sort.Strings(targetMachines) + + // Make sure the status matches. + g.Eventually(func() *clusterv1.MachineHealthCheckStatus { + err := testEnv.Get(ctx, util.ObjectKey(mhc), mhc) + if err != nil { + return nil + } + return &mhc.Status + }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ + ExpectedMachines: 3, + CurrentHealthy: 2, + RemediationsAllowed: 2, + ObservedGeneration: 1, + Targets: targetMachines, + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.RemediationAllowedCondition, + Status: corev1.ConditionTrue, + }, + }, + })) + }) + + t.Run("it marks unhealthy machines for remediation when there a Machine has a failure message", func(t *testing.T) { + g := NewWithT(t) + cluster := createNamespaceAndCluster(g) + + mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) + + g.Expect(testEnv.Create(ctx, mhc)).To(Succeed()) + defer func(do ...client.Object) { + g.Expect(testEnv.Cleanup(ctx, do...)).To(Succeed()) + }(cluster, mhc) + + // Healthy nodes and machines. + _, machines, cleanup1 := createMachinesWithNodes(g, cluster, + count(2), + firstMachineAsControlPlane(), + createNodeRefForMachine(true), + nodeStatus(corev1.ConditionTrue), + machineLabels(mhc.Spec.Selector.MatchLabels), + ) + defer cleanup1() + // Machine with failure message. + _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, + count(1), + createNodeRefForMachine(true), + nodeStatus(corev1.ConditionTrue), + machineLabels(mhc.Spec.Selector.MatchLabels), + machineFailureMessage("some failure"), ) defer cleanup2() machines = append(machines, unhealthyMachines...) @@ -293,8 +521,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. _, machines, cleanup1 := createMachinesWithNodes(g, cluster, count(1), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup1() @@ -302,7 +531,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, count(2), createNodeRefForMachine(true), - markNodeAsHealthy(false), + nodeStatus(corev1.ConditionUnknown), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup2() @@ -366,7 +595,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { } for i := range machines.Items { - if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil { + if conditions.IsTrue(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) { remediated++ } } @@ -390,8 +619,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. _, machines, cleanup1 := createMachinesWithNodes(g, cluster, count(2), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup1() @@ -399,7 +629,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, count(1), createNodeRefForMachine(true), - markNodeAsHealthy(false), + nodeStatus(corev1.ConditionUnknown), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup2() @@ -448,8 +678,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. _, machines, cleanup1 := createMachinesWithNodes(g, cluster, count(1), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup1() @@ -457,7 +688,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, count(2), createNodeRefForMachine(true), - markNodeAsHealthy(false), + nodeStatus(corev1.ConditionUnknown), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup2() @@ -533,6 +764,14 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { g := NewWithT(t) cluster := createNamespaceAndCluster(g) + // After the cluster exists, we have to set the infrastructure ready condition; otherwise, MachineHealthChecks + // will never fail when nodeStartupTimeout is exceeded. + patchHelper, err := patch.NewHelper(cluster, testEnv.GetClient()) + g.Expect(err).ToNot(HaveOccurred()) + + conditions.MarkTrue(cluster, clusterv1.InfrastructureReadyCondition) + g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) + mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) mhc.Spec.NodeStartupTimeout = &metav1.Duration{Duration: 5 * time.Hour} @@ -544,8 +783,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. _, machines, cleanup1 := createMachinesWithNodes(g, cluster, count(2), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup1() @@ -553,7 +793,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, count(1), createNodeRefForMachine(false), - markNodeAsHealthy(false), + nodeStatus(corev1.ConditionUnknown), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup2() @@ -614,7 +854,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { } for i := range machines.Items { - if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil { + if conditions.IsTrue(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) { remediated++ } } @@ -639,8 +879,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. _, machines, cleanup1 := createMachinesWithNodes(g, cluster, count(2), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup1() @@ -648,7 +889,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, count(1), createNodeRefForMachine(false), - markNodeAsHealthy(false), + nodeStatus(corev1.ConditionUnknown), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup2() @@ -737,8 +978,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. nodes, machines, cleanup := createMachinesWithNodes(g, cluster, count(3), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup() @@ -829,8 +1071,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. nodes, machines, cleanup := createMachinesWithNodes(g, cluster, count(1), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup() @@ -911,7 +1154,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { return }).Should(Equal(1)) - // Calculate how many Machines have been remediated. + // Calculate how many Machines have been marked for remediation g.Eventually(func() (remediated int) { machines := &clusterv1.MachineList{} err := testEnv.List(ctx, machines, client.MatchingLabels{ @@ -922,7 +1165,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { } for i := range machines.Items { - if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil { + if conditions.IsFalse(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) { remediated++ } } @@ -934,6 +1177,15 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { g := NewWithT(t) cluster := createNamespaceAndCluster(g) + // Create 1 control plane machine so MHC can proceed + _, _, cleanup := createMachinesWithNodes(g, cluster, + count(1), + firstMachineAsControlPlane(), + createNodeRefForMachine(true), + nodeStatus(corev1.ConditionTrue), + ) + defer cleanup() + mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) // Create infrastructure template resource. infraResource := map[string]interface{}{ @@ -1085,8 +1337,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. nodes, machines, cleanup := createMachinesWithNodes(g, cluster, count(1), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup() @@ -1235,8 +1488,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. nodes, machines, cleanup := createMachinesWithNodes(g, cluster, count(1), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup() @@ -1382,8 +1636,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { // Healthy nodes and machines. nodes, machines, cleanup := createMachinesWithNodes(g, cluster, count(1), + firstMachineAsControlPlane(), createNodeRefForMachine(true), - markNodeAsHealthy(true), + nodeStatus(corev1.ConditionTrue), machineLabels(mhc.Spec.Selector.MatchLabels), ) defer cleanup() @@ -1996,18 +2251,38 @@ func ownerReferenceForCluster(ctx context.Context, g *WithT, c *clusterv1.Cluste func createNamespaceAndCluster(g *WithT) *clusterv1.Cluster { ns, err := testEnv.CreateNamespace(ctx, "test-mhc") g.Expect(err).ToNot(HaveOccurred()) + cluster := &clusterv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "test-cluster-", Namespace: ns.Name, }, } + g.Expect(testEnv.Create(ctx, cluster)).To(Succeed()) + + // Make sure the cluster is in the cache before proceeding g.Eventually(func() error { var cl clusterv1.Cluster return testEnv.Get(ctx, util.ObjectKey(cluster), &cl) }, timeout, 100*time.Millisecond).Should(Succeed()) + // This is required for MHC to perform checks + patchHelper, err := patch.NewHelper(cluster, testEnv.Client) + g.Expect(err).To(BeNil()) + conditions.MarkTrue(cluster, clusterv1.InfrastructureReadyCondition) + g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) + + // Wait for cluster in cache to be updated post-patch + g.Eventually(func() bool { + err := testEnv.Get(ctx, util.ObjectKey(cluster), cluster) + if err != nil { + return false + } + + return conditions.IsTrue(cluster, clusterv1.InfrastructureReadyCondition) + }, timeout, 100*time.Millisecond).Should(BeTrue()) + g.Expect(testEnv.CreateKubeconfigSecret(ctx, cluster)).To(Succeed()) return cluster @@ -2040,28 +2315,6 @@ func newRunningMachine(c *clusterv1.Cluster, labels map[string]string) *clusterv } } -// newNode creaetes a Node object with node condition Ready == True -func newNode() *corev1.Node { - return &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "test-mhc-node-", - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{ - {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, - }, - }, - } -} - -func setNodeUnhealthy(node *corev1.Node) { - node.Status.Conditions[0] = corev1.NodeCondition{ - Type: corev1.NodeReady, - Status: corev1.ConditionUnknown, - LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), - } -} - func newInfraMachine(machine *clusterv1.Machine) (*unstructured.Unstructured, string) { providerID := fmt.Sprintf("test:////%v", uuid.NewUUID()) return &unstructured.Unstructured{ @@ -2080,10 +2333,13 @@ func newInfraMachine(machine *clusterv1.Machine) (*unstructured.Unstructured, st } type machinesWithNodes struct { - count int - markNodeAsHealthy bool - createNodeRefForMachine bool - labels map[string]string + count int + nodeStatus corev1.ConditionStatus + createNodeRefForMachine bool + firstMachineAsControlPlane bool + labels map[string]string + failureReason string + failureMessage string } type machineWithNodesOption func(m *machinesWithNodes) @@ -2094,9 +2350,15 @@ func count(n int) machineWithNodesOption { } } -func markNodeAsHealthy(b bool) machineWithNodesOption { +func firstMachineAsControlPlane() machineWithNodesOption { + return func(m *machinesWithNodes) { + m.firstMachineAsControlPlane = true + } +} + +func nodeStatus(s corev1.ConditionStatus) machineWithNodesOption { return func(m *machinesWithNodes) { - m.markNodeAsHealthy = b + m.nodeStatus = s } } @@ -2112,6 +2374,18 @@ func machineLabels(l map[string]string) machineWithNodesOption { } } +func machineFailureReason(s string) machineWithNodesOption { + return func(m *machinesWithNodes) { + m.failureReason = s + } +} + +func machineFailureMessage(s string) machineWithNodesOption { + return func(m *machinesWithNodes) { + m.failureMessage = s + } +} + func createMachinesWithNodes( g *WithT, c *clusterv1.Cluster, @@ -2131,6 +2405,12 @@ func createMachinesWithNodes( for i := 0; i < o.count; i++ { machine := newRunningMachine(c, o.labels) + if i == 0 && o.firstMachineAsControlPlane { + if machine.Labels == nil { + machine.Labels = make(map[string]string) + } + machine.Labels[clusterv1.MachineControlPlaneLabelName] = "" + } infraMachine, providerID := newInfraMachine(machine) g.Expect(testEnv.Create(ctx, infraMachine)).To(Succeed()) infraMachines = append(infraMachines, infraMachine) @@ -2164,35 +2444,60 @@ func createMachinesWithNodes( return machine.Status.LastUpdated }, timeout, 100*time.Millisecond).ShouldNot(BeNil()) + machinePatchHelper, err := patch.NewHelper(machine, testEnv.Client) + g.Expect(err).To(BeNil()) + if o.createNodeRefForMachine { - node := newNode() - if !o.markNodeAsHealthy { - setNodeUnhealthy(node) + // Create node + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-mhc-node-", + }, + Spec: corev1.NodeSpec{ + ProviderID: providerID, + }, } - machineStatus := machine.Status - node.Spec.ProviderID = providerID - nodeStatus := node.Status + g.Expect(testEnv.Create(ctx, node)).To(Succeed()) fmt.Printf("node created: %s\n", node.GetName()) - nodePatch := client.MergeFrom(node.DeepCopy()) - node.Status = nodeStatus - g.Expect(testEnv.Status().Patch(ctx, node, nodePatch)).To(Succeed()) + // Patch node status + nodePatchHelper, err := patch.NewHelper(node, testEnv.Client) + g.Expect(err).To(BeNil()) + + node.Status.Conditions = []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: o.nodeStatus, + LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), + }, + } + + g.Expect(nodePatchHelper.Patch(ctx, node)).To(Succeed()) + nodes = append(nodes, node) - machinePatch := client.MergeFrom(machine.DeepCopy()) - machine.Status = machineStatus machine.Status.NodeRef = &corev1.ObjectReference{ Name: node.Name, } + } - // Adding one second to ensure there is a difference from the - // original time so that the patch works. That is, ensure the - // precision isn't lost during conversions. - lastUp := metav1.NewTime(machine.Status.LastUpdated.Add(time.Second)) - machine.Status.LastUpdated = &lastUp - g.Expect(testEnv.Status().Patch(ctx, machine, machinePatch)).To(Succeed()) + if o.failureReason != "" { + failureReason := capierrors.MachineStatusError(o.failureReason) + machine.Status.FailureReason = &failureReason } + if o.failureMessage != "" { + machine.Status.FailureMessage = pointer.StringPtr(o.failureMessage) + } + + // Adding one second to ensure there is a difference from the + // original time so that the patch works. That is, ensure the + // precision isn't lost during conversions. + lastUp := metav1.NewTime(machine.Status.LastUpdated.Add(time.Second)) + machine.Status.LastUpdated = &lastUp + + // Patch the machine to record the status changes + g.Expect(machinePatchHelper.Patch(ctx, machine)).To(Succeed()) machines = append(machines, machine) } diff --git a/controllers/machinehealthcheck_targets.go b/controllers/machinehealthcheck_targets.go index 5493b54ef2e7..95cc1bf46289 100644 --- a/controllers/machinehealthcheck_targets.go +++ b/controllers/machinehealthcheck_targets.go @@ -59,6 +59,7 @@ const ( // healthCheckTarget contains the information required to perform a health check // on the node to determine if any remediation is required. type healthCheckTarget struct { + Cluster *clusterv1.Cluster Machine *clusterv1.Machine Node *corev1.Node MHC *clusterv1.MachineHealthCheck @@ -115,19 +116,46 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi return true, time.Duration(0) } + // Don't penalize any Machine/Node if the control plane has not been initialized. + if !conditions.IsTrue(t.Cluster, clusterv1.ControlPlaneInitializedCondition) { + logger.V(3).Info("Not evaluating target health because the control plane has not yet been initialized") + // Return a nextCheck time of 0 because we'll get requeued when the Cluster is updated. + return false, 0 + } + + // Don't penalize any Machine/Node if the cluster infrastructure is not ready. + if !conditions.IsTrue(t.Cluster, clusterv1.InfrastructureReadyCondition) { + logger.V(3).Info("Not evaluating target health because the cluster infrastructure is not ready") + // Return a nextCheck time of 0 because we'll get requeued when the Cluster is updated. + return false, 0 + } + // the node has not been set yet if t.Node == nil { - // status not updated yet - if t.Machine.Status.LastUpdated == nil { - return false, timeoutForMachineToHaveNode + controlPlaneInitializedTime := conditions.GetLastTransitionTime(t.Cluster, clusterv1.ControlPlaneInitializedCondition).Time + clusterInfraReadyTime := conditions.GetLastTransitionTime(t.Cluster, clusterv1.InfrastructureReadyCondition).Time + machineCreationTime := t.Machine.CreationTimestamp.Time + + // Use the latest of the 3 times + comparisonTime := machineCreationTime + logger.V(3).Info("Determining comparison time", "machineCreationTime", machineCreationTime, "clusterInfraReadyTime", clusterInfraReadyTime, "controlPlaneInitializedTime", controlPlaneInitializedTime) + if controlPlaneInitializedTime.After(comparisonTime) { + comparisonTime = controlPlaneInitializedTime + } + if clusterInfraReadyTime.After(comparisonTime) { + comparisonTime = clusterInfraReadyTime } - if t.Machine.Status.LastUpdated.Add(timeoutForMachineToHaveNode).Before(now) { + logger.V(3).Info("Using comparison time", "time", comparisonTime) + + if comparisonTime.Add(timeoutForMachineToHaveNode).Before(now) { conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.NodeStartupTimeoutReason, clusterv1.ConditionSeverityWarning, "Node failed to report startup in %s", timeoutForMachineToHaveNode.String()) logger.V(3).Info("Target is unhealthy: machine has no node", "duration", timeoutForMachineToHaveNode.String()) return true, time.Duration(0) } - durationUnhealthy := now.Sub(t.Machine.Status.LastUpdated.Time) + + durationUnhealthy := now.Sub(comparisonTime) nextCheck := timeoutForMachineToHaveNode - durationUnhealthy + time.Second + return false, nextCheck } @@ -169,6 +197,11 @@ func (r *MachineHealthCheckReconciler) getTargetsFromMHC(ctx context.Context, lo return nil, nil } + var cluster clusterv1.Cluster + if err := clusterClient.Get(ctx, client.ObjectKey{Namespace: mhc.Namespace, Name: mhc.Spec.ClusterName}, &cluster); err != nil { + return nil, errors.Wrapf(err, "error getting Cluster %s/%s for MachineHealthCheck %s", mhc.Namespace, mhc.Spec.ClusterName, mhc.Name) + } + targets := []healthCheckTarget{} for k := range machines { skip, reason := shouldSkipRemediation(&machines[k]) @@ -182,6 +215,7 @@ func (r *MachineHealthCheckReconciler) getTargetsFromMHC(ctx context.Context, lo return nil, errors.Wrap(err, "unable to initialize patch helper") } target := healthCheckTarget{ + Cluster: &cluster, MHC: mhc, Machine: &machines[k], patchHelper: patchHelper, @@ -201,7 +235,7 @@ func (r *MachineHealthCheckReconciler) getTargetsFromMHC(ctx context.Context, lo return targets, nil } -//getMachinesFromMHC fetches Machines matched by the MachineHealthCheck's +// getMachinesFromMHC fetches Machines matched by the MachineHealthCheck's // label selector func (r *MachineHealthCheckReconciler) getMachinesFromMHC(ctx context.Context, mhc *clusterv1.MachineHealthCheck) ([]clusterv1.Machine, error) { selector, err := metav1.LabelSelectorAsSelector(metav1.CloneSelectorAndAddLabel( diff --git a/controllers/machinehealthcheck_targets_test.go b/controllers/machinehealthcheck_targets_test.go index a0a8cb9d0b33..caab2448f59d 100644 --- a/controllers/machinehealthcheck_targets_test.go +++ b/controllers/machinehealthcheck_targets_test.go @@ -21,12 +21,12 @@ import ( "time" . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -36,6 +36,14 @@ import ( func TestGetTargetsFromMHC(t *testing.T) { namespace := "test-mhc" clusterName := "test-cluster" + + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: clusterName, + }, + } + mhcSelector := map[string]string{"cluster": clusterName, "machine-group": "foo"} // Create a namespace for the tests @@ -62,7 +70,7 @@ func TestGetTargetsFromMHC(t *testing.T) { }, } - baseObjects := []client.Object{testNS, testMHC} + baseObjects := []client.Object{testNS, cluster, testMHC} // Initialise some test machines and nodes for use in the test cases @@ -179,8 +187,20 @@ func TestGetTargetsFromMHC(t *testing.T) { func TestHealthCheckTargets(t *testing.T) { namespace := "test-mhc" clusterName := "test-cluster" + + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: clusterName, + }, + } + conditions.MarkTrue(cluster, clusterv1.InfrastructureReadyCondition) + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) + mhcSelector := map[string]string{"cluster": clusterName, "machine-group": "foo"} + timeoutForMachineToHaveNode := 10 * time.Minute + // Create a test MHC testMHC := &clusterv1.MachineHealthCheck{ ObjectMeta: metav1.ObjectMeta{ @@ -215,6 +235,7 @@ func TestHealthCheckTargets(t *testing.T) { testMachineLastUpdated400s.Status.LastUpdated = &nowMinus400s nodeNotYetStartedTarget := healthCheckTarget{ + Cluster: cluster, MHC: testMHC, Machine: testMachineLastUpdated400s, Node: nil, @@ -222,6 +243,7 @@ func TestHealthCheckTargets(t *testing.T) { // Target for when the Node has been seen, but has now gone nodeGoneAway := healthCheckTarget{ + Cluster: cluster, MHC: testMHC, Machine: testMachine, Node: &corev1.Node{}, @@ -231,6 +253,7 @@ func TestHealthCheckTargets(t *testing.T) { // Target for when the node has been in an unknown state for shorter than the timeout testNodeUnknown200 := newTestUnhealthyNode("node1", corev1.NodeReady, corev1.ConditionUnknown, 200*time.Second) nodeUnknown200 := healthCheckTarget{ + Cluster: cluster, MHC: testMHC, Machine: testMachine, Node: testNodeUnknown200, @@ -240,6 +263,7 @@ func TestHealthCheckTargets(t *testing.T) { // Second Target for when the node has been in an unknown state for shorter than the timeout testNodeUnknown100 := newTestUnhealthyNode("node1", corev1.NodeReady, corev1.ConditionUnknown, 100*time.Second) nodeUnknown100 := healthCheckTarget{ + Cluster: cluster, MHC: testMHC, Machine: testMachine, Node: testNodeUnknown100, @@ -249,6 +273,7 @@ func TestHealthCheckTargets(t *testing.T) { // Target for when the node has been in an unknown state for longer than the timeout testNodeUnknown400 := newTestUnhealthyNode("node1", corev1.NodeReady, corev1.ConditionUnknown, 400*time.Second) nodeUnknown400 := healthCheckTarget{ + Cluster: cluster, MHC: testMHC, Machine: testMachine, Node: testNodeUnknown400, @@ -259,6 +284,7 @@ func TestHealthCheckTargets(t *testing.T) { testNodeHealthy := newTestNode("node1") testNodeHealthy.UID = "12345" nodeHealthy := healthCheckTarget{ + Cluster: cluster, MHC: testMHC, Machine: testMachine, Node: testNodeHealthy, @@ -277,7 +303,7 @@ func TestHealthCheckTargets(t *testing.T) { targets: []healthCheckTarget{nodeNotYetStartedTarget}, expectedHealthy: []healthCheckTarget{}, expectedNeedsRemediation: []healthCheckTarget{}, - expectedNextCheckTimes: []time.Duration{200 * time.Second}, + expectedNextCheckTimes: []time.Duration{timeoutForMachineToHaveNode}, }, { desc: "when the node has gone away", @@ -318,18 +344,13 @@ func TestHealthCheckTargets(t *testing.T) { for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { - gs := NewGomegaWithT(t) - - gs.Expect(clusterv1.AddToScheme(scheme.Scheme)).To(Succeed()) - k8sClient := fake.NewClientBuilder().WithScheme(scheme.Scheme).Build() + gs := NewWithT(t) - // Create a test reconciler + // Create a test reconciler. reconciler := &MachineHealthCheckReconciler{ - Client: k8sClient, recorder: record.NewFakeRecorder(5), } - timeoutForMachineToHaveNode := 10 * time.Minute healthy, unhealthy, nextCheckTimes := reconciler.healthCheckTargets(tc.targets, ctrl.LoggerFrom(ctx), timeoutForMachineToHaveNode) // Round durations down to nearest second account for minute differences From ff383eca185911abf5078eface4dc74bf71f6e8b Mon Sep 17 00:00:00 2001 From: Andy Goldstein Date: Fri, 5 Mar 2021 12:27:07 -0500 Subject: [PATCH 4/4] Add mutations to conversion tests Add optional "hub after" and "spoke after" mutation functions to the conversion tests to support things like removing fields that were added during the conversion process that will cause the equality check to fail. Signed-off-by: Andy Goldstein --- api/v1alpha3/conversion.go | 1 + api/v1alpha3/conversion_test.go | 62 +++++++++++++++++-- .../kubeadm/api/v1alpha3/conversion_test.go | 14 ++++- .../kubeadm/api/v1alpha3/conversion_test.go | 45 ++++++++------ util/conversion/conversion.go | 38 +++++++++--- 5 files changed, 124 insertions(+), 36 deletions(-) diff --git a/api/v1alpha3/conversion.go b/api/v1alpha3/conversion.go index 19dbb65cb083..c693abd720a5 100644 --- a/api/v1alpha3/conversion.go +++ b/api/v1alpha3/conversion.go @@ -48,6 +48,7 @@ func (dst *Cluster) ConvertFrom(srcRaw conversion.Hub) error { return err } + // Set the v1alpha3 boolean status field if the v1alpha4 condition was true if conditions.IsTrue(src, v1alpha4.ControlPlaneInitializedCondition) { dst.Status.ControlPlaneInitialized = true } diff --git a/api/v1alpha3/conversion_test.go b/api/v1alpha3/conversion_test.go index 6db7519188d4..c55680620371 100644 --- a/api/v1alpha3/conversion_test.go +++ b/api/v1alpha3/conversion_test.go @@ -21,6 +21,8 @@ import ( fuzz "github.com/google/gofuzz" . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/apitesting/fuzzer" + "sigs.k8s.io/controller-runtime/pkg/conversion" "k8s.io/apimachinery/pkg/runtime" runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer" @@ -34,11 +36,39 @@ func TestFuzzyConversion(t *testing.T) { g.Expect(AddToScheme(scheme)).To(Succeed()) g.Expect(v1alpha4.AddToScheme(scheme)).To(Succeed()) - t.Run("for Cluster", utilconversion.FuzzTestFunc(scheme, &v1alpha4.Cluster{}, &Cluster{})) - t.Run("for Machine", utilconversion.FuzzTestFunc(scheme, &v1alpha4.Machine{}, &Machine{}, BootstrapFuzzFuncs)) - t.Run("for MachineSet", utilconversion.FuzzTestFunc(scheme, &v1alpha4.MachineSet{}, &MachineSet{}, BootstrapFuzzFuncs)) - t.Run("for MachineDeployment", utilconversion.FuzzTestFunc(scheme, &v1alpha4.MachineDeployment{}, &MachineDeployment{}, BootstrapFuzzFuncs)) - t.Run("for MachineHealthCheckSpec", utilconversion.FuzzTestFunc(scheme, &v1alpha4.MachineHealthCheck{}, &MachineHealthCheck{})) + t.Run("for Cluster", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ + Scheme: scheme, + Hub: &v1alpha4.Cluster{}, + Spoke: &Cluster{}, + SpokeAfterMutation: clusterSpokeAfterMutation, + })) + + t.Run("for Machine", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ + Scheme: scheme, + Hub: &v1alpha4.Machine{}, + Spoke: &Machine{}, + FuzzerFuncs: []fuzzer.FuzzerFuncs{BootstrapFuzzFuncs}, + })) + + t.Run("for MachineSet", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ + Scheme: scheme, + Hub: &v1alpha4.MachineSet{}, + Spoke: &MachineSet{}, + FuzzerFuncs: []fuzzer.FuzzerFuncs{BootstrapFuzzFuncs}, + })) + + t.Run("for MachineDeployment", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ + Scheme: scheme, + Hub: &v1alpha4.MachineDeployment{}, + Spoke: &MachineDeployment{}, + FuzzerFuncs: []fuzzer.FuzzerFuncs{BootstrapFuzzFuncs}, + })) + + t.Run("for MachineHealthCheckSpec", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ + Scheme: scheme, + Hub: &v1alpha4.MachineHealthCheck{}, + Spoke: &MachineHealthCheck{}, + })) } func BootstrapFuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { @@ -53,3 +83,25 @@ func BootstrapFuzzer(obj *Bootstrap, c fuzz.Continue) { // Bootstrap.Data has been removed in v1alpha4, so setting it to nil in order to avoid v1alpha3 --> v1alpha4 --> v1alpha3 round trip errors. obj.Data = nil } + +// clusterSpokeAfterMutation modifies the spoke version of the Cluster such that it can pass an equality test in the +// spoke-hub-spoke conversion scenario. +func clusterSpokeAfterMutation(c conversion.Convertible) { + cluster := c.(*Cluster) + + // Create a temporary 0-length slice using the same underlying array as cluster.Status.Conditions to avoid + // allocations. + tmp := cluster.Status.Conditions[:0] + + for i := range cluster.Status.Conditions { + condition := cluster.Status.Conditions[i] + + // Keep everything that is not ControlPlaneInitializedCondition + if condition.Type != ConditionType(v1alpha4.ControlPlaneInitializedCondition) { + tmp = append(tmp, condition) + } + } + + // Point cluster.Status.Conditions and our slice that does not have ControlPlaneInitializedCondition + cluster.Status.Conditions = tmp +} diff --git a/bootstrap/kubeadm/api/v1alpha3/conversion_test.go b/bootstrap/kubeadm/api/v1alpha3/conversion_test.go index 300b9fd50de4..88a6228317ec 100644 --- a/bootstrap/kubeadm/api/v1alpha3/conversion_test.go +++ b/bootstrap/kubeadm/api/v1alpha3/conversion_test.go @@ -21,6 +21,7 @@ import ( fuzz "github.com/google/gofuzz" . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/apitesting/fuzzer" runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apimachinery/pkg/runtime" @@ -34,8 +35,17 @@ func TestFuzzyConversion(t *testing.T) { g.Expect(AddToScheme(scheme)).To(Succeed()) g.Expect(v1alpha4.AddToScheme(scheme)).To(Succeed()) - t.Run("for KubeadmConfig", utilconversion.FuzzTestFunc(scheme, &v1alpha4.KubeadmConfig{}, &KubeadmConfig{}, KubeadmConfigStatusFuzzFuncs)) - t.Run("for KubeadmConfigTemplate", utilconversion.FuzzTestFunc(scheme, &v1alpha4.KubeadmConfigTemplate{}, &KubeadmConfigTemplate{})) + t.Run("for KubeadmConfig", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ + Scheme: scheme, + Hub: &v1alpha4.KubeadmConfig{}, + Spoke: &KubeadmConfig{}, + FuzzerFuncs: []fuzzer.FuzzerFuncs{KubeadmConfigStatusFuzzFuncs}, + })) + t.Run("for KubeadmConfigTemplate", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ + Scheme: scheme, + Hub: &v1alpha4.KubeadmConfigTemplate{}, + Spoke: &KubeadmConfigTemplate{}, + })) } func KubeadmConfigStatusFuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { diff --git a/controlplane/kubeadm/api/v1alpha3/conversion_test.go b/controlplane/kubeadm/api/v1alpha3/conversion_test.go index 8de6c65b0960..62d6e0f8f32a 100644 --- a/controlplane/kubeadm/api/v1alpha3/conversion_test.go +++ b/controlplane/kubeadm/api/v1alpha3/conversion_test.go @@ -21,6 +21,7 @@ import ( fuzz "github.com/google/gofuzz" . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/apitesting/fuzzer" "k8s.io/apimachinery/pkg/runtime" runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer" @@ -35,25 +36,29 @@ func TestFuzzyConversion(t *testing.T) { g.Expect(AddToScheme(scheme)).To(Succeed()) g.Expect(v1alpha4.AddToScheme(scheme)).To(Succeed()) - t.Run("for KubeadmControlPLane", utilconversion.FuzzTestFunc( - scheme, &v1alpha4.KubeadmControlPlane{}, &KubeadmControlPlane{}, - func(codecs runtimeserializer.CodecFactory) []interface{} { - return []interface{}{ - // This custom function is needed when ConvertTo/ConvertFrom functions - // uses the json package to unmarshal the bootstrap token string. - // - // The Kubeadm v1beta1.BootstrapTokenString type ships with a custom - // json string representation, in particular it supplies a customized - // UnmarshalJSON function that can return an error if the string - // isn't in the correct form. - // - // This function effectively disables any fuzzing for the token by setting - // the values for ID and Secret to working alphanumeric values. - func(in *kubeadmv1.BootstrapTokenString, c fuzz.Continue) { - in.ID = "abcdef" - in.Secret = "abcdef0123456789" - }, - } + t.Run("for KubeadmControlPLane", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ + Scheme: scheme, + Hub: &v1alpha4.KubeadmControlPlane{}, + Spoke: &KubeadmControlPlane{}, + FuzzerFuncs: []fuzzer.FuzzerFuncs{ + func(codecs runtimeserializer.CodecFactory) []interface{} { + return []interface{}{ + // This custom function is needed when ConvertTo/ConvertFrom functions + // uses the json package to unmarshal the bootstrap token string. + // + // The Kubeadm v1beta1.BootstrapTokenString type ships with a custom + // json string representation, in particular it supplies a customized + // UnmarshalJSON function that can return an error if the string + // isn't in the correct form. + // + // This function effectively disables any fuzzing for the token by setting + // the values for ID and Secret to working alphanumeric values. + func(in *kubeadmv1.BootstrapTokenString, c fuzz.Continue) { + in.ID = "abcdef" + in.Secret = "abcdef0123456789" + }, + } + }, }, - )) + })) } diff --git a/util/conversion/conversion.go b/util/conversion/conversion.go index 278a1861ae53..152eb9624de4 100644 --- a/util/conversion/conversion.go +++ b/util/conversion/conversion.go @@ -136,51 +136,71 @@ func GetFuzzer(scheme *runtime.Scheme, funcs ...fuzzer.FuzzerFuncs) *fuzz.Fuzzer ) } +type FuzzTestFuncInput struct { + Scheme *runtime.Scheme + + Hub conversion.Hub + HubAfterMutation func(conversion.Hub) + + Spoke conversion.Convertible + SpokeAfterMutation func(convertible conversion.Convertible) + + FuzzerFuncs []fuzzer.FuzzerFuncs +} + // FuzzTestFunc returns a new testing function to be used in tests to make sure conversions between // the Hub version of an object and an older version aren't lossy. -func FuzzTestFunc(scheme *runtime.Scheme, hub conversion.Hub, dst conversion.Convertible, funcs ...fuzzer.FuzzerFuncs) func(*testing.T) { +func FuzzTestFunc(input FuzzTestFuncInput) func(*testing.T) { return func(t *testing.T) { t.Run("spoke-hub-spoke", func(t *testing.T) { g := gomega.NewWithT(t) - fuzzer := GetFuzzer(scheme, funcs...) + fuzzer := GetFuzzer(input.Scheme, input.FuzzerFuncs...) for i := 0; i < 10000; i++ { // Create the spoke and fuzz it - spokeBefore := dst.DeepCopyObject().(conversion.Convertible) + spokeBefore := input.Spoke.DeepCopyObject().(conversion.Convertible) fuzzer.Fuzz(spokeBefore) // First convert spoke to hub - hubCopy := hub.DeepCopyObject().(conversion.Hub) + hubCopy := input.Hub.DeepCopyObject().(conversion.Hub) g.Expect(spokeBefore.ConvertTo(hubCopy)).To(gomega.Succeed()) // Convert hub back to spoke and check if the resulting spoke is equal to the spoke before the round trip - spokeAfter := dst.DeepCopyObject().(conversion.Convertible) + spokeAfter := input.Spoke.DeepCopyObject().(conversion.Convertible) g.Expect(spokeAfter.ConvertFrom(hubCopy)).To(gomega.Succeed()) // Remove data annotation eventually added by ConvertFrom for avoiding data loss in hub-spoke-hub round trips metaAfter := spokeAfter.(metav1.Object) delete(metaAfter.GetAnnotations(), DataAnnotation) + if input.SpokeAfterMutation != nil { + input.SpokeAfterMutation(spokeAfter) + } + g.Expect(apiequality.Semantic.DeepEqual(spokeBefore, spokeAfter)).To(gomega.BeTrue(), cmp.Diff(spokeBefore, spokeAfter)) } }) t.Run("hub-spoke-hub", func(t *testing.T) { g := gomega.NewWithT(t) - fuzzer := GetFuzzer(scheme, funcs...) + fuzzer := GetFuzzer(input.Scheme, input.FuzzerFuncs...) for i := 0; i < 10000; i++ { // Create the hub and fuzz it - hubBefore := hub.DeepCopyObject().(conversion.Hub) + hubBefore := input.Hub.DeepCopyObject().(conversion.Hub) fuzzer.Fuzz(hubBefore) // First convert hub to spoke - dstCopy := dst.DeepCopyObject().(conversion.Convertible) + dstCopy := input.Spoke.DeepCopyObject().(conversion.Convertible) g.Expect(dstCopy.ConvertFrom(hubBefore)).To(gomega.Succeed()) // Convert spoke back to hub and check if the resulting hub is equal to the hub before the round trip - hubAfter := hub.DeepCopyObject().(conversion.Hub) + hubAfter := input.Hub.DeepCopyObject().(conversion.Hub) g.Expect(dstCopy.ConvertTo(hubAfter)).To(gomega.Succeed()) + if input.HubAfterMutation != nil { + input.HubAfterMutation(hubAfter) + } + g.Expect(apiequality.Semantic.DeepEqual(hubBefore, hubAfter)).To(gomega.BeTrue(), cmp.Diff(hubBefore, hubAfter)) } })