diff --git a/api/v1beta1/machinehealthcheck_webhook.go b/api/v1beta1/machinehealthcheck_webhook.go index ce27eba4ab6f..87e7e1ad9b58 100644 --- a/api/v1beta1/machinehealthcheck_webhook.go +++ b/api/v1beta1/machinehealthcheck_webhook.go @@ -136,37 +136,53 @@ func (m *MachineHealthCheck) validate(old *MachineHealthCheck) error { ) } + allErrs = append(allErrs, m.ValidateCommonFields(specPath)...) + + if len(allErrs) == 0 { + return nil + } + return apierrors.NewInvalid(GroupVersion.WithKind("MachineHealthCheck").GroupKind(), m.Name, allErrs) +} + +// ValidateCommonFields validates UnhealthyConditions NodeStartupTimeout, MaxUnhealthy, and RemediationTemplate of the MHC. +// It is also used to help validate other types which define MachineHealthChecks such as MachineHealthCheckClass and MachineHealthCheckTopology. +// Note: this function is used for internal validation and is not intended for external consumption. +func (m *MachineHealthCheck) ValidateCommonFields(fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + if m.Spec.NodeStartupTimeout != nil && m.Spec.NodeStartupTimeout.Seconds() != disabledNodeStartupTimeout.Seconds() && m.Spec.NodeStartupTimeout.Seconds() < minNodeStartupTimeout.Seconds() { allErrs = append( allErrs, - field.Invalid(specPath.Child("nodeStartupTimeout"), m.Spec.NodeStartupTimeout.Seconds(), "must be at least 30s"), + field.Invalid(fldPath.Child("nodeStartupTimeout"), m.Spec.NodeStartupTimeout.String(), "must be at least 30s"), ) } - if m.Spec.MaxUnhealthy != nil { if _, err := intstr.GetScaledValueFromIntOrPercent(m.Spec.MaxUnhealthy, 0, false); err != nil { allErrs = append( allErrs, - field.Invalid(specPath.Child("maxUnhealthy"), m.Spec.MaxUnhealthy, fmt.Sprintf("must be either an int or a percentage: %v", err.Error())), + field.Invalid(fldPath.Child("maxUnhealthy"), m.Spec.MaxUnhealthy, fmt.Sprintf("must be either an int or a percentage: %v", err.Error())), ) } } - if m.Spec.RemediationTemplate != nil && m.Spec.RemediationTemplate.Namespace != m.Namespace { allErrs = append( allErrs, field.Invalid( - specPath.Child("remediationTemplate", "namespace"), + fldPath.Child("remediationTemplate", "namespace"), m.Spec.RemediationTemplate.Namespace, "must match metadata.namespace", ), ) } - if len(allErrs) == 0 { - return nil + if len(m.Spec.UnhealthyConditions) == 0 { + allErrs = append(allErrs, field.Forbidden( + fldPath.Child("unhealthyConditions"), + "must have at least one entry", + )) } - return apierrors.NewInvalid(GroupVersion.WithKind("MachineHealthCheck").GroupKind(), m.Name, allErrs) + + return allErrs } diff --git a/api/v1beta1/machinehealthcheck_webhook_test.go b/api/v1beta1/machinehealthcheck_webhook_test.go index 15f7cb8acaef..e560efe969c2 100644 --- a/api/v1beta1/machinehealthcheck_webhook_test.go +++ b/api/v1beta1/machinehealthcheck_webhook_test.go @@ -39,6 +39,12 @@ func TestMachineHealthCheckDefault(t *testing.T) { MatchLabels: map[string]string{"foo": "bar"}, }, RemediationTemplate: &corev1.ObjectReference{}, + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, }, } t.Run("for MachineHealthCheck", utildefaulting.DefaultValidateTest(mhc)) @@ -77,6 +83,12 @@ func TestMachineHealthCheckLabelSelectorAsSelectorValidation(t *testing.T) { Selector: metav1.LabelSelector{ MatchLabels: tt.selectors, }, + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, }, } if tt.expectErr { @@ -123,6 +135,12 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) { "test": "test", }, }, + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, }, } oldMHC := &MachineHealthCheck{ @@ -133,6 +151,12 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) { "test": "test", }, }, + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, }, } @@ -145,6 +169,58 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) { } } +func TestMachineHealthCheckUnhealthyConditions(t *testing.T) { + tests := []struct { + name string + unhealthConditions []UnhealthyCondition + expectErr bool + }{ + { + name: "pass with correctly defined unhealthyConditions", + unhealthConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, + expectErr: false, + }, + { + name: "fail if the UnhealthCondition array is nil", + unhealthConditions: nil, + expectErr: true, + }, + { + name: "fail if the UnhealthCondition array is empty", + unhealthConditions: []UnhealthyCondition{}, + expectErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + mhc := &MachineHealthCheck{ + Spec: MachineHealthCheckSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "test": "test", + }, + }, + UnhealthyConditions: tt.unhealthConditions, + }, + } + if tt.expectErr { + g.Expect(mhc.ValidateCreate()).NotTo(Succeed()) + g.Expect(mhc.ValidateUpdate(mhc)).NotTo(Succeed()) + } else { + g.Expect(mhc.ValidateCreate()).To(Succeed()) + g.Expect(mhc.ValidateUpdate(mhc)).To(Succeed()) + } + }) + } +} + func TestMachineHealthCheckNodeStartupTimeout(t *testing.T) { zero := metav1.Duration{Duration: 0} twentyNineSeconds := metav1.Duration{Duration: 29 * time.Second} @@ -200,6 +276,12 @@ func TestMachineHealthCheckNodeStartupTimeout(t *testing.T) { "test": "test", }, }, + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, }, } @@ -253,6 +335,12 @@ func TestMachineHealthCheckMaxUnhealthy(t *testing.T) { "test": "test", }, }, + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, }, } @@ -268,7 +356,16 @@ func TestMachineHealthCheckMaxUnhealthy(t *testing.T) { func TestMachineHealthCheckSelectorValidation(t *testing.T) { g := NewWithT(t) - mhc := &MachineHealthCheck{} + mhc := &MachineHealthCheck{ + Spec: MachineHealthCheckSpec{ + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, + }, + } err := mhc.validate(nil) g.Expect(err).ToNot(BeNil()) g.Expect(err.Error()).To(ContainSubstring("selector must not be empty")) @@ -285,6 +382,12 @@ func TestMachineHealthCheckClusterNameSelectorValidation(t *testing.T) { "baz": "qux", }, }, + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, }, } err := mhc.validate(nil) @@ -305,6 +408,12 @@ func TestMachineHealthCheckRemediationTemplateNamespaceValidation(t *testing.T) Spec: MachineHealthCheckSpec{ Selector: metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, RemediationTemplate: &corev1.ObjectReference{Namespace: "foo"}, + UnhealthyConditions: []UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + }, }, } invalid := valid.DeepCopy() diff --git a/internal/webhooks/cluster.go b/internal/webhooks/cluster.go index a4b66481c80e..9dc499b2acc0 100644 --- a/internal/webhooks/cluster.go +++ b/internal/webhooks/cluster.go @@ -366,64 +366,64 @@ func (webhook *Cluster) getClusterClassForCluster(ctx context.Context, cluster * func validateMachineHealthChecks(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList { var allErrs field.ErrorList - // Validate ControlPlane MachineHealthCheck if defined. - if cluster.Spec.Topology.ControlPlane.MachineHealthCheck != nil && !cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() { - // Ensure ControlPlane does not define a MachineHealthCheck if the ClusterClass does not define MachineInfrastructure. - if clusterClass.Spec.ControlPlane.MachineInfrastructure == nil { - allErrs = append(allErrs, field.Forbidden( - field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck"), - "can be set only if spec.controlPlane.machineInfrastructure is set in ClusterClass", - )) - } - // Ensure ControlPlane MachineHealthCheck defines UnhealthyConditions. - if len(cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.UnhealthyConditions) == 0 { - allErrs = append(allErrs, field.Forbidden( - field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck", "unhealthyConditions"), - "must have at least one value", - )) + if cluster.Spec.Topology.ControlPlane.MachineHealthCheck != nil { + fldPath := field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck") + + // Validate ControlPlane MachineHealthCheck if defined. + if !cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() { + // Ensure ControlPlane does not define a MachineHealthCheck if the ClusterClass does not define MachineInfrastructure. + if clusterClass.Spec.ControlPlane.MachineInfrastructure == nil { + allErrs = append(allErrs, field.Forbidden( + fldPath, + "can be set only if spec.controlPlane.machineInfrastructure is set in ClusterClass", + )) + } + allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, cluster.Namespace, + &cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass)...) } - } - // If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is - // available either in the Cluster topology or in the ClusterClass. - // (One of these definitions will be used in the controller to create the MachineHealthCheck) - if cluster.Spec.Topology.ControlPlane.MachineHealthCheck != nil && - cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable != nil && - *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable && - cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() && - clusterClass.Spec.ControlPlane.MachineHealthCheck == nil { - allErrs = append(allErrs, field.Forbidden( - field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck", "enable"), - fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable), - )) + // If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is + // available either in the Cluster topology or in the ClusterClass. + // (One of these definitions will be used in the controller to create the MachineHealthCheck) + + // Check if the machineHealthCheck is explicitly enabled in the ControlPlaneTopology. + if cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable != nil && *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable { + // Ensure the MHC is defined in at least one of the ControlPlaneTopology of the Cluster or the ControlPlaneClass of the ClusterClass. + if cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() && clusterClass.Spec.ControlPlane.MachineHealthCheck == nil { + allErrs = append(allErrs, field.Forbidden( + fldPath.Child("enable"), + fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable), + )) + } + } } if cluster.Spec.Topology.Workers != nil { for i, md := range cluster.Spec.Topology.Workers.MachineDeployments { - // If MachineHealthCheck is defined ensure it defines UnhealthyConditions. - if md.MachineHealthCheck != nil && !md.MachineHealthCheck.MachineHealthCheckClass.IsZero() { - if len(md.MachineHealthCheck.MachineHealthCheckClass.UnhealthyConditions) == 0 { - allErrs = append(allErrs, field.Forbidden( - field.NewPath("spec", "topology", "workers", "machineDeployments", "machineHealthCheck").Index(i).Child("unhealthyConditions"), - "must have at least one value", - )) + if md.MachineHealthCheck != nil { + fldPath := field.NewPath("spec", "topology", "workers", "machineDeployments", "machineHealthCheck").Index(i) + + // Validate the MachineDeployment MachineHealthCheck if defined. + if !md.MachineHealthCheck.MachineHealthCheckClass.IsZero() { + allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, cluster.Namespace, + &md.MachineHealthCheck.MachineHealthCheckClass)...) } - } - // If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is - // available either in the Cluster topology or in the ClusterClass. - // (One of these definitions will be used in the controller to create the MachineHealthCheck) - mdClass := machineDeploymentClassOfName(clusterClass, md.Class) - if mdClass != nil { // Note: we skip handling the nil case here as it is already handled in previous validations. - if md.MachineHealthCheck != nil && - md.MachineHealthCheck.Enable != nil && - *md.MachineHealthCheck.Enable && - md.MachineHealthCheck.MachineHealthCheckClass.IsZero() && - mdClass.MachineHealthCheck == nil { - allErrs = append(allErrs, field.Forbidden( - field.NewPath("spec", "topology", "workers", "machineDeployments", "machineHealthCheck").Index(i).Child("enable"), - fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *md.MachineHealthCheck.Enable), - )) + // If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is + // available either in the Cluster topology or in the ClusterClass. + // (One of these definitions will be used in the controller to create the MachineHealthCheck) + mdClass := machineDeploymentClassOfName(clusterClass, md.Class) + if mdClass != nil { // Note: we skip handling the nil case here as it is already handled in previous validations. + // Check if the machineHealthCheck is explicitly enabled in the machineDeploymentTopology. + if md.MachineHealthCheck.Enable != nil && *md.MachineHealthCheck.Enable { + // Ensure the MHC is defined in at least one of the MachineDeploymentTopology of the Cluster or the MachineDeploymentClass of the ClusterClass. + if md.MachineHealthCheck.MachineHealthCheckClass.IsZero() && mdClass.MachineHealthCheck == nil { + allErrs = append(allErrs, field.Forbidden( + fldPath.Child("enable"), + fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *md.MachineHealthCheck.Enable), + )) + } + } } } } @@ -433,7 +433,7 @@ func validateMachineHealthChecks(cluster *clusterv1.Cluster, clusterClass *clust } // machineDeploymentClassOfName find a MachineDeploymentClass of the given name in the provided ClusterClass. -// Returns nill if can not find one. +// Returns nil if it can not find one. // TODO: Check if there is already a helper function that can do this. func machineDeploymentClassOfName(clusterClass *clusterv1.ClusterClass, name string) *clusterv1.MachineDeploymentClass { for _, mdClass := range clusterClass.Spec.Workers.MachineDeployments { diff --git a/internal/webhooks/clusterclass.go b/internal/webhooks/clusterclass.go index 470a3b4238ca..1eac14104346 100644 --- a/internal/webhooks/clusterclass.go +++ b/internal/webhooks/clusterclass.go @@ -25,6 +25,7 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation/field" @@ -538,20 +539,18 @@ func validateMachineHealthCheckClasses(clusterClass *clusterv1.ClusterClass) fie // Validate ControlPlane MachineHealthCheck if defined. if clusterClass.Spec.ControlPlane.MachineHealthCheck != nil { + fldPath := field.NewPath("spec", "controlPlane", "machineHealthCheck") + + allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, clusterClass.Namespace, + clusterClass.Spec.ControlPlane.MachineHealthCheck)...) + // Ensure ControlPlane does not define a MachineHealthCheck if it does not define MachineInfrastructure. if clusterClass.Spec.ControlPlane.MachineInfrastructure == nil { allErrs = append(allErrs, field.Forbidden( - field.NewPath("spec", "controlPlane", "machineHealthCheck"), + fldPath.Child("machineInfrastructure"), "can be set only if spec.controlPlane.machineInfrastructure is set", )) } - // Ensure ControlPlane MachineHealthCheck defines UnhealthyConditions. - if len(clusterClass.Spec.ControlPlane.MachineHealthCheck.UnhealthyConditions) == 0 { - allErrs = append(allErrs, field.Forbidden( - field.NewPath("spec", "controlPlane", "machineHealthCheck", "unhealthyConditions"), - "must be defined and have at least one value", - )) - } } // Ensure MachineDeployment MachineHealthChecks define UnhealthyConditions. @@ -559,12 +558,26 @@ func validateMachineHealthCheckClasses(clusterClass *clusterv1.ClusterClass) fie if md.MachineHealthCheck == nil { continue } - if len(md.MachineHealthCheck.UnhealthyConditions) == 0 { - allErrs = append(allErrs, field.Forbidden( - field.NewPath("spec", "workers", "machineDeployments", "machineHealthCheck").Index(i).Child("unhealthyConditions"), - "must be defined and have at least one value", - )) - } + fldPath := field.NewPath("spec", "workers", "machineDeployments", "machineHealthCheck").Index(i) + + allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, clusterClass.Namespace, md.MachineHealthCheck)...) } return allErrs } + +// validateMachineHealthCheckClass validates the MachineHealthCheckSpec fields defined in a MachineHealthCheckClass. +func validateMachineHealthCheckClass(fldPath *field.Path, namepace string, m *clusterv1.MachineHealthCheckClass) field.ErrorList { + mhc := clusterv1.MachineHealthCheck{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namepace, + }, + Spec: clusterv1.MachineHealthCheckSpec{ + NodeStartupTimeout: m.NodeStartupTimeout, + MaxUnhealthy: m.MaxUnhealthy, + UnhealthyConditions: m.UnhealthyConditions, + UnhealthyRange: m.UnhealthyRange, + RemediationTemplate: m.RemediationTemplate, + }} + + return mhc.ValidateCommonFields(fldPath) +} diff --git a/internal/webhooks/clusterclass_test.go b/internal/webhooks/clusterclass_test.go index 555e3fe900ed..7ac6f6a1e9ec 100644 --- a/internal/webhooks/clusterclass_test.go +++ b/internal/webhooks/clusterclass_test.go @@ -643,7 +643,7 @@ func TestClusterClassValidation(t *testing.T) { }, }, NodeStartupTimeout: &metav1.Duration{ - Duration: time.Duration(1)}}). + Duration: time.Duration(6000000000000)}}). Build(), }, { @@ -657,7 +657,7 @@ func TestClusterClassValidation(t *testing.T) { // No ControlPlaneMachineInfrastructure makes this an invalid creation request. WithControlPlaneMachineHealthCheck(&clusterv1.MachineHealthCheckClass{ NodeStartupTimeout: &metav1.Duration{ - Duration: time.Duration(1)}}). + Duration: time.Duration(6000000000000)}}). Build(), expectErr: true, }, @@ -674,7 +674,7 @@ func TestClusterClassValidation(t *testing.T) { Build()). WithControlPlaneMachineHealthCheck(&clusterv1.MachineHealthCheckClass{ NodeStartupTimeout: &metav1.Duration{ - Duration: time.Duration(1)}}). + Duration: time.Duration(6000000000000)}}). Build(), expectErr: true, }, @@ -701,10 +701,39 @@ func TestClusterClassValidation(t *testing.T) { }, }, NodeStartupTimeout: &metav1.Duration{ - Duration: time.Duration(1)}}). + Duration: time.Duration(6000000000000)}}). Build()). Build(), }, + { + name: "create fail if MachineDeployment MachineHealthCheck NodeStartUpTimeout is too short", + in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate( + builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithControlPlaneTemplate( + builder.ControlPlaneTemplate(metav1.NamespaceDefault, "cp1"). + Build()). + WithWorkerMachineDeploymentClasses( + *builder.MachineDeploymentClass("aa"). + WithInfrastructureTemplate( + builder.InfrastructureMachineTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithBootstrapTemplate( + builder.BootstrapTemplate(metav1.NamespaceDefault, "bootstrap1").Build()). + WithMachineHealthCheckClass(&clusterv1.MachineHealthCheckClass{ + UnhealthyConditions: []clusterv1.UnhealthyCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionUnknown, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, + NodeStartupTimeout: &metav1.Duration{ + // nodeStartupTimeout is too short here - 600ns. + Duration: time.Duration(600)}}). + Build()). + Build(), + expectErr: true, + }, { name: "create fail if MachineDeployment MachineHealthCheck does not define UnhealthyConditions", in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). @@ -721,7 +750,7 @@ func TestClusterClassValidation(t *testing.T) { builder.BootstrapTemplate(metav1.NamespaceDefault, "bootstrap1").Build()). WithMachineHealthCheckClass(&clusterv1.MachineHealthCheckClass{ NodeStartupTimeout: &metav1.Duration{ - Duration: time.Duration(1)}}). + Duration: time.Duration(6000000000000)}}). Build()). Build(), expectErr: true,