From d9226f22de28ce8b0e7c97d1d33f7c15eb37f514 Mon Sep 17 00:00:00 2001 From: killianmuldoon Date: Mon, 20 Feb 2023 13:36:21 +0000 Subject: [PATCH] Make Cluster webhook less strict for out of date ClusterClasses --- cmd/clusterctl/client/cluster/mover.go | 11 +-- internal/webhooks/cluster.go | 58 +++++++----- internal/webhooks/cluster_test.go | 123 +++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 30 deletions(-) diff --git a/cmd/clusterctl/client/cluster/mover.go b/cmd/clusterctl/client/cluster/mover.go index 6c8b01c4c867..6d88b589197d 100644 --- a/cmd/clusterctl/client/cluster/mover.go +++ b/cmd/clusterctl/client/cluster/mover.go @@ -36,7 +36,6 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log" - "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/yaml" @@ -639,7 +638,7 @@ func pauseClusterClass(proxy Proxy, n *node, pause bool) error { // Update the annotation to the desired state ccAnnotations := clusterClass.GetAnnotations() if ccAnnotations == nil { - ccAnnotations = make(map[string]string) + ccAnnotations = map[string]string{} } if pause { // Set the pause annotation. @@ -649,12 +648,8 @@ func pauseClusterClass(proxy Proxy, n *node, pause bool) error { delete(ccAnnotations, clusterv1.PausedAnnotation) } - // If the ClusterClass is already at desired state return early. - if !annotations.AddAnnotations(clusterClass, ccAnnotations) { - return nil - } - - // Update the cluster class with the new annotations. + // Update the ClusterClass with the new annotations. + clusterClass.SetAnnotations(ccAnnotations) if err := patchHelper.Patch(ctx, clusterClass); err != nil { return errors.Wrapf(err, "error patching ClusterClass %s/%s", n.identity.Namespace, n.identity.Name) } diff --git a/internal/webhooks/cluster.go b/internal/webhooks/cluster.go index 2b4ba8f67cae..c0d812a6ecc9 100644 --- a/internal/webhooks/cluster.go +++ b/internal/webhooks/cluster.go @@ -62,6 +62,8 @@ type Cluster struct { var _ webhook.CustomDefaulter = &Cluster{} var _ webhook.CustomValidator = &Cluster{} +var errClusterClassNotReconciled = errors.New("ClusterClass is not up to date") + // Default satisfies the defaulting webhook interface. func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error { // We gather all defaulting errors and return them together. @@ -88,11 +90,11 @@ func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error { } clusterClass, err := webhook.pollClusterClassForCluster(ctx, cluster) if err != nil { - // If the ClusterClass can't be found ignore the error. - if apierrors.IsNotFound(err) { + // If the ClusterClass can't be found or is not up to date ignore the error. + if apierrors.IsNotFound(err) || errors.Is(err, errClusterClassNotReconciled) { return nil } - return apierrors.NewInternalError(errors.Wrapf(err, "Cluster %s can't be defaulted. Valid ClusterClass %s can not be retrieved", cluster.Name, cluster.Spec.Topology.Class)) + return apierrors.NewInternalError(errors.Wrapf(err, "Cluster %s can't be defaulted. ClusterClass %s can not be retrieved", cluster.Name, cluster.Spec.Topology.Class)) } allErrs = append(allErrs, DefaultVariables(cluster, clusterClass)...) @@ -242,27 +244,28 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu } // Get the ClusterClass referenced in the Cluster. - clusterClass, clusterClassGetErr := webhook.pollClusterClassForCluster(ctx, newCluster) - if clusterClassGetErr != nil && !apierrors.IsNotFound(clusterClassGetErr) { - // If the error is anything other than "Not Found" return all errors at this point. + clusterClass, clusterClassPollErr := webhook.pollClusterClassForCluster(ctx, newCluster) + if clusterClassPollErr != nil && + // If the error is anything other than "NotFound" or "NotReconciled" return all errors at this point. + !(apierrors.IsNotFound(clusterClassPollErr) || errors.Is(clusterClassPollErr, errClusterClassNotReconciled)) { allErrs = append( allErrs, field.InternalError( fldPath.Child("class"), - clusterClassGetErr)) + clusterClassPollErr)) return allErrs } - if clusterClassGetErr == nil { + if clusterClassPollErr == nil { // If there's no error validate the Cluster based on the ClusterClass. allErrs = append(allErrs, ValidateClusterForClusterClass(newCluster, clusterClass, fldPath)...) } if oldCluster != nil { // On update // The ClusterClass must exist to proceed with update validation. Return an error if the ClusterClass was // not found. - if clusterClassGetErr != nil { + if apierrors.IsNotFound(clusterClassPollErr) { allErrs = append( allErrs, field.InternalError( fldPath.Child("class"), - clusterClassGetErr)) + clusterClassPollErr)) return allErrs } @@ -341,7 +344,7 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu allErrs = append( allErrs, field.Forbidden( fldPath.Child("class"), - fmt.Sprintf("ClusterClass with name %q could not be found, change from class %[1]q to class %q cannot be validated", + fmt.Sprintf("valid ClusterClass with name %q could not be found, change from class %[1]q to class %q cannot be validated", oldCluster.Spec.Topology.Class, newCluster.Spec.Topology.Class))) // Return early with errors if the ClusterClass can't be retrieved. @@ -529,24 +532,35 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl // pollClusterClassForCluster will retry getting the ClusterClass referenced in the Cluster for two seconds. func (webhook *Cluster) pollClusterClassForCluster(ctx context.Context, cluster *clusterv1.Cluster) (*clusterv1.ClusterClass, error) { clusterClass := &clusterv1.ClusterClass{} - var clusterClassGetErr error + var clusterClassPollErr error + // TODO: Add a webhook warning if the ClusterClass is not up to date or not found. _ = util.PollImmediate(200*time.Millisecond, 2*time.Second, func() (bool, error) { - if clusterClassGetErr = webhook.Client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: cluster.Spec.Topology.Class}, clusterClass); clusterClassGetErr != nil { + if clusterClassPollErr = webhook.Client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: cluster.Spec.Topology.Class}, clusterClass); clusterClassPollErr != nil { return false, nil //nolint:nilerr } - // Return an error if the ClusterClass has not successfully reconciled because variables aren't correctly - // reconciled. - // TODO: Decide whether to check generation here. This requires creating templates before creating the Cluster and - // may interfere with the way clusterctl move works. - if !conditions.Has(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) || - conditions.IsFalse(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) { - clusterClassGetErr = errors.New("ClusterClass is not up to date. If this persists check ClusterClass status") - return false, nil + if clusterClassPollErr = clusterClassIsReconciled(clusterClass); clusterClassPollErr != nil { + return false, nil //nolint:nilerr } + clusterClassPollErr = nil return true, nil }) - return clusterClass, clusterClassGetErr + return clusterClass, clusterClassPollErr +} + +// clusterClassIsReconciled returns errClusterClassNotReconciled if the ClusterClass has not successfully reconciled or if the +// ClusterClass variables have not been successfully reconciled. +func clusterClassIsReconciled(clusterClass *clusterv1.ClusterClass) error { + // If the clusterClass metadata generation does not match the status observed generation, the ClusterClass has not been successfully reconciled. + if clusterClass.Generation != clusterClass.Status.ObservedGeneration { + return errClusterClassNotReconciled + } + // If the clusterClass does not have ClusterClassVariablesReconciled==True, the ClusterClass has not been successfully reconciled. + if !conditions.Has(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) || + conditions.IsFalse(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) { + return errClusterClassNotReconciled + } + return nil } // TODO: This function will not be needed once per-definition defaulting and validation is implemented. diff --git a/internal/webhooks/cluster_test.go b/internal/webhooks/cluster_test.go index dfbd3efc3039..f204026696f1 100644 --- a/internal/webhooks/cluster_test.go +++ b/internal/webhooks/cluster_test.go @@ -1833,6 +1833,129 @@ func TestMovingBetweenManagedAndUnmanaged(t *testing.T) { } } +// TestClusterClassPollingErrors tests when a Cluster can be reconciled given different reconcile states of the ClusterClass. +func TestClusterClassPollingErrors(t *testing.T) { + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.ClusterTopology, true)() + g := NewWithT(t) + ref := &corev1.ObjectReference{ + APIVersion: "group.test.io/foo", + Kind: "barTemplate", + Name: "baz", + Namespace: "default", + } + + topology := builder.ClusterTopology().WithClass("class1").WithVersion("v1.24.3").Build() + secondTopology := builder.ClusterTopology().WithClass("class2").WithVersion("v1.24.3").Build() + notFoundTopology := builder.ClusterTopology().WithClass("doesnotexist").WithVersion("v1.24.3").Build() + + baseClusterClass := builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate(refToUnstructured(ref)). + WithControlPlaneTemplate(refToUnstructured(ref)). + WithControlPlaneInfrastructureMachineTemplate(refToUnstructured(ref)) + + // ccFullyReconciled is a ClusterClass with a matching generation and observed generation, and VariablesReconciled=True. + ccFullyReconciled := baseClusterClass.DeepCopy().Build() + ccFullyReconciled.Generation = 1 + ccFullyReconciled.Status.ObservedGeneration = 1 + conditions.MarkTrue(ccFullyReconciled, clusterv1.ClusterClassVariablesReconciledCondition) + + // secondFullyReconciled is a second ClusterClass with a matching generation and observed generation, and VariablesReconciled=True. + secondFullyReconciled := ccFullyReconciled.DeepCopy() + secondFullyReconciled.SetName("class2") + + // ccGenerationMismatch is a ClusterClass with a mismatched generation and observed generation, but VariablesReconciledCondition=True. + ccGenerationMismatch := baseClusterClass.DeepCopy().Build() + ccGenerationMismatch.Generation = 999 + ccGenerationMismatch.Status.ObservedGeneration = 1 + conditions.MarkTrue(ccGenerationMismatch, clusterv1.ClusterClassVariablesReconciledCondition) + + // ccVariablesReconciledFalse with VariablesReconciled=False. + ccVariablesReconciledFalse := baseClusterClass.DeepCopy().Build() + conditions.MarkFalse(ccGenerationMismatch, clusterv1.ClusterClassVariablesReconciledCondition, "", clusterv1.ConditionSeverityError, "") + + tests := []struct { + name string + cluster *clusterv1.Cluster + oldCluster *clusterv1.Cluster + clusterClasses []*clusterv1.ClusterClass + wantErr bool + }{ + { + name: "Pass on create if ClusterClass is fully reconciled", + cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(), + clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled}, + wantErr: false, + }, + { + name: "Pass on create if ClusterClass generation does not match observedGeneration", + cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(), + clusterClasses: []*clusterv1.ClusterClass{ccGenerationMismatch}, + wantErr: false, + }, + { + name: "Pass on create if ClusterClass generation matches observedGeneration but VariablesReconciled=False", + cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(), + clusterClasses: []*clusterv1.ClusterClass{ccVariablesReconciledFalse}, + wantErr: false, + }, + { + name: "Pass on create if ClusterClass is not found", + cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(notFoundTopology).Build(), + clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled}, + wantErr: false, + }, + { + name: "Pass on update if oldCluster ClusterClass is fully reconciled", + cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(secondTopology).Build(), + oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(), + clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled, secondFullyReconciled}, + wantErr: false, + }, + { + name: "Fail on update if oldCluster ClusterClass generation does not match observedGeneration", + cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(secondTopology).Build(), + oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(), + clusterClasses: []*clusterv1.ClusterClass{ccGenerationMismatch, secondFullyReconciled}, + wantErr: true, + }, + { + name: "Fail on update if old Cluster ClusterClass is not found", + cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(), + oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(notFoundTopology).Build(), + clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled}, + wantErr: true, + }, + { + name: "Fail on update if new Cluster ClusterClass is not found", + cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(notFoundTopology).Build(), + oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(), + clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled}, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Sets up a reconcile with a fakeClient for the test case. + objs := []client.Object{} + for _, cc := range tt.clusterClasses { + objs = append(objs, cc) + } + c := &Cluster{Client: fake.NewClientBuilder(). + WithObjects(objs...). + WithScheme(fakeScheme). + Build()} + + // Checks the return error. + if tt.wantErr { + g.Expect(c.validate(ctx, tt.oldCluster, tt.cluster)).NotTo(Succeed()) + } else { + g.Expect(c.validate(ctx, tt.oldCluster, tt.cluster)).To(Succeed()) + } + }) + } +} + func refToUnstructured(ref *corev1.ObjectReference) *unstructured.Unstructured { gvk := ref.GetObjectKind().GroupVersionKind() output := &unstructured.Unstructured{}