diff --git a/pkg/controllers/updaterun/controller.go b/pkg/controllers/updaterun/controller.go index 51857af0e..253815aaf 100644 --- a/pkg/controllers/updaterun/controller.go +++ b/pkg/controllers/updaterun/controller.go @@ -40,6 +40,10 @@ var ( // errInitializedFailed is the error when the ClusterStagedUpdateRun fails to initialize. // It is a wrapped error of errStagedUpdatedAborted, because some initialization functions are reused in the validation step. errInitializedFailed = fmt.Errorf("%w: failed to initialize the clusterStagedUpdateRun", errStagedUpdatedAborted) + + // stageUpdatingWaitTime is the time to wait before rechecking the stage update status. + // Put it as a variable for convenient testing. + stageUpdatingWaitTime = 60 * time.Second ) // Reconciler reconciles a ClusterStagedUpdateRun object. @@ -108,16 +112,25 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim // Check if the clusterStagedUpdateRun is finished. finishedCond := meta.FindStatusCondition(updateRun.Status.Conditions, string(placementv1alpha1.StagedUpdateRunConditionSucceeded)) if condition.IsConditionStatusTrue(finishedCond, updateRun.Generation) || condition.IsConditionStatusFalse(finishedCond, updateRun.Generation) { - klog.V(2).InfoS("The clusterStagedUpdateRun is finished", "clusterStagedUpdateRun", runObjRef) + klog.V(2).InfoS("The clusterStagedUpdateRun is finished", "finishedSuccessfully", finishedCond.Status, "clusterStagedUpdateRun", runObjRef) return runtime.Result{}, nil } - // TODO(wantjian): validate the clusterStagedUpdateRun and generate the updatingStage etc. + + // Validate the clusterStagedUpdateRun status to ensure the update can be continued and get the updating stage index and cluster indices. + if updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings, err = r.validate(ctx, &updateRun); err != nil { + // errStagedUpdatedAborted cannot be retried. + if errors.Is(err, errStagedUpdatedAborted) { + return runtime.Result{}, r.recordUpdateRunFailed(ctx, &updateRun, err.Error()) + } + return runtime.Result{}, err + } + klog.V(2).InfoS("The clusterStagedUpdateRun is validated", "clusterStagedUpdateRun", runObjRef) } - // TODO(wantjian): execute the clusterStagedUpdateRun. + // TODO(wantjian): execute the clusterStagedUpdateRun and fix the requeue time. klog.V(2).InfoS("Executing the clusterStagedUpdateRun", "clusterStagedUpdateRun", runObjRef, "updatingStageIndex", updatingStageIndex, "toBeUpdatedBindings count", len(toBeUpdatedBindings), "toBeDeletedBindings count", len(toBeDeletedBindings)) - return runtime.Result{}, nil + return runtime.Result{RequeueAfter: stageUpdatingWaitTime}, nil } // handleDelete handles the deletion of the clusterStagedUpdateRun object. diff --git a/pkg/controllers/updaterun/controller_integration_test.go b/pkg/controllers/updaterun/controller_integration_test.go index fb1d04662..d546a3905 100644 --- a/pkg/controllers/updaterun/controller_integration_test.go +++ b/pkg/controllers/updaterun/controller_integration_test.go @@ -35,6 +35,8 @@ const ( timeout = time.Second * 10 // interval is the time to wait between retries for Eventually and Consistently interval = time.Millisecond * 250 + // duration is the time to duration to check for Consistently + duration = time.Second * 20 // numTargetClusters is the number of scheduled clusters numTargetClusters = 10 @@ -107,8 +109,8 @@ var _ = Describe("Test the clusterStagedUpdateRun controller", func() { validateUpdateRunHasFinalizer(ctx, updateRun) By("Updating the clusterStagedUpdateRun to failed") - startedcond := getTrueCondition(updateRun, string(placementv1alpha1.StagedUpdateRunConditionProgressing)) - finishedcond := getFalseCondition(updateRun, string(placementv1alpha1.StagedUpdateRunConditionSucceeded)) + startedcond := generateTrueCondition(updateRun, placementv1alpha1.StagedUpdateRunConditionProgressing) + finishedcond := generateFalseCondition(updateRun, placementv1alpha1.StagedUpdateRunConditionSucceeded) meta.SetStatusCondition(&updateRun.Status.Conditions, startedcond) meta.SetStatusCondition(&updateRun.Status.Conditions, finishedcond) Expect(k8sClient.Status().Update(ctx, updateRun)).Should(Succeed(), "failed to update the clusterStagedUpdateRun") @@ -136,7 +138,7 @@ var _ = Describe("Test the clusterStagedUpdateRun controller", func() { validateUpdateRunHasFinalizer(ctx, updateRun) By("Updating the clusterStagedUpdateRun status to processing") - startedcond := getTrueCondition(updateRun, string(placementv1alpha1.StagedUpdateRunConditionProgressing)) + startedcond := generateTrueCondition(updateRun, placementv1alpha1.StagedUpdateRunConditionProgressing) meta.SetStatusCondition(&updateRun.Status.Conditions, startedcond) Expect(k8sClient.Status().Update(ctx, updateRun)).Should(Succeed(), "failed to add condition to the clusterStagedUpdateRun") @@ -467,35 +469,71 @@ func validateApprovalRequestCount(ctx context.Context, count int) { }, timeout, interval).Should(Equal(count), "approval requests count mismatch") } -func getTrueCondition(updateRun *placementv1alpha1.ClusterStagedUpdateRun, condType string) metav1.Condition { - reason := "" - switch condType { - case string(placementv1alpha1.StagedUpdateRunConditionInitialized): - reason = condition.UpdateRunInitializeSucceededReason - case string(placementv1alpha1.StagedUpdateRunConditionProgressing): - reason = condition.UpdateRunStartedReason - case string(placementv1alpha1.StagedUpdateRunConditionSucceeded): - reason = condition.UpdateRunSucceededReason +func generateTrueCondition(updateRun *placementv1alpha1.ClusterStagedUpdateRun, condType any) metav1.Condition { + reason, typeStr := "", "" + switch cond := condType.(type) { + case placementv1alpha1.StagedUpdateRunConditionType: + switch cond { + case placementv1alpha1.StagedUpdateRunConditionInitialized: + reason = condition.UpdateRunInitializeSucceededReason + case placementv1alpha1.StagedUpdateRunConditionProgressing: + reason = condition.UpdateRunStartedReason + case placementv1alpha1.StagedUpdateRunConditionSucceeded: + reason = condition.UpdateRunSucceededReason + } + typeStr = string(cond) + case placementv1alpha1.StageUpdatingConditionType: + switch cond { + case placementv1alpha1.StageUpdatingConditionProgressing: + reason = condition.StageUpdatingStartedReason + case placementv1alpha1.StageUpdatingConditionSucceeded: + reason = condition.StageUpdatingSucceededReason + } + typeStr = string(cond) + case placementv1alpha1.ClusterUpdatingStatusConditionType: + switch cond { + case placementv1alpha1.ClusterUpdatingConditionStarted: + reason = condition.ClusterUpdatingStartedReason + case placementv1alpha1.ClusterUpdatingConditionSucceeded: + reason = condition.ClusterUpdatingSucceededReason + } + typeStr = string(cond) } return metav1.Condition{ Status: metav1.ConditionTrue, - Type: condType, + Type: typeStr, ObservedGeneration: updateRun.Generation, Reason: reason, } } -func getFalseCondition(updateRun *placementv1alpha1.ClusterStagedUpdateRun, condType string) metav1.Condition { - reason := "" - switch condType { - case string(placementv1alpha1.StagedUpdateRunConditionInitialized): - reason = condition.UpdateRunInitializeFailedReason - case string(placementv1alpha1.StagedUpdateRunConditionSucceeded): - reason = condition.UpdateRunFailedReason +func generateFalseCondition(updateRun *placementv1alpha1.ClusterStagedUpdateRun, condType any) metav1.Condition { + reason, typeStr := "", "" + switch cond := condType.(type) { + case placementv1alpha1.StagedUpdateRunConditionType: + switch cond { + case placementv1alpha1.StagedUpdateRunConditionInitialized: + reason = condition.UpdateRunInitializeFailedReason + case placementv1alpha1.StagedUpdateRunConditionSucceeded: + reason = condition.UpdateRunFailedReason + } + typeStr = string(cond) + case placementv1alpha1.StageUpdatingConditionType: + switch cond { + case placementv1alpha1.StageUpdatingConditionSucceeded: + reason = condition.StageUpdatingFailedReason + } + typeStr = string(cond) + case placementv1alpha1.ClusterUpdatingStatusConditionType: + switch cond { + case placementv1alpha1.ClusterUpdatingConditionSucceeded: + reason = condition.ClusterUpdatingFailedReason + } + typeStr = string(cond) } return metav1.Condition{ Status: metav1.ConditionFalse, - Type: condType, + Type: typeStr, ObservedGeneration: updateRun.Generation, Reason: reason, } diff --git a/pkg/controllers/updaterun/initialization.go b/pkg/controllers/updaterun/initialization.go index 2ccf8e81a..5353ab008 100644 --- a/pkg/controllers/updaterun/initialization.go +++ b/pkg/controllers/updaterun/initialization.go @@ -175,7 +175,7 @@ func (r *Reconciler) collectScheduledClusters( for i, binding := range bindingList.Items { if binding.Spec.SchedulingPolicySnapshotName == latestPolicySnapshot.Name { if binding.Spec.State != placementv1beta1.BindingStateScheduled && binding.Spec.State != placementv1beta1.BindingStateBound { - stateErr := fmt.Errorf("binding `%s`'s state %s is not scheduled or bound", binding.Name, binding.Spec.State) + stateErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("binding `%s`'s state %s is not scheduled or bound", binding.Name, binding.Spec.State)) klog.ErrorS(stateErr, "Failed to collect clusterResourceBindings", "clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef) // no more retries here. return nil, nil, fmt.Errorf("%w: %s", errInitializedFailed, stateErr.Error()) @@ -255,6 +255,7 @@ func (r *Reconciler) computeRunStageStatus( for _, binding := range scheduledBindings { allSelectedClusters[binding.Spec.TargetCluster] = struct{}{} } + stagesStatus := make([]placementv1alpha1.StageUpdatingStatus, 0, len(updateRun.Status.StagedUpdateStrategySnapshot.Stages)) // Apply the label selectors from the ClusterStagedUpdateStrategy to filter the clusters. for _, stage := range updateRun.Status.StagedUpdateStrategySnapshot.Stages { @@ -272,7 +273,7 @@ func (r *Reconciler) computeRunStageStatus( klog.ErrorS(err, "Failed to convert label selector", "clusterStagedUpdateStrategy", updateStrategyName, "stage name", stage.Name, "labelSelector", stage.LabelSelector, "clusterStagedUpdateRun", updateRunRef) // no more retries here. invalidLabelErr := controller.NewUserError(fmt.Errorf("the stage label selector is invalid, clusterStagedUpdateStrategy: %s, stage: %s, err: %s", updateStrategyName, stage.Name, err.Error())) - return controller.NewUserError(fmt.Errorf("%w: %s", errInitializedFailed, invalidLabelErr.Error())) + return fmt.Errorf("%w: %s", errInitializedFailed, invalidLabelErr.Error()) } // List all the clusters that match the label selector. var clusterList clusterv1beta1.MemberClusterList @@ -340,8 +341,9 @@ func (r *Reconciler) computeRunStageStatus( curStageUpdatingStatus.AfterStageTaskStatus[i].ApprovalRequestName = fmt.Sprintf(placementv1alpha1.ApprovalTaskNameFmt, updateRun.Name, stage.Name) } } - updateRun.Status.StagesStatus = append(updateRun.Status.StagesStatus, curStageUpdatingStatus) + stagesStatus = append(stagesStatus, curStageUpdatingStatus) } + updateRun.Status.StagesStatus = stagesStatus // Check if the clusters are all placed. if len(allPlacedClusters) != len(allSelectedClusters) { diff --git a/pkg/controllers/updaterun/initialization_integration_test.go b/pkg/controllers/updaterun/initialization_integration_test.go index ca7825288..97b931818 100644 --- a/pkg/controllers/updaterun/initialization_integration_test.go +++ b/pkg/controllers/updaterun/initialization_integration_test.go @@ -95,6 +95,9 @@ var _ = Describe("Updaterun initialization tests", func() { }) Expect(err).To(Succeed()) resourceSnapshot = generateTestClusterResourceSnapshot() + + // Set smaller wait time for testing + stageUpdatingWaitTime = time.Second * 2 }) AfterEach(func() { @@ -249,7 +252,7 @@ var _ = Describe("Updaterun initialization tests", func() { By("Creating scheduling policy snapshot with pickN policy") Expect(k8sClient.Create(ctx, policySnapshot)).To(Succeed()) - By("Set the latest policy snapshot condition as fully scheduled") + By("Setting the latest policy snapshot condition as fully scheduled") meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{ Type: string(placementv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionTrue, @@ -282,7 +285,7 @@ var _ = Describe("Updaterun initialization tests", func() { policySnapshot.Spec.Policy.ClusterNames = []string{"cluster-0", "cluster-1"} Expect(k8sClient.Create(ctx, policySnapshot)).To(Succeed()) - By("Set the latest policy snapshot condition as fully scheduled") + By("Setting the latest policy snapshot condition as fully scheduled") meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{ Type: string(placementv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionTrue, @@ -314,7 +317,7 @@ var _ = Describe("Updaterun initialization tests", func() { policySnapshot.Spec.Policy.PlacementType = placementv1beta1.PickAllPlacementType Expect(k8sClient.Create(ctx, policySnapshot)).To(Succeed()) - By("Set the latest policy snapshot condition as fully scheduled") + By("Setting the latest policy snapshot condition as fully scheduled") meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{ Type: string(placementv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionTrue, @@ -350,7 +353,7 @@ var _ = Describe("Updaterun initialization tests", func() { By("Creating scheduling policy snapshot") Expect(k8sClient.Create(ctx, policySnapshot)).To(Succeed()) - By("Set the latest policy snapshot condition as fully scheduled") + By("Setting the latest policy snapshot condition as fully scheduled") meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{ Type: string(placementv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionTrue, @@ -406,7 +409,7 @@ var _ = Describe("Updaterun initialization tests", func() { By("Creating scheduling policy snapshot") Expect(k8sClient.Create(ctx, policySnapshot)).To(Succeed()) - By("Set the latest policy snapshot condition as fully scheduled") + By("Setting the latest policy snapshot condition as fully scheduled") meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{ Type: string(placementv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionTrue, @@ -522,59 +525,19 @@ var _ = Describe("Updaterun initialization tests", func() { return err } - want := placementv1alpha1.StagedUpdateRunStatus{ - PolicySnapshotIndexUsed: policySnapshot.Name, - PolicyObservedClusterCount: numberOfClustersAnnotation, - ApplyStrategy: crp.Spec.Strategy.ApplyStrategy, - StagedUpdateStrategySnapshot: &updateStrategy.Spec, - StagesStatus: []placementv1alpha1.StageUpdatingStatus{ - { - StageName: "stage1", - Clusters: []placementv1alpha1.ClusterUpdatingStatus{ - {ClusterName: "cluster-9"}, - {ClusterName: "cluster-7"}, - {ClusterName: "cluster-5"}, - {ClusterName: "cluster-3"}, - {ClusterName: "cluster-1"}, - }, - AfterStageTaskStatus: []placementv1alpha1.AfterStageTaskStatus{ - {Type: placementv1alpha1.AfterStageTaskTypeTimedWait}, - }, - }, - { - StageName: "stage2", - Clusters: []placementv1alpha1.ClusterUpdatingStatus{ - {ClusterName: "cluster-0"}, - {ClusterName: "cluster-2"}, - {ClusterName: "cluster-4"}, - {ClusterName: "cluster-6"}, - {ClusterName: "cluster-8"}, - }, - AfterStageTaskStatus: []placementv1alpha1.AfterStageTaskStatus{ - { - Type: placementv1alpha1.AfterStageTaskTypeApproval, - ApprovalRequestName: updateRun.Name + "-stage2", - }, - }, - }, - }, - DeletionStageStatus: &placementv1alpha1.StageUpdatingStatus{ - StageName: "kubernetes-fleet.io/deleteStage", - Clusters: []placementv1alpha1.ClusterUpdatingStatus{ - {ClusterName: "unscheduled-cluster-0"}, - {ClusterName: "unscheduled-cluster-1"}, - {ClusterName: "unscheduled-cluster-2"}, - }, - }, - Conditions: []metav1.Condition{ - getFalseCondition(updateRun, string(placementv1alpha1.StagedUpdateRunConditionInitialized)), - }, + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + for i := range want.StagesStatus[0].Clusters { + // Remove the CROs, as they are not added in this test. + want.StagesStatus[0].Clusters[i].ClusterResourceOverrideSnapshots = nil + } + // initialization should fail. + want.Conditions = []metav1.Condition{ + generateFalseCondition(updateRun, placementv1alpha1.StagedUpdateRunConditionInitialized), } - if diff := cmp.Diff(want, updateRun.Status, cmpOptions...); diff != "" { + if diff := cmp.Diff(*want, updateRun.Status, cmpOptions...); diff != "" { return fmt.Errorf("status mismatch: (-want +got):\n%s", diff) } - return nil }, timeout, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun in the status") }) @@ -588,7 +551,7 @@ var _ = Describe("Updaterun initialization tests", func() { By("Creating scheduling policy snapshot") Expect(k8sClient.Create(ctx, policySnapshot)).To(Succeed()) - By("Set the latest policy snapshot condition as fully scheduled") + By("Setting the latest policy snapshot condition as fully scheduled") meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{ Type: string(placementv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionTrue, @@ -657,67 +620,29 @@ var _ = Describe("Updaterun initialization tests", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the clusterStagedUpdateRun stats") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) Eventually(func() error { if err := k8sClient.Get(ctx, updateRunNamespacedName, updateRun); err != nil { return err } - want := placementv1alpha1.StagedUpdateRunStatus{ - PolicySnapshotIndexUsed: policySnapshot.Name, - PolicyObservedClusterCount: numberOfClustersAnnotation, - ApplyStrategy: crp.Spec.Strategy.ApplyStrategy, - StagedUpdateStrategySnapshot: &updateStrategy.Spec, - StagesStatus: []placementv1alpha1.StageUpdatingStatus{ - { - StageName: "stage1", - Clusters: []placementv1alpha1.ClusterUpdatingStatus{ - {ClusterName: "cluster-9", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, - {ClusterName: "cluster-7", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, - {ClusterName: "cluster-5", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, - {ClusterName: "cluster-3", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, - {ClusterName: "cluster-1", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, - }, - AfterStageTaskStatus: []placementv1alpha1.AfterStageTaskStatus{ - {Type: placementv1alpha1.AfterStageTaskTypeTimedWait}, - }, - }, - { - StageName: "stage2", - Clusters: []placementv1alpha1.ClusterUpdatingStatus{ - {ClusterName: "cluster-0"}, - {ClusterName: "cluster-2"}, - {ClusterName: "cluster-4"}, - {ClusterName: "cluster-6"}, - {ClusterName: "cluster-8"}, - }, - AfterStageTaskStatus: []placementv1alpha1.AfterStageTaskStatus{ - { - Type: placementv1alpha1.AfterStageTaskTypeApproval, - ApprovalRequestName: updateRun.Name + "-stage2", - }, - }, - }, - }, - DeletionStageStatus: &placementv1alpha1.StageUpdatingStatus{ - StageName: "kubernetes-fleet.io/deleteStage", - Clusters: []placementv1alpha1.ClusterUpdatingStatus{ - {ClusterName: "unscheduled-cluster-0"}, - {ClusterName: "unscheduled-cluster-1"}, - {ClusterName: "unscheduled-cluster-2"}, - }, - }, - Conditions: []metav1.Condition{ - // initialization should succeed! - getTrueCondition(updateRun, string(placementv1alpha1.StagedUpdateRunConditionInitialized)), - }, + if diff := cmp.Diff(*want, updateRun.Status, cmpOptions...); diff != "" { + return fmt.Errorf("status mismatch: (-want +got):\n%s", diff) } - if diff := cmp.Diff(want, updateRun.Status, cmpOptions...); diff != "" { + return nil + }, timeout, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun initialized successfully") + + By("Validating the clusterStagedUpdateRun initialized consistently") + Consistently(func() error { + if err := k8sClient.Get(ctx, updateRunNamespacedName, updateRun); err != nil { + return err + } + if diff := cmp.Diff(*want, updateRun.Status, cmpOptions...); diff != "" { return fmt.Errorf("status mismatch: (-want +got):\n%s", diff) } - return nil - }, timeout, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun in the status") + }, duration, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun initialized consistently") }) }) }) @@ -727,7 +652,7 @@ func validateFailedInitCondition(ctx context.Context, updateRun *placementv1alph if err := k8sClient.Get(ctx, updateRunNamespacedName, updateRun); err != nil { return err } - wantConditions := []metav1.Condition{getFalseCondition(updateRun, string(placementv1alpha1.StagedUpdateRunConditionInitialized))} + wantConditions := []metav1.Condition{generateFalseCondition(updateRun, placementv1alpha1.StagedUpdateRunConditionInitialized)} if diff := cmp.Diff(wantConditions, updateRun.Status.Conditions, cmpOptions...); diff != "" { return fmt.Errorf("condition mismatch: (-want +got):\n%s", diff) } @@ -738,3 +663,61 @@ func validateFailedInitCondition(ctx context.Context, updateRun *placementv1alph return nil }, timeout, interval).Should(Succeed(), "failed to validate the failed initialization condition") } + +func generateSucceededInitializationStatus( + crp *placementv1beta1.ClusterResourcePlacement, + updateRun *placementv1alpha1.ClusterStagedUpdateRun, + policySnapshot *placementv1beta1.ClusterSchedulingPolicySnapshot, + updateStrategy *placementv1alpha1.ClusterStagedUpdateStrategy, + clusterResourceOverride *placementv1alpha1.ClusterResourceOverrideSnapshot, +) *placementv1alpha1.StagedUpdateRunStatus { + return &placementv1alpha1.StagedUpdateRunStatus{ + PolicySnapshotIndexUsed: policySnapshot.Name, + PolicyObservedClusterCount: numberOfClustersAnnotation, + ApplyStrategy: crp.Spec.Strategy.ApplyStrategy.DeepCopy(), + StagedUpdateStrategySnapshot: &updateStrategy.Spec, + StagesStatus: []placementv1alpha1.StageUpdatingStatus{ + { + StageName: "stage1", + Clusters: []placementv1alpha1.ClusterUpdatingStatus{ + {ClusterName: "cluster-9", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, + {ClusterName: "cluster-7", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, + {ClusterName: "cluster-5", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, + {ClusterName: "cluster-3", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, + {ClusterName: "cluster-1", ClusterResourceOverrideSnapshots: []string{clusterResourceOverride.Name}}, + }, + AfterStageTaskStatus: []placementv1alpha1.AfterStageTaskStatus{ + {Type: placementv1alpha1.AfterStageTaskTypeTimedWait}, + }, + }, + { + StageName: "stage2", + Clusters: []placementv1alpha1.ClusterUpdatingStatus{ + {ClusterName: "cluster-0"}, + {ClusterName: "cluster-2"}, + {ClusterName: "cluster-4"}, + {ClusterName: "cluster-6"}, + {ClusterName: "cluster-8"}, + }, + AfterStageTaskStatus: []placementv1alpha1.AfterStageTaskStatus{ + { + Type: placementv1alpha1.AfterStageTaskTypeApproval, + ApprovalRequestName: updateRun.Name + "-stage2", + }, + }, + }, + }, + DeletionStageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "kubernetes-fleet.io/deleteStage", + Clusters: []placementv1alpha1.ClusterUpdatingStatus{ + {ClusterName: "unscheduled-cluster-0"}, + {ClusterName: "unscheduled-cluster-1"}, + {ClusterName: "unscheduled-cluster-2"}, + }, + }, + Conditions: []metav1.Condition{ + // initialization should succeed! + generateTrueCondition(updateRun, placementv1alpha1.StagedUpdateRunConditionInitialized), + }, + } +} diff --git a/pkg/controllers/updaterun/validation.go b/pkg/controllers/updaterun/validation.go new file mode 100644 index 000000000..97e7f2c3c --- /dev/null +++ b/pkg/controllers/updaterun/validation.go @@ -0,0 +1,319 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package updaterun + +import ( + "context" + "fmt" + "reflect" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + + placementv1alpha1 "go.goms.io/fleet/apis/placement/v1alpha1" + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils/condition" + "go.goms.io/fleet/pkg/utils/controller" +) + +// validate validates the clusterStagedUpdateRun status and ensures the update can be continued. +// The function returns the index of the stage that is updating, and the list of clusters that are scheduled to be deleted. +// If the updating stage index is -1, it means all stages are finished, and the clusterStageUpdateRun should be marked as finished. +// If the updating stage index is 0, the next stage to be updated is the first stage. +// If the updating stage index is len(updateRun.Status.StagesStatus), the next stage to be updated will be the delete stage. +func (r *Reconciler) validate( + ctx context.Context, + updateRun *placementv1alpha1.ClusterStagedUpdateRun, +) (int, []*placementv1beta1.ClusterResourceBinding, []*placementv1beta1.ClusterResourceBinding, error) { + // Some of the validating function changes the object, so we need to make a copy of the object. + updateRunRef := klog.KObj(updateRun) + updateRunCopy := updateRun.DeepCopy() + klog.V(2).InfoS("Start to validate the clusterStagedUpdateRun", "clusterStagedUpdateRun", updateRunRef) + + // Validate the ClusterResourcePlacement object referenced by the ClusterStagedUpdateRun. + placementName, err := r.validateCRP(ctx, updateRunCopy) + if err != nil { + return -1, nil, nil, err + } + // Validate the applyStrategy. + if !reflect.DeepEqual(updateRun.Status.ApplyStrategy, updateRunCopy.Status.ApplyStrategy) { + mismatchErr := fmt.Errorf("the applyStrategy in the clusterStagedUpdateRun is outdated, latest: %v, recorded: %v", updateRunCopy.Status.ApplyStrategy, updateRun.Status.ApplyStrategy) + klog.ErrorS(mismatchErr, "the applyStrategy in the clusterResourcePlacement has changed", "clusterResourcePlacement", placementName, "clusterStagedUpdateRun", updateRunRef) + return -1, nil, nil, fmt.Errorf("%w: %s", errStagedUpdatedAborted, mismatchErr.Error()) + } + + // Retrieve the latest policy snapshot. + latestPolicySnapshot, clusterCount, err := r.determinePolicySnapshot(ctx, placementName, updateRunCopy) + if err != nil { + return -1, nil, nil, err + } + // Make sure the latestPolicySnapshot has not changed. + if updateRun.Status.PolicySnapshotIndexUsed != latestPolicySnapshot.Name { + mismatchErr := fmt.Errorf("the policy snapshot index used in the clusterStagedUpdateRun is outdated, latest: %s, recorded: %s", latestPolicySnapshot.Name, updateRun.Status.PolicySnapshotIndexUsed) + klog.ErrorS(mismatchErr, "there's a new latest policy snapshot", "clusterResourcePlacement", placementName, "clusterStagedUpdateRun", updateRunRef) + return -1, nil, nil, fmt.Errorf("%w: %s", errStagedUpdatedAborted, mismatchErr.Error()) + } + // Make sure the cluster count in the policy snapshot has not changed. + if updateRun.Status.PolicyObservedClusterCount != clusterCount { + mismatchErr := fmt.Errorf("the cluster count initialized in the clusterStagedUpdateRun is outdated, latest: %d, recorded: %d", clusterCount, updateRun.Status.PolicyObservedClusterCount) + klog.ErrorS(mismatchErr, "the cluster count in the policy snapshot has changed", "clusterResourcePlacement", placementName, "clusterStagedUpdateRun", updateRunRef) + return -1, nil, nil, fmt.Errorf("%w: %s", errStagedUpdatedAborted, mismatchErr.Error()) + } + + // Collect the clusters by the corresponding ClusterResourcePlacement with the latest policy snapshot. + scheduledBindings, toBeDeletedBindings, err := r.collectScheduledClusters(ctx, placementName, latestPolicySnapshot, updateRunCopy) + if err != nil { + return -1, nil, nil, err + } + + // if condition.IsConditionStatusFalse(meta.FindStatusCondition(updateRun.Status.Conditions, string(placementv1alpha1.StagedUpdateRunConditionProgressing)), updateRun.Generation) { + // // The clusterStagedUpdateRun has not started yet. + // klog.V(2).InfoS("Starting the staged update run from the beginning", "clusterStagedUpdateRun", updateRunRef) + // return 0, scheduledBindings, toBeDeletedBindings, nil + // } + + // Validate the stages and return the updating stage index. + updatingStageIndex, err := r.validateStagesStatus(ctx, scheduledBindings, updateRun, updateRunCopy) + if err != nil { + return -1, nil, nil, err + } + return updatingStageIndex, scheduledBindings, toBeDeletedBindings, nil +} + +// validateStagesStatus validates both the update and delete stages of the ClusterStagedUpdateRun. +// The function returns the stage index that is updating, or any error encountered. +// If the updating stage index is -1, it means all stages are finished, and the clusterStageUpdateRun should be marked as finished. +// If the updating stage index is 0, the next stage to be updated will be the first stage. +// If the updating stage index is len(updateRun.Status.StagesStatus), the next stage to be updated will be the delete stage. +func (r *Reconciler) validateStagesStatus( + ctx context.Context, + scheduledBindings []*placementv1beta1.ClusterResourceBinding, + updateRun, updateRunCopy *placementv1alpha1.ClusterStagedUpdateRun, +) (int, error) { + updateRunRef := klog.KObj(updateRun) + + // Recompute the stage status which does not include the delete stage. + // Note that the compute process uses the StagedUpdateStrategySnapshot in status, + // so it won't affect anything if the actual updateStrategy has changed. + if updateRun.Status.StagedUpdateStrategySnapshot == nil { + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the clusterStagedUpdateRun has nil stagedUpdateStrategySnapshot")) + klog.ErrorS(unexpectedErr, "Failed to find the stagedUpdateStrategySnapshot in the clusterStagedUpdateRun", "clusterStagedUpdateRun", updateRunRef) + return -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + if err := r.computeRunStageStatus(ctx, scheduledBindings, updateRunCopy); err != nil { + return -1, err + } + + // Validate the stages in the updateRun and return the updating stage index. + existingStageStatus := updateRun.Status.StagesStatus + if existingStageStatus == nil { + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the clusterStagedUpdateRun has nil stagesStatus")) + klog.ErrorS(unexpectedErr, "Failed to find the stagesStatus in the clusterStagedUpdateRun", "clusterStagedUpdateRun", updateRunRef) + return -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + updatingStageIndex, lastFinishedStageIndex, validateErr := validateUpdateStageStatus(existingStageStatus, updateRunCopy) + if validateErr != nil { + return -1, validateErr + } + + return validateDeleteStageStatus(updatingStageIndex, lastFinishedStageIndex, len(existingStageStatus), updateRunCopy) +} + +// validateUpdateStageStatus is a helper function to validate the updating stages in the clusterStagedUpdateRun. +// It compares the existing stage status with the latest list of clusters to be updated. +// It returns the index of the updating stage, the index of the last finished stage and any error encountered. +func validateUpdateStageStatus(existingStageStatus []placementv1alpha1.StageUpdatingStatus, updateRun *placementv1alpha1.ClusterStagedUpdateRun) (int, int, error) { + updatingStageIndex := -1 + lastFinishedStageIndex := -1 + // Remember the newly computed stage status. + newStageStatus := updateRun.Status.StagesStatus + // Make sure the number of stages in the clusterStagedUpdateRun are still the same. + if len(existingStageStatus) != len(newStageStatus) { + mismatchErr := fmt.Errorf("the number of stages in the clusterStagedUpdateRun has changed, new: %d, existing: %d", len(newStageStatus), len(existingStageStatus)) + klog.ErrorS(mismatchErr, "The number of stages in the clusterStagedUpdateRun has changed", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, mismatchErr.Error()) + } + // Make sure the stages in the updateRun are still the same. + for curStage := range existingStageStatus { + if existingStageStatus[curStage].StageName != newStageStatus[curStage].StageName { + mismatchErr := fmt.Errorf("index `%d` stage name in the clusterStagedUpdateRun has changed, new: %s, existing: %s", curStage, newStageStatus[curStage].StageName, existingStageStatus[curStage].StageName) + klog.ErrorS(mismatchErr, "The stage name in the clusterStagedUpdateRun has changed", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, mismatchErr.Error()) + } + if len(existingStageStatus[curStage].Clusters) != len(newStageStatus[curStage].Clusters) { + mismatchErr := fmt.Errorf("the number of clusters in index `%d` stage has changed, new: %d, existing: %d", curStage, len(newStageStatus[curStage].Clusters), len(existingStageStatus[curStage].Clusters)) + klog.ErrorS(mismatchErr, "The number of clusters in the stage has changed", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, mismatchErr.Error()) + } + // Check that the clusters in the stage are still the same. + for j := range existingStageStatus[curStage].Clusters { + if existingStageStatus[curStage].Clusters[j].ClusterName != newStageStatus[curStage].Clusters[j].ClusterName { + mismatchErr := fmt.Errorf("the `%d` cluster in the `%d` stage has changed, new: %s, existing: %s", j, curStage, newStageStatus[curStage].Clusters[j].ClusterName, existingStageStatus[curStage].Clusters[j].ClusterName) + klog.ErrorS(mismatchErr, "The cluster in the stage has changed", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, mismatchErr.Error()) + } + } + + var err error + updatingStageIndex, lastFinishedStageIndex, err = determineUpdatingStage(curStage, updatingStageIndex, lastFinishedStageIndex, &existingStageStatus[curStage], updateRun) + if err != nil { + return -1, -1, err + } + } + return updatingStageIndex, lastFinishedStageIndex, nil +} + +// determineUpdatingStage validates a stage status and looks for the updating stage and last finished stage. +// It returns latest updatingStageIndex, lastFinishedStageIndex and any error encountered. +func determineUpdatingStage( + curStage, updatingStageIndex, lastFinishedStageIndex int, + stageStatus *placementv1alpha1.StageUpdatingStatus, + updateRun *placementv1alpha1.ClusterStagedUpdateRun, +) (int, int, error) { + stageSucceedCond := meta.FindStatusCondition(stageStatus.Conditions, string(placementv1alpha1.StageUpdatingConditionSucceeded)) + stageStartedCond := meta.FindStatusCondition(stageStatus.Conditions, string(placementv1alpha1.StageUpdatingConditionProgressing)) + if condition.IsConditionStatusTrue(stageSucceedCond, updateRun.Generation) { + // The stage has finished. + if updatingStageIndex != -1 && curStage > updatingStageIndex { + // The finished stage is after the updating stage. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the finished stage `%d` is after the updating stage `%d`", curStage, updatingStageIndex)) + klog.ErrorS(unexpectedErr, "The finished stage is after the updating stage", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + // Make sure that all the clusters are updated. + for curCluster := range stageStatus.Clusters { + // Check if the cluster is still updating. + if !condition.IsConditionStatusTrue(meta.FindStatusCondition( + stageStatus.Clusters[curCluster].Conditions, + string(placementv1alpha1.ClusterUpdatingConditionSucceeded)), + updateRun.Generation) { + // The clusters in the finished stage should all have finished too. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("cluster `%s` in the finished stage `%s` has not succeeded", stageStatus.Clusters[curCluster].ClusterName, stageStatus.StageName)) + klog.ErrorS(unexpectedErr, "The cluster in a finished stage is still updating", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + } + if curStage != lastFinishedStageIndex+1 { + // The current finished stage is not right after the last finished stage. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the finished stage `%s` is not right after the last finished stage with index `%d`", stageStatus.StageName, lastFinishedStageIndex)) + klog.ErrorS(unexpectedErr, "There's not yet started stage before the finished stage", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + // Record the last finished stage so we can continue from the next stage if no stage is updating. + lastFinishedStageIndex = curStage + } else if condition.IsConditionStatusFalse(stageSucceedCond, updateRun.Generation) { + // The stage has failed. + failedErr := fmt.Errorf("the stage `%s` has failed, err: %s", stageStatus.StageName, stageSucceedCond.Message) + klog.ErrorS(failedErr, "The stage has failed", "stageCond", stageSucceedCond, "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, failedErr.Error()) + } else if stageStartedCond != nil { + // The stage is still updating. + if updatingStageIndex != -1 { + // There should be only one stage updating at a time. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the stage `%s` is updating, but there is already a stage with index `%d` updating", stageStatus.StageName, updatingStageIndex)) + klog.ErrorS(unexpectedErr, "Detected more than one updating stages", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + if curStage != lastFinishedStageIndex+1 { + // The current updating stage is not right after the last finished stage. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the updating stage `%s` is not right after the last finished stage with index `%d`", stageStatus.StageName, lastFinishedStageIndex)) + klog.ErrorS(unexpectedErr, "There's not yet started stage before the updating stage", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + updatingStageIndex = curStage + // Collect the updating clusters. + var updatingClusters []string + for j := range stageStatus.Clusters { + clusterStartedCond := meta.FindStatusCondition(stageStatus.Clusters[j].Conditions, string(placementv1alpha1.ClusterUpdatingConditionStarted)) + clusterFinishedCond := meta.FindStatusCondition(stageStatus.Clusters[j].Conditions, string(placementv1alpha1.ClusterUpdatingConditionSucceeded)) + if condition.IsConditionStatusTrue(clusterStartedCond, updateRun.Generation) && + !(condition.IsConditionStatusTrue(clusterFinishedCond, updateRun.Generation) || condition.IsConditionStatusFalse(clusterFinishedCond, updateRun.Generation)) { + updatingClusters = append(updatingClusters, stageStatus.Clusters[j].ClusterName) + } + } + // We don't allow more than one clusters to be updating at the same time. + if len(updatingClusters) > 1 { + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("more than one cluster is updating in the stage `%s`, clusters: %v", stageStatus.StageName, updatingClusters)) + klog.ErrorS(unexpectedErr, "Detected more than one updating clusters in the stage", "clusterStagedUpdateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + } + return updatingStageIndex, lastFinishedStageIndex, nil +} + +// validateDeleteStageStatus validates the delete stage in the clusterStagedUpdateRun. +// It returns the updating stage index, or any error encountered. +func validateDeleteStageStatus( + updatingStageIndex, lastFinishedStageIndex, totalStages int, + updateRun *placementv1alpha1.ClusterStagedUpdateRun, +) (int, error) { + updateRunRef := klog.KObj(updateRun) + existingDeleteStageStatus := updateRun.Status.DeletionStageStatus + if existingDeleteStageStatus == nil { + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the clusterStagedUpdateRun has nil deletionStageStatus")) + klog.ErrorS(unexpectedErr, "Failed to find the deletionStageStatus in the clusterStagedUpdateRun", "clusterStagedUpdateRun", updateRunRef) + return -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + deleteStageFinishedCond := meta.FindStatusCondition(existingDeleteStageStatus.Conditions, string(placementv1alpha1.StagedUpdateRunConditionSucceeded)) + deleteStageProgressingCond := meta.FindStatusCondition(existingDeleteStageStatus.Conditions, string(placementv1alpha1.StagedUpdateRunConditionProgressing)) + // Check if there is any active updating stage + if updatingStageIndex != -1 || lastFinishedStageIndex < totalStages-1 { + // There are still stages updating before the delete stage, make sure the delete stage is not active/finished. + if condition.IsConditionStatusTrue(deleteStageFinishedCond, updateRun.Generation) || + condition.IsConditionStatusFalse(deleteStageFinishedCond, updateRun.Generation) || + condition.IsConditionStatusTrue(deleteStageProgressingCond, updateRun.Generation) { + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the delete stage is active, but there are still stages updating, updatingStageIndex: %d, lastFinishedStageIndex: %d", updatingStageIndex, lastFinishedStageIndex)) + klog.ErrorS(unexpectedErr, "the delete stage is active, but there are still stages updating", "clusterStagedUpdateRun", updateRunRef) + return -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + + // If no stage is updating, continue from the last finished stage (which will result it starting from 0). + if updatingStageIndex == -1 { + updatingStageIndex = lastFinishedStageIndex + 1 + } + return updatingStageIndex, nil + } + + klog.InfoS("All stages are finished, continue from the delete stage", "clusterStagedUpdateRun", updateRunRef) + // Check if the delete stage has finished successfully. + if condition.IsConditionStatusTrue(deleteStageFinishedCond, updateRun.Generation) { + klog.InfoS("The delete stage has finished successfully, no more stages to update", "clusterStagedUpdateRun", updateRunRef) + return -1, nil + } + // Check if the delete stage has failed. + if condition.IsConditionStatusFalse(deleteStageFinishedCond, updateRun.Generation) { + failedErr := fmt.Errorf("the delete stage has failed, err: %s", deleteStageFinishedCond.Message) + klog.ErrorS(failedErr, "The delete stage has failed", "stageCond", deleteStageFinishedCond, "clusterStagedUpdateRun", updateRunRef) + return -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, failedErr.Error()) + } + // The delete stage is still updating. + if condition.IsConditionStatusTrue(deleteStageProgressingCond, updateRun.Generation) { + klog.InfoS("The delete stage is updating", "clusterStagedUpdateRun", updateRunRef) + return totalStages, nil + } + // All stages have finished, but the delete stage is not active or finished. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the delete stage is not active, but all stages finished")) + klog.ErrorS(unexpectedErr, "There is no stage active", "clusterStagedUpdateRun", updateRunRef) + return -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) +} + +// recordUpdateRunFailed records the failed condition in the ClusterStagedUpdateRun status. +func (r *Reconciler) recordUpdateRunFailed(ctx context.Context, updateRun *placementv1alpha1.ClusterStagedUpdateRun, message string) error { + meta.SetStatusCondition(&updateRun.Status.Conditions, metav1.Condition{ + Type: string(placementv1alpha1.StagedUpdateRunConditionSucceeded), + Status: metav1.ConditionFalse, + ObservedGeneration: updateRun.Generation, + Reason: condition.UpdateRunFailedReason, + Message: message, + }) + if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil { + klog.ErrorS(updateErr, "Failed to update the ClusterStagedUpdateRun status as failed", "clusterStagedUpdateRun", klog.KObj(updateRun)) + // updateErr can be retried. + return controller.NewAPIServerError(false, updateErr) + } + return nil +} diff --git a/pkg/controllers/updaterun/validation_integration_test.go b/pkg/controllers/updaterun/validation_integration_test.go new file mode 100644 index 000000000..a943e465a --- /dev/null +++ b/pkg/controllers/updaterun/validation_integration_test.go @@ -0,0 +1,485 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package updaterun + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "strings" + "time" + + "github.com/google/go-cmp/cmp" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1" + placementv1alpha1 "go.goms.io/fleet/apis/placement/v1alpha1" + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils" +) + +var _ = Describe("UpdateRun validation tests", func() { + var updateRun *placementv1alpha1.ClusterStagedUpdateRun + var crp *placementv1beta1.ClusterResourcePlacement + var policySnapshot *placementv1beta1.ClusterSchedulingPolicySnapshot + var updateStrategy *placementv1alpha1.ClusterStagedUpdateStrategy + var resourceBindings []*placementv1beta1.ClusterResourceBinding + var targetClusters []*clusterv1beta1.MemberCluster + var unscheduledCluster []*clusterv1beta1.MemberCluster + var resourceSnapshot *placementv1beta1.ClusterResourceSnapshot + var clusterResourceOverride *placementv1alpha1.ClusterResourceOverrideSnapshot + + BeforeEach(func() { + testUpdateRunName = "updaterun-" + utils.RandStr() + testCRPName = "crp-" + utils.RandStr() + testResourceSnapshotName = "snapshot-" + utils.RandStr() + testUpdateStrategyName = "updatestrategy-" + utils.RandStr() + testCROName = "cro-" + utils.RandStr() + updateRunNamespacedName = types.NamespacedName{Name: testUpdateRunName} + + updateRun = generateTestClusterStagedUpdateRun() + crp = generateTestClusterResourcePlacement() + policySnapshot = generateTestClusterSchedulingPolicySnapshot(1) + updateStrategy = generateTestClusterStagedUpdateStrategy() + clusterResourceOverride = generateTestClusterResourceOverride() + + resourceBindings = make([]*placementv1beta1.ClusterResourceBinding, numTargetClusters+numUnscheduledClusters) + targetClusters = make([]*clusterv1beta1.MemberCluster, numTargetClusters) + for i := range targetClusters { + // split the clusters into 2 regions + region := "eastus" + if i%2 == 0 { + region = "westus" + } + // reserse the order of the clusters by index + targetClusters[i] = generateTestMemberCluster(numTargetClusters-1-i, "cluster-"+strconv.Itoa(i), map[string]string{"group": "prod", "region": region}) + resourceBindings[i] = generateTestClusterResourceBinding(policySnapshot.Name, targetClusters[i].Name) + } + + unscheduledCluster = make([]*clusterv1beta1.MemberCluster, numUnscheduledClusters) + for i := range unscheduledCluster { + unscheduledCluster[i] = generateTestMemberCluster(i, "unscheduled-cluster-"+strconv.Itoa(i), map[string]string{"group": "staging"}) + // update the policySnapshot name so that these clusters are considered to-be-deleted + resourceBindings[numTargetClusters+i] = generateTestClusterResourceBinding(policySnapshot.Name+"a", unscheduledCluster[i].Name) + } + + var err error + testNamespace, err = json.Marshal(corev1.Namespace{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Namespace", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-namespace", + Labels: map[string]string{ + "fleet.azure.com/name": "test-namespace", + }, + }, + }) + Expect(err).To(Succeed()) + resourceSnapshot = generateTestClusterResourceSnapshot() + + // Set smaller wait time for testing + stageUpdatingWaitTime = time.Second * 2 + + By("Creating a new clusterResourcePlacement") + Expect(k8sClient.Create(ctx, crp)).To(Succeed()) + + By("Creating scheduling policy snapshot") + Expect(k8sClient.Create(ctx, policySnapshot)).To(Succeed()) + + By("Setting the latest policy snapshot condition as fully scheduled") + meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{ + Type: string(placementv1beta1.PolicySnapshotScheduled), + Status: metav1.ConditionTrue, + ObservedGeneration: policySnapshot.Generation, + Reason: "scheduled", + }) + Expect(k8sClient.Status().Update(ctx, policySnapshot)).Should(Succeed(), "failed to update the policy snapshot condition") + + By("Creating the member clusters") + for _, cluster := range targetClusters { + Expect(k8sClient.Create(ctx, cluster)).To(Succeed()) + } + for _, cluster := range unscheduledCluster { + Expect(k8sClient.Create(ctx, cluster)).To(Succeed()) + } + + By("Creating a bunch of ClusterResourceBindings") + for _, binding := range resourceBindings { + Expect(k8sClient.Create(ctx, binding)).To(Succeed()) + } + + By("Creating a clusterStagedUpdateStrategy") + Expect(k8sClient.Create(ctx, updateStrategy)).To(Succeed()) + + By("Creating a new resource snapshot") + Expect(k8sClient.Create(ctx, resourceSnapshot)).To(Succeed()) + + By("Creating a new cluster resource override") + Expect(k8sClient.Create(ctx, clusterResourceOverride)).To(Succeed()) + }) + + AfterEach(func() { + By("Deleting the clusterStagedUpdateRun") + Expect(k8sClient.Delete(ctx, updateRun)).Should(Succeed()) + updateRun = nil + + By("Deleting the clusterResourcePlacement") + Expect(k8sClient.Delete(ctx, crp)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + crp = nil + + By("Deleting the clusterSchedulingPolicySnapshot") + Expect(k8sClient.Delete(ctx, policySnapshot)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + policySnapshot = nil + + By("Deleting the clusterResourceBindings") + for _, binding := range resourceBindings { + Expect(k8sClient.Delete(ctx, binding)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + } + resourceBindings = nil + + By("Deleting the member clusters") + for _, cluster := range targetClusters { + Expect(k8sClient.Delete(ctx, cluster)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + } + for _, cluster := range unscheduledCluster { + Expect(k8sClient.Delete(ctx, cluster)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + } + targetClusters, unscheduledCluster = nil, nil + + By("Deleting the clusterStagedUpdateStrategy") + Expect(k8sClient.Delete(ctx, updateStrategy)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + updateStrategy = nil + + By("Deleting the clusterResourceSnapshot") + Expect(k8sClient.Delete(ctx, resourceSnapshot)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + resourceSnapshot = nil + + By("Deleting the clusterResourceOverride") + Expect(k8sClient.Delete(ctx, clusterResourceOverride)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + clusterResourceOverride = nil + }) + + Context("Test validateCRP", func() { + It("Should fail to validate if the CRP is not found", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Deleting the clusterResourcePlacement") + Expect(k8sClient.Delete(ctx, crp)).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + validateFailedValidationStatus(ctx, updateRun, want, "parent clusterResourcePlacement not found") + }) + + It("Should fail to validate if CRP does not have external rollout strategy type", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Updating CRP's rollout strategy type") + crp.Spec.Strategy.Type = placementv1beta1.RollingUpdateRolloutStrategyType + Expect(k8sClient.Update(ctx, crp)).To(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + validateFailedValidationStatus(ctx, updateRun, want, + "parent clusterResourcePlacement does not have an external rollout strategy") + }) + + It("Should fail to valdiate if the ApplyStrategy in the CRP has changed", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Updating CRP's ApplyStrategy") + crp.Spec.Strategy.ApplyStrategy.Type = placementv1beta1.ApplyStrategyTypeClientSideApply + Expect(k8sClient.Update(ctx, crp)).To(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + validateFailedValidationStatus(ctx, updateRun, want, "the applyStrategy in the clusterStagedUpdateRun is outdated") + }) + }) + + Context("Test determinePolicySnapshot", func() { + It("Should fail to validate if the latest policySnapshot is not found", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Deleting the policySnapshot") + Expect(k8sClient.Delete(ctx, policySnapshot)).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + validateFailedValidationStatus(ctx, updateRun, want, "no latest policy snapshot associated") + }) + + It("Should fail to validate if the latest policySnapshot has changed", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Deleting the old policySnapshot") + Expect(k8sClient.Delete(ctx, policySnapshot)).Should(Succeed()) + + By("Creating a new policySnapshot") + newPolicySnapshot := generateTestClusterSchedulingPolicySnapshot(2) + Expect(k8sClient.Create(ctx, newPolicySnapshot)).To(Succeed()) + + By("Setting the latest policy snapshot condition as fully scheduled") + meta.SetStatusCondition(&newPolicySnapshot.Status.Conditions, metav1.Condition{ + Type: string(placementv1beta1.PolicySnapshotScheduled), + Status: metav1.ConditionTrue, + ObservedGeneration: newPolicySnapshot.Generation, + Reason: "scheduled", + }) + Expect(k8sClient.Status().Update(ctx, newPolicySnapshot)).Should(Succeed(), "failed to update the policy snapshot condition") + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + validateFailedValidationStatus(ctx, updateRun, want, + "the policy snapshot index used in the clusterStagedUpdateRun is outdated") + + By("Deleting the new policySnapshot") + Expect(k8sClient.Delete(ctx, newPolicySnapshot)).Should(Succeed()) + }) + + It("Should fail to validate if the cluster count has changed", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Updating the cluster count in the policySnapshot") + policySnapshot.Annotations["kubernetes-fleet.io/number-of-clusters"] = strconv.Itoa(numberOfClustersAnnotation + 1) + Expect(k8sClient.Update(ctx, policySnapshot)).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + validateFailedValidationStatus(ctx, updateRun, want, + "the cluster count initialized in the clusterStagedUpdateRun is outdated") + }) + }) + + Context("Test validateStagesStatus", func() { + It("Should fail to validate if the StagedUpdateStrategySnapshot is nil", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Updating the status.StagedUpdateStrategySnapshot to nil") + updateRun.Status.StagedUpdateStrategySnapshot = nil + Expect(k8sClient.Status().Update(ctx, updateRun)).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + want.StagedUpdateStrategySnapshot = nil + validateFailedValidationStatus(ctx, updateRun, want, "the clusterStagedUpdateRun has nil stagedUpdateStrategySnapshot") + }) + + It("Should fail to validate if the StagesStatus is nil", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Updating the status.StagesStatus to nil") + updateRun.Status.StagesStatus = nil + Expect(k8sClient.Status().Update(ctx, updateRun)).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + want.StagesStatus = nil + validateFailedValidationStatus(ctx, updateRun, want, "the clusterStagedUpdateRun has nil stagesStatus") + }) + + It("Should fail to validate if the DeletionStageStatus is nil", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Updating the status.DeletionStageStatus to nil") + updateRun.Status.DeletionStageStatus = nil + Expect(k8sClient.Status().Update(ctx, updateRun)).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + want.DeletionStageStatus = nil + validateFailedValidationStatus(ctx, updateRun, want, "the clusterStagedUpdateRun has nil deletionStageStatus") + }) + + It("Should fail to validate if the number of stages has changed", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Adding a stage to the updateRun status") + updateRun.Status.StagedUpdateStrategySnapshot.Stages = append(updateRun.Status.StagedUpdateStrategySnapshot.Stages, placementv1alpha1.StageConfig{ + Name: "stage3", + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "group": "dummy", + "region": "no-exist", + }, + }, + }) + Expect(k8sClient.Status().Update(ctx, updateRun)).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + want.StagedUpdateStrategySnapshot.Stages = append(want.StagedUpdateStrategySnapshot.Stages, placementv1alpha1.StageConfig{ + Name: "stage3", + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "group": "dummy", + "region": "no-exist", + }, + }, + }) + validateFailedValidationStatus(ctx, updateRun, want, "the number of stages in the clusterStagedUpdateRun has changed") + }) + + It("Should fail to validate if stage name has changed", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Changing the name of a stage") + updateRun.Status.StagedUpdateStrategySnapshot.Stages[0].Name = "stage3" + Expect(k8sClient.Status().Update(ctx, updateRun)).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + want.StagedUpdateStrategySnapshot.Stages[0].Name = "stage3" + validateFailedValidationStatus(ctx, updateRun, want, "index `0` stage name in the clusterStagedUpdateRun has changed") + }) + + It("Should fail to validate if the number of clusters has changed in a stage", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Deleting a cluster resource binding") + Expect(k8sClient.Delete(ctx, resourceBindings[0])).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + validateFailedValidationStatus(ctx, updateRun, want, "the number of clusters in index `1` stage has changed") + }) + + It("Should fail to validate if the cluster name has changed in a stage", func() { + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded") + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + validateSucceededInitializationStatus(ctx, updateRun, want) + + By("Changing the sorting key value to reorder the clusters") + // Swap the index of cluster 1 and 3. + targetClusters[1].Labels["index"], targetClusters[3].Labels["index"] = + targetClusters[3].Labels["index"], targetClusters[1].Labels["index"] + Expect(k8sClient.Update(ctx, targetClusters[1])).Should(Succeed()) + Expect(k8sClient.Update(ctx, targetClusters[3])).Should(Succeed()) + + By("Validating the validation failed") + want = generateFailedValidationStatus(updateRun, want) + validateFailedValidationStatus(ctx, updateRun, want, "the `3` cluster in the `0` stage has changed") + }) + }) +}) + +func validateSucceededInitializationStatus( + ctx context.Context, + updateRun *placementv1alpha1.ClusterStagedUpdateRun, + want *placementv1alpha1.StagedUpdateRunStatus, +) { + Eventually(func() error { + if err := k8sClient.Get(ctx, updateRunNamespacedName, updateRun); err != nil { + return err + } + + if diff := cmp.Diff(*want, updateRun.Status, cmpOptions...); diff != "" { + return fmt.Errorf("status mismatch: (-want +got):\n%s", diff) + } + return nil + }, timeout, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun initialized successfully") +} + +func validateFailedValidationStatus( + ctx context.Context, + updateRun *placementv1alpha1.ClusterStagedUpdateRun, + want *placementv1alpha1.StagedUpdateRunStatus, + message string, +) { + Eventually(func() error { + if err := k8sClient.Get(ctx, updateRunNamespacedName, updateRun); err != nil { + return err + } + + if diff := cmp.Diff(*want, updateRun.Status, cmpOptions...); diff != "" { + return fmt.Errorf("status mismatch: (-want +got):\n%s", diff) + } + succeedCond := meta.FindStatusCondition(updateRun.Status.Conditions, string(placementv1alpha1.StagedUpdateRunConditionSucceeded)) + if !strings.Contains(succeedCond.Message, message) { + return fmt.Errorf("condition message mismatch: got %s, want %s", succeedCond.Message, message) + } + return nil + }, timeout, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun failed to validate") +} + +func generateFailedValidationStatus( + updateRun *placementv1alpha1.ClusterStagedUpdateRun, + initialized *placementv1alpha1.StagedUpdateRunStatus, +) *placementv1alpha1.StagedUpdateRunStatus { + initialized.Conditions = append(initialized.Conditions, generateFalseCondition(updateRun, placementv1alpha1.StagedUpdateRunConditionSucceeded)) + return initialized +} diff --git a/pkg/controllers/updaterun/validation_test.go b/pkg/controllers/updaterun/validation_test.go new file mode 100644 index 000000000..9d156e2a4 --- /dev/null +++ b/pkg/controllers/updaterun/validation_test.go @@ -0,0 +1,358 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package updaterun + +import ( + "fmt" + "testing" + + placementv1alpha1 "go.goms.io/fleet/apis/placement/v1alpha1" + "go.goms.io/fleet/pkg/utils/controller" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestDetermineUpdatingStage(t *testing.T) { + updateRun := &placementv1alpha1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-run", + Generation: 1, + }, + } + + tests := []struct { + name string + curStage int + updatingStageIndex int + lastFinishedStageIndex int + stageStatus *placementv1alpha1.StageUpdatingStatus + wantErr error + wantUpdatingStageIndex int + wantLastFinishedStageIndex int + }{ + { + name: "determineUpdatingStage should return error if some stage finished after the updating stage", + curStage: 2, + updatingStageIndex: 1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + }, + wantErr: wrapErr(true, fmt.Errorf("the finished stage `2` is after the updating stage `1`")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatingStage should return error if some cluster has not succeeded but the stage has succeeded", + curStage: 0, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + Clusters: []placementv1alpha1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{generateFalseCondition(updateRun, placementv1alpha1.ClusterUpdatingConditionSucceeded)}, + }, + }, + }, + wantErr: wrapErr(true, fmt.Errorf("cluster `cluster-1` in the finished stage `test-stage` has not succeeded")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatingStage should return error if some cluster has not finished but the stage has succeeded", + curStage: 0, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + Clusters: []placementv1alpha1.ClusterUpdatingStatus{ + {ClusterName: "cluster-1"}, + }, + }, + wantErr: wrapErr(true, fmt.Errorf("cluster `cluster-1` in the finished stage `test-stage` has not succeeded")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatingStage should return error if the finished stage is not right after the last finished stage", + curStage: 2, + updatingStageIndex: -1, + lastFinishedStageIndex: 0, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + }, + wantErr: wrapErr(true, fmt.Errorf("the finished stage `test-stage` is not right after the last finished stage with index `0`")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatingStage should return error if some stage has failed", + curStage: 0, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateFalseCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + }, + wantErr: wrapErr(false, fmt.Errorf("the stage `test-stage` has failed, err: ")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatingStage should return error if there are multiple stages updating", + curStage: 1, + updatingStageIndex: 0, + lastFinishedStageIndex: -1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionProgressing)}, + }, + wantErr: wrapErr(true, fmt.Errorf("the stage `test-stage` is updating, but there is already a stage with index `0` updating")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatignStage should return error if the updating stage is not right after the last finished stage", + curStage: 1, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionProgressing)}, + }, + wantErr: wrapErr(true, fmt.Errorf("the updating stage `test-stage` is not right after the last finished stage with index `-1`")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatignStage should return error if there are multiple clusters updating in an updating stage", + curStage: 0, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionProgressing)}, + Clusters: []placementv1alpha1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.ClusterUpdatingConditionStarted)}, + }, + { + ClusterName: "cluster-2", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.ClusterUpdatingConditionStarted)}, + }, + }, + }, + wantErr: wrapErr(true, fmt.Errorf("more than one cluster is updating in the stage `test-stage`, clusters: [cluster-1 cluster-2]")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatingStage should return -1 as the updatingStageIndex if no stage is updating", + curStage: 0, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + }, + wantErr: nil, + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, + { + name: "determineUpdatingStage should return the index of the updating stage in updatingStageIndex", + curStage: 2, + updatingStageIndex: -1, + lastFinishedStageIndex: 1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionProgressing)}, + }, + wantErr: nil, + wantUpdatingStageIndex: 2, + wantLastFinishedStageIndex: 1, + }, + { + name: "determineUpdatingStage should return the index of the succeeded stage in lastFinishedStageIndex", + curStage: 2, + updatingStageIndex: -1, + lastFinishedStageIndex: 1, + stageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{ + generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionProgressing), + generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded), + }, + }, + wantErr: nil, + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: 2, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + gotUpdatingStageIndex, gotLastFinishedStageIndex, err := + determineUpdatingStage(test.curStage, test.updatingStageIndex, test.lastFinishedStageIndex, test.stageStatus, updateRun) + if test.wantErr == nil { + if err != nil { + t.Fatalf("determineUpdatingStage() got error = %+v, want error = nil", err) + } + } else if err == nil || err.Error() != test.wantErr.Error() { + t.Fatalf("determineUpdatingStage() got error = %+v, want error = %+v", err, test.wantErr) + } + if gotUpdatingStageIndex != test.wantUpdatingStageIndex { + t.Fatalf("determineUpdatingStage() got updatingStageIndex = %d, want updatingStageIndex = %d", gotUpdatingStageIndex, test.wantUpdatingStageIndex) + } + if gotLastFinishedStageIndex != test.wantLastFinishedStageIndex { + t.Fatalf("determineUpdatingStage() got lastFinishedStageIndex = %d, want lastFinishedStageIndex = %d", gotLastFinishedStageIndex, test.wantLastFinishedStageIndex) + } + }) + } +} + +func TestValidateDeleteStageStatus(t *testing.T) { + totalStages := 3 + updateRun := &placementv1alpha1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-run", + Generation: 1, + }, + } + + tests := []struct { + name string + updatingStageIndex int + lastFinishedStageIndex int + deleteStageStatus *placementv1alpha1.StageUpdatingStatus + wantErr error + wantUpdatingStageIndex int + }{ + { + name: "validateDeleteStageStatus should return error if delete stage status is nil", + deleteStageStatus: nil, + wantErr: wrapErr(true, fmt.Errorf("the clusterStagedUpdateRun has nil deletionStageStatus")), + wantUpdatingStageIndex: -1, + }, + { + name: "validateDeleteStageStatus should return error if there's stage updating but the delete stage has started", + updatingStageIndex: 2, + lastFinishedStageIndex: 1, + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "delete-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionProgressing)}, + }, + wantErr: wrapErr(true, fmt.Errorf("the delete stage is active, but there are still stages updating, updatingStageIndex: 2, lastFinishedStageIndex: 1")), + wantUpdatingStageIndex: -1, + }, + { + name: "validateDeleteStageStatus should return error if there's stage not started yet but the delete stage has finished", + updatingStageIndex: -1, + lastFinishedStageIndex: 1, // < totalStages - 1 + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "delete-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + }, + wantErr: wrapErr(true, fmt.Errorf("the delete stage is active, but there are still stages updating, updatingStageIndex: -1, lastFinishedStageIndex: 1")), + wantUpdatingStageIndex: -1, + }, + { + name: "validateDeleteStageStatus should return error if there's stage not started yet but the delete stage has failed", + updatingStageIndex: -1, + lastFinishedStageIndex: 1, // < totalStages - 1 + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "delete-stage", + Conditions: []metav1.Condition{generateFalseCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + }, + wantErr: wrapErr(true, fmt.Errorf("the delete stage is active, but there are still stages updating, updatingStageIndex: -1, lastFinishedStageIndex: 1")), + wantUpdatingStageIndex: -1, + }, + { + name: "validateDeleteStageStatus should return the updatingStageIndex if there's still stage updating", + updatingStageIndex: 2, + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{StageName: "delete-stage"}, + wantErr: nil, + wantUpdatingStageIndex: 2, + }, + { + name: "validateDeleteStageStatus should return the next stage after lastUpdatingStageIndex if there's no stage updating but stage not started yet", + updatingStageIndex: -1, + lastFinishedStageIndex: 0, + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{StageName: "delete-stage"}, + wantErr: nil, + wantUpdatingStageIndex: 1, + }, + { + name: "validateDeleteStageStatus should return -1 if all stages have finished", + updatingStageIndex: -1, + lastFinishedStageIndex: totalStages - 1, + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "delete-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + }, + wantErr: nil, + wantUpdatingStageIndex: -1, + }, + { + name: "validateDeleteStageStatus should return error if the delete stage has failed", + updatingStageIndex: -1, + lastFinishedStageIndex: totalStages - 1, + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "delete-stage", + Conditions: []metav1.Condition{generateFalseCondition(updateRun, placementv1alpha1.StageUpdatingConditionSucceeded)}, + }, + wantErr: wrapErr(false, fmt.Errorf("the delete stage has failed, err: ")), + wantUpdatingStageIndex: -1, + }, + { + name: "validateDeleteStageStatus should return totalStaged if the delete stage is still running", + updatingStageIndex: -1, + lastFinishedStageIndex: totalStages - 1, + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{ + StageName: "delete-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1alpha1.StageUpdatingConditionProgressing)}, + }, + wantErr: nil, + wantUpdatingStageIndex: totalStages, + }, + { + name: "validateDeleteStageStatus should return error if all updating stages have finished but the delete stage is not active or finished", + updatingStageIndex: -1, + lastFinishedStageIndex: totalStages - 1, + deleteStageStatus: &placementv1alpha1.StageUpdatingStatus{StageName: "delete-stage"}, + wantErr: wrapErr(true, fmt.Errorf("the delete stage is not active, but all stages finished")), + wantUpdatingStageIndex: -1, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + updateRun.Status.DeletionStageStatus = test.deleteStageStatus + gotUpdatingStageIndex, err := validateDeleteStageStatus(test.updatingStageIndex, test.lastFinishedStageIndex, totalStages, updateRun) + if test.wantErr == nil { + if err != nil { + t.Fatalf("validateDeleteStageStatus() got error = %+v, want error = nil", err) + } + } else if err == nil || err.Error() != test.wantErr.Error() { + t.Fatalf("validateDeleteStageStatus() got error = %+v, want error = %+v", err, test.wantErr) + } + if gotUpdatingStageIndex != test.wantUpdatingStageIndex { + t.Fatalf("validateDeleteStageStatus() got updatingStageIndex = %d, want updatingStageIndex = %d", gotUpdatingStageIndex, test.wantUpdatingStageIndex) + } + }) + } +} + +func wrapErr(unexpected bool, err error) error { + if unexpected { + err = controller.NewUnexpectedBehaviorError(err) + } + return fmt.Errorf("%w: %s", errStagedUpdatedAborted, err.Error()) +}