diff --git a/apis/placement/v1beta1/commons.go b/apis/placement/v1beta1/commons.go index ed5542c4b..908b730a1 100644 --- a/apis/placement/v1beta1/commons.go +++ b/apis/placement/v1beta1/commons.go @@ -57,4 +57,8 @@ const ( // EnvelopeConfigMapAnnotation is the annotation that indicates the configmap is an envelope configmap that contains resources // we need to apply to the member cluster instead of the configMap itself. EnvelopeConfigMapAnnotation = fleetPrefix + "EnvelopeConfigMap" + + // PreviousBindingStateAnnotation is the annotation that records the previous state of a binding. + // This is used to remember if an "unscheduled" binding was moved from a "bound" state or a "scheduled" state. + PreviousBindingStateAnnotation = fleetPrefix + "PreviousBindingState" ) diff --git a/apis/placement/v1beta1/zz_generated.deepcopy.go b/apis/placement/v1beta1/zz_generated.deepcopy.go index c55bb5975..acfc9cb73 100644 --- a/apis/placement/v1beta1/zz_generated.deepcopy.go +++ b/apis/placement/v1beta1/zz_generated.deepcopy.go @@ -11,7 +11,7 @@ Licensed under the MIT license. package v1beta1 import ( - "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" ) diff --git a/go.mod b/go.mod index a604d02e1..2101fb089 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.1 go.uber.org/atomic v1.11.0 + go.uber.org/zap v1.24.0 golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e golang.org/x/sync v0.3.0 golang.org/x/time v0.3.0 @@ -71,7 +72,6 @@ require ( github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.10.0 // indirect go.uber.org/multierr v1.11.0 // indirect - go.uber.org/zap v1.24.0 // indirect golang.org/x/crypto v0.11.0 // indirect golang.org/x/net v0.12.0 // indirect golang.org/x/oauth2 v0.8.0 // indirect diff --git a/pkg/controllers/rollout/controller.go b/pkg/controllers/rollout/controller.go index 1f29dc05f..4c52817e7 100644 --- a/pkg/controllers/rollout/controller.go +++ b/pkg/controllers/rollout/controller.go @@ -92,6 +92,17 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu allBindings = append(allBindings, binding.DeepCopy()) } + // handle the case that a cluster was unselected by the scheduler and then selected again but the unselected binding is not completely deleted yet + wait, err := waitForResourcesToCleanUp(allBindings, &crp) + if err != nil { + return ctrl.Result{}, err + } + if wait { + // wait for the deletion to finish + klog.V(2).InfoS("Found multiple bindings pointing to the same cluster, wait for the deletion to finish", "clusterResourcePlacement", crpName) + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + // find the latest resource resourceBinding latestResourceSnapshotName, err := r.fetchLatestResourceSnapshot(ctx, crpName) if err != nil { @@ -101,18 +112,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu } klog.V(2).InfoS("Found the latest resourceSnapshot for the clusterResourcePlacement", "clusterResourcePlacement", crpName, "latestResourceSnapshotName", latestResourceSnapshotName) - //TODO: handle the case that a cluster was unselected by the scheduler and then selected again but the unselected binding is not deleted yet - // fill out all the default values for CRP just in case the mutation webhook is not enabled. - crpCopy := crp.DeepCopy() - fleetv1beta1.SetDefaultsClusterResourcePlacement(crpCopy) + fleetv1beta1.SetDefaultsClusterResourcePlacement(&crp) // validate the clusterResourcePlacement just in case the validation webhook is not enabled - if err = validator.ValidateClusterResourcePlacement(crpCopy); err != nil { + if err = validator.ValidateClusterResourcePlacement(&crp); err != nil { klog.ErrorS(err, "Encountered an invalid clusterResourcePlacement", "clusterResourcePlacement", crpName) return ctrl.Result{}, controller.NewUnexpectedBehaviorError(err) } + // pick the bindings to be updated according to the rollout plan - toBeUpdatedBindings, needRoll := pickBindingsToRoll(allBindings, latestResourceSnapshotName, crpCopy) + toBeUpdatedBindings, needRoll := pickBindingsToRoll(allBindings, latestResourceSnapshotName, &crp) if !needRoll { klog.V(2).InfoS("No bindings are out of date, stop rolling", "clusterResourcePlacement", crpName) return ctrl.Result{}, nil @@ -124,7 +133,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu // to avoid the case that the rollout process stalling because the time based binding readiness does not trigger any event. // We wait for 1/5 of the UnavailablePeriodSeconds so we can catch the next ready one early. // TODO: only wait the time we need to wait for the first applied but not ready binding to be ready - return ctrl.Result{RequeueAfter: time.Duration(*crpCopy.Spec.Strategy.RollingUpdate.UnavailablePeriodSeconds) * time.Second / 5}, + return ctrl.Result{RequeueAfter: time.Duration(*crp.Spec.Strategy.RollingUpdate.UnavailablePeriodSeconds) * time.Second / 5}, r.updateBindings(ctx, latestResourceSnapshotName, toBeUpdatedBindings) } @@ -160,6 +169,54 @@ func (r *Reconciler) fetchLatestResourceSnapshot(ctx context.Context, crpName st return latestResourceSnapshotName, nil } +// waitForResourcesToCleanUp checks if there are any cluster that has a binding that is both being deleted and another one that needs rollout. +// We currently just wait for those cluster to be cleanup so that we can have a clean slate to start compute the rollout plan. +// TODO (rzhang): group all bindings pointing to the same cluster together when we calculate the rollout plan so that we can avoid this. +func waitForResourcesToCleanUp(allBindings []*fleetv1beta1.ClusterResourceBinding, crp *fleetv1beta1.ClusterResourcePlacement) (bool, error) { + crpObj := klog.KObj(crp) + deletingBinding := make(map[string]bool) + bindingMap := make(map[string]*fleetv1beta1.ClusterResourceBinding) + // separate deleting bindings from the rest of the bindings + for _, binding := range allBindings { + if !binding.DeletionTimestamp.IsZero() { + deletingBinding[binding.Spec.TargetCluster] = true + klog.V(2).InfoS("Found a binding that is being deleted", "clusterResourcePlacement", crpObj, "binding", klog.KObj(binding)) + } else { + if _, exist := bindingMap[binding.Spec.TargetCluster]; !exist { + bindingMap[binding.Spec.TargetCluster] = binding + } else { + return false, controller.NewUnexpectedBehaviorError(fmt.Errorf("the same cluster `%s` has bindings `%s` and `%s` pointing to it", + binding.Spec.TargetCluster, bindingMap[binding.Spec.TargetCluster].Name, binding.Name)) + } + } + } + // check if there are any cluster that has a binding that is both being deleted and scheduled + for cluster, binding := range bindingMap { + // check if there is a deleting binding on the same cluster + if deletingBinding[cluster] { + klog.V(2).InfoS("Find a binding assigned to a cluster with another deleting binding", "clusterResourcePlacement", crpObj, "binding", binding) + if binding.Spec.State == fleetv1beta1.BindingStateBound { + // the rollout controller won't move a binding from scheduled state to bound if there is a deleting binding on the same cluster. + return false, controller.NewUnexpectedBehaviorError(fmt.Errorf( + "find a cluster `%s` that has a bound binding `%s` and a deleting binding point to it", binding.Spec.TargetCluster, binding.Name)) + } + if binding.Spec.State == fleetv1beta1.BindingStateUnscheduled { + // this is a very rare case that the resource was in the middle of being removed from a member cluster after it is unselected. + // then the cluster get selected and unselected in two scheduling before the member agent is able to clean up all the resources. + if binding.GetAnnotations()[fleetv1beta1.PreviousBindingStateAnnotation] == string(fleetv1beta1.BindingStateBound) { + // its previous state can not be bound as rollout won't roll a binding with a deleting binding pointing to the same cluster. + return false, controller.NewUnexpectedBehaviorError(fmt.Errorf( + "find a cluster `%s` that has a unscheduled binding `%+s` with previous state is `bound` and a deleting binding point to it", binding.Spec.TargetCluster, binding.Name)) + } + return true, nil + } + // there is a scheduled binding on the same cluster, we need to wait for the deletion to finish + return true, nil + } + } + return false, nil +} + // pickBindingsToRoll go through all bindings associated with a CRP and returns the bindings that are ready to be updated. // There could be cases that no bindings are ready to be updated because of the maxSurge/maxUnavailable constraints even if there are out of sync bindings. // Thus, it also returns a bool indicating whether there are out of sync bindings to be rolled to differentiate those two cases. @@ -203,11 +260,11 @@ func pickBindingsToRoll(allBindings []*fleetv1beta1.ClusterResourceBinding, late klog.V(8).InfoS("Found a ready unscheduled binding", "clusterResourcePlacement", klog.KObj(crp), "binding", klog.KObj(binding)) readyBindings = append(readyBindings, binding) } - if binding.DeletionTimestamp == nil { - // it's not deleted yet, so it is a removal candidate + if binding.DeletionTimestamp.IsZero() { + // it's not been deleted yet, so it is a removal candidate removeCandidates = append(removeCandidates, binding) } else if bindingReady { - // it can be deleted at any time, so it can be unavailable at any time + // it is being deleted, it can be removed from the cluster at any time, so it can be unavailable at any time canBeUnavailableBindings = append(canBeUnavailableBindings, binding) } @@ -242,8 +299,14 @@ func pickBindingsToRoll(allBindings []*fleetv1beta1.ClusterResourceBinding, late case crp.Spec.Policy.PlacementType == fleetv1beta1.PickFixedPlacementType: // we use the length of the given cluster names are targets targetNumber = len(crp.Spec.Policy.ClusterNames) - default: + case crp.Spec.Policy.PlacementType == fleetv1beta1.PickNPlacementType: + // we use the given number as the target targetNumber = int(*crp.Spec.Policy.NumberOfClusters) + default: + // should never happen + klog.ErrorS(controller.NewUnexpectedBehaviorError(fmt.Errorf("unknown placement type")), + "Encountered an invalid placementType", "clusterResourcePlacement", klog.KObj(crp)) + targetNumber = 0 } klog.V(2).InfoS("Calculated the targetNumber", "clusterResourcePlacement", klog.KObj(crp), "targetNumber", targetNumber, "readyBindingNumber", len(readyBindings), "canBeUnavailableBindingNumber", len(canBeUnavailableBindings), diff --git a/pkg/controllers/rollout/controller_integration_test.go b/pkg/controllers/rollout/controller_integration_test.go index 119d10a1a..68441c6ad 100644 --- a/pkg/controllers/rollout/controller_integration_test.go +++ b/pkg/controllers/rollout/controller_integration_test.go @@ -23,8 +23,11 @@ import ( ) const ( - timeout = time.Second * 5 - interval = time.Millisecond * 250 + timeout = time.Second * 5 + interval = time.Millisecond * 250 + consistentTimeout = time.Second * 60 + consistentInterval = time.Second * 5 + customBindingFinalizer = "custom-binding-finalizer" ) var testCRPName string @@ -46,10 +49,12 @@ var _ = Describe("Test the rollout Controller", func() { for _, binding := range bindings { Expect(k8sClient.Delete(ctx, binding)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) } + bindings = nil By("Deleting ClusterResourceSnapshots") for _, resourceSnapshot := range resourceSnapshots { Expect(k8sClient.Delete(ctx, resourceSnapshot)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) } + resourceSnapshots = nil By("Deleting ClusterResourcePlacement") Expect(k8sClient.Delete(ctx, rolloutCRP)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) }) @@ -346,6 +351,99 @@ var _ = Describe("Test the rollout Controller", func() { }, timeout, interval).Should(BeTrue(), "rollout controller should roll all the bindings to use the latest resource snapshot") }) + It("Should wait for deleting binding delete before we rollout", func() { + // create CRP + var targetCluster int32 = 5 + rolloutCRP = clusterResourcePlacementForTest(testCRPName, createPlacementPolicyForTest(fleetv1beta1.PickNPlacementType, targetCluster)) + Expect(k8sClient.Create(ctx, rolloutCRP)).Should(Succeed()) + // create master resource snapshot that is latest + latestSnapshot := generateResourceSnapshot(rolloutCRP.Name, 1, true) + Expect(k8sClient.Create(ctx, latestSnapshot)).Should(Succeed()) + By(fmt.Sprintf("resource snapshot %s created", latestSnapshot.Name)) + // generate scheduled bindings for master snapshot on target clusters + clusters := make([]string, targetCluster) + for i := 0; i < int(targetCluster); i++ { + clusters[i] = "cluster-" + utils.RandStr() + binding := generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, latestSnapshot.Name, clusters[i]) + bindings = append(bindings, binding) + } + // create two unscheduled bindings and delete them + firstDeleteBinding := generateClusterResourceBinding(fleetv1beta1.BindingStateUnscheduled, latestSnapshot.Name, clusters[0]) + firstDeleteBinding.Name = "delete-" + firstDeleteBinding.Name + firstDeleteBinding.SetFinalizers([]string{customBindingFinalizer}) + Expect(k8sClient.Create(ctx, firstDeleteBinding)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, firstDeleteBinding)).Should(Succeed()) + secondDeleteBinding := generateClusterResourceBinding(fleetv1beta1.BindingStateUnscheduled, latestSnapshot.Name, clusters[2]) + secondDeleteBinding.Name = "delete-" + secondDeleteBinding.Name + secondDeleteBinding.SetFinalizers([]string{customBindingFinalizer}) + Expect(k8sClient.Create(ctx, secondDeleteBinding)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, secondDeleteBinding)).Should(Succeed()) + By("Created 2 deleting bindings") + // create the normal binding after the deleting one + for _, binding := range bindings { + Expect(k8sClient.Create(ctx, binding)).Should(Succeed()) + By(fmt.Sprintf("resource binding %s created", binding.Name)) + } + // check that no bindings are rolled out + Consistently(func() bool { + for _, binding := range bindings { + err := k8sClient.Get(ctx, types.NamespacedName{Name: binding.GetName()}, binding) + if err != nil { + return false + } + if binding.Spec.State == fleetv1beta1.BindingStateBound { + return false + } + } + return true + }, consistentTimeout, consistentInterval).Should(BeTrue(), "rollout controller should not roll the bindings") + By("Verified that the rollout is blocked") + // now we remove the finalizer of the first deleting binding + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: firstDeleteBinding.GetName()}, firstDeleteBinding)).Should(Succeed()) + firstDeleteBinding.SetFinalizers([]string{}) + Expect(k8sClient.Update(ctx, firstDeleteBinding)).Should(Succeed()) + Eventually(func() bool { + return apierrors.IsNotFound(k8sClient.Get(ctx, types.NamespacedName{Name: firstDeleteBinding.GetName()}, firstDeleteBinding)) + }, timeout, interval).Should(BeTrue(), "the first deleting binding should now be deleted") + By("Verified that the first deleting binding is deleted") + // check that no bindings are rolled out + Consistently(func() bool { + for _, binding := range bindings { + err := k8sClient.Get(ctx, types.NamespacedName{Name: binding.GetName()}, binding) + if err != nil { + return false + } + if binding.Spec.State == fleetv1beta1.BindingStateBound { + return false + } + } + return true + }, consistentTimeout, consistentInterval).Should(BeTrue(), "rollout controller should not roll the bindings") + By("Verified that the rollout is still blocked") + // now we remove the finalizer of the second deleting binding + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: secondDeleteBinding.GetName()}, secondDeleteBinding)).Should(Succeed()) + secondDeleteBinding.SetFinalizers([]string{}) + Expect(k8sClient.Update(ctx, secondDeleteBinding)).Should(Succeed()) + Eventually(func() bool { + return apierrors.IsNotFound(k8sClient.Get(ctx, types.NamespacedName{Name: secondDeleteBinding.GetName()}, secondDeleteBinding)) + }, timeout, interval).Should(BeTrue(), "the second deleting binding should now be deleted") + By("Verified that the second deleting binding is deleted") + // check that the bindings are rolledout + Eventually(func() bool { + for _, binding := range bindings { + err := k8sClient.Get(ctx, types.NamespacedName{Name: binding.GetName()}, binding) + if err != nil { + return false + } + if binding.Spec.State != fleetv1beta1.BindingStateBound { + return false + } + } + return true + }, consistentTimeout, consistentInterval).Should(BeTrue(), "rollout controller should roll all the bindings to Bound state") + By("Verified that the rollout is finally unblocked") + }) + // TODO: should update scheduled bindings to the latest snapshot when it is updated to bound state. // TODO: should count the deleting bindings as can be Unavailable. @@ -353,6 +451,8 @@ var _ = Describe("Test the rollout Controller", func() { }) func markBindingApplied(binding *fleetv1beta1.ClusterResourceBinding) { + // get the binding again to avoid conflict + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: binding.GetName()}, binding)).Should(Succeed()) binding.SetConditions(metav1.Condition{ Status: metav1.ConditionTrue, Type: string(fleetv1beta1.ResourceBindingApplied), @@ -407,3 +507,11 @@ func generateResourceSnapshot(testCRPName string, resourceIndex int, isLatest bo } return clusterResourceSnapshot } + +func generateDeletingClusterResourceBinding(targetCluster string) *fleetv1beta1.ClusterResourceBinding { + binding := generateClusterResourceBinding(fleetv1beta1.BindingStateUnscheduled, "anything", targetCluster) + binding.DeletionTimestamp = &metav1.Time{ + Time: now, + } + return binding +} diff --git a/pkg/controllers/rollout/controller_test.go b/pkg/controllers/rollout/controller_test.go index 94d7a5965..14c6b2555 100644 --- a/pkg/controllers/rollout/controller_test.go +++ b/pkg/controllers/rollout/controller_test.go @@ -7,6 +7,7 @@ package rollout import ( "context" + "errors" "reflect" "testing" "time" @@ -20,9 +21,16 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllertest" fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils/controller" ) -var now = time.Now() +var ( + now = time.Now() + + cluster1 = "cluster-1" + cluster2 = "cluster-2" + cluster3 = "cluster-3" +) func TestReconciler_handleResourceSnapshot(t *testing.T) { tests := map[string]struct { @@ -141,6 +149,72 @@ func TestReconciler_handleResourceBinding(t *testing.T) { } } +func Test_waitForResourcesToCleanUp(t *testing.T) { + tests := map[string]struct { + allBindings []*fleetv1beta1.ClusterResourceBinding + wantWait bool + wantErr bool + }{ + "test deleting binding block schedule binding on the same cluster": { + allBindings: []*fleetv1beta1.ClusterResourceBinding{ + generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, "snapshot-1", cluster1), + generateDeletingClusterResourceBinding(cluster1), + }, + wantWait: true, + wantErr: false, + }, + "test deleting binding not block binding on different cluster": { + allBindings: []*fleetv1beta1.ClusterResourceBinding{ + generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, "snapshot-1", cluster1), + generateDeletingClusterResourceBinding(cluster2), + generateClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster3), + }, + wantWait: false, + wantErr: false, + }, + "test deleting binding cannot co-exsit with a bound binding on same cluster": { + allBindings: []*fleetv1beta1.ClusterResourceBinding{ + generateDeletingClusterResourceBinding(cluster1), + generateClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster1), + }, + wantWait: false, + wantErr: true, + }, + "test no two non-deleting binding can co-exsit on same cluster, case one": { + allBindings: []*fleetv1beta1.ClusterResourceBinding{ + generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, "snapshot-2", cluster1), + generateClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster1), + }, + wantWait: false, + wantErr: true, + }, + "test no two non-deleting binding can co-exsit on same cluster, case two": { + allBindings: []*fleetv1beta1.ClusterResourceBinding{ + generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, "snapshot-2", cluster1), + generateClusterResourceBinding(fleetv1beta1.BindingStateUnscheduled, "snapshot-1", cluster1), + }, + wantWait: false, + wantErr: true, + }, + } + for name, tt := range tests { + crp := &fleetv1beta1.ClusterResourcePlacement{} + t.Run(name, func(t *testing.T) { + gotWait, err := waitForResourcesToCleanUp(tt.allBindings, crp) + if (err != nil) != tt.wantErr { + t.Errorf("waitForResourcesToCleanUp test `%s` error = %v, wantErr %v", name, err, tt.wantErr) + return + } + if err != nil && !errors.Is(err, controller.ErrUnexpectedBehavior) { + t.Errorf("waitForResourcesToCleanUp test `%s` get an unexpected error = %v", name, err) + } + if gotWait != tt.wantWait { + t.Errorf("waitForResourcesToCleanUp test `%s` gotWait = %v, wantWait %v", name, gotWait, tt.wantWait) + } + }) + } +} + func Test_pickBindingsToRoll(t *testing.T) { tests := map[string]struct { allBindings []*fleetv1beta1.ClusterResourceBinding @@ -152,7 +226,7 @@ func Test_pickBindingsToRoll(t *testing.T) { // TODO: add more tests "test bound with out dated bindings": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ - generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, "snapshot-1", "cluster-1"), + generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, "snapshot-1", cluster1), }, latestResourceSnapshotName: "snapshot-2", crp: clusterResourcePlacementForTest("test", @@ -169,10 +243,10 @@ func Test_pickBindingsToRoll(t *testing.T) { tobeUpdatedBindings = append(tobeUpdatedBindings, tt.allBindings[index]) } if !reflect.DeepEqual(gotUpdatedBindings, tobeUpdatedBindings) { - t.Errorf("pickBindingsToRoll() gotUpdatedBindings = %v, wantReady %v", gotUpdatedBindings, tt.tobeUpdatedBindings) + t.Errorf("pickBindingsToRoll test `%s` gotUpdatedBindings = %v, wantReady %v", name, gotUpdatedBindings, tt.tobeUpdatedBindings) } if gotNeedRoll != tt.needRoll { - t.Errorf("pickBindingsToRoll() gotNeedRoll = %v, wantReady %v", gotNeedRoll, tt.needRoll) + t.Errorf("pickBindingsToRoll test `%s` gotNeedRoll = %v, wantReady %v", name, gotNeedRoll, tt.needRoll) } }) } @@ -345,10 +419,10 @@ func Test_isBindingReady(t *testing.T) { t.Run(name, func(t *testing.T) { gotWaitTime, gotReady := isBindingReady(tt.binding, tt.readyTimeCutOff) if gotReady != tt.wantReady { - t.Errorf("gotReady = %v, wantReady %v", gotReady, tt.wantReady) + t.Errorf("isBindingReady test `%s` gotReady = %v, wantReady %v", name, gotReady, tt.wantReady) } if gotWaitTime != tt.wantWaitTime { - t.Errorf("gotWaitTime = %v, wantWaitTime %v", gotWaitTime, tt.wantWaitTime) + t.Errorf("isBindingReady test `%s` gotWaitTime = %v, wantWaitTime %v", name, gotWaitTime, tt.wantWaitTime) } }) } diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index f344bf13e..b62c27b02 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -39,6 +39,7 @@ const ( pickedByPolicyReason = "picked by scheduling policy" pickFixedInvalidClusterReasonTemplate = "cluster is not eligible for resource placement yet: %s" pickFixedNotFoundClusterReason = "specified cluster is not found" + notPickedByScoreReason = "cluster does not score high enough" // The reasons and messages for scheduled conditions. fullyScheduledReason = "SchedulingPolicyFulfilled" @@ -422,7 +423,7 @@ func (f *framework) runSchedulingCycleForPickAllPlacementType( // With the PickAll placement type, the desired number of clusters to select always matches // with the count of scheduled + bound bindings. numOfClusters := len(toCreate) + len(patched) + len(scheduled) + len(bound) - if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, filtered, toCreate, patched, scheduled, bound); err != nil { + if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, nil, filtered, toCreate, patched, scheduled, bound); err != nil { klog.ErrorS(err, "Failed to update latest scheduling decisions and condition", "clusterSchedulingPolicySnapshot", policyRef) return ctrl.Result{}, err } @@ -665,13 +666,14 @@ func (f *framework) updatePolicySnapshotStatusFromBindings( ctx context.Context, policy *placementv1beta1.ClusterSchedulingPolicySnapshot, numOfClusters int, + notPicked ScoredClusters, filtered []*filteredClusterWithStatus, existing ...[]*placementv1beta1.ClusterResourceBinding, ) error { policyRef := klog.KObj(policy) // Prepare new scheduling decisions. - newDecisions := newSchedulingDecisionsFromBindings(f.maxUnselectedClusterDecisionCount, filtered, existing...) + newDecisions := newSchedulingDecisionsFromBindings(f.maxUnselectedClusterDecisionCount, notPicked, filtered, existing...) // Prepare new scheduling condition. newCondition := newScheduledConditionFromBindings(policy, numOfClusters, existing...) @@ -763,7 +765,7 @@ func (f *framework) runSchedulingCycleForPickNPlacementType( // Note that since there is no reliable way to determine the validity of old decisions added // to the policy snapshot status, we will only update the status with the known facts, i.e., // the clusters that are currently selected. - if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, nil, scheduled, bound); err != nil { + if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, nil, nil, scheduled, bound); err != nil { klog.ErrorS(err, "Failed to update latest scheduling decisions and condition when downscaling", "clusterSchedulingPolicySnapshot", policyRef) return ctrl.Result{}, err } @@ -783,7 +785,7 @@ func (f *framework) runSchedulingCycleForPickNPlacementType( // Note that since there is no reliable way to determine the validity of old decisions added // to the policy snapshot status, we will only update the status with the known facts, i.e., // the clusters that are currently selected. - if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, nil, bound, scheduled); err != nil { + if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, nil, nil, bound, scheduled); err != nil { klog.ErrorS(err, "Failed to update latest scheduling decisions and condition when no scheduling run is needed", "clusterSchedulingPolicySnapshot", policyRef) return ctrl.Result{}, err } @@ -826,7 +828,7 @@ func (f *framework) runSchedulingCycleForPickNPlacementType( // // Note that at this point of the scheduling cycle, any cluster associated with a currently // bound or scheduled binding should be filtered out already. - picked := pickTopNScoredClusters(scored, numOfClustersToPick) + picked, notPicked := pickTopNScoredClusters(scored, numOfClustersToPick) // Cross-reference the newly picked clusters with obsolete bindings; find out // @@ -876,7 +878,7 @@ func (f *framework) runSchedulingCycleForPickNPlacementType( // Update policy snapshot status with the latest scheduling decisions and condition. klog.V(2).InfoS("Updating policy snapshot status", "clusterSchedulingPolicySnapshot", policyRef) - if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, filtered, toCreate, patched, scheduled, bound); err != nil { + if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, notPicked, filtered, toCreate, patched, scheduled, bound); err != nil { klog.ErrorS(err, "Failed to update latest scheduling decisions and condition", "clusterSchedulingPolicySnapshot", policyRef) return ctrl.Result{}, err } diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go index 959d80827..10d22d475 100644 --- a/pkg/scheduler/framework/framework_test.go +++ b/pkg/scheduler/framework/framework_test.go @@ -46,13 +46,14 @@ const ( ) var ( - ignoreObjectMetaResourceVersionField = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion") - ignoreObjectMetaNameField = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "Name") - ignoreTypeMetaAPIVersionKindFields = cmpopts.IgnoreFields(metav1.TypeMeta{}, "APIVersion", "Kind") - ignoredStatusFields = cmpopts.IgnoreFields(Status{}, "reasons", "err") - ignoredBindingWithPatchFields = cmpopts.IgnoreFields(bindingWithPatch{}, "patch") - ignoredCondFields = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") - ignoreCycleStateFields = cmpopts.IgnoreFields(CycleState{}, "store", "clusters", "scheduledOrBoundBindings", "obsoleteBindings") + ignoreObjectMetaResourceVersionField = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion") + ignoreObjectMetaNameField = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "Name") + ignoreTypeMetaAPIVersionKindFields = cmpopts.IgnoreFields(metav1.TypeMeta{}, "APIVersion", "Kind") + ignoredStatusFields = cmpopts.IgnoreFields(Status{}, "reasons", "err") + ignoredBindingWithPatchFields = cmpopts.IgnoreFields(bindingWithPatch{}, "patch") + ignoredCondFields = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") + ignoreCycleStateFields = cmpopts.IgnoreFields(CycleState{}, "store", "clusters", "scheduledOrBoundBindings", "obsoleteBindings") + ignoreClusterDecisionScoreAndReasonFields = cmpopts.IgnoreFields(placementv1beta1.ClusterDecision{}, "ClusterScore", "Reason") lessFuncCluster = func(cluster1, cluster2 *clusterv1beta1.MemberCluster) bool { return cluster1.Name < cluster2.Name @@ -97,15 +98,27 @@ var ( Selected: selected, } - if !selected { - newDecision.Reason = defaultFilteredStatus.String() - } - decisions = append(decisions, newDecision) } return decisions } + generateNotPickedScoredClusters = func(count int, startIdx int) ScoredClusters { + notPicked := make(ScoredClusters, 0, count) + + for i := 0; i < count; i++ { + notPicked = append(notPicked, &ScoredCluster{ + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, i+startIdx), + }, + }, + Score: &ClusterScore{}, + }) + } + return notPicked + } + generatedFilterdClusterWithStatus = func(count int, startIdx int) []*filteredClusterWithStatus { filtered := make([]*filteredClusterWithStatus, 0, count) @@ -1780,6 +1793,8 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { topologySpreadScore1 := int32(10) affinityScore2 := int32(0) topologySpreadScore2 := int32(20) + affinityScore3 := int32(-1) + topologySpreadScore3 := int32(0) filteredStatus := NewNonErrorStatus(ClusterUnschedulable, dummyPluginName, "filtered") @@ -1796,13 +1811,14 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { testCases := []struct { name string maxUnselectedClusterDecisionCount int + notPicked ScoredClusters filtered []*filteredClusterWithStatus existing [][]*placementv1beta1.ClusterResourceBinding wantDecisions []placementv1beta1.ClusterDecision wantCondition metav1.Condition }{ { - name: "no filtered", + name: "no filtered/not picked clusters", maxUnselectedClusterDecisionCount: defaultMaxUnselectedClusterDecisionCount, existing: [][]*placementv1beta1.ClusterResourceBinding{ { @@ -1863,7 +1879,7 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { wantCondition: newScheduledCondition(policy, metav1.ConditionTrue, fullyScheduledReason, fullyScheduledMessage), }, { - name: "filtered and existing", + name: "with filtered clusters and existing bindings", maxUnselectedClusterDecisionCount: defaultMaxUnselectedClusterDecisionCount, existing: [][]*placementv1beta1.ClusterResourceBinding{ { @@ -1938,6 +1954,162 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { }, wantCondition: newScheduledCondition(policy, metav1.ConditionTrue, fullyScheduledReason, fullyScheduledMessage), }, + { + name: "with not picked clusters and existing bindings", + maxUnselectedClusterDecisionCount: defaultMaxUnselectedClusterDecisionCount, + existing: [][]*placementv1beta1.ClusterResourceBinding{ + { + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore1, + TopologySpreadScore: &topologySpreadScore1, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: altBindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: altClusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore2, + TopologySpreadScore: &topologySpreadScore2, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + }, + }, + notPicked: ScoredClusters{ + { + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: anotherClusterName, + }, + }, + Score: &ClusterScore{ + TopologySpreadScore: int(topologySpreadScore3), + AffinityScore: int(affinityScore3), + }, + }, + }, + wantDecisions: []placementv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore1, + TopologySpreadScore: &topologySpreadScore1, + }, + Reason: pickedByPolicyReason, + }, + { + ClusterName: altClusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore2, + TopologySpreadScore: &topologySpreadScore2, + }, + Reason: pickedByPolicyReason, + }, + { + ClusterName: anotherClusterName, + Selected: false, + Reason: notPickedByScoreReason, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore3, + TopologySpreadScore: &topologySpreadScore3, + }, + }, + }, + wantCondition: newScheduledCondition(policy, metav1.ConditionTrue, fullyScheduledReason, fullyScheduledMessage), + }, + { + name: "with both filtered/not picked clusters and existing bindings", + maxUnselectedClusterDecisionCount: defaultMaxUnselectedClusterDecisionCount, + existing: [][]*placementv1beta1.ClusterResourceBinding{ + { + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore1, + TopologySpreadScore: &topologySpreadScore1, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + }, + }, + notPicked: ScoredClusters{ + { + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: altClusterName, + }, + }, + Score: &ClusterScore{ + TopologySpreadScore: int(topologySpreadScore3), + AffinityScore: int(affinityScore3), + }, + }, + }, + filtered: []*filteredClusterWithStatus{ + { + cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: anotherClusterName, + }, + }, + status: filteredStatus, + }, + }, + wantDecisions: []placementv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore1, + TopologySpreadScore: &topologySpreadScore1, + }, + Reason: pickedByPolicyReason, + }, + { + ClusterName: altClusterName, + Selected: false, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore3, + TopologySpreadScore: &topologySpreadScore3, + }, + Reason: notPickedByScoreReason, + }, + { + ClusterName: anotherClusterName, + Selected: false, + Reason: filteredStatus.String(), + }, + }, + wantCondition: newScheduledCondition(policy, metav1.ConditionTrue, fullyScheduledReason, fullyScheduledMessage), + }, { name: "none", maxUnselectedClusterDecisionCount: defaultMaxUnselectedClusterDecisionCount, @@ -2002,6 +2174,75 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { }, wantCondition: newScheduledCondition(policy, metav1.ConditionTrue, fullyScheduledReason, fullyScheduledMessage), }, + { + name: "too many not picked", + maxUnselectedClusterDecisionCount: 1, + existing: [][]*placementv1beta1.ClusterResourceBinding{ + { + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore1, + TopologySpreadScore: &topologySpreadScore1, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + }, + }, + notPicked: ScoredClusters{ + { + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: altClusterName, + }, + }, + Score: &ClusterScore{ + AffinityScore: int(affinityScore2), + TopologySpreadScore: int(topologySpreadScore2), + }, + }, + { + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: anotherClusterName, + }, + }, + Score: &ClusterScore{ + AffinityScore: int(affinityScore3), + TopologySpreadScore: int(topologySpreadScore3), + }, + }, + }, + wantDecisions: []placementv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore1, + TopologySpreadScore: &topologySpreadScore1, + }, + Reason: pickedByPolicyReason, + }, + { + ClusterName: altClusterName, + Selected: false, + Reason: notPickedByScoreReason, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: &affinityScore2, + TopologySpreadScore: &topologySpreadScore2, + }, + }, + }, + wantCondition: newScheduledCondition(policy, metav1.ConditionTrue, fullyScheduledReason, fullyScheduledMessage), + }, } for _, tc := range testCases { @@ -2021,7 +2262,7 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { for _, bindingSet := range tc.existing { numOfClusters += len(bindingSet) } - if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, tc.filtered, tc.existing...); err != nil { + if err := f.updatePolicySnapshotStatusFromBindings(ctx, policy, numOfClusters, tc.notPicked, tc.filtered, tc.existing...); err != nil { t.Fatalf("updatePolicySnapshotStatusFrom() = %v, want no error", err) } @@ -2436,19 +2677,95 @@ func TestNewSchedulingDecisionsFromBindings(t *testing.T) { affinityScore1 := int32(10) topologySpreadScore2 := int32(0) affinityScore2 := int32(20) + topologySpreadScore3 := int32(2) + affinityScore3 := int32(5) filteredStatus := NewNonErrorStatus(ClusterUnschedulable, dummyPlugin, dummyReasons...) testCases := []struct { name string maxUnselectedClusterDecisionCount int + notPicked ScoredClusters filtered []*filteredClusterWithStatus existing [][]*placementv1beta1.ClusterResourceBinding want []placementv1beta1.ClusterDecision }{ { - name: "no filtered clusters, small number of existing bindings", + name: "no not picked/filtered clusters, small number of existing bindings", + maxUnselectedClusterDecisionCount: 20, + existing: [][]*placementv1beta1.ClusterResourceBinding{ + { + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore1, + AffinityScore: &affinityScore1, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: altBindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: altClusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore2, + AffinityScore: &affinityScore2, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + }, + }, + want: []placementv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore1, + AffinityScore: &affinityScore1, + }, + Reason: pickedByPolicyReason, + }, + { + ClusterName: altClusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore2, + AffinityScore: &affinityScore2, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + { + name: "with not picked clusters, small number of existing bindings", maxUnselectedClusterDecisionCount: 20, + notPicked: ScoredClusters{ + { + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: anotherClusterName, + }, + }, + Score: &ClusterScore{ + AffinityScore: int(affinityScore3), + TopologySpreadScore: int(topologySpreadScore3), + }, + }, + }, existing: [][]*placementv1beta1.ClusterResourceBinding{ { &placementv1beta1.ClusterResourceBinding{ @@ -2504,6 +2821,87 @@ func TestNewSchedulingDecisionsFromBindings(t *testing.T) { }, Reason: pickedByPolicyReason, }, + { + ClusterName: anotherClusterName, + Selected: false, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore3, + AffinityScore: &affinityScore3, + }, + Reason: notPickedByScoreReason, + }, + }, + }, + { + name: "with both not picked and filtered clusters, small number of existing bindings", + maxUnselectedClusterDecisionCount: 20, + notPicked: ScoredClusters{ + { + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: anotherClusterName, + }, + }, + Score: &ClusterScore{ + AffinityScore: int(affinityScore3), + TopologySpreadScore: int(topologySpreadScore3), + }, + }, + }, + filtered: []*filteredClusterWithStatus{ + { + cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: altClusterName, + }, + }, + status: filteredStatus, + }, + }, + existing: [][]*placementv1beta1.ClusterResourceBinding{ + { + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore1, + AffinityScore: &affinityScore1, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + }, + }, + want: []placementv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore1, + AffinityScore: &affinityScore1, + }, + Reason: pickedByPolicyReason, + }, + { + ClusterName: anotherClusterName, + Selected: false, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore3, + AffinityScore: &affinityScore3, + }, + Reason: notPickedByScoreReason, + }, + { + ClusterName: altClusterName, + Selected: false, + Reason: filteredStatus.String(), + }, }, }, { @@ -2651,11 +3049,84 @@ func TestNewSchedulingDecisionsFromBindings(t *testing.T) { }, }, }, + { + name: "with not picked clusters (count exceeding limit), small number of existing bindings", + maxUnselectedClusterDecisionCount: 0, + notPicked: ScoredClusters{ + { + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: anotherClusterName, + }, + }, + Score: &ClusterScore{ + AffinityScore: int(affinityScore3), + TopologySpreadScore: int(topologySpreadScore3), + }, + }, + }, + existing: [][]*placementv1beta1.ClusterResourceBinding{ + { + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore1, + AffinityScore: &affinityScore1, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: altBindingName, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + ClusterDecision: placementv1beta1.ClusterDecision{ + ClusterName: altClusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore2, + AffinityScore: &affinityScore2, + }, + Reason: pickedByPolicyReason, + }, + }, + }, + }, + }, + want: []placementv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore1, + AffinityScore: &affinityScore1, + }, + Reason: pickedByPolicyReason, + }, + { + ClusterName: altClusterName, + Selected: true, + ClusterScore: &placementv1beta1.ClusterScore{ + TopologySpreadScore: &topologySpreadScore2, + AffinityScore: &affinityScore2, + }, + Reason: pickedByPolicyReason, + }, + }, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - decisions := newSchedulingDecisionsFromBindings(tc.maxUnselectedClusterDecisionCount, tc.filtered, tc.existing...) + decisions := newSchedulingDecisionsFromBindings(tc.maxUnselectedClusterDecisionCount, tc.notPicked, tc.filtered, tc.existing...) if diff := cmp.Diff(tc.want, decisions); diff != "" { t.Errorf("newSchedulingDecisionsFrom() decisions diff (-got, +want): %s", diff) } @@ -2666,15 +3137,18 @@ func TestNewSchedulingDecisionsFromBindings(t *testing.T) { // TestNewSchedulingDecisionsFrom tests a special case in the newSchedulingDecisionsFrom function, // specifically the case where the number of new decisions exceeds the API limit. func TestNewSchedulingDecisionsFromOversized(t *testing.T) { - wantSelectedAndUnselectedDecisons := make([]placementv1beta1.ClusterDecision, 0, 1000) - wantSelectedDecisions := generateClusterDecisions(980, 0, true) - wantUnselectedDecisions := generateClusterDecisions(20, 980, false) - wantSelectedAndUnselectedDecisons = append(wantSelectedAndUnselectedDecisons, wantSelectedDecisions...) - wantSelectedAndUnselectedDecisons = append(wantSelectedAndUnselectedDecisons, wantUnselectedDecisions...) + wantDecisions1 := generateClusterDecisions(1000, 0, true) + + wantDecisions2 := generateClusterDecisions(980, 0, true) + wantDecisions2 = append(wantDecisions2, generateClusterDecisions(20, 980, false)...) + + wantDecisions3 := generateClusterDecisions(10, 0, true) + wantDecisions3 = append(wantDecisions3, generateClusterDecisions(20, 10, false)...) testCases := []struct { name string maxUnselectedClusterDecisionCount int + notPicked ScoredClusters filtered []*filteredClusterWithStatus bindingSets [][]*placementv1beta1.ClusterResourceBinding wantDecisions []placementv1beta1.ClusterDecision @@ -2686,24 +3160,54 @@ func TestNewSchedulingDecisionsFromOversized(t *testing.T) { generateResourceBindings(550, 0), generateResourceBindings(550, 550), }, - wantDecisions: generateClusterDecisions(1000, 0, true), + wantDecisions: wantDecisions1, + }, + { + name: "count of not picked clusters exceeding API limit", + maxUnselectedClusterDecisionCount: 50, + bindingSets: [][]*placementv1beta1.ClusterResourceBinding{ + generateResourceBindings(490, 0), + generateResourceBindings(490, 490), + }, + notPicked: generateNotPickedScoredClusters(50, 980), + wantDecisions: wantDecisions2, + }, + { + name: "count of not picked clusters exceeding custom limit", + maxUnselectedClusterDecisionCount: 20, + bindingSets: [][]*placementv1beta1.ClusterResourceBinding{ + generateResourceBindings(10, 0), + }, + notPicked: generateNotPickedScoredClusters(50, 10), + wantDecisions: wantDecisions3, }, { - name: "too many selected + unselected clusters", + name: "count of filtered clusters exceeding API limit", maxUnselectedClusterDecisionCount: 50, - filtered: generatedFilterdClusterWithStatus(60, 980), bindingSets: [][]*placementv1beta1.ClusterResourceBinding{ generateResourceBindings(490, 0), generateResourceBindings(490, 490), }, - wantDecisions: wantSelectedAndUnselectedDecisons, + notPicked: nil, + filtered: generatedFilterdClusterWithStatus(50, 980), + wantDecisions: wantDecisions2, + }, + { + name: "count of filtered clusters exceeding custom limit", + maxUnselectedClusterDecisionCount: 20, + bindingSets: [][]*placementv1beta1.ClusterResourceBinding{ + generateResourceBindings(10, 0), + }, + notPicked: nil, + filtered: generatedFilterdClusterWithStatus(50, 10), + wantDecisions: wantDecisions3, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - decisions := newSchedulingDecisionsFromBindings(tc.maxUnselectedClusterDecisionCount, tc.filtered, tc.bindingSets...) - if diff := cmp.Diff(decisions, tc.wantDecisions); diff != "" { + decisions := newSchedulingDecisionsFromBindings(tc.maxUnselectedClusterDecisionCount, tc.notPicked, tc.filtered, tc.bindingSets...) + if diff := cmp.Diff(decisions, tc.wantDecisions, ignoreClusterDecisionScoreAndReasonFields); diff != "" { t.Errorf("newSchedulingDecisionsFrom() decisions diff (-got, +want): %s", diff) } }) @@ -4259,28 +4763,31 @@ func TestPickTopNScoredClusters(t *testing.T) { } testCases := []struct { - name string - scoredClusters ScoredClusters - picks int - wantScoredClusters ScoredClusters + name string + scoredClusters ScoredClusters + picks int + wantPicked ScoredClusters + wantNotPicked ScoredClusters }{ { - name: "no scored clusters", - scoredClusters: ScoredClusters{}, - picks: 1, - wantScoredClusters: ScoredClusters{}, + name: "no scored clusters", + scoredClusters: ScoredClusters{}, + picks: 1, + wantPicked: ScoredClusters{}, + wantNotPicked: ScoredClusters{}, }, { - name: "zero to pick", - scoredClusters: scs, - picks: 0, - wantScoredClusters: ScoredClusters{}, + name: "zero to pick", + scoredClusters: scs, + picks: 0, + wantPicked: ScoredClusters{}, + wantNotPicked: scs, }, { name: "not enough to pick", scoredClusters: scs, picks: 10, - wantScoredClusters: ScoredClusters{ + wantPicked: ScoredClusters{ { Cluster: &clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ @@ -4306,12 +4813,13 @@ func TestPickTopNScoredClusters(t *testing.T) { }, }, }, + wantNotPicked: ScoredClusters{}, }, { name: "enough to pick", scoredClusters: scs, picks: 1, - wantScoredClusters: ScoredClusters{ + wantPicked: ScoredClusters{ { Cluster: &clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ @@ -4325,15 +4833,33 @@ func TestPickTopNScoredClusters(t *testing.T) { }, }, }, + wantNotPicked: ScoredClusters{ + { + Cluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + }, + }, + Score: &ClusterScore{ + TopologySpreadScore: 1, + AffinityScore: 20, + ObsoletePlacementAffinityScore: 0, + }, + }, + }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - picked := pickTopNScoredClusters(tc.scoredClusters, tc.picks) - if diff := cmp.Diff(picked, tc.wantScoredClusters); diff != "" { + picked, notPicked := pickTopNScoredClusters(tc.scoredClusters, tc.picks) + if diff := cmp.Diff(picked, tc.wantPicked); diff != "" { t.Errorf("pickTopNScoredClusters() picked diff (-got, +want): %s", diff) } + + if diff := cmp.Diff(notPicked, tc.wantNotPicked); diff != "" { + t.Errorf("pickTopNScoredClusters() not picked diff (-got, +want): %s", diff) + } }) } } diff --git a/pkg/scheduler/framework/frameworkutils.go b/pkg/scheduler/framework/frameworkutils.go index e9f04be71..722f68ddf 100644 --- a/pkg/scheduler/framework/frameworkutils.go +++ b/pkg/scheduler/framework/frameworkutils.go @@ -12,6 +12,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" + "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1" @@ -205,7 +206,12 @@ func crossReferencePickedCustersAndObsoleteBindings( // newSchedulingDecisionsFromBindings returns a list of scheduling decisions, based on the newly manipulated list of // bindings and (if applicable) a list of filtered clusters. -func newSchedulingDecisionsFromBindings(maxUnselectedClusterDecisionCount int, filtered []*filteredClusterWithStatus, existing ...[]*placementv1beta1.ClusterResourceBinding) []placementv1beta1.ClusterDecision { +func newSchedulingDecisionsFromBindings( + maxUnselectedClusterDecisionCount int, + notPicked ScoredClusters, + filtered []*filteredClusterWithStatus, + existing ...[]*placementv1beta1.ClusterResourceBinding, +) []placementv1beta1.ClusterDecision { // Pre-allocate with a reasonable capacity. newDecisions := make([]placementv1beta1.ClusterDecision, 0, maxUnselectedClusterDecisionCount) @@ -224,6 +230,27 @@ func newSchedulingDecisionsFromBindings(maxUnselectedClusterDecisionCount int, f } } + // Add decisions for clusters that have been scored, but are not picked, if there are still + // enough room. + for _, sc := range notPicked { + if slotsLeft == 0 || maxUnselectedClusterDecisionCount == 0 { + break + } + + newDecisions = append(newDecisions, placementv1beta1.ClusterDecision{ + ClusterName: sc.Cluster.Name, + Selected: false, + ClusterScore: &placementv1beta1.ClusterScore{ + AffinityScore: pointer.Int32(int32(sc.Score.AffinityScore)), + TopologySpreadScore: pointer.Int32(int32(sc.Score.TopologySpreadScore)), + }, + Reason: notPickedByScoreReason, + }) + + slotsLeft-- + maxUnselectedClusterDecisionCount-- + } + // Move some decisions from unbound clusters, if there are still enough room. for i := 0; i < maxUnselectedClusterDecisionCount && i < len(filtered) && i < slotsLeft; i++ { clusterWithStatus := filtered[i] @@ -421,7 +448,7 @@ func calcNumOfClustersToSelect(desired, limit, scored int) int { // // Note that this function assumes that the list of clusters have been sorted by their scores, // and the N count is no greater than the length of the list. -func pickTopNScoredClusters(scoredClusters ScoredClusters, N int) ScoredClusters { +func pickTopNScoredClusters(scoredClusters ScoredClusters, N int) (picked, notPicked ScoredClusters) { // Sort the clusters by their scores in reverse order. // // Note that when two clusters have the same score, they are sorted by their names in @@ -431,15 +458,15 @@ func pickTopNScoredClusters(scoredClusters ScoredClusters, N int) ScoredClusters // No need to pick if there is no scored cluster or the number to pick is zero. if len(scoredClusters) == 0 || N == 0 { - return make(ScoredClusters, 0) + return make(ScoredClusters, 0), scoredClusters } // No need to pick if the number of scored clusters is less than or equal to N. if len(scoredClusters) <= N { - return scoredClusters + return scoredClusters, make(ScoredClusters, 0) } - return scoredClusters[:N] + return scoredClusters[:N], scoredClusters[N:] } // shouldRequeue determines if the scheduler should start another scheduling cycle on the same diff --git a/test/scheduler/actuals_test.go b/test/scheduler/actuals_test.go index 422f2e563..8eb69dc1c 100644 --- a/test/scheduler/actuals_test.go +++ b/test/scheduler/actuals_test.go @@ -14,19 +14,23 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" ) // This file features common actuals (and utilities for generating actuals) in the test suites. -var ( - noBindingCreatedActual = func() error { - // List all bindings. - bindingList := &fleetv1beta1.ClusterResourceBindingList{} - if err := hubClient.List(ctx, bindingList); err != nil { +func noBindingsCreatedForCRPActual(crpName string) func() error { + return func() error { + // List all bindings associated with the given CRP. + bindingList := &placementv1beta1.ClusterResourceBindingList{} + labelSelector := labels.SelectorFromSet(labels.Set{placementv1beta1.CRPTrackingLabel: crpName}) + listOptions := &client.ListOptions{LabelSelector: labelSelector} + if err := hubClient.List(ctx, bindingList, listOptions); err != nil { return err } @@ -37,18 +41,18 @@ var ( return nil } -) +} func crpSchedulerFinalizerAddedActual(crpName string) func() error { return func() error { // Retrieve the CRP. - crp := &fleetv1beta1.ClusterResourcePlacement{} + crp := &placementv1beta1.ClusterResourcePlacement{} if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { return err } // Check that the scheduler finalizer has been added. - if !controllerutil.ContainsFinalizer(crp, fleetv1beta1.SchedulerCRPCleanupFinalizer) { + if !controllerutil.ContainsFinalizer(crp, placementv1beta1.SchedulerCRPCleanupFinalizer) { return fmt.Errorf("scheduler cleanup finalizer has not been added") } @@ -59,13 +63,13 @@ func crpSchedulerFinalizerAddedActual(crpName string) func() error { func crpSchedulerFinalizerRemovedActual(crpName string) func() error { return func() error { // Retrieve the CRP. - crp := &fleetv1beta1.ClusterResourcePlacement{} + crp := &placementv1beta1.ClusterResourcePlacement{} if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { return err } // Check that the scheduler finalizer has been added. - if controllerutil.ContainsFinalizer(crp, fleetv1beta1.SchedulerCRPCleanupFinalizer) { + if controllerutil.ContainsFinalizer(crp, placementv1beta1.SchedulerCRPCleanupFinalizer) { return fmt.Errorf("scheduler cleanup finalizer is still present") } @@ -73,38 +77,44 @@ func crpSchedulerFinalizerRemovedActual(crpName string) func() error { } } -func scheduledBindingsCreatedForClustersActual(clusters []string, scoreByCluster map[string]*fleetv1beta1.ClusterScore, crpName, policySnapshotName string) func() error { +func scheduledBindingsCreatedOrUpdatedForClustersActual(clusters []string, scoreByCluster map[string]*placementv1beta1.ClusterScore, crpName, policySnapshotName string) func() error { return func() error { // List all bindings. - bindingList := &fleetv1beta1.ClusterResourceBindingList{} - if err := hubClient.List(ctx, bindingList); err != nil { + bindingList := &placementv1beta1.ClusterResourceBindingList{} + labelSelector := labels.SelectorFromSet(labels.Set{placementv1beta1.CRPTrackingLabel: crpName}) + listOptions := &client.ListOptions{LabelSelector: labelSelector} + if err := hubClient.List(ctx, bindingList, listOptions); err != nil { return err } // Find all the scheduled bindings. - scheduled := []fleetv1beta1.ClusterResourceBinding{} + scheduled := []placementv1beta1.ClusterResourceBinding{} + clusterMap := make(map[string]bool) + for _, name := range clusters { + clusterMap[name] = true + } for _, binding := range bindingList.Items { - if binding.Spec.State == fleetv1beta1.BindingStateScheduled { + if _, ok := clusterMap[binding.Spec.TargetCluster]; ok && binding.Spec.State == placementv1beta1.BindingStateScheduled { scheduled = append(scheduled, binding) } } // Verify that scheduled bindings are created as expected. - wantScheduled := []fleetv1beta1.ClusterResourceBinding{} + wantScheduled := []placementv1beta1.ClusterResourceBinding{} for _, name := range clusters { score := scoreByCluster[name] - binding := fleetv1beta1.ClusterResourceBinding{ + binding := placementv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: bindingNamePlaceholder, Labels: map[string]string{ - fleetv1beta1.CRPTrackingLabel: crpName, + placementv1beta1.CRPTrackingLabel: crpName, }, }, - Spec: fleetv1beta1.ResourceBindingSpec{ - State: fleetv1beta1.BindingStateScheduled, + Spec: placementv1beta1.ResourceBindingSpec{ + State: placementv1beta1.BindingStateScheduled, SchedulingPolicySnapshotName: policySnapshotName, TargetCluster: name, - ClusterDecision: fleetv1beta1.ClusterDecision{ + ClusterDecision: placementv1beta1.ClusterDecision{ ClusterName: name, Selected: true, ClusterScore: score, @@ -130,35 +140,41 @@ func scheduledBindingsCreatedForClustersActual(clusters []string, scoreByCluster } } -func boundBindingsUpdatedForClustersActual(clusters []string, scoreByCluster map[string]*fleetv1beta1.ClusterScore, crpName, policySnapshotName string) func() error { +func boundBindingsCreatedOrUpdatedForClustersActual(clusters []string, scoreByCluster map[string]*placementv1beta1.ClusterScore, crpName, policySnapshotName string) func() error { return func() error { - bindingList := &fleetv1beta1.ClusterResourceBindingList{} - if err := hubClient.List(ctx, bindingList); err != nil { + bindingList := &placementv1beta1.ClusterResourceBindingList{} + labelSelector := labels.SelectorFromSet(labels.Set{placementv1beta1.CRPTrackingLabel: crpName}) + listOptions := &client.ListOptions{LabelSelector: labelSelector} + if err := hubClient.List(ctx, bindingList, listOptions); err != nil { return err } - bound := []fleetv1beta1.ClusterResourceBinding{} + bound := []placementv1beta1.ClusterResourceBinding{} + clusterMap := make(map[string]bool) + for _, name := range clusters { + clusterMap[name] = true + } for _, binding := range bindingList.Items { - if binding.Spec.State == fleetv1beta1.BindingStateBound { + if _, ok := clusterMap[binding.Spec.TargetCluster]; ok && binding.Spec.State == placementv1beta1.BindingStateBound { bound = append(bound, binding) } } - wantBound := []fleetv1beta1.ClusterResourceBinding{} + wantBound := []placementv1beta1.ClusterResourceBinding{} for _, name := range clusters { score := scoreByCluster[name] - binding := fleetv1beta1.ClusterResourceBinding{ + binding := placementv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: bindingNamePlaceholder, Labels: map[string]string{ - fleetv1beta1.CRPTrackingLabel: crpName, + placementv1beta1.CRPTrackingLabel: crpName, }, }, - Spec: fleetv1beta1.ResourceBindingSpec{ - State: fleetv1beta1.BindingStateBound, + Spec: placementv1beta1.ResourceBindingSpec{ + State: placementv1beta1.BindingStateBound, SchedulingPolicySnapshotName: policySnapshotName, TargetCluster: name, - ClusterDecision: fleetv1beta1.ClusterDecision{ + ClusterDecision: placementv1beta1.ClusterDecision{ ClusterName: name, Selected: true, ClusterScore: score, @@ -184,89 +200,41 @@ func boundBindingsUpdatedForClustersActual(clusters []string, scoreByCluster map } } -func scheduledBindingsUpdatedForClustersActual(clusters []string, scoreByCluster map[string]*fleetv1beta1.ClusterScore, crpName, policySnapshotName string) func() error { +func unscheduledBindingsCreatedOrUpdatedForClustersActual(clusters []string, scoreByCluster map[string]*placementv1beta1.ClusterScore, crpName string, policySnapshotName string) func() error { return func() error { - bindingList := &fleetv1beta1.ClusterResourceBindingList{} - if err := hubClient.List(ctx, bindingList); err != nil { + bindingList := &placementv1beta1.ClusterResourceBindingList{} + labelSelector := labels.SelectorFromSet(labels.Set{placementv1beta1.CRPTrackingLabel: crpName}) + listOptions := &client.ListOptions{LabelSelector: labelSelector} + if err := hubClient.List(ctx, bindingList, listOptions); err != nil { return err } - scheduled := []fleetv1beta1.ClusterResourceBinding{} - for _, binding := range bindingList.Items { - if binding.Spec.State == fleetv1beta1.BindingStateScheduled { - scheduled = append(scheduled, binding) - } - } - - wantScheduled := []fleetv1beta1.ClusterResourceBinding{} + unscheduled := []placementv1beta1.ClusterResourceBinding{} + clusterMap := make(map[string]bool) for _, name := range clusters { - score := scoreByCluster[name] - binding := fleetv1beta1.ClusterResourceBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: bindingNamePlaceholder, - Labels: map[string]string{ - fleetv1beta1.CRPTrackingLabel: crpName, - }, - }, - Spec: fleetv1beta1.ResourceBindingSpec{ - State: fleetv1beta1.BindingStateScheduled, - SchedulingPolicySnapshotName: policySnapshotName, - TargetCluster: name, - ClusterDecision: fleetv1beta1.ClusterDecision{ - ClusterName: name, - Selected: true, - ClusterScore: score, - }, - }, - } - wantScheduled = append(wantScheduled, binding) - } - - if diff := cmp.Diff(scheduled, wantScheduled, ignoreResourceBindingFields...); diff != "" { - return fmt.Errorf("scheduled bindings are not updated as expected; diff (-got, +want): %s", diff) + clusterMap[name] = true } - - // Verify that binding names are formatted correctly. for _, binding := range bindingList.Items { - wantPrefix := fmt.Sprintf("%s-%s", crpName, binding.Spec.TargetCluster) - if !strings.HasPrefix(binding.Name, wantPrefix) { - return fmt.Errorf("binding name %s is not formatted correctly; want prefix %s", binding.Name, wantPrefix) - } - } - - return nil - } -} - -func unscheduledBindingsCreatedForClustersActual(clusters []string, scoreByCluster map[string]*fleetv1beta1.ClusterScore, crpName string, policySnapshotName string) func() error { - return func() error { - bindingList := &fleetv1beta1.ClusterResourceBindingList{} - if err := hubClient.List(ctx, bindingList); err != nil { - return err - } - - unscheduled := []fleetv1beta1.ClusterResourceBinding{} - for _, binding := range bindingList.Items { - if binding.Spec.State == fleetv1beta1.BindingStateUnscheduled { + if _, ok := clusterMap[binding.Spec.TargetCluster]; ok && binding.Spec.State == placementv1beta1.BindingStateUnscheduled { unscheduled = append(unscheduled, binding) } } - wantUnscheduled := []fleetv1beta1.ClusterResourceBinding{} + wantUnscheduled := []placementv1beta1.ClusterResourceBinding{} for _, name := range clusters { score := scoreByCluster[name] - binding := fleetv1beta1.ClusterResourceBinding{ + binding := placementv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: bindingNamePlaceholder, Labels: map[string]string{ - fleetv1beta1.CRPTrackingLabel: crpName, + placementv1beta1.CRPTrackingLabel: crpName, }, }, - Spec: fleetv1beta1.ResourceBindingSpec{ - State: fleetv1beta1.BindingStateUnscheduled, + Spec: placementv1beta1.ResourceBindingSpec{ + State: placementv1beta1.BindingStateUnscheduled, SchedulingPolicySnapshotName: policySnapshotName, TargetCluster: name, - ClusterDecision: fleetv1beta1.ClusterDecision{ + ClusterDecision: placementv1beta1.ClusterDecision{ ClusterName: name, Selected: true, ClusterScore: score, @@ -292,7 +260,7 @@ func unscheduledBindingsCreatedForClustersActual(clusters []string, scoreByClust } } -func noBindingsCreatedForClustersActual(clusters []string) func() error { +func noBindingsCreatedForClustersActual(clusters []string, crpName string) func() error { // Build a map for clusters for quicker lookup. clusterMap := map[string]bool{} for _, name := range clusters { @@ -300,8 +268,10 @@ func noBindingsCreatedForClustersActual(clusters []string) func() error { } return func() error { - bindingList := &fleetv1beta1.ClusterResourceBindingList{} - if err := hubClient.List(ctx, bindingList); err != nil { + bindingList := &placementv1beta1.ClusterResourceBindingList{} + labelSelector := labels.SelectorFromSet(labels.Set{placementv1beta1.CRPTrackingLabel: crpName}) + listOptions := &client.ListOptions{LabelSelector: labelSelector} + if err := hubClient.List(ctx, bindingList, listOptions); err != nil { return err } @@ -318,28 +288,28 @@ func noBindingsCreatedForClustersActual(clusters []string) func() error { func pickFixedPolicySnapshotStatusUpdatedActual(valid, invalidOrNotFound []string, policySnapshotName string) func() error { return func() error { - policySnapshot := &fleetv1beta1.ClusterSchedulingPolicySnapshot{} + policySnapshot := &placementv1beta1.ClusterSchedulingPolicySnapshot{} if err := hubClient.Get(ctx, types.NamespacedName{Name: policySnapshotName}, policySnapshot); err != nil { return err } // Verify that the observed CRP generation field is populated correctly. - wantCRPGeneration := policySnapshot.Annotations[fleetv1beta1.CRPGenerationAnnotation] + wantCRPGeneration := policySnapshot.Annotations[placementv1beta1.CRPGenerationAnnotation] observedCRPGeneration := policySnapshot.Status.ObservedCRPGeneration if strconv.FormatInt(observedCRPGeneration, 10) != wantCRPGeneration { return fmt.Errorf("policy snapshot observed CRP generation not match: want %s, got %d", wantCRPGeneration, observedCRPGeneration) } // Verify that cluster decisions are populated correctly. - wantClusterDecisions := []fleetv1beta1.ClusterDecision{} + wantClusterDecisions := []placementv1beta1.ClusterDecision{} for _, clusterName := range valid { - wantClusterDecisions = append(wantClusterDecisions, fleetv1beta1.ClusterDecision{ + wantClusterDecisions = append(wantClusterDecisions, placementv1beta1.ClusterDecision{ ClusterName: clusterName, Selected: true, }) } for _, clusterName := range invalidOrNotFound { - wantClusterDecisions = append(wantClusterDecisions, fleetv1beta1.ClusterDecision{ + wantClusterDecisions = append(wantClusterDecisions, placementv1beta1.ClusterDecision{ ClusterName: clusterName, Selected: false, }) @@ -349,17 +319,17 @@ func pickFixedPolicySnapshotStatusUpdatedActual(valid, invalidOrNotFound []strin } // Verify that the scheduled condition is added correctly. - scheduledCondition := meta.FindStatusCondition(policySnapshot.Status.Conditions, string(fleetv1beta1.PolicySnapshotScheduled)) + scheduledCondition := meta.FindStatusCondition(policySnapshot.Status.Conditions, string(placementv1beta1.PolicySnapshotScheduled)) var wantScheduledCondition *metav1.Condition if len(invalidOrNotFound) == 0 { wantScheduledCondition = &metav1.Condition{ - Type: string(fleetv1beta1.PolicySnapshotScheduled), + Type: string(placementv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionTrue, ObservedGeneration: policySnapshot.Generation, } } else { wantScheduledCondition = &metav1.Condition{ - Type: string(fleetv1beta1.PolicySnapshotScheduled), + Type: string(placementv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionFalse, ObservedGeneration: policySnapshot.Generation, } @@ -371,3 +341,52 @@ func pickFixedPolicySnapshotStatusUpdatedActual(valid, invalidOrNotFound []strin return nil } } + +func pickAllPolicySnapshotStatusUpdatedActual(scored, filtered []string, policySnapshotName string) func() error { + return func() error { + policySnapshot := &placementv1beta1.ClusterSchedulingPolicySnapshot{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: policySnapshotName}, policySnapshot); err != nil { + return err + } + + // Verify that the observed CRP generation field is populated correctly. + wantCRPGeneration := policySnapshot.Annotations[placementv1beta1.CRPGenerationAnnotation] + observedCRPGeneration := policySnapshot.Status.ObservedCRPGeneration + if strconv.FormatInt(observedCRPGeneration, 10) != wantCRPGeneration { + return fmt.Errorf("policy snapshot observed CRP generation not match: want %s, got %d", wantCRPGeneration, observedCRPGeneration) + } + + // Verify that cluster decisions are populated correctly. + wantClusterDecisions := []placementv1beta1.ClusterDecision{} + for _, clusterName := range scored { + wantClusterDecisions = append(wantClusterDecisions, placementv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: true, + ClusterScore: &zeroScore, + }) + } + for _, clusterName := range filtered { + wantClusterDecisions = append(wantClusterDecisions, placementv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: false, + }) + } + if diff := cmp.Diff(policySnapshot.Status.ClusterDecisions, wantClusterDecisions, ignoreClusterDecisionReasonField, cmpopts.SortSlices(lessFuncClusterDecision)); diff != "" { + return fmt.Errorf("policy snapshot status cluster decisions (-got, +want): %s", diff) + } + + // Verify that the scheduled condition is added correctly. + scheduledCondition := meta.FindStatusCondition(policySnapshot.Status.Conditions, string(placementv1beta1.PolicySnapshotScheduled)) + wantScheduledCondition := &metav1.Condition{ + Type: string(placementv1beta1.PolicySnapshotScheduled), + Status: metav1.ConditionTrue, + ObservedGeneration: policySnapshot.Generation, + } + + if diff := cmp.Diff(scheduledCondition, wantScheduledCondition, ignoreConditionTimeReasonAndMessageFields); diff != "" { + return fmt.Errorf("policy snapshot status scheduled condition (-got, +want): %s", diff) + } + + return nil + } +} diff --git a/test/scheduler/pickall_integration_test.go b/test/scheduler/pickall_integration_test.go new file mode 100644 index 000000000..e7b87e383 --- /dev/null +++ b/test/scheduler/pickall_integration_test.go @@ -0,0 +1,164 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package tests + +// This test suite features a number of test cases which cover the workflow of scheduling CRPs +// of the PickAll placement type. + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +var _ = Describe("scheduling CRPs with no scheduling policy specified", func() { + Context("pick all valid clusters", Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + policySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, 1) + + BeforeAll(func() { + // Ensure that no bindings have been created so far. + noBindingsCreatedActual := noBindingsCreatedForCRPActual(crpName) + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + + // Create a CRP with no scheduling policy specified, along with its associated policy snapshot. + createNilSchedulingPolicyCRPWithPolicySnapshot(crpName, policySnapshotName) + }) + + It("should add scheduler cleanup finalizer to the CRP", func() { + finalizerAddedActual := crpSchedulerFinalizerAddedActual(crpName) + Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add scheduler cleanup finalizer to CRP") + }) + + It("should create scheduled bindings for all healthy clusters", func() { + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(healthyClusters, zeroScoreByCluster, crpName, policySnapshotName) + Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + }) + + It("should not create any binding for unhealthy clusters", func() { + noBindingsCreatedActual := noBindingsCreatedForClustersActual(unhealthyClusters, crpName) + Eventually(noBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + }) + + It("should report status correctly", func() { + statusUpdatedActual := pickAllPolicySnapshotStatusUpdatedActual(healthyClusters, unhealthyClusters, policySnapshotName) + Eventually(statusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update status") + Consistently(statusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update status") + }) + + AfterAll(func() { + // Delete the CRP. + ensureCRPAndAllRelatedResourcesDeletion(crpName) + }) + }) + + // This is a serial test as adding a new member cluster may interrupt other test cases. + Context("add a new healthy cluster", Serial, Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + policySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, 1) + + // Prepare a new cluster to avoid interrupting other concurrently running test cases. + newUnhealthyMemberClusterName := fmt.Sprintf(provisionalClusterNameTemplate, GinkgoParallelProcess()) + updatedHealthyClusters := healthyClusters + updatedHealthyClusters = append(updatedHealthyClusters, newUnhealthyMemberClusterName) + + // Copy the map to avoid interrupting other concurrently running test cases. + updatedZeroScoreByCluster := make(map[string]*placementv1beta1.ClusterScore) + for k, v := range zeroScoreByCluster { + updatedZeroScoreByCluster[k] = v + } + updatedZeroScoreByCluster[newUnhealthyMemberClusterName] = &zeroScore + + BeforeAll(func() { + // Ensure that no bindings have been created so far. + noBindingsCreatedActual := noBindingsCreatedForCRPActual(crpName) + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + + // Create a CRP with no scheduling policy specified, along with its associated policy snapshot. + createNilSchedulingPolicyCRPWithPolicySnapshot(crpName, policySnapshotName) + + // Create a new member cluster. + createMemberCluster(newUnhealthyMemberClusterName) + + // Mark this cluster as healthy. + markClusterAsHealthy(newUnhealthyMemberClusterName) + }) + + It("should create scheduled bindings for the newly recovered cluster", func() { + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(updatedHealthyClusters, updatedZeroScoreByCluster, crpName, policySnapshotName) + Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + }) + + AfterAll(func() { + // Delete the CRP. + ensureCRPAndAllRelatedResourcesDeletion(crpName) + + // Delete the provisional cluster. + ensureProvisionalClusterDeletion(newUnhealthyMemberClusterName) + }) + }) + + // This is a serial test as adding a new member cluster may interrupt other test cases. + Context("a healthy cluster becomes unhealthy", Serial, Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + policySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, 1) + + // Prepare a new cluster to avoid interrupting other concurrently running test cases. + newUnhealthyMemberClusterName := fmt.Sprintf(provisionalClusterNameTemplate, GinkgoParallelProcess()) + updatedHealthyClusters := healthyClusters + updatedHealthyClusters = append(updatedHealthyClusters, newUnhealthyMemberClusterName) + + // Copy the map to avoid interrupting other concurrently running test cases. + updatedZeroScoreByCluster := make(map[string]*placementv1beta1.ClusterScore) + for k, v := range zeroScoreByCluster { + updatedZeroScoreByCluster[k] = v + } + updatedZeroScoreByCluster[newUnhealthyMemberClusterName] = &zeroScore + + BeforeAll(func() { + // Ensure that no bindings have been created so far. + noBindingsCreatedActual := noBindingsCreatedForCRPActual(crpName) + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + + // Create a CRP with no scheduling policy specified, along with its associated policy snapshot. + createNilSchedulingPolicyCRPWithPolicySnapshot(crpName, policySnapshotName) + + // Create a new member cluster. + createMemberCluster(newUnhealthyMemberClusterName) + + // Mark this cluster as healthy. + markClusterAsHealthy(newUnhealthyMemberClusterName) + + // Verify that a binding has been created for the cluster. + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(updatedHealthyClusters, updatedZeroScoreByCluster, crpName, policySnapshotName) + Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + + // Mark the cluster as unhealthy. + markClusterAsUnhealthy(newUnhealthyMemberClusterName) + }) + + It("should not remove binding for the cluster that just becomes unhealthy", func() { + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(updatedHealthyClusters, updatedZeroScoreByCluster, crpName, policySnapshotName) + Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + }) + + AfterAll(func() { + // Delete the CRP. + ensureCRPAndAllRelatedResourcesDeletion(crpName) + + // Delete the provisional cluster. + ensureProvisionalClusterDeletion(newUnhealthyMemberClusterName) + }) + }) +}) diff --git a/test/scheduler/pickfixed_integration_test.go b/test/scheduler/pickfixed_integration_test.go index 87b6ff326..14d8e22b4 100644 --- a/test/scheduler/pickfixed_integration_test.go +++ b/test/scheduler/pickfixed_integration_test.go @@ -5,76 +5,35 @@ Licensed under the MIT license. package tests -// This test suite features a number of test cases which cover the happy paths of the scheduler -// workflow. +// This test suite features a number of test cases which cover the workflow of scheduling CRPs +// of the PickFixed placement type. import ( "fmt" - "strconv" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - - fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" ) -var _ = Describe("scheduling CRPs of the PickFixed placement type", Ordered, func() { - crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) +var _ = Describe("scheduling CRPs of the PickFixed placement type", func() { + Context("with valid target clusters", Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) - Context("create a CRP with some valid target clusters", func() { targetClusters := []string{ memberCluster1EastProd, memberCluster4CentralProd, memberCluster6WestProd, } - policySnapshotIdx := 1 - policySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx) + policySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, 1) BeforeAll(func() { // Ensure that no bindings have been created so far. - Consistently(noBindingCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") - - policy := &fleetv1beta1.PlacementPolicy{ - PlacementType: fleetv1beta1.PickFixedPlacementType, - ClusterNames: targetClusters, - } - - // Create the CRP. - crp := &fleetv1beta1.ClusterResourcePlacement{ - ObjectMeta: metav1.ObjectMeta{ - Name: crpName, - }, - Spec: fleetv1beta1.ClusterResourcePlacementSpec{ - ResourceSelectors: defaultResourceSelectors, - Policy: policy, - }, - } - Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") - - crpGeneration := crp.Generation - - // Create the associated policy snapshot. - policySnapshot := &fleetv1beta1.ClusterSchedulingPolicySnapshot{ - ObjectMeta: metav1.ObjectMeta{ - Name: policySnapshotName, - Labels: map[string]string{ - fleetv1beta1.IsLatestSnapshotLabel: strconv.FormatBool(true), - fleetv1beta1.CRPTrackingLabel: crpName, - }, - Annotations: map[string]string{ - fleetv1beta1.CRPGenerationAnnotation: strconv.FormatInt(crpGeneration, 10), - }, - }, - Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: policy, - PolicyHash: []byte(policyHash), - }, - } - Expect(hubClient.Create(ctx, policySnapshot)).To(Succeed(), "Failed to create policy snapshot") + noBindingsCreatedActual := noBindingsCreatedForCRPActual(crpName) + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + + // Create the CRP and its associated policy snapshot. + createPickFixedCRPWithPolicySnapshot(crpName, targetClusters, policySnapshotName) }) It("should add scheduler cleanup finalizer to the CRP", func() { @@ -83,7 +42,7 @@ var _ = Describe("scheduling CRPs of the PickFixed placement type", Ordered, fun }) It("should create scheduled bindings for valid target clusters", func() { - scheduledBindingsCreatedActual := scheduledBindingsCreatedForClustersActual(targetClusters, nilScoreByCluster, crpName, policySnapshotName) + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(targetClusters, nilScoreByCluster, crpName, policySnapshotName) Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") }) @@ -93,67 +52,15 @@ var _ = Describe("scheduling CRPs of the PickFixed placement type", Ordered, fun Eventually(statusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to report correct policy snapshot status") Consistently(statusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to report correct policy snapshot status") }) - }) - - Context("add additional valid target clusters", func() { - targetClusters := []string{ - memberCluster1EastProd, - memberCluster2EastProd, - memberCluster4CentralProd, - memberCluster5CentralProd, - memberCluster6WestProd, - } - - boundClusters := []string{ - memberCluster1EastProd, - memberCluster4CentralProd, - memberCluster6WestProd, - } - scheduledClusters := []string{ - memberCluster2EastProd, - memberCluster5CentralProd, - } - - policySnapshotIdx := 2 - oldPolicySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx-1) - newPolicySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx) - BeforeAll(func() { - // Mark all previously created bindings as bound. - bindingList := &fleetv1beta1.ClusterResourceBindingList{} - Expect(hubClient.List(ctx, bindingList)).To(Succeed(), "Failed to list bindings") - for idx := range bindingList.Items { - binding := bindingList.Items[idx] - if binding.Spec.State == fleetv1beta1.BindingStateScheduled { - binding.Spec.State = fleetv1beta1.BindingStateBound - Expect(hubClient.Update(ctx, &binding)).To(Succeed(), "Failed to update binding") - } - } - - // Update the CRP with new target clusters and refresh scheduling policy snapshots. - updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName, targetClusters, oldPolicySnapshotName, newPolicySnapshotName) - }) - - It("should create scheduled bindings for newly added valid target clusters", func() { - scheduledBindingsCreatedActual := scheduledBindingsCreatedForClustersActual(scheduledClusters, nilScoreByCluster, crpName, newPolicySnapshotName) - Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") - Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") - }) - - It("should update bound bindings for previously added valid target clusters", func() { - boundBindingsUpdatedActual := boundBindingsUpdatedForClustersActual(boundClusters, nilScoreByCluster, crpName, newPolicySnapshotName) - Eventually(boundBindingsUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update the expected set of bindings") - Consistently(boundBindingsUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update the expected set of bindings") - }) - - It("should report status correctly", func() { - statusUpdatedActual := pickFixedPolicySnapshotStatusUpdatedActual(targetClusters, []string{}, newPolicySnapshotName) - Eventually(statusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update policy snapshot status") - Consistently(statusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update policy snapshot status") + AfterAll(func() { + ensureCRPAndAllRelatedResourcesDeletion(crpName) }) }) - Context("add invalid (unhealthy, or left) and not found target clusters", func() { + Context("with both valid and invalid/non-existent target clusters", Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + targetClusters := []string{ memberCluster1EastProd, memberCluster2EastProd, @@ -164,7 +71,6 @@ var _ = Describe("scheduling CRPs of the PickFixed placement type", Ordered, fun memberCluster9LeftCentralProd, // An invalid cluster (left). memberCluster10NonExistent, // A cluster that cannot be found in the fleet. } - validClusters := []string{ memberCluster1EastProd, memberCluster2EastProd, @@ -172,223 +78,199 @@ var _ = Describe("scheduling CRPs of the PickFixed placement type", Ordered, fun memberCluster5CentralProd, memberCluster6WestProd, } - boundClusters := []string{ - memberCluster1EastProd, - memberCluster4CentralProd, - memberCluster6WestProd, - } - scheduledClusters := []string{ - memberCluster2EastProd, - memberCluster5CentralProd, - } - invalidClusters := []string{ memberCluster8UnhealthyEastProd, memberCluster9LeftCentralProd, memberCluster10NonExistent, } - policySnapshotIdx := 3 - oldPolicySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx-1) - newPolicySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx) + policySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, 1) BeforeAll(func() { - // Update the CRP with new target clusters and refresh scheduling policy snapshots. - updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName, targetClusters, oldPolicySnapshotName, newPolicySnapshotName) + // Ensure that no bindings have been created so far. + noBindingsCreatedActual := noBindingsCreatedForCRPActual(crpName) + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + + // Create the CRP and its associated policy snapshot. + createPickFixedCRPWithPolicySnapshot(crpName, targetClusters, policySnapshotName) }) - It("should update bound bindings for previously added valid target clusters", func() { - boundBindingsUpdatedActual := boundBindingsUpdatedForClustersActual(boundClusters, nilScoreByCluster, crpName, newPolicySnapshotName) - Eventually(boundBindingsUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update the expected set of bindings") - Consistently(boundBindingsUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update the expected set of bindings") + It("should add scheduler cleanup finalizer to the CRP", func() { + finalizerAddedActual := crpSchedulerFinalizerAddedActual(crpName) + Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add scheduler cleanup finalizer to CRP") }) - It("should update scheduled bindings for previously added valid target clusters", func() { - scheduledBindingsUpdatedActual := scheduledBindingsUpdatedForClustersActual(scheduledClusters, nilScoreByCluster, crpName, newPolicySnapshotName) - Eventually(scheduledBindingsUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update the expected set of bindings") - Consistently(scheduledBindingsUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update the expected set of bindings") + It("should create scheduled bindings for valid target clusters", func() { + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(validClusters, nilScoreByCluster, crpName, policySnapshotName) + Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") }) It("should not create bindings for invalid target clusters", func() { - noBindingsCreatedActual := noBindingsCreatedForClustersActual(invalidClusters) + noBindingsCreatedActual := noBindingsCreatedForClustersActual(invalidClusters, crpName) Eventually(noBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Created a binding for invalid or not found cluster") Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Created a binding for invalid or not found cluster") }) It("should report status correctly", func() { - statusUpdatedActual := pickFixedPolicySnapshotStatusUpdatedActual(validClusters, invalidClusters, newPolicySnapshotName) + statusUpdatedActual := pickFixedPolicySnapshotStatusUpdatedActual(validClusters, invalidClusters, policySnapshotName) Eventually(statusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update policy snapshot status") Consistently(statusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update policy snapshot status") }) + + AfterAll(func() { + ensureCRPAndAllRelatedResourcesDeletion(crpName) + }) }) - Context("remove some target clusters (valid + invalid)", func() { - targetClusters := []string{ + Context("policy snapshot refresh with added clusters", Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + + targetClusters1 := []string{ + memberCluster1EastProd, memberCluster2EastProd, - memberCluster5CentralProd, - memberCluster6WestProd, - memberCluster8UnhealthyEastProd, - memberCluster10NonExistent, + memberCluster4CentralProd, } - - validClusters := []string{ + targetClusters2 := []string{ + memberCluster1EastProd, memberCluster2EastProd, + memberCluster4CentralProd, memberCluster5CentralProd, memberCluster6WestProd, } - boundClusters := []string{ - memberCluster6WestProd, - } - scheduledClusters := []string{ + previouslyBoundClusters := []string{ + memberCluster1EastProd, memberCluster2EastProd, - memberCluster5CentralProd, - } - - invalidClusters := []string{ - memberCluster8UnhealthyEastProd, - memberCluster10NonExistent, } - - unscheduledClusters := []string{ - memberCluster1EastProd, + previouslyScheduledClusters := []string{ memberCluster4CentralProd, } + newScheduledClusters := []string{ + memberCluster5CentralProd, + memberCluster6WestProd, + } - policySnapshotIdx := 4 - oldPolicySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx-1) - newPolicySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx) + policySnapshotName1 := fmt.Sprintf(policySnapshotNameTemplate, crpName, 1) + policySnapshotName2 := fmt.Sprintf(policySnapshotNameTemplate, crpName, 2) BeforeAll(func() { + // Ensure that no bindings have been created so far. + noBindingsCreatedActual := noBindingsCreatedForCRPActual(crpName) + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + + // Create the CRP and its associated policy snapshot. + createPickFixedCRPWithPolicySnapshot(crpName, targetClusters1, policySnapshotName1) + + // Make sure that the bindings have been created. + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(targetClusters1, nilScoreByCluster, crpName, policySnapshotName1) + Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + + // Mark all previously created bindings as bound. + markBindingsAsBoundForClusters(crpName, previouslyBoundClusters) + // Update the CRP with new target clusters and refresh scheduling policy snapshots. - updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName, targetClusters, oldPolicySnapshotName, newPolicySnapshotName) + updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName, targetClusters2, policySnapshotName1, policySnapshotName2) + }) + + It("should create scheduled bindings for newly added valid target clusters", func() { + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(newScheduledClusters, nilScoreByCluster, crpName, policySnapshotName2) + Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") }) It("should update bound bindings for previously added valid target clusters", func() { - boundBindingsUpdatedActual := boundBindingsUpdatedForClustersActual(boundClusters, nilScoreByCluster, crpName, newPolicySnapshotName) + boundBindingsUpdatedActual := boundBindingsCreatedOrUpdatedForClustersActual(previouslyBoundClusters, nilScoreByCluster, crpName, policySnapshotName2) Eventually(boundBindingsUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update the expected set of bindings") Consistently(boundBindingsUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update the expected set of bindings") }) It("should update scheduled bindings for previously added valid target clusters", func() { - scheduledBindingsUpdatedActual := scheduledBindingsUpdatedForClustersActual(scheduledClusters, nilScoreByCluster, crpName, newPolicySnapshotName) + scheduledBindingsUpdatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(previouslyScheduledClusters, nilScoreByCluster, crpName, policySnapshotName2) Eventually(scheduledBindingsUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update the expected set of bindings") Consistently(scheduledBindingsUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update the expected set of bindings") }) - It("should mark bindings as unscheduled for removed valid target clusters", func() { - unscheduledBindingsCreatedActual := unscheduledBindingsCreatedForClustersActual(unscheduledClusters, nilScoreByCluster, crpName, oldPolicySnapshotName) - Eventually(unscheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to mark bindings as unscheduled") - Consistently(unscheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to mark bindings as unscheduled") - }) - - It("should not create bindings for invalid target clusters", func() { - noBindingsCreatedActual := noBindingsCreatedForClustersActual(invalidClusters) - Eventually(noBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Created a binding for invalid or not found cluster") - Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Created a binding for invalid or not found cluster") - }) - It("should report status correctly", func() { - statusUpdatedActual := pickFixedPolicySnapshotStatusUpdatedActual(validClusters, invalidClusters, newPolicySnapshotName) + statusUpdatedActual := pickFixedPolicySnapshotStatusUpdatedActual(targetClusters2, []string{}, policySnapshotName2) Eventually(statusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update policy snapshot status") Consistently(statusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update policy snapshot status") }) AfterAll(func() { - clearUnscheduledBindings() + ensureCRPAndAllRelatedResourcesDeletion(crpName) }) }) - Context("pick a different set of clusters", func() { - targetClusters := []string{ - memberCluster3EastCanary, - memberCluster7WestCanary, - } + Context("policy snapshot refresh with removed clusters", Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) - validClusters := []string{ - memberCluster3EastCanary, - memberCluster7WestCanary, + targetClusters1 := []string{ + memberCluster1EastProd, + memberCluster2EastProd, + memberCluster4CentralProd, } - scheduledClusters := []string{ - memberCluster3EastCanary, - memberCluster7WestCanary, + targetClusters2 := []string{ + memberCluster5CentralProd, + memberCluster6WestProd, } - - unscheduledClusters := []string{ + previouslyBoundClusters := []string{ + memberCluster1EastProd, memberCluster2EastProd, + } + scheduledClusters := []string{ memberCluster5CentralProd, memberCluster6WestProd, } + unscheduledClusters := []string{ + memberCluster1EastProd, + memberCluster2EastProd, + memberCluster4CentralProd, + } - policySnapshotIdx := 5 - oldPolicySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx-1) - newPolicySnapshotName := fmt.Sprintf(policySnapshotNameTemplate, crpName, policySnapshotIdx) + policySnapshotName1 := fmt.Sprintf(policySnapshotNameTemplate, crpName, 1) + policySnapshotName2 := fmt.Sprintf(policySnapshotNameTemplate, crpName, 2) BeforeAll(func() { + // Ensure that no bindings have been created so far. + noBindingsCreatedActual := noBindingsCreatedForCRPActual(crpName) + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Some bindings have been created unexpectedly") + + // Create the CRP and its associated policy snapshot. + createPickFixedCRPWithPolicySnapshot(crpName, targetClusters1, policySnapshotName1) + + // Make sure that the bindings have been created. + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(targetClusters1, nilScoreByCluster, crpName, policySnapshotName1) + Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") + + // Mark some previously created bindings as bound. + markBindingsAsBoundForClusters(crpName, previouslyBoundClusters) + // Update the CRP with new target clusters and refresh scheduling policy snapshots. - updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName, targetClusters, oldPolicySnapshotName, newPolicySnapshotName) + updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName, targetClusters2, policySnapshotName1, policySnapshotName2) }) It("should create scheduled bindings for newly added valid target clusters", func() { - scheduledBindingsCreatedActual := scheduledBindingsCreatedForClustersActual(scheduledClusters, nilScoreByCluster, crpName, newPolicySnapshotName) + scheduledBindingsCreatedActual := scheduledBindingsCreatedOrUpdatedForClustersActual(scheduledClusters, nilScoreByCluster, crpName, policySnapshotName2) Eventually(scheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create the expected set of bindings") Consistently(scheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to create the expected set of bindings") }) - It("should not have any bound bindings", func() { - noBoundBindingsActual := boundBindingsUpdatedForClustersActual([]string{}, nilScoreByCluster, crpName, newPolicySnapshotName) - Eventually(noBoundBindingsActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Unexpected bound bindings are present") - Consistently(noBoundBindingsActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Unexpected bound bindings are present") - }) - - It("should mark bindings as unscheduled for removed valid target clusters", func() { - unscheduledBindingsCreatedActual := unscheduledBindingsCreatedForClustersActual(unscheduledClusters, nilScoreByCluster, crpName, oldPolicySnapshotName) + It("should mark bindings as unscheduled for removed target clusters", func() { + unscheduledBindingsCreatedActual := unscheduledBindingsCreatedOrUpdatedForClustersActual(unscheduledClusters, nilScoreByCluster, crpName, policySnapshotName1) Eventually(unscheduledBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to mark bindings as unscheduled") Consistently(unscheduledBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to mark bindings as unscheduled") }) It("should report status correctly", func() { - statusUpdatedActual := pickFixedPolicySnapshotStatusUpdatedActual(validClusters, []string{}, newPolicySnapshotName) + statusUpdatedActual := pickFixedPolicySnapshotStatusUpdatedActual(scheduledClusters, []string{}, policySnapshotName2) Eventually(statusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update policy snapshot status") Consistently(statusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update policy snapshot status") }) - }) - - Context("delete the CRP", func() { - additionalFinalizer := "test-purpose-finalizer" - - BeforeAll(func() { - // Retrieve the CRP. - crp := &fleetv1beta1.ClusterResourcePlacement{} - Expect(hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp)).To(Succeed(), "Failed to get CRP") - - // Ensure that the CRP has the scheduler cleanup finalizer. - Expect(controllerutil.ContainsFinalizer(crp, fleetv1beta1.SchedulerCRPCleanupFinalizer)).To(BeTrue(), "CRP does not have the scheduler cleanup finalizer") - - // Add an additional finalizer to the CRP to block its deletion; this helps to better - // observe the scheduler's behavior. - controllerutil.AddFinalizer(crp, additionalFinalizer) - Expect(hubClient.Update(ctx, crp)).To(Succeed(), "Failed to update CRP") - - // Delete the CRP. - Expect(hubClient.Delete(ctx, crp)).To(Succeed(), "Failed to delete CRP") - }) - - It("should clear all bindings", func() { - Eventually(noBindingCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to clear all bindings") - Consistently(noBindingCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to clear all bindings") - }) - - It("should remove the scheduler cleanup finalizer from the CRP", func() { - finalizerRemovedActual := crpSchedulerFinalizerRemovedActual(crpName) - Eventually(finalizerRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove scheduler cleanup finalizer from CRP") - }) AfterAll(func() { - // Delete the CRP. - ensureCRPDeletion(crpName) - - // Remove all policy snapshots. - clearPolicySnapshots() + ensureCRPAndAllRelatedResourcesDeletion(crpName) }) }) }) diff --git a/test/scheduler/suite_test.go b/test/scheduler/suite_test.go index d046a9cfe..f132253a2 100644 --- a/test/scheduler/suite_test.go +++ b/test/scheduler/suite_test.go @@ -47,8 +47,8 @@ const ( dummyReason = "dummyReason" - eventuallyDuration = time.Second * 5 - eventuallyInterval = time.Millisecond * 500 + eventuallyDuration = time.Second * 30 + eventuallyInterval = time.Second * 2 consistentlyDuration = time.Second * 1 consistentlyInterval = time.Millisecond * 200 @@ -100,6 +100,15 @@ var ( memberCluster1EastProd, memberCluster2EastProd, memberCluster3EastCanary, memberCluster4CentralProd, memberCluster5CentralProd, memberCluster6WestProd, memberCluster7WestCanary, memberCluster8UnhealthyEastProd, memberCluster9LeftCentralProd, } + healthyClusters = []string{ + memberCluster1EastProd, memberCluster2EastProd, memberCluster3EastCanary, + memberCluster4CentralProd, memberCluster5CentralProd, memberCluster6WestProd, + memberCluster7WestCanary, + } + unhealthyClusters = []string{ + memberCluster8UnhealthyEastProd, + memberCluster9LeftCentralProd, + } labelsByCluster = map[string]map[string]string{ memberCluster1EastProd: { diff --git a/test/scheduler/utils_test.go b/test/scheduler/utils_test.go index f31a96790..ddaa92500 100644 --- a/test/scheduler/utils_test.go +++ b/test/scheduler/utils_test.go @@ -9,19 +9,25 @@ package tests import ( "strconv" + "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" . "github.com/onsi/gomega" + rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/manager" - fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1" + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/scheduler/clustereligibilitychecker" "go.goms.io/fleet/pkg/scheduler/framework" "go.goms.io/fleet/pkg/scheduler/framework/plugins/clusteraffinity" @@ -33,9 +39,9 @@ import ( // This file features some utilities used in the test suites. const ( - crpNameTemplate = "crp-%d" - - policySnapshotNameTemplate = "%s-policy-snapshot-%d" + crpNameTemplate = "crp-%d" + policySnapshotNameTemplate = "%s-policy-snapshot-%d" + provisionalClusterNameTemplate = "provisional-cluster-%d" policyHash = "policy-hash" @@ -43,7 +49,11 @@ const ( ) var ( - defaultResourceSelectors = []fleetv1beta1.ClusterResourceSelector{ + // Note that for the scheduler integration tests, since no actual resources are collected + // by any controller (the scheduler cares only about policy snapshots and manipulates + // bindings accordingly), it is safe for all suites to select the same set of resources + // (which is not even provisioned in the environment). + defaultResourceSelectors = []placementv1beta1.ClusterResourceSelector{ { Group: "core", Kind: "Namespace", @@ -52,21 +62,36 @@ var ( }, } - nilScoreByCluster = map[string]*fleetv1beta1.ClusterScore{} + nilScoreByCluster = map[string]*placementv1beta1.ClusterScore{} + zeroScore = placementv1beta1.ClusterScore{ + AffinityScore: pointer.Int32(0), + TopologySpreadScore: pointer.Int32(0), + } + zeroScoreByCluster = map[string]*placementv1beta1.ClusterScore{ + memberCluster1EastProd: &zeroScore, + memberCluster2EastProd: &zeroScore, + memberCluster3EastCanary: &zeroScore, + memberCluster4CentralProd: &zeroScore, + memberCluster5CentralProd: &zeroScore, + memberCluster6WestProd: &zeroScore, + memberCluster7WestCanary: &zeroScore, + memberCluster8UnhealthyEastProd: &zeroScore, + memberCluster9LeftCentralProd: &zeroScore, + } ) var ( - lessFuncBinding = func(binding1, binding2 fleetv1beta1.ClusterResourceBinding) bool { + lessFuncBinding = func(binding1, binding2 placementv1beta1.ClusterResourceBinding) bool { return binding1.Spec.TargetCluster < binding2.Spec.TargetCluster } - lessFuncClusterDecision = func(decision1, decision2 fleetv1beta1.ClusterDecision) bool { + lessFuncClusterDecision = func(decision1, decision2 placementv1beta1.ClusterDecision) bool { return decision1.ClusterName < decision2.ClusterName } - ignoreClusterDecisionReasonField = cmpopts.IgnoreFields(fleetv1beta1.ClusterDecision{}, "Reason") + ignoreClusterDecisionReasonField = cmpopts.IgnoreFields(placementv1beta1.ClusterDecision{}, "Reason") ignoreObjectMetaNameField = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "Name") ignoreObjectMetaAutoGeneratedFields = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "UID", "CreationTimestamp", "ResourceVersion", "Generation", "ManagedFields") - ignoreResourceBindingTypeMetaField = cmpopts.IgnoreFields(fleetv1beta1.ClusterResourceBinding{}, "TypeMeta") + ignoreResourceBindingTypeMetaField = cmpopts.IgnoreFields(placementv1beta1.ClusterResourceBinding{}, "TypeMeta") ignoreConditionTimeReasonAndMessageFields = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime", "Reason", "Message") ignoreResourceBindingFields = []cmp.Option{ @@ -156,9 +181,154 @@ func loadRestConfigFrom(apiCfgBytes []byte) *rest.Config { return restCfg } +func createMemberCluster(name string) { + memberCluster := clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: clusterv1beta1.MemberClusterSpec{ + Identity: rbacv1.Subject{ + Kind: "ServiceAccount", + APIGroup: "", + Name: "admin", + }, + }, + } + Expect(hubClient.Create(ctx, &memberCluster)).To(Succeed(), "Failed to create member cluster") +} + +func markClusterAsHealthy(name string) { + memberCluster := clusterv1beta1.MemberCluster{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: name}, &memberCluster)).To(Succeed(), "Failed to get member cluster") + memberCluster.Status.AgentStatus = []clusterv1beta1.AgentStatus{ + { + Type: clusterv1beta1.MemberAgent, + Conditions: []metav1.Condition{ + { + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now()), + Reason: dummyReason, + }, + { + Type: string(clusterv1beta1.AgentHealthy), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now()), + Reason: dummyReason, + }, + }, + LastReceivedHeartbeat: metav1.NewTime(time.Now()), + }, + } + Expect(hubClient.Status().Update(ctx, &memberCluster)).To(Succeed(), "Failed to update member cluster status") +} + +func markClusterAsUnhealthy(name string) { + memberCluster := clusterv1beta1.MemberCluster{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: name}, &memberCluster)).To(Succeed(), "Failed to get member cluster") + memberCluster.Status.AgentStatus = []clusterv1beta1.AgentStatus{ + { + Type: clusterv1beta1.MemberAgent, + Conditions: []metav1.Condition{ + { + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now().Add(-time.Hour * 25)), + Reason: dummyReason, + }, + { + Type: string(clusterv1beta1.AgentHealthy), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now().Add(-time.Hour * 25)), + Reason: dummyReason, + }, + }, + LastReceivedHeartbeat: metav1.NewTime(time.Now().Add(-time.Hour * 25)), + }, + } + Expect(hubClient.Status().Update(ctx, &memberCluster)).To(Succeed(), "Failed to update member cluster status") +} + +func createPickFixedCRPWithPolicySnapshot(crpName string, targetClusters []string, policySnapshotName string) { + policy := &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: targetClusters, + } + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + Finalizers: []string{customDeletionBlockerFinalizer}, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: defaultResourceSelectors, + Policy: policy, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + + crpGeneration := crp.Generation + + // Create the associated policy snapshot. + policySnapshot := &placementv1beta1.ClusterSchedulingPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policySnapshotName, + Labels: map[string]string{ + placementv1beta1.IsLatestSnapshotLabel: strconv.FormatBool(true), + placementv1beta1.CRPTrackingLabel: crpName, + }, + Annotations: map[string]string{ + placementv1beta1.CRPGenerationAnnotation: strconv.FormatInt(crpGeneration, 10), + }, + }, + Spec: placementv1beta1.SchedulingPolicySnapshotSpec{ + Policy: policy, + PolicyHash: []byte(policyHash), + }, + } + Expect(hubClient.Create(ctx, policySnapshot)).To(Succeed(), "Failed to create policy snapshot") +} + +func createNilSchedulingPolicyCRPWithPolicySnapshot(crpName string, policySnapshotName string) { + // Create a CRP with no scheduling policy specified. + crp := placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + Finalizers: []string{customDeletionBlockerFinalizer}, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: defaultResourceSelectors, + Policy: nil, + }, + } + Expect(hubClient.Create(ctx, &crp)).Should(Succeed(), "Failed to create CRP") + + crpGeneration := crp.Generation + + // Create the associated policy snapshot. + policySnapshot := &placementv1beta1.ClusterSchedulingPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policySnapshotName, + Labels: map[string]string{ + placementv1beta1.IsLatestSnapshotLabel: strconv.FormatBool(true), + placementv1beta1.CRPTrackingLabel: crpName, + }, + Annotations: map[string]string{ + placementv1beta1.CRPGenerationAnnotation: strconv.FormatInt(crpGeneration, 10), + }, + }, + Spec: placementv1beta1.SchedulingPolicySnapshotSpec{ + Policy: nil, + PolicyHash: []byte(policyHash), + }, + } + Expect(hubClient.Create(ctx, policySnapshot)).Should(Succeed(), "Failed to create policy snapshot") +} + func updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName string, targetClusters []string, oldPolicySnapshotName, newPolicySnapshotName string) { // Update the CRP. - crp := &fleetv1beta1.ClusterResourcePlacement{} + crp := &placementv1beta1.ClusterResourcePlacement{} Expect(hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp)).To(Succeed(), "Failed to get CRP") policy := crp.Spec.Policy.DeepCopy() @@ -169,24 +339,24 @@ func updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName string crpGeneration := crp.Generation // Mark the old policy snapshot as inactive. - policySnapshot := &fleetv1beta1.ClusterSchedulingPolicySnapshot{} + policySnapshot := &placementv1beta1.ClusterSchedulingPolicySnapshot{} Expect(hubClient.Get(ctx, types.NamespacedName{Name: oldPolicySnapshotName}, policySnapshot)).To(Succeed(), "Failed to get policy snapshot") - policySnapshot.Labels[fleetv1beta1.IsLatestSnapshotLabel] = strconv.FormatBool(false) + policySnapshot.Labels[placementv1beta1.IsLatestSnapshotLabel] = strconv.FormatBool(false) Expect(hubClient.Update(ctx, policySnapshot)).To(Succeed(), "Failed to update policy snapshot") // Create a new policy snapshot. - policySnapshot = &fleetv1beta1.ClusterSchedulingPolicySnapshot{ + policySnapshot = &placementv1beta1.ClusterSchedulingPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: newPolicySnapshotName, Labels: map[string]string{ - fleetv1beta1.IsLatestSnapshotLabel: strconv.FormatBool(true), - fleetv1beta1.CRPTrackingLabel: crpName, + placementv1beta1.IsLatestSnapshotLabel: strconv.FormatBool(true), + placementv1beta1.CRPTrackingLabel: crpName, }, Annotations: map[string]string{ - fleetv1beta1.CRPGenerationAnnotation: strconv.FormatInt(crpGeneration, 10), + placementv1beta1.CRPGenerationAnnotation: strconv.FormatInt(crpGeneration, 10), }, }, - Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ + Spec: placementv1beta1.SchedulingPolicySnapshotSpec{ Policy: policy, PolicyHash: []byte(policyHash), }, @@ -194,40 +364,76 @@ func updatePickedFixedCRPWithNewTargetClustersAndRefreshSnapshots(crpName string Expect(hubClient.Create(ctx, policySnapshot)).To(Succeed(), "Failed to create policy snapshot") } -func clearUnscheduledBindings() { - // List all bindings. - bindingList := &fleetv1beta1.ClusterResourceBindingList{} - Expect(hubClient.List(ctx, bindingList)).To(Succeed(), "Failed to list bindings") - - // Delete all unscheduled bindings. +func markBindingsAsBoundForClusters(crpName string, boundClusters []string) { + bindingList := &placementv1beta1.ClusterResourceBindingList{} + labelSelector := labels.SelectorFromSet(labels.Set{placementv1beta1.CRPTrackingLabel: crpName}) + listOptions := &client.ListOptions{LabelSelector: labelSelector} + Expect(hubClient.List(ctx, bindingList, listOptions)).To(Succeed(), "Failed to list bindings") + boundClusterMap := make(map[string]bool) + for _, cluster := range boundClusters { + boundClusterMap[cluster] = true + } for idx := range bindingList.Items { binding := bindingList.Items[idx] - if binding.Spec.State == fleetv1beta1.BindingStateUnscheduled { - Expect(hubClient.Delete(ctx, &binding)).To(Succeed(), "Failed to delete binding") - - Eventually(func() error { - err := hubClient.Get(ctx, types.NamespacedName{Name: binding.Name}, &fleetv1beta1.ClusterResourceBinding{}) - if errors.IsNotFound(err) { - return nil - } - - return err - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to delete binding") + if _, ok := boundClusterMap[binding.Spec.TargetCluster]; ok && binding.Spec.State == placementv1beta1.BindingStateScheduled { + binding.Spec.State = placementv1beta1.BindingStateBound + Expect(hubClient.Update(ctx, &binding)).To(Succeed(), "Failed to update binding") } } } -func clearPolicySnapshots() { +func ensureCRPAndAllRelatedResourcesDeletion(crpName string) { + // Delete the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + } + Expect(hubClient.Delete(ctx, crp)).To(Succeed(), "Failed to delete CRP") + + // Ensure that all the bindings are deleted. + noBindingsCreatedActual := noBindingsCreatedForCRPActual(crpName) + Eventually(noBindingsCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to clear all bindings") + Consistently(noBindingsCreatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to clear all bindings") + + // Ensure that the scheduler finalizer is removed. + finalizerRemovedActual := crpSchedulerFinalizerRemovedActual(crpName) + Eventually(finalizerRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove scheduler cleanup finalizer from CRP") + + // Remove all the other finalizers from the CRP. + Eventually(func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + crp.Finalizers = []string{} + return hubClient.Update(ctx, crp) + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove all finalizers from CRP") + + // Ensure that the CRP is deleted. + Eventually(func() error { + err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, &placementv1beta1.ClusterResourcePlacement{}) + if errors.IsNotFound(err) { + return nil + } + + return err + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to delete CRP") + // List all policy snapshots. - policySnapshotList := &fleetv1beta1.ClusterSchedulingPolicySnapshotList{} - Expect(hubClient.List(ctx, policySnapshotList)).To(Succeed(), "Failed to list policy snapshots") + policySnapshotList := &placementv1beta1.ClusterSchedulingPolicySnapshotList{} + labelSelector := labels.SelectorFromSet(labels.Set{placementv1beta1.CRPTrackingLabel: crpName}) + listOptions := &client.ListOptions{LabelSelector: labelSelector} + Expect(hubClient.List(ctx, policySnapshotList, listOptions)).To(Succeed(), "Failed to list policy snapshots") - // Delete all policy snapshots. + // Delete all policy snapshots and ensure their deletion. for idx := range policySnapshotList.Items { policySnapshot := policySnapshotList.Items[idx] Expect(hubClient.Delete(ctx, &policySnapshot)).To(Succeed(), "Failed to delete policy snapshot") + Eventually(func() error { - err := hubClient.Get(ctx, types.NamespacedName{Name: policySnapshot.Name}, &fleetv1beta1.ClusterSchedulingPolicySnapshot{}) + err := hubClient.Get(ctx, types.NamespacedName{Name: policySnapshot.Name}, &placementv1beta1.ClusterSchedulingPolicySnapshot{}) if errors.IsNotFound(err) { return nil } @@ -237,22 +443,22 @@ func clearPolicySnapshots() { } } -func ensureCRPDeletion(crpName string) { - // Retrieve the CRP. - crp := &fleetv1beta1.ClusterResourcePlacement{} - Expect(hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp)).To(Succeed(), "Failed to get CRP") - - // Remove all finalizers from the CRP. - crp.Finalizers = []string{} - Expect(hubClient.Update(ctx, crp)).To(Succeed(), "Failed to update CRP") +func ensureProvisionalClusterDeletion(clusterName string) { + // Retrieve the provisional cluster. + memberCluster := &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + }, + } + Expect(hubClient.Delete(ctx, memberCluster)).To(Succeed(), "Failed to delete member cluster") - // Ensure that the CRP is deleted. + // Ensure that the provisional cluster is deleted. Eventually(func() error { - err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, &fleetv1beta1.ClusterResourcePlacement{}) + err := hubClient.Get(ctx, types.NamespacedName{Name: clusterName}, &clusterv1beta1.MemberCluster{}) if errors.IsNotFound(err) { return nil } return err - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to delete CRP") + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to delete member cluster") }