From 8037ba18f69144d81c6cd2364e8da995c481166d Mon Sep 17 00:00:00 2001 From: haijianyang Date: Thu, 20 Jul 2023 03:07:17 -0400 Subject: [PATCH 1/8] Refactor available hosts and vm migration code --- controllers/elfmachine_controller.go | 15 +- .../elfmachine_controller_placement_group.go | 187 ++++++++---------- controllers/elfmachine_controller_test.go | 121 +++++------- controllers/vm_limiter.go | 28 --- controllers/vm_limiter_test.go | 21 -- pkg/service/collections.go | 147 ++++++++++++++ pkg/service/collections_test.go | 69 +++++++ pkg/service/mock_services/vm_mock.go | 15 +- pkg/service/util.go | 86 +------- pkg/service/util_test.go | 18 -- pkg/service/vm.go | 6 +- pkg/util/kcp/kcp.go | 15 +- 12 files changed, 381 insertions(+), 347 deletions(-) create mode 100644 pkg/service/collections.go create mode 100644 pkg/service/collections_test.go diff --git a/controllers/elfmachine_controller.go b/controllers/elfmachine_controller.go index a83ae456..4373b613 100644 --- a/controllers/elfmachine_controller.go +++ b/controllers/elfmachine_controller.go @@ -842,12 +842,6 @@ func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm * switch { case service.IsCloneVMTask(task): releaseTicketForCreateVM(ctx.ElfMachine.Name) - case service.IsVMMigrationTask(task): - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) - if err != nil { - return false, err - } - releaseTicketForPlacementGroupVMMigration(placementGroupName) case service.IsMemoryInsufficientError(errorMessage): setElfClusterMemoryInsufficient(ctx.ElfCluster.Spec.Cluster, true) message := fmt.Sprintf("Insufficient memory detected for ELF cluster %s", ctx.ElfCluster.Spec.Cluster) @@ -858,16 +852,9 @@ func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm * case models.TaskStatusSUCCESSED: ctx.Logger.Info("VM task succeeded", "vmRef", vmRef, "taskRef", taskRef, "taskDescription", service.GetTowerString(task.Description)) - switch { - case service.IsCloneVMTask(task) || service.IsPowerOnVMTask(task): + if service.IsCloneVMTask(task) || service.IsPowerOnVMTask(task) { setElfClusterMemoryInsufficient(ctx.ElfCluster.Spec.Cluster, false) releaseTicketForCreateVM(ctx.ElfMachine.Name) - case service.IsVMMigrationTask(task): - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) - if err != nil { - return false, err - } - releaseTicketForPlacementGroupVMMigration(placementGroupName) } default: ctx.Logger.Info("Waiting for VM task done", "vmRef", vmRef, "taskRef", taskRef, "taskStatus", service.GetTowerTaskStatus(task.Status), "taskDescription", service.GetTowerString(task.Description)) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index 2e27f4ae..5f249a3b 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -145,7 +145,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return nil, err } - usedHostsByPG, err := r.getHostsInPlacementGroup(ctx, placementGroup) + usedHostSetByPG, err := r.getHostsInPlacementGroup(ctx, placementGroup) if err != nil { return nil, err } @@ -155,10 +155,10 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return nil, err } + usedHostsByPG := hosts.Find(usedHostSetByPG) availableHosts := r.getAvailableHostsForVM(ctx, hosts, usedHostsByPG, nil) - availableHostSet := service.HostsToSet(availableHosts) - if availableHostSet.Len() != 0 { - ctx.Logger.V(1).Info("The placement group still has capacity", "placementGroup", *placementGroup.Name, "availableHosts", availableHostSet.UnsortedList()) + if !availableHosts.IsEmpty() { + ctx.Logger.V(1).Info("The placement group still has capacity", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String()) return pointer.String(""), nil } @@ -170,7 +170,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // KCP is not in scaling down/rolling update. if !(kcputil.IsKCPRollingUpdateFirstMachine(kcp) || kcputil.IsKCPInScalingDown(kcp)) { - ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHostSet.UnsortedList(), "usedHostsByPG", usedHostsByPG.UnsortedList()) + ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -192,7 +192,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return nil, errors.Wrapf(err, "failed to init patch helper for %s %s/%s", ctx.Machine.GroupVersionKind(), ctx.Machine.Namespace, ctx.Machine.Name) } - ctx.Logger.Info("Add the delete machine annotation on KCP Machine in order to delete it, because KCP is being scaled down after a failed scaling up", "placementGroup", *placementGroup.Name, "availableHosts", availableHostSet.UnsortedList()) + ctx.Logger.Info("Add the delete machine annotation on KCP Machine in order to delete it, because KCP is being scaled down after a failed scaling up", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String()) // Allow scaling down of KCP with the possibility of marking specific control plane machine(s) to be deleted with delete annotation key. // The presence of the annotation will affect the rollout strategy in a way that, it implements the following prioritization logic in descending order, @@ -213,7 +213,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // KCP is in rolling update. - if !service.ContainsUnavailableHost(hosts, usedHostsByPG.UnsortedList(), *service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) && + if usedHostsByPG.Len() == usedHostsByPG.Available(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Len() && int(*kcp.Spec.Replicas) == usedHostsByPG.Len() { // Only when KCP is in rolling update and the placement group is full // does it need to get the latest created machine. @@ -225,7 +225,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return pointer.String(hostID), err } - ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHostSet.UnsortedList(), "usedHostsByPG", usedHostsByPG.UnsortedList()) + ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -235,7 +235,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // Find the latest created machine in the placement group, // and set the host where the machine is located to the first machine created by KCP rolling update. // This prevents migration of virtual machine during KCP rolling update when using a placement group. -func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineContext, placementGroup *models.VMPlacementGroup, hosts []*models.Host) (string, error) { +func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineContext, placementGroup *models.VMPlacementGroup, hosts service.Hosts) (string, error) { elfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) if err != nil { return "", err @@ -267,7 +267,7 @@ func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineCon if vm, err := ctx.VMService.Get(vmMap[machine.Name]); err != nil { return "", err } else { - host := service.GetHostFromList(*vm.Host.ID, hosts) + host := hosts.Get(*vm.Host.ID) if host == nil { ctx.Logger.Info("Host not found, skip selecting host for VM", "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) } else { @@ -307,33 +307,22 @@ func (r *ElfMachineReconciler) getHostsInPlacementGroup(ctx *context.MachineCont // The 'Available' means that the specified VM can run on these hosts. // It returns hosts that are not in faulted state, not in the given 'usedHostsByPG', // and have sufficient memory for running this VM. -func (r *ElfMachineReconciler) getAvailableHostsForVM(ctx *context.MachineContext, hosts []*models.Host, usedHostsByPG sets.Set[string], vm *models.VM) []*models.Host { +func (r *ElfMachineReconciler) getAvailableHostsForVM(ctx *context.MachineContext, hosts service.Hosts, usedHostsByPG service.Hosts, vm *models.VM) service.Hosts { + availableHosts := hosts.Available(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Difference(usedHostsByPG) + // If the VM is running, and the host where the VM is located // is not used by the placement group, then it is not necessary to // check the memory is sufficient to determine whether the host is available. - // Otherwise, the VM may need to be migrated to another host, - // and need to check whether the memory is sufficient. - if vm != nil && (*vm.Status == models.VMStatusRUNNING || *vm.Status == models.VMStatusSUSPENDED) { - availableHosts := service.GetAvailableHosts(hosts, 0) - unusedHostsByPG := service.HostsToSet(availableHosts).Difference(usedHostsByPG) - - if unusedHostsByPG.Has(*vm.Host.ID) { - return service.FilterHosts(availableHosts, usedHostsByPG) + if vm != nil && + (*vm.Status == models.VMStatusRUNNING || *vm.Status == models.VMStatusSUSPENDED) && + !availableHosts.Contains(*vm.Host.ID) { + host := hosts.Get(*vm.Host.ID) + available, _ := service.IsAvailableHost(host, 0) + if available && !usedHostsByPG.Contains(*host.ID) { + availableHosts.Insert(host) } - - // The virtual machine is not on an unused host and may need to be migrated, - // so need to check whether the host memory is sufficient. - unusedHosts := service.FilterHosts(availableHosts, usedHostsByPG) - availableHosts = service.GetAvailableHosts(unusedHosts, *service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) - - return availableHosts } - // The virtual machine has not been created or is not powered on. - // Need to check whether the memory of the host is sufficient. - availableHosts := service.GetAvailableHosts(hosts, *service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) - availableHosts = service.FilterHosts(availableHosts, usedHostsByPG) - return availableHosts } @@ -360,6 +349,8 @@ func (r *ElfMachineReconciler) getPlacementGroup(ctx *context.MachineContext, pl // For example, the virtual machine has joined the placement group. // 2. false and error is nil means the virtual machine has not joined the placement group. // For example, the placement group is full or the virtual machine is being migrated. +// +//nolint:gocyclo func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, vm *models.VM) (ret bool, reterr error) { if !version.IsCompatibleWithPlacementGroup(ctx.ElfMachine) { ctx.Logger.V(1).Info(fmt.Sprintf("The capeVersion of ElfMachine is lower than %s, skip adding VM to the placement group", version.CAPEVersion1_2_0), "capeVersion", version.GetCAPEVersion(ctx.ElfMachine)) @@ -401,19 +392,20 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v } if machineutil.IsControlPlaneMachine(ctx.Machine) { - usedHostsByPG, err := r.getHostsInPlacementGroup(ctx, placementGroup) + hosts, err := ctx.VMService.GetHostsByCluster(ctx.ElfCluster.Spec.Cluster) if err != nil { return false, err } - hosts, err := ctx.VMService.GetHostsByCluster(ctx.ElfCluster.Spec.Cluster) + usedHostSetByPG, err := r.getHostsInPlacementGroup(ctx, placementGroup) if err != nil { return false, err } + usedHostsByPG := hosts.Find(usedHostSetByPG) + availableHosts := r.getAvailableHostsForVM(ctx, hosts, usedHostsByPG, vm) - availableHostSet := service.HostsToSet(availableHosts) - if availableHostSet.Len() == 0 { + if availableHosts.IsEmpty() { kcp, err := machineutil.GetKCPByMachine(ctx, ctx.Client, ctx.Machine) if err != nil { return false, err @@ -424,21 +416,21 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // In this case first machine created by KCP rolling update can be powered on without being added to the placement group. if kcputil.IsKCPRollingUpdateFirstMachine(kcp) && *vm.Status == models.VMStatusSTOPPED && - !service.ContainsUnavailableHost(hosts, usedHostsByPG.UnsortedList(), *service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) && + usedHostsByPG.Len() == usedHostsByPG.Available(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Len() && int(*kcp.Spec.Replicas) == usedHostsByPG.Len() { - ctx.Logger.Info("The placement group is full and KCP is in rolling update, skip adding VM to the placement group", "placementGroup", *placementGroup.Name, "availableHosts", availableHostSet.UnsortedList(), "usedHostsByPG", usedHostsByPG.UnsortedList(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + ctx.Logger.Info("The placement group is full and KCP is in rolling update, skip adding VM to the placement group", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return true, nil } if *vm.Status == models.VMStatusRUNNING || *vm.Status == models.VMStatusSUSPENDED { - ctx.Logger.V(1).Info(fmt.Sprintf("The placement group is full and VM is in %s status, skip adding VM to the placement group", *vm.Status), "placementGroup", *placementGroup.Name, "availableHosts", availableHostSet.UnsortedList(), "usedHostsByPG", usedHostsByPG.UnsortedList(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + ctx.Logger.V(1).Info(fmt.Sprintf("The placement group is full and VM is in %s status, skip adding VM to the placement group", *vm.Status), "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return true, nil } // KCP is scaling out or being created. - ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHostSet.UnsortedList(), "usedHostsByPG", usedHostsByPG.UnsortedList(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return false, nil } @@ -447,9 +439,56 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // and the virtual machine is not STOPPED, we need to migrate the virtual machine to a host that // is not used by the placement group before adding the virtual machine to the placement group. // Otherwise, just add the virtual machine to the placement group directly. - ctx.Logger.V(1).Info("The availableHosts for migrating the VM", "hosts", availableHostSet, "vmHost", *vm.Host.ID) - if !availableHostSet.Has(*vm.Host.ID) && *vm.Status != models.VMStatusSTOPPED { - return r.migrateVMForJoiningPlacementGroup(ctx, vm, placementGroup, availableHostSet.UnsortedList()[0]) + ctx.Logger.V(1).Info("The availableHosts for migrating the VM", "hosts", availableHosts.String(), "vmHost", *vm.Host.ID) + if !availableHosts.Contains(*vm.Host.ID) && *vm.Status != models.VMStatusSTOPPED { + ctx.Logger.V(1).Info("Try to migrate the virtual machine to the specified target host if needed") + + kcp, err := machineutil.GetKCPByMachine(ctx, ctx.Client, ctx.Machine) + if err != nil { + return false, err + } + + if kcputil.IsKCPRollingUpdate(kcp) { + ctx.Logger.Info("KCP rolling update in progress, skip migrating VM", "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + return true, nil + } + + // The 1st new CP ElfMachine should wait for other new CP ElfMachines to join the target PlacementGroup. + // The code below double checks the recommended target host for migration is valid. + cpElfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) + if err != nil { + return false, err + } + + targetHost := availableHosts.UnsortedList()[0] + usedHostsByPG := sets.Set[string]{} + for i := 0; i < len(cpElfMachines); i++ { + if ctx.ElfMachine.Name != cpElfMachines[i].Name && + cpElfMachines[i].Status.PlacementGroupRef == *placementGroup.ID && + cpElfMachines[i].CreationTimestamp.After(ctx.ElfMachine.CreationTimestamp.Time) { + usedHostsByPG.Insert(cpElfMachines[i].Status.HostServerRef) + } + } + + usedHostsCount := usedHostsByPG.Len() + ctx.Logger.V(1).Info("The hosts used by the PlacementGroup", "usedHosts", usedHostsByPG, "count", usedHostsCount, "targetHost", targetHost) + if usedHostsCount < int(*kcp.Spec.Replicas-1) { + ctx.Logger.V(1).Info("Not all other CPs joined the PlacementGroup, skip migrating VM") + return true, nil + } + + if usedHostsByPG.Has(*targetHost.ID) { + ctx.Logger.V(1).Info("The recommended target host for VM migration is used by the PlacementGroup, skip migrating VM") + return true, nil + } + + // KCP is not in rolling update process. + // This is the last CP ElfMachine (i.e. the 1st new CP ElfMachine) which has not been added into the target PlacementGroup. + // Migrate this VM to the target host, then it will be added into the target PlacementGroup. + + ctx.Logger.V(1).Info("Start migrateVM since KCP is not in rolling update process", "targetHost", targetHost) + + return r.migrateVM(ctx, vm, *targetHost.ID) } } @@ -463,79 +502,17 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v return true, nil } -// migrateVMForJoiningPlacementGroup migrates the virtual machine to the specified target host -// for joining placement group. -// -// The return value: -// 1. true means that the virtual machine does not need to be migrated. -// 2. false and error is nil means the virtual machine is being migrated. -func (r *ElfMachineReconciler) migrateVMForJoiningPlacementGroup(ctx *context.MachineContext, vm *models.VM, placementGroup *models.VMPlacementGroup, targetHost string) (bool, error) { - ctx.Logger.V(1).Info("Try to migrate the virtual machine to the specified target host if needed") - kcp, err := machineutil.GetKCPByMachine(ctx, ctx.Client, ctx.Machine) - if err != nil { - return false, err - } - - // When *kcp.Spec.Replicas != kcp.Status.UpdatedReplicas, it must be in a KCP rolling update process. - // When *kcp.Spec.Replicas == kcp.Status.UpdatedReplicas, it could be in one of the following cases: - // 1) It's not in a KCP rolling update process. So kcp.Spec.Replicas == kcp.Status.Replicas. - // 2) It's at the end of a KCP rolling update process, and the last KCP replica (i.e the last KCP ElfMachine) is created just now. - // There is still an old KCP ElfMachine, so kcp.Spec.Replicas + 1 == kcp.Status.Replicas. - - if *kcp.Spec.Replicas != kcp.Status.UpdatedReplicas || *kcp.Spec.Replicas != kcp.Status.Replicas { - ctx.Logger.Info("KCP rolling update in progress, skip migrating VM", "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) - return true, nil - } - - // The 1st new CP ElfMachine should wait for other new CP ElfMachines to join the target PlacementGroup. - // The code below double checks the recommended target host for migration is valid. - cpElfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) - if err != nil { - return false, err - } - usedHostsByPG := sets.Set[string]{} - for i := 0; i < len(cpElfMachines); i++ { - if ctx.ElfMachine.Name != cpElfMachines[i].Name && - cpElfMachines[i].Status.PlacementGroupRef == *placementGroup.ID && - cpElfMachines[i].CreationTimestamp.After(ctx.ElfMachine.CreationTimestamp.Time) { - usedHostsByPG.Insert(cpElfMachines[i].Status.HostServerRef) - } - } - usedHostsCount := usedHostsByPG.Len() - ctx.Logger.V(1).Info("The hosts used by the PlacementGroup", "usedHosts", usedHostsByPG, "count", usedHostsCount, "targetHost", targetHost) - if usedHostsCount < int(*kcp.Spec.Replicas-1) { - ctx.Logger.V(1).Info("Not all other CPs joined the PlacementGroup, skip migrating VM") - return true, nil - } - if usedHostsByPG.Has(targetHost) { - ctx.Logger.V(1).Info("The recommended target host for VM migration is used by the PlacementGroup, skip migrating VM") - return true, nil - } - - // KCP is not in rolling update process. - // This is the last CP ElfMachine (i.e. the 1st new CP ElfMachine) which has not been added into the target PlacementGroup. - // Migrate this VM to the target host, then it will be added into the target PlacementGroup. - ctx.Logger.V(1).Info("Start migrateVM since KCP is not in rolling update process", "targetHost", targetHost) - return r.migrateVM(ctx, vm, placementGroup, targetHost) -} - // migrateVM migrates the virtual machine to the specified target host. // // The return value: // 1. true means that the virtual machine does not need to be migrated. // 2. false and error is nil means the virtual machine is being migrated. -func (r *ElfMachineReconciler) migrateVM(ctx *context.MachineContext, vm *models.VM, placementGroup *models.VMPlacementGroup, targetHost string) (bool, error) { +func (r *ElfMachineReconciler) migrateVM(ctx *context.MachineContext, vm *models.VM, targetHost string) (bool, error) { if *vm.Host.ID == targetHost { ctx.Logger.V(1).Info(fmt.Sprintf("The VM is already on the recommended target host %s, skip migrating VM", targetHost)) return true, nil } - if ok := acquireTicketForPlacementGroupVMMigration(*placementGroup.Name); !ok { - ctx.Logger.V(1).Info("The placement group is performing another VM migration, skip migrating VM", "placementGroup", service.GetTowerString(placementGroup.Name), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) - - return false, nil - } - withTaskVM, err := ctx.VMService.Migrate(service.GetTowerString(vm.ID), targetHost) if err != nil { return false, err diff --git a/controllers/elfmachine_controller_test.go b/controllers/elfmachine_controller_test.go index 5bd4867d..c74c0ca1 100644 --- a/controllers/elfmachine_controller_test.go +++ b/controllers/elfmachine_controller_test.go @@ -35,7 +35,6 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -955,7 +954,7 @@ var _ = Describe("ElfMachineReconciler", func() { fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs([]string{}).Return([]*models.VM{}, nil) mockVMService.EXPECT().AddVMsToPlacementGroup(placementGroup, []string{*vm.ID}).Return(task, nil) mockVMService.EXPECT().WaitTask(*task.ID, config.WaitTaskTimeout, config.WaitTaskInterval).Return(task, nil) @@ -980,12 +979,13 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Spec.Replicas = pointer.Int32(1) kcp.Status.Replicas = 2 kcp.Status.UpdatedReplicas = 1 + conditions.MarkFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "") ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kcp) machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{vm2}, nil) reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -998,7 +998,7 @@ var _ = Describe("ElfMachineReconciler", func() { klog.SetOutput(logBuffer) host.HostState = &models.NestedMaintenanceHostState{State: models.NewMaintenanceModeEnum(models.MaintenanceModeEnumMAINTENANCEMODE)} mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1012,7 +1012,7 @@ var _ = Describe("ElfMachineReconciler", func() { vm.Status = models.NewVMStatus(models.VMStatusRUNNING) host.HostState = &models.NestedMaintenanceHostState{State: models.NewMaintenanceModeEnum(models.MaintenanceModeEnumMAINTENANCEMODE)} mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1042,7 +1042,7 @@ var _ = Describe("ElfMachineReconciler", func() { fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1, host2, host3}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs([]string{*vm2.ID}).Return([]*models.VM{vm2}, nil) mockVMService.EXPECT().AddVMsToPlacementGroup(placementGroup, gomock.Any()).Return(task, nil) mockVMService.EXPECT().WaitTask(*task.ID, config.WaitTaskTimeout, config.WaitTaskInterval).Return(task, nil) @@ -1073,12 +1073,13 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Spec.Replicas = pointer.Int32(3) kcp.Status.UpdatedReplicas = 3 kcp.Status.Replicas = 4 + conditions.MarkFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "") ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kcp) machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1, host2, host3}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*newCP2.ID, *oldCP3.ID})).Return([]*models.VM{newCP2, oldCP3}, nil) reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1138,7 +1139,7 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID})).Return([]*models.VM{vm1, vm2}, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host0, host1, host2}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host0, host1, host2), nil) mockVMService.EXPECT().Migrate(*vm0.ID, *host0.ID).Return(withTaskVM, nil) reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1155,7 +1156,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine1.Status.HostServerRef = *host0.ID mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID})).Return([]*models.VM{vm1, vm2}, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host0, host1, host2}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host0, host1, host2), nil) ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2) machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -1172,13 +1173,12 @@ var _ = Describe("ElfMachineReconciler", func() { host := fake.NewTowerHost() vm := fake.NewTowerVMFromElfMachine(elfMachine) vm.Host = &models.NestedHost{ID: service.TowerString(*host.ID)} - placementGroup := fake.NewVMPlacementGroup([]string{}) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.migrateVM(machineContext, vm, placementGroup, *host.ID) + ok, err := reconciler.migrateVM(machineContext, vm, *host.ID) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("The VM is already on the recommended target host")) @@ -1241,7 +1241,7 @@ var _ = Describe("ElfMachineReconciler", func() { logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{})).Return([]*models.VM{}, nil) reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1253,7 +1253,7 @@ var _ = Describe("ElfMachineReconciler", func() { klog.SetOutput(logBuffer) placementGroup.Vms = []*models.NestedVM{{ID: vm1.ID, Name: vm1.Name}} mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1263,7 +1263,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(logBuffer.String()).To(ContainSubstring("The placement group is full, wait for enough available hosts")) }) - It("when placement group is full", func() { + It("when placement group is full and KCP rolling update in progress", func() { host1 := fake.NewTowerHost() host2 := fake.NewTowerHost() host3 := fake.NewTowerHost() @@ -1299,6 +1299,7 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Spec.Replicas = pointer.Int32(3) kcp.Status.Replicas = 4 kcp.Status.UpdatedReplicas = 1 + conditions.MarkFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "") ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2, elfMachine3, machine3) machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -1312,7 +1313,7 @@ var _ = Describe("ElfMachineReconciler", func() { klog.SetOutput(logBuffer) mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1, host2, host3}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1326,7 +1327,7 @@ var _ = Describe("ElfMachineReconciler", func() { klog.SetOutput(logBuffer) host1.Status = models.NewHostStatus(models.HostStatusCONNECTEDERROR) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1, host2, host3}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1340,7 +1341,7 @@ var _ = Describe("ElfMachineReconciler", func() { host1.Status = models.NewHostStatus(models.HostStatusCONNECTEDHEALTHY) host2.Status = models.NewHostStatus(models.HostStatusCONNECTEDERROR) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1, host2, host3}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1354,7 +1355,7 @@ var _ = Describe("ElfMachineReconciler", func() { host2.Status = models.NewHostStatus(models.HostStatusCONNECTEDHEALTHY) host3.Status = models.NewHostStatus(models.HostStatusCONNECTEDERROR) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1, host2, host3}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1370,7 +1371,7 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err := reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, hosts) + hostID, err := reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal("")) Expect(logBuffer.String()).To(ContainSubstring("Host is unavailable: host is in CONNECTED_ERROR status, skip selecting host for VM")) @@ -1382,7 +1383,7 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, hosts) + hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal("")) Expect(logBuffer.String()).To(ContainSubstring("Host not found, skip selecting host for VM")) @@ -1399,7 +1400,7 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, hosts) + hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal(*vm3.Host.ID)) Expect(logBuffer.String()).To(ContainSubstring("Selected the host server for VM since the placement group is full")) @@ -1411,7 +1412,7 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, hosts) + hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal(*vm3.Host.ID)) Expect(logBuffer.String()).To(ContainSubstring("Selected the host server for VM since the placement group is full")) @@ -1450,7 +1451,7 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1462,7 +1463,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.Conditions = nil mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1, host2}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1473,7 +1474,7 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroup.Vms = []*models.NestedVM{} mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host1}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{})).Return([]*models.VM{}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1505,7 +1506,7 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) - mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return([]*models.Host{host}, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} @@ -1520,9 +1521,7 @@ var _ = Describe("ElfMachineReconciler", func() { }) Context("Get Available Hosts For VM", func() { - It("should return the available hosts when the virtual machine has not been created or is not powered on", func() { - vm := fake.NewTowerVMFromElfMachine(elfMachine) - vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) + It("should return the available hosts", func() { host1 := fake.NewTowerHost() host1.AllocatableMemoryBytes = service.TowerMemory(0) host2 := fake.NewTowerHost() @@ -1531,69 +1530,47 @@ var _ = Describe("ElfMachineReconciler", func() { ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + // virtual machine has not been created + reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - availableHosts := reconciler.getAvailableHostsForVM(machineContext, nil, sets.Set[string]{}, nil) + availableHosts := reconciler.getAvailableHostsForVM(machineContext, nil, service.NewHosts(), nil) Expect(availableHosts).To(BeEmpty()) - availableHosts = reconciler.getAvailableHostsForVM(machineContext, nil, sets.Set[string]{}.Insert(*host2.ID), nil) + availableHosts = reconciler.getAvailableHostsForVM(machineContext, nil, service.NewHosts(host2), nil) Expect(availableHosts).To(BeEmpty()) - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host3.ID), vm) + availableHosts = reconciler.getAvailableHostsForVM(machineContext, service.NewHosts(host1, host2, host3), service.NewHosts(host3), nil) Expect(availableHosts).To(ContainElements(host2)) - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host1.ID, *host2.ID, *host3.ID), vm) + availableHosts = reconciler.getAvailableHostsForVM(machineContext, service.NewHosts(host1, host2, host3), service.NewHosts(host1, host2, host3), nil) Expect(availableHosts).To(BeEmpty()) - }) - It("should return the available hosts when the virtual machine is running", func() { - vm := fake.NewTowerVMFromElfMachine(elfMachine) - vm.Status = models.NewVMStatus(models.VMStatusRUNNING) - vm.Host = &models.NestedHost{ID: service.TowerString(fake.ID())} - host1 := fake.NewTowerHost() - host1.Status = models.NewHostStatus(models.HostStatusSESSIONEXPIRED) - host2 := fake.NewTowerHost() - host3 := fake.NewTowerHost() + // virtual machine is not powered on - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - - vm.Host.ID = host2.ID - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - availableHosts := reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}, vm) - Expect(availableHosts).To(ContainElements(host2, host3)) - - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host1.ID), vm) - Expect(availableHosts).To(ContainElements(host2, host3)) - - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host2.ID), vm) - Expect(availableHosts).To(ContainElements(host3)) - - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host3.ID), vm) - Expect(availableHosts).To(ContainElements(host2)) - - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host1.ID, *host2.ID), vm) - Expect(availableHosts).To(ContainElements(host3)) - - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host1.ID, *host3.ID), vm) - Expect(availableHosts).To(ContainElements(host2)) + vm := fake.NewTowerVMFromElfMachine(elfMachine) + vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host2.ID, *host3.ID), vm) + availableHosts = reconciler.getAvailableHostsForVM(machineContext, nil, service.NewHosts(), vm) Expect(availableHosts).To(BeEmpty()) - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2, host3}, sets.Set[string]{}.Insert(*host1.ID, *host2.ID, *host3.ID), vm) + availableHosts = reconciler.getAvailableHostsForVM(machineContext, nil, service.NewHosts(host2), vm) Expect(availableHosts).To(BeEmpty()) - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host1, host2}, sets.Set[string]{}.Insert(*host2.ID, *host3.ID), vm) - Expect(availableHosts).To(BeEmpty()) + availableHosts = reconciler.getAvailableHostsForVM(machineContext, service.NewHosts(host1, host2, host3), service.NewHosts(host3), vm) + Expect(availableHosts).To(ContainElements(host2)) - availableHosts = reconciler.getAvailableHostsForVM(machineContext, []*models.Host{host2, host3}, sets.Set[string]{}.Insert(*host2.ID, *host3.ID), vm) + availableHosts = reconciler.getAvailableHostsForVM(machineContext, service.NewHosts(host1, host2, host3), service.NewHosts(host1, host2, host3), vm) Expect(availableHosts).To(BeEmpty()) - availableHosts = reconciler.getAvailableHostsForVM(machineContext, nil, sets.Set[string]{}, vm) - Expect(availableHosts).To(BeEmpty()) + // virtual machine is powered on + vm.Status = models.NewVMStatus(models.VMStatusRUNNING) + vm.Host = &models.NestedHost{ID: host1.ID} - availableHosts = reconciler.getAvailableHostsForVM(machineContext, nil, sets.Set[string]{}.Insert(*host3.ID), vm) + availableHosts = reconciler.getAvailableHostsForVM(machineContext, service.NewHosts(host1, host2, host3), service.NewHosts(host1, host2, host3), vm) Expect(availableHosts).To(BeEmpty()) + + availableHosts = reconciler.getAvailableHostsForVM(machineContext, service.NewHosts(host1, host2, host3), service.NewHosts(host2, host3), vm) + Expect(availableHosts).To(ContainElements(host1)) }) }) diff --git a/controllers/vm_limiter.go b/controllers/vm_limiter.go index a8c593d1..e0efcc99 100644 --- a/controllers/vm_limiter.go +++ b/controllers/vm_limiter.go @@ -27,7 +27,6 @@ import ( const ( creationTimeout = time.Minute * 6 vmOperationRateLimit = time.Second * 6 - vmMigrationTimeout = time.Minute * 20 placementGroupSilenceTime = time.Minute * 30 placementGroupCreationLockKey = "%s:creation" ) @@ -35,7 +34,6 @@ const ( var vmStatusMap = make(map[string]time.Time) var limiterLock sync.Mutex var vmOperationMap = make(map[string]time.Time) -var placementGroupVMMigrationMap = make(map[string]time.Time) var vmOperationLock sync.Mutex var placementGroupOperationMap = make(map[string]time.Time) @@ -91,32 +89,6 @@ func acquireTicketForUpdatingVM(vmName string) bool { return true } -// acquireTicketForPlacementGroupVMMigration returns whether virtual machine migration -// of placement group operation can be performed. -func acquireTicketForPlacementGroupVMMigration(groupName string) bool { - vmOperationLock.Lock() - defer vmOperationLock.Unlock() - - if status, ok := placementGroupVMMigrationMap[groupName]; ok { - if !time.Now().After(status.Add(vmMigrationTimeout)) { - return false - } - } - - placementGroupVMMigrationMap[groupName] = time.Now() - - return true -} - -// releaseTicketForPlacementGroupVMMigration releases the virtual machine migration -// of placement group being operated. -func releaseTicketForPlacementGroupVMMigration(groupName string) { - vmOperationLock.Lock() - defer vmOperationLock.Unlock() - - delete(placementGroupVMMigrationMap, groupName) -} - // acquireTicketForPlacementGroupOperation returns whether placement group operation // can be performed. func acquireTicketForPlacementGroupOperation(groupName string) bool { diff --git a/controllers/vm_limiter_test.go b/controllers/vm_limiter_test.go index 81badabd..37b259c5 100644 --- a/controllers/vm_limiter_test.go +++ b/controllers/vm_limiter_test.go @@ -85,27 +85,6 @@ var _ = Describe("VM Operation Limiter", func() { }) }) -var _ = Describe("Placement Group VM Migration Limiter", func() { - var groupName string - - BeforeEach(func() { - groupName = fake.UUID() - }) - - It("acquireTicketForPlacementGroupVMMigration", func() { - Expect(acquireTicketForPlacementGroupVMMigration(groupName)).To(BeTrue()) - Expect(placementGroupVMMigrationMap).To(HaveKey(groupName)) - - Expect(acquireTicketForPlacementGroupVMMigration(groupName)).To(BeFalse()) - releaseTicketForPlacementGroupVMMigration(groupName) - Expect(placementGroupVMMigrationMap).NotTo(HaveKey(groupName)) - - placementGroupVMMigrationMap[groupName] = time.Now().Add(-vmMigrationTimeout) - Expect(acquireTicketForPlacementGroupVMMigration(groupName)).To(BeTrue()) - Expect(placementGroupVMMigrationMap).To(HaveKey(groupName)) - }) -}) - var _ = Describe("Placement Group Operation Limiter", func() { var groupName string diff --git a/pkg/service/collections.go b/pkg/service/collections.go new file mode 100644 index 00000000..05c78d8a --- /dev/null +++ b/pkg/service/collections.go @@ -0,0 +1,147 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "fmt" + + "github.com/smartxworks/cloudtower-go-sdk/v2/models" + "k8s.io/apimachinery/pkg/util/sets" +) + +// Hosts is a set of hosts. +type Hosts map[string]*models.Host + +// NewHosts creates a Hosts. from a list of values. +func NewHosts(hosts ...*models.Host) Hosts { + ss := make(Hosts, len(hosts)) + ss.Insert(hosts...) + return ss +} + +// NewHostsFromList creates a Hosts from the given host slice. +func NewHostsFromList(hosts []*models.Host) Hosts { + ss := make(Hosts, len(hosts)) + for i := range hosts { + ss.Insert(hosts[i]) + } + return ss +} + +func (s Hosts) Insert(hosts ...*models.Host) { + for i := range hosts { + if hosts[i] != nil { + h := hosts[i] + s[*h.ID] = h + } + } +} + +func (s Hosts) Contains(hostID string) bool { + _, ok := s[hostID] + return ok +} + +// Len returns the size of the set. +func (s Hosts) Len() int { + return len(s) +} + +func (s Hosts) IsEmpty() bool { + return len(s) == 0 +} + +func (s Hosts) String() string { + str := "" + for _, host := range s { + str += fmt.Sprintf("{id: %s,name: %s},", GetTowerString(host.ID), GetTowerString(host.Name)) + } + + return fmt.Sprintf("[%s]", str) +} + +// Available returns a Hosts with available hosts. +func (s Hosts) Available(memory int64) Hosts { + return s.Filter(func(h *models.Host) bool { + ok, _ := IsAvailableHost(h, memory) + return ok + }) +} + +// Get returns a Host of the specified host. +func (s Hosts) Get(hostID string) *models.Host { + if host, ok := s[hostID]; ok { + return host + } + return nil +} + +// Find returns a Hosts of the specified hosts. +func (s Hosts) Find(targetHosts sets.Set[string]) Hosts { + return s.Filter(func(h *models.Host) bool { + return targetHosts.Has(*h.ID) + }) +} + +// UnsortedList returns the slice with contents in random order. +func (s Hosts) UnsortedList() []*models.Host { + res := make([]*models.Host, 0, len(s)) + for _, value := range s { + res = append(res, value) + } + return res +} + +// Difference returns a copy without hosts that are in the given collection. +func (s Hosts) Difference(hosts Hosts) Hosts { + return s.Filter(func(h *models.Host) bool { + _, found := hosts[*h.ID] + return !found + }) +} + +// newFilteredHostCollection creates a Hosts from a filtered list of values. +func newFilteredHostCollection(filter Func, hosts ...*models.Host) Hosts { + ss := make(Hosts, len(hosts)) + for i := range hosts { + h := hosts[i] + if filter(h) { + ss.Insert(h) + } + } + return ss +} + +// Filter returns a Hosts containing only the Hosts that match all of the given HostFilters. +func (s Hosts) Filter(filters ...Func) Hosts { + return newFilteredHostCollection(And(filters...), s.UnsortedList()...) +} + +// Func is the functon definition for a filter. +type Func func(host *models.Host) bool + +// And returns a filter that returns true if all of the given filters returns true. +func And(filters ...Func) Func { + return func(host *models.Host) bool { + for _, f := range filters { + if !f(host) { + return false + } + } + return true + } +} diff --git a/pkg/service/collections_test.go b/pkg/service/collections_test.go new file mode 100644 index 00000000..cf74e046 --- /dev/null +++ b/pkg/service/collections_test.go @@ -0,0 +1,69 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "testing" + + "github.com/onsi/gomega" + "github.com/smartxworks/cloudtower-go-sdk/v2/models" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/utils/pointer" +) + +func TestHostCollection(t *testing.T) { + g := gomega.NewGomegaWithT(t) + + t.Run("Find", func(t *testing.T) { + host1 := &models.Host{ID: TowerString("1"), Name: TowerString("host1")} + host2 := &models.Host{ID: TowerString("2"), Name: TowerString("host2")} + + hosts := NewHosts() + g.Expect(hosts.Find(sets.Set[string]{}.Insert(*host1.ID)).Len()).To(gomega.Equal(0)) + + hosts = NewHostsFromList([]*models.Host{host1, host2}) + g.Expect(hosts.Get(*host1.ID)).To(gomega.Equal(host1)) + g.Expect(hosts.Get(*TowerString("404"))).To(gomega.BeNil()) + g.Expect(hosts.Find(sets.Set[string]{}.Insert(*host1.ID)).Contains(*host1.ID)).To(gomega.BeTrue()) + g.Expect(hosts.Find(sets.Set[string]{}.Insert(*host1.ID)).Len()).To(gomega.Equal(1)) + }) + + t.Run("Available", func(t *testing.T) { + host1 := &models.Host{ID: TowerString("1"), Name: TowerString("host1"), AllocatableMemoryBytes: pointer.Int64(1), Status: models.NewHostStatus(models.HostStatusCONNECTEDHEALTHY)} + host2 := &models.Host{ID: TowerString("2"), Name: TowerString("host2"), AllocatableMemoryBytes: pointer.Int64(2), Status: models.NewHostStatus(models.HostStatusCONNECTEDHEALTHY)} + + hosts := NewHosts() + g.Expect(hosts.Available(0).Len()).To(gomega.Equal(0)) + + hosts = NewHostsFromList([]*models.Host{host1, host2}) + availableHosts := hosts.Available(2) + g.Expect(availableHosts.Len()).To(gomega.Equal(1)) + g.Expect(availableHosts.Contains(*host2.ID)).To(gomega.BeTrue()) + }) + + t.Run("Difference", func(t *testing.T) { + host1 := &models.Host{ID: TowerString("1"), Name: TowerString("host1")} + host2 := &models.Host{ID: TowerString("2"), Name: TowerString("host2")} + + g.Expect(NewHosts().Difference(NewHosts()).Len()).To(gomega.Equal(0)) + g.Expect(NewHosts().Difference(NewHosts(host1)).Len()).To(gomega.Equal(0)) + g.Expect(NewHosts(host1).Difference(NewHosts(host1)).Len()).To(gomega.Equal(0)) + g.Expect(NewHosts(host1).Difference(NewHosts()).Contains(*host1.ID)).To(gomega.BeTrue()) + g.Expect(NewHosts(host1).Difference(NewHosts(host2)).Contains(*host1.ID)).To(gomega.BeTrue()) + g.Expect(NewHosts(host1, host2).Difference(NewHosts(host2)).Contains(*host1.ID)).To(gomega.BeTrue()) + }) +} diff --git a/pkg/service/mock_services/vm_mock.go b/pkg/service/mock_services/vm_mock.go index ffed9efd..968f2293 100644 --- a/pkg/service/mock_services/vm_mock.go +++ b/pkg/service/mock_services/vm_mock.go @@ -1,5 +1,5 @@ // Code generated by MockGen. DO NOT EDIT. -// Source: github.com/smartxworks/cluster-api-provider-elf/pkg/service/vm.go +// Source: /root/github/cluster-api-provider-elf/pkg/service/vm.go // Package mock_services is a generated GoMock package. package mock_services @@ -11,6 +11,7 @@ import ( gomock "github.com/golang/mock/gomock" models "github.com/smartxworks/cloudtower-go-sdk/v2/models" v1beta1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" + service "github.com/smartxworks/cluster-api-provider-elf/pkg/service" v1beta10 "sigs.k8s.io/cluster-api/api/v1beta1" ) @@ -217,10 +218,10 @@ func (mr *MockVMServiceMockRecorder) GetHost(id interface{}) *gomock.Call { } // GetHostsByCluster mocks base method. -func (m *MockVMService) GetHostsByCluster(clusterID string) ([]*models.Host, error) { +func (m *MockVMService) GetHostsByCluster(clusterID string) (service.Hosts, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetHostsByCluster", clusterID) - ret0, _ := ret[0].([]*models.Host) + ret0, _ := ret[0].(service.Hosts) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -277,18 +278,18 @@ func (mr *MockVMServiceMockRecorder) GetVMPlacementGroup(name interface{}) *gomo } // GetVMTemplate mocks base method. -func (m *MockVMService) GetVMTemplate(id string) (*models.ContentLibraryVMTemplate, error) { +func (m *MockVMService) GetVMTemplate(template string) (*models.ContentLibraryVMTemplate, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetVMTemplate", id) + ret := m.ctrl.Call(m, "GetVMTemplate", template) ret0, _ := ret[0].(*models.ContentLibraryVMTemplate) ret1, _ := ret[1].(error) return ret0, ret1 } // GetVMTemplate indicates an expected call of GetVMTemplate. -func (mr *MockVMServiceMockRecorder) GetVMTemplate(id interface{}) *gomock.Call { +func (mr *MockVMServiceMockRecorder) GetVMTemplate(template interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVMTemplate", reflect.TypeOf((*MockVMService)(nil).GetVMTemplate), id) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVMTemplate", reflect.TypeOf((*MockVMService)(nil).GetVMTemplate), template) } // GetVlan mocks base method. diff --git a/pkg/service/util.go b/pkg/service/util.go index 4aa78163..0190deab 100644 --- a/pkg/service/util.go +++ b/pkg/service/util.go @@ -77,84 +77,6 @@ func IsAvailableHost(host *models.Host, memory int64) (bool, string) { return true, "" } -// GetAvailableHosts returns the available hosts. -func GetAvailableHosts(hosts []*models.Host, memory int64) []*models.Host { - var availableHosts []*models.Host - for i := 0; i < len(hosts); i++ { - if ok, _ := IsAvailableHost(hosts[i], memory); ok { - availableHosts = append(availableHosts, hosts[i]) - } - } - - return availableHosts -} - -func GetUnavailableHostInfo(hosts []*models.Host, memory int64) map[string]string { - info := make(map[string]string) - for i := 0; i < len(hosts); i++ { - ok, message := IsAvailableHost(hosts[i], memory) - if !ok { - info[*hosts[i].Name] = message - } - } - - return info -} - -func ContainsUnavailableHost(hosts []*models.Host, hostIDs []string, memory int64) bool { - if len(hosts) == 0 || len(hostIDs) == 0 { - return true - } - - hostMap := make(map[string]*models.Host) - for i := 0; i < len(hosts); i++ { - hostMap[*hosts[i].ID] = hosts[i] - } - - for i := 0; i < len(hostIDs); i++ { - host, ok := hostMap[hostIDs[i]] - if !ok { - return true - } - - if ok, _ := IsAvailableHost(host, memory); !ok { - return true - } - } - - return false -} - -func GetHostFromList(hostID string, hosts []*models.Host) *models.Host { - for i := 0; i < len(hosts); i++ { - if *hosts[i].ID == hostID { - return hosts[i] - } - } - - return nil -} - -func HostsToSet(hosts []*models.Host) sets.Set[string] { - hostSet := sets.Set[string]{} - for i := 0; i < len(hosts); i++ { - hostSet.Insert(*hosts[i].ID) - } - - return hostSet -} - -func FilterHosts(hosts []*models.Host, needFilteredHosts sets.Set[string]) []*models.Host { - var filteredHosts []*models.Host - for i := 0; i < len(hosts); i++ { - if !needFilteredHosts.Has(*hosts[i].ID) { - filteredHosts = append(filteredHosts, hosts[i]) - } - } - - return filteredHosts -} - // GetVMsInPlacementGroup returns a Set of IDs of the virtual machines in the placement group. func GetVMsInPlacementGroup(placementGroup *models.VMPlacementGroup) sets.Set[string] { placementGroupVMSet := sets.Set[string]{} @@ -243,6 +165,14 @@ func GetTowerInt32(ptr *int32) int32 { return *ptr } +func GetTowerInt64(ptr *int64) int64 { + if ptr == nil { + return 0 + } + + return *ptr +} + func GetTowerTaskStatus(ptr *models.TaskStatus) string { if ptr == nil { return "" diff --git a/pkg/service/util_test.go b/pkg/service/util_test.go index d743cc2f..28253a6b 100644 --- a/pkg/service/util_test.go +++ b/pkg/service/util_test.go @@ -100,21 +100,3 @@ func TestIsAvailableHost(t *testing.T) { g.Expect(message).To(gomega.ContainSubstring("3")) }) } - -func TestContainsUnavailableHost(t *testing.T) { - g := gomega.NewGomegaWithT(t) - - t.Run("should return false when has unavailable host", func(t *testing.T) { - hosts := []*models.Host{{ID: pointer.String("1"), AllocatableMemoryBytes: pointer.Int64(1), Status: models.NewHostStatus(models.HostStatusCONNECTEDHEALTHY)}} - - g.Expect(ContainsUnavailableHost(nil, []string{"0"}, 2)).To(gomega.BeTrue()) - - g.Expect(ContainsUnavailableHost(hosts, nil, 2)).To(gomega.BeTrue()) - - g.Expect(ContainsUnavailableHost(hosts, []string{"0"}, 2)).To(gomega.BeTrue()) - - g.Expect(ContainsUnavailableHost(hosts, []string{"1"}, 2)).To(gomega.BeTrue()) - - g.Expect(ContainsUnavailableHost(hosts, []string{"1"}, 1)).To(gomega.BeFalse()) - }) -} diff --git a/pkg/service/vm.go b/pkg/service/vm.go index 2421259f..dffd738e 100644 --- a/pkg/service/vm.go +++ b/pkg/service/vm.go @@ -60,7 +60,7 @@ type VMService interface { WaitTask(id string, timeout, interval time.Duration) (*models.Task, error) GetCluster(id string) (*models.Cluster, error) GetHost(id string) (*models.Host, error) - GetHostsByCluster(clusterID string) ([]*models.Host, error) + GetHostsByCluster(clusterID string) (Hosts, error) GetVlan(id string) (*models.Vlan, error) UpsertLabel(key, value string) (*models.Label, error) DeleteLabel(key, value string, strict bool) (string, error) @@ -488,7 +488,7 @@ func (svr *TowerVMService) GetHost(id string) (*models.Host, error) { return getHostsResp.Payload[0], nil } -func (svr *TowerVMService) GetHostsByCluster(clusterID string) ([]*models.Host, error) { +func (svr *TowerVMService) GetHostsByCluster(clusterID string) (Hosts, error) { getHostsParams := clienthost.NewGetHostsParams() getHostsParams.RequestBody = &models.GetHostsRequestBody{ Where: &models.HostWhereInput{ @@ -507,7 +507,7 @@ func (svr *TowerVMService) GetHostsByCluster(clusterID string) ([]*models.Host, return nil, errors.New(HostNotFound) } - return getHostsResp.Payload, nil + return NewHostsFromList(getHostsResp.Payload), nil } // GetVlan searches for a vlan. diff --git a/pkg/util/kcp/kcp.go b/pkg/util/kcp/kcp.go index 770297fe..ae0654c2 100644 --- a/pkg/util/kcp/kcp.go +++ b/pkg/util/kcp/kcp.go @@ -22,6 +22,19 @@ import ( "sigs.k8s.io/cluster-api/util/conditions" ) +// IsKCPRollingUpdate returns whether KCP is in scaling down. +// +// When KCP is in rolling update, KCP controller marks +// MachinesSpecUpToDateCondition to false and RollingUpdateInProgressReason as Reason. +// +// When all machines are up to date, KCP controller marks MachinesSpecUpToDateCondition to true. +// +// For more information about KCP MachinesSpecUpToDateCondition and RollingUpdateInProgressReason, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/api/v1beta1/condition_consts.go +func IsKCPRollingUpdate(kcp *controlplanev1.KubeadmControlPlane) bool { + return conditions.IsFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition) && + conditions.GetReason(kcp, controlplanev1.MachinesSpecUpToDateCondition) == controlplanev1.RollingUpdateInProgressReason +} + // IsKCPRollingUpdateFirstMachine returns true if KCP is in rolling update and creating the first CP Machine. // // KCP rollout algorithm is as follows: @@ -38,7 +51,7 @@ import ( // For more information about KCP replicas, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_types.go // For more information about KCP rollout, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20191017-kubeadm-based-control-plane.md#kubeadmcontrolplane-rollout func IsKCPRollingUpdateFirstMachine(kcp *controlplanev1.KubeadmControlPlane) bool { - return *kcp.Spec.Replicas < kcp.Status.Replicas && kcp.Status.UpdatedReplicas == 1 + return IsKCPRollingUpdate(kcp) && kcp.Status.UpdatedReplicas == 1 } // IsKCPInScalingDown returns whether KCP is in scaling down. From d881627f99bf42da8849e9279028db7b4a2a5932 Mon Sep 17 00:00:00 2001 From: haijianyang Date: Mon, 31 Jul 2023 23:28:34 -0400 Subject: [PATCH 2/8] Fix comment --- controllers/elfmachine_controller_placement_group.go | 12 +++++++----- controllers/elfmachine_controller_test.go | 10 +++++----- pkg/service/mock_services/vm_mock.go | 2 +- pkg/util/kcp/kcp.go | 6 +++--- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index 5f249a3b..6a13b896 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -168,9 +168,9 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return nil, err } - // KCP is not in scaling down/rolling update. + // When KCP is not in scaling down and it's not rolling update the 1st CP Machine, just return since the placement group is full. if !(kcputil.IsKCPRollingUpdateFirstMachine(kcp) || kcputil.IsKCPInScalingDown(kcp)) { - ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + ctx.Logger.V(1).Info("KCP is not in scaling down and it's not rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -211,7 +211,9 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return nil, nil } - // KCP is in rolling update. + // When the PlacementGroup is full and it's rolling update the 1st CP Machine and, + // place the VM on the target host without joining the PlacementGroup and power it on. + // After other CP Machine are rolled out successfully, add this 1st CP VM into the PlacementGroup. if usedHostsByPG.Len() == usedHostsByPG.Available(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Len() && int(*kcp.Spec.Replicas) == usedHostsByPG.Len() { @@ -225,7 +227,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return pointer.String(hostID), err } - ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + ctx.Logger.V(1).Info("KCP is rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -448,7 +450,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v return false, err } - if kcputil.IsKCPRollingUpdate(kcp) { + if kcputil.IsKCPInRollingUpdate(kcp) { ctx.Logger.Info("KCP rolling update in progress, skip migrating VM", "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return true, nil } diff --git a/controllers/elfmachine_controller_test.go b/controllers/elfmachine_controller_test.go index c74c0ca1..61da905c 100644 --- a/controllers/elfmachine_controller_test.go +++ b/controllers/elfmachine_controller_test.go @@ -1260,7 +1260,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("The placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is not in scaling down and it's not rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) }) It("when placement group is full and KCP rolling update in progress", func() { @@ -1334,7 +1334,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("The placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1348,7 +1348,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("The placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1362,7 +1362,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("The placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1458,7 +1458,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err := reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("The placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is not in scaling down and it's not rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForAvailableHostRequiredByPlacementGroupReason}}) elfMachine.Status.Conditions = nil diff --git a/pkg/service/mock_services/vm_mock.go b/pkg/service/mock_services/vm_mock.go index 968f2293..a0567d38 100644 --- a/pkg/service/mock_services/vm_mock.go +++ b/pkg/service/mock_services/vm_mock.go @@ -1,5 +1,5 @@ // Code generated by MockGen. DO NOT EDIT. -// Source: /root/github/cluster-api-provider-elf/pkg/service/vm.go +// Source: github.com/smartxworks/cluster-api-provider-elf/pkg/service/vm.go // Package mock_services is a generated GoMock package. package mock_services diff --git a/pkg/util/kcp/kcp.go b/pkg/util/kcp/kcp.go index ae0654c2..0c9a33e8 100644 --- a/pkg/util/kcp/kcp.go +++ b/pkg/util/kcp/kcp.go @@ -22,7 +22,7 @@ import ( "sigs.k8s.io/cluster-api/util/conditions" ) -// IsKCPRollingUpdate returns whether KCP is in scaling down. +// IsKCPInRollingUpdate returns whether KCP is in scaling down. // // When KCP is in rolling update, KCP controller marks // MachinesSpecUpToDateCondition to false and RollingUpdateInProgressReason as Reason. @@ -30,7 +30,7 @@ import ( // When all machines are up to date, KCP controller marks MachinesSpecUpToDateCondition to true. // // For more information about KCP MachinesSpecUpToDateCondition and RollingUpdateInProgressReason, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/api/v1beta1/condition_consts.go -func IsKCPRollingUpdate(kcp *controlplanev1.KubeadmControlPlane) bool { +func IsKCPInRollingUpdate(kcp *controlplanev1.KubeadmControlPlane) bool { return conditions.IsFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition) && conditions.GetReason(kcp, controlplanev1.MachinesSpecUpToDateCondition) == controlplanev1.RollingUpdateInProgressReason } @@ -51,7 +51,7 @@ func IsKCPRollingUpdate(kcp *controlplanev1.KubeadmControlPlane) bool { // For more information about KCP replicas, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_types.go // For more information about KCP rollout, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20191017-kubeadm-based-control-plane.md#kubeadmcontrolplane-rollout func IsKCPRollingUpdateFirstMachine(kcp *controlplanev1.KubeadmControlPlane) bool { - return IsKCPRollingUpdate(kcp) && kcp.Status.UpdatedReplicas == 1 + return IsKCPInRollingUpdate(kcp) && kcp.Status.UpdatedReplicas == 1 } // IsKCPInScalingDown returns whether KCP is in scaling down. From b2d4621639a3f5681de15652e9dd4d7cd182788d Mon Sep 17 00:00:00 2001 From: haijianyang Date: Wed, 2 Aug 2023 23:47:05 -0400 Subject: [PATCH 3/8] Update --- .../elfmachine_controller_placement_group.go | 94 ++++++++++--------- controllers/elfmachine_controller_test.go | 26 +++-- pkg/service/collections.go | 20 +++- pkg/service/collections_test.go | 17 +++- pkg/util/kcp/kcp.go | 39 +++----- 5 files changed, 113 insertions(+), 83 deletions(-) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index 6a13b896..f17a9803 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -168,9 +168,9 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return nil, err } - // When KCP is not in scaling down and it's not rolling update the 1st CP Machine, just return since the placement group is full. - if !(kcputil.IsKCPRollingUpdateFirstMachine(kcp) || kcputil.IsKCPInScalingDown(kcp)) { - ctx.Logger.V(1).Info("KCP is not in scaling down and it's not rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + // When KCP is not in rolling update and it's not in scaling down, just return since the placement group is full. + if !kcputil.IsKCPInRollingUpdate(kcp) && !kcputil.IsKCPInScalingDown(kcp) { + ctx.Logger.V(1).Info("KCP is not in rolling update and it's not in scaling down, the placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -211,31 +211,30 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return nil, nil } - // When the PlacementGroup is full and it's rolling update the 1st CP Machine and, - // place the VM on the target host without joining the PlacementGroup and power it on. - // After other CP Machine are rolled out successfully, add this 1st CP VM into the PlacementGroup. + // Now since the PlacementGroup is full and KCP is in rolling update, + // we will place the target CP VM on a selected host without joining the PlacementGroup and power it on. + // After other CP VMs are rolled out successfully, these VMs must already join the PlacementGroup + // since there is always an empty seat in the PlacementGroup, we will add the target CP VM into the PlacementGroup. - if usedHostsByPG.Len() == usedHostsByPG.Available(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Len() && - int(*kcp.Spec.Replicas) == usedHostsByPG.Len() { - // Only when KCP is in rolling update and the placement group is full - // does it need to get the latest created machine. - hostID, err := r.getVMHostForRollingUpdate(ctx, placementGroup, hosts) - if err != nil || hostID == "" { - return nil, err - } + usedAndUnavailableHostsByPG := usedHostsByPG.FilterUnavailableHostsWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + if !usedAndUnavailableHostsByPG.IsEmpty() { + ctx.Logger.V(1).Info("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts", "placementGroup", *placementGroup.Name, "usedAndUnavailableHostsByPG", usedAndUnavailableHostsByPG.String(), "usedHostsByPG", usedHostsByPG.String()) - return pointer.String(hostID), err + return nil, nil } - ctx.Logger.V(1).Info("KCP is rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + hostID, err := r.getVMHostForRollingUpdate(ctx, placementGroup, hosts) + if err != nil || hostID == "" { + return nil, err + } - return nil, nil + return pointer.String(hostID), err } // getVMHostForRollingUpdate returns the target host server id for a virtual machine during rolling update. // During KCP rolling update, machines will be deleted in the order of creation. // Find the latest created machine in the placement group, -// and set the host where the machine is located to the first machine created by KCP rolling update. +// and set the host where the machine is located to the machine created by KCP rolling update. // This prevents migration of virtual machine during KCP rolling update when using a placement group. func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineContext, placementGroup *models.VMPlacementGroup, hosts service.Hosts) (string, error) { elfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) @@ -265,27 +264,31 @@ func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineCon } machines := collections.FromMachines(placementGroupMachines...) - if machine := machines.Newest(); machine != nil { - if vm, err := ctx.VMService.Get(vmMap[machine.Name]); err != nil { - return "", err - } else { - host := hosts.Get(*vm.Host.ID) - if host == nil { - ctx.Logger.Info("Host not found, skip selecting host for VM", "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) - } else { - ok, message := service.IsAvailableHost(host, *service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) - if ok { - ctx.Logger.Info("Selected the host server for VM since the placement group is full", "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) - - return *vm.Host.ID, nil - } + newestMachine := machines.Newest() + if newestMachine == nil { + ctx.Logger.Info("Newest machine not found, skip selecting host for VM", "vmRef", ctx.ElfMachine.Status.VMRef) + return "", nil + } - ctx.Logger.Info(fmt.Sprintf("Host is unavailable: %s, skip selecting host for VM", message), "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) - } - } + vm, err := ctx.VMService.Get(vmMap[newestMachine.Name]) + if err != nil { + return "", err } - return "", nil + host := hosts.Get(*vm.Host.ID) + if host == nil { + ctx.Logger.Info("Host not found, skip selecting host for VM", "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) + return "", err + } + + ok, message := service.IsAvailableHost(host, *service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + if ok { + ctx.Logger.Info("Selected the host server for VM since the placement group is full", "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) + return *vm.Host.ID, nil + } + + ctx.Logger.Info(fmt.Sprintf("Host is unavailable: %s, skip selecting host for VM", message), "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) + return "", err } // getHostsInPlacementGroup returns the hosts where all virtual machines of placement group located. @@ -310,7 +313,7 @@ func (r *ElfMachineReconciler) getHostsInPlacementGroup(ctx *context.MachineCont // It returns hosts that are not in faulted state, not in the given 'usedHostsByPG', // and have sufficient memory for running this VM. func (r *ElfMachineReconciler) getAvailableHostsForVM(ctx *context.MachineContext, hosts service.Hosts, usedHostsByPG service.Hosts, vm *models.VM) service.Hosts { - availableHosts := hosts.Available(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Difference(usedHostsByPG) + availableHosts := hosts.FilterAvailableHostsWithEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Difference(usedHostsByPG) // If the VM is running, and the host where the VM is located // is not used by the placement group, then it is not necessary to @@ -415,14 +418,17 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // Only when the KCP is in rolling update, the VM is stopped, and all the hosts used by the placement group are available, // will the upgrade be allowed with the same number of hosts and CP nodes. - // In this case first machine created by KCP rolling update can be powered on without being added to the placement group. - if kcputil.IsKCPRollingUpdateFirstMachine(kcp) && - *vm.Status == models.VMStatusSTOPPED && - usedHostsByPG.Len() == usedHostsByPG.Available(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Len() && - int(*kcp.Spec.Replicas) == usedHostsByPG.Len() { - ctx.Logger.Info("The placement group is full and KCP is in rolling update, skip adding VM to the placement group", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + // In this case machine created by KCP rolling update can be powered on without being added to the placement group. + if kcputil.IsKCPInRollingUpdate(kcp) && *vm.Status == models.VMStatusSTOPPED { + usedAndUnavailableHostsByPG := usedHostsByPG.FilterUnavailableHostsWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + if usedAndUnavailableHostsByPG.IsEmpty() { + ctx.Logger.Info("KCP is in rolling update, the placement group is full and has no unavailable hosts, skip adding VM to the placement group", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + return true, nil + } - return true, nil + ctx.Logger.Info("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + + return false, nil } if *vm.Status == models.VMStatusRUNNING || *vm.Status == models.VMStatusSUSPENDED { diff --git a/controllers/elfmachine_controller_test.go b/controllers/elfmachine_controller_test.go index 61da905c..72343811 100644 --- a/controllers/elfmachine_controller_test.go +++ b/controllers/elfmachine_controller_test.go @@ -992,20 +992,20 @@ var _ = Describe("ElfMachineReconciler", func() { ok, err := reconciler.joinPlacementGroup(machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) - Expect(logBuffer.String()).To(ContainSubstring("The placement group is full and KCP is in rolling update, skip adding VM to the placement group")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has no unavailable hosts, skip adding VM to the placement group")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) host.HostState = &models.NestedMaintenanceHostState{State: models.NewMaintenanceModeEnum(models.MaintenanceModeEnumMAINTENANCEMODE)} mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) - mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{}, nil) + mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{vm2}, nil) reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} ok, err = reconciler.joinPlacementGroup(machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).To(BeZero()) - Expect(logBuffer.String()).To(ContainSubstring("The placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1229,6 +1229,7 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroup.Vms = []*models.NestedVM{} kcp.Spec.Replicas = pointer.Int32(3) kcp.Status.Replicas = 3 + kcp.Status.UpdatedReplicas = 3 ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2, elfMachine3, machine3) machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -1248,6 +1249,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err := reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(*hostID).To(Equal("")) + Expect(logBuffer.String()).To(ContainSubstring("The placement group still has capacity")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1260,7 +1262,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is not in scaling down and it's not rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and it's not in scaling down, the placement group is full, wait for enough available hosts")) }) It("when placement group is full and KCP rolling update in progress", func() { @@ -1334,7 +1336,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unavailable hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1348,7 +1350,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1362,7 +1364,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1416,6 +1418,14 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).To(BeZero()) Expect(hostID).To(Equal(*vm3.Host.ID)) Expect(logBuffer.String()).To(ContainSubstring("Selected the host server for VM since the placement group is full")) + + ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) + machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) + Expect(err).To(BeZero()) + Expect(hostID).To(Equal("")) + Expect(logBuffer.String()).To(ContainSubstring("Newest machine not found, skip selecting host for VM")) }) }) @@ -1458,7 +1468,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err := reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is not in scaling down and it's not rolling update the 1st CP Machine, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and it's not in scaling down, the placement group is full, wait for enough available hosts")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForAvailableHostRequiredByPlacementGroupReason}}) elfMachine.Status.Conditions = nil diff --git a/pkg/service/collections.go b/pkg/service/collections.go index 05c78d8a..98680733 100644 --- a/pkg/service/collections.go +++ b/pkg/service/collections.go @@ -67,21 +67,35 @@ func (s Hosts) IsEmpty() bool { func (s Hosts) String() string { str := "" + for _, host := range s { - str += fmt.Sprintf("{id: %s,name: %s},", GetTowerString(host.ID), GetTowerString(host.Name)) + state := "" + if host.HostState != nil { + state = string(*host.HostState.State) + } + + str += fmt.Sprintf("{id: %s,name: %s,memory: %d,status: %s,state: %s},", GetTowerString(host.ID), GetTowerString(host.Name), GetTowerInt64(host.AllocatableMemoryBytes), string(*host.Status), state) } return fmt.Sprintf("[%s]", str) } -// Available returns a Hosts with available hosts. -func (s Hosts) Available(memory int64) Hosts { +// FilterAvailableHostsWithEnoughMemory returns a Hosts containing the available host which has allocatable memory no less than the specified memory. +func (s Hosts) FilterAvailableHostsWithEnoughMemory(memory int64) Hosts { return s.Filter(func(h *models.Host) bool { ok, _ := IsAvailableHost(h, memory) return ok }) } +// FilterUnavailableHostsWithoutEnoughMemory returns a Hosts containing the unavailable host which has allocatable memory less than the specified memory. +func (s Hosts) FilterUnavailableHostsWithoutEnoughMemory(memory int64) Hosts { + return s.Filter(func(h *models.Host) bool { + ok, _ := IsAvailableHost(h, memory) + return !ok + }) +} + // Get returns a Host of the specified host. func (s Hosts) Get(hostID string) *models.Host { if host, ok := s[hostID]; ok { diff --git a/pkg/service/collections_test.go b/pkg/service/collections_test.go index cf74e046..dabb92a9 100644 --- a/pkg/service/collections_test.go +++ b/pkg/service/collections_test.go @@ -17,6 +17,7 @@ limitations under the License. package service import ( + "fmt" "testing" "github.com/onsi/gomega" @@ -47,12 +48,24 @@ func TestHostCollection(t *testing.T) { host2 := &models.Host{ID: TowerString("2"), Name: TowerString("host2"), AllocatableMemoryBytes: pointer.Int64(2), Status: models.NewHostStatus(models.HostStatusCONNECTEDHEALTHY)} hosts := NewHosts() - g.Expect(hosts.Available(0).Len()).To(gomega.Equal(0)) + g.Expect(hosts.FilterAvailableHostsWithEnoughMemory(0).Len()).To(gomega.Equal(0)) hosts = NewHostsFromList([]*models.Host{host1, host2}) - availableHosts := hosts.Available(2) + availableHosts := hosts.FilterAvailableHostsWithEnoughMemory(2) g.Expect(availableHosts.Len()).To(gomega.Equal(1)) g.Expect(availableHosts.Contains(*host2.ID)).To(gomega.BeTrue()) + + hosts = NewHosts() + unavailableHosts := hosts.FilterUnavailableHostsWithoutEnoughMemory(0) + g.Expect(unavailableHosts.IsEmpty()).To(gomega.BeTrue()) + g.Expect(unavailableHosts.Len()).To(gomega.Equal(0)) + g.Expect(unavailableHosts.String()).To(gomega.Equal("[]")) + + hosts = NewHostsFromList([]*models.Host{host1, host2}) + unavailableHosts = hosts.FilterUnavailableHostsWithoutEnoughMemory(2) + g.Expect(unavailableHosts.Len()).To(gomega.Equal(1)) + g.Expect(unavailableHosts.Contains(*host1.ID)).To(gomega.BeTrue()) + g.Expect(unavailableHosts.String()).To(gomega.Equal(fmt.Sprintf("[{id: %s,name: %s,memory: %d,status: %s,state: %s},]", *host1.ID, *host1.Name, *host1.AllocatableMemoryBytes, string(*host1.Status), ""))) }) t.Run("Difference", func(t *testing.T) { diff --git a/pkg/util/kcp/kcp.go b/pkg/util/kcp/kcp.go index 0c9a33e8..afbaffd3 100644 --- a/pkg/util/kcp/kcp.go +++ b/pkg/util/kcp/kcp.go @@ -22,36 +22,23 @@ import ( "sigs.k8s.io/cluster-api/util/conditions" ) -// IsKCPInRollingUpdate returns whether KCP is in scaling down. +// IsKCPInRollingUpdate returns whether KCP is in rolling update. // -// When KCP is in rolling update, KCP controller marks -// MachinesSpecUpToDateCondition to false and RollingUpdateInProgressReason as Reason. -// -// When all machines are up to date, KCP controller marks MachinesSpecUpToDateCondition to true. -// -// For more information about KCP MachinesSpecUpToDateCondition and RollingUpdateInProgressReason, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/api/v1beta1/condition_consts.go -func IsKCPInRollingUpdate(kcp *controlplanev1.KubeadmControlPlane) bool { - return conditions.IsFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition) && - conditions.GetReason(kcp, controlplanev1.MachinesSpecUpToDateCondition) == controlplanev1.RollingUpdateInProgressReason -} - -// IsKCPRollingUpdateFirstMachine returns true if KCP is in rolling update and creating the first CP Machine. -// -// KCP rollout algorithm is as follows: -// Find Machines that have an outdated spec, If there is a machine requiring rollout -// 1.Scale up control plane creating a machine with the new spec -// 2.Scale down control plane by removing one of the machine that needs rollout (the oldest out-of date machine in the failure domain that has the most control-plane machines on it) -// -// kcp.Status.UpdatedReplicas is the total number of machines that are up to date with the control -// plane's configuration and therefore do not require rollout. -// -// So when KCP is in rolling update and creating the first CP Machine, -// kcp.Status.Replicas is greater than kcp.Spec.Replicas and kcp.Status.UpdatedReplicas equals 1. +// When *kcp.Spec.Replicas > kcp.Status.UpdatedReplicas, it must be in a KCP rolling update process. +// When *kcp.Spec.Replicas == kcp.Status.UpdatedReplicas, it could be in one of the following cases: +// 1. It's not in a KCP rolling update process. So kcp.Spec.Replicas == kcp.Status.Replicas. +// 2. It's at the end of a KCP rolling update process, and the last KCP replica (i.e the last KCP ElfMachine) is created just now. +// There is still an old KCP ElfMachine, so kcp.Spec.Replicas + 1 == kcp.Status.Replicas. // // For more information about KCP replicas, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_types.go // For more information about KCP rollout, refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20191017-kubeadm-based-control-plane.md#kubeadmcontrolplane-rollout -func IsKCPRollingUpdateFirstMachine(kcp *controlplanev1.KubeadmControlPlane) bool { - return IsKCPInRollingUpdate(kcp) && kcp.Status.UpdatedReplicas == 1 +func IsKCPInRollingUpdate(kcp *controlplanev1.KubeadmControlPlane) bool { + if (*kcp.Spec.Replicas > kcp.Status.UpdatedReplicas && *kcp.Spec.Replicas <= kcp.Status.Replicas) || + (*kcp.Spec.Replicas == kcp.Status.UpdatedReplicas && *kcp.Spec.Replicas < kcp.Status.Replicas) { + return true + } + + return false } // IsKCPInScalingDown returns whether KCP is in scaling down. From 8c664ed476a04d391ce3a5469cbcbc594e170c83 Mon Sep 17 00:00:00 2001 From: haijianyang Date: Thu, 3 Aug 2023 05:21:57 -0400 Subject: [PATCH 4/8] Fix comment --- .../elfmachine_controller_placement_group.go | 44 ++++++++++++------- controllers/elfmachine_controller_test.go | 38 ++++++++++++---- pkg/service/collections.go | 4 +- pkg/service/collections_test.go | 4 +- 4 files changed, 60 insertions(+), 30 deletions(-) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index f17a9803..cb8e9018 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -168,9 +168,9 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex return nil, err } - // When KCP is not in rolling update and it's not in scaling down, just return since the placement group is full. + // When KCP is not in rolling update and not in scaling down, just return since the placement group is full. if !kcputil.IsKCPInRollingUpdate(kcp) && !kcputil.IsKCPInScalingDown(kcp) { - ctx.Logger.V(1).Info("KCP is not in rolling update and it's not in scaling down, the placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + ctx.Logger.V(1).Info("KCP is not in rolling update and not in scaling down, the placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -216,9 +216,9 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // After other CP VMs are rolled out successfully, these VMs must already join the PlacementGroup // since there is always an empty seat in the PlacementGroup, we will add the target CP VM into the PlacementGroup. - usedAndUnavailableHostsByPG := usedHostsByPG.FilterUnavailableHostsWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) - if !usedAndUnavailableHostsByPG.IsEmpty() { - ctx.Logger.V(1).Info("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts", "placementGroup", *placementGroup.Name, "usedAndUnavailableHostsByPG", usedAndUnavailableHostsByPG.String(), "usedHostsByPG", usedHostsByPG.String()) + unusableHosts := usedHostsByPG.FilterUnavailableHostsOrWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + if !unusableHosts.IsEmpty() { + ctx.Logger.V(1).Info("KCP is in rolling update, the placement group is full and has unusable hosts, will wait for enough available hosts", "placementGroup", *placementGroup.Name, "unusableHosts", unusableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -277,17 +277,17 @@ func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineCon host := hosts.Get(*vm.Host.ID) if host == nil { - ctx.Logger.Info("Host not found, skip selecting host for VM", "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) + ctx.Logger.Info("Host not found, skip selecting host for VM", "host", formatNestedHost(vm.Host), "vmRef", ctx.ElfMachine.Status.VMRef) return "", err } ok, message := service.IsAvailableHost(host, *service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) if ok { - ctx.Logger.Info("Selected the host server for VM since the placement group is full", "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) + ctx.Logger.Info("Select a host to power on the VM since the placement group is full", "host", formatNestedHost(vm.Host), "vmRef", ctx.ElfMachine.Status.VMRef) return *vm.Host.ID, nil } - ctx.Logger.Info(fmt.Sprintf("Host is unavailable: %s, skip selecting host for VM", message), "hostID", *vm.Host.ID, "vmRef", ctx.ElfMachine.Status.VMRef) + ctx.Logger.Info(fmt.Sprintf("Host is unavailable: %s, skip selecting host for VM", message), "host", formatNestedHost(vm.Host), "vmRef", ctx.ElfMachine.Status.VMRef) return "", err } @@ -417,16 +417,17 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v } // Only when the KCP is in rolling update, the VM is stopped, and all the hosts used by the placement group are available, - // will the upgrade be allowed with the same number of hosts and CP nodes. - // In this case machine created by KCP rolling update can be powered on without being added to the placement group. + // will the upgrade be allowed. + // In this case the machine created by KCP rolling update can be powered on without being added to the placement group, + // so return true and nil to let reconcileVMStatus() power it on. if kcputil.IsKCPInRollingUpdate(kcp) && *vm.Status == models.VMStatusSTOPPED { - usedAndUnavailableHostsByPG := usedHostsByPG.FilterUnavailableHostsWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) - if usedAndUnavailableHostsByPG.IsEmpty() { - ctx.Logger.Info("KCP is in rolling update, the placement group is full and has no unavailable hosts, skip adding VM to the placement group", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + unusablehosts := usedHostsByPG.FilterUnavailableHostsOrWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + if unusablehosts.IsEmpty() { + ctx.Logger.Info("KCP is in rolling update, the placement group is full and has no unusable hosts, so skip adding VM to the placement group and power it on", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return true, nil } - ctx.Logger.Info("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + ctx.Logger.Info("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "unusablehosts", unusablehosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return false, nil } @@ -438,7 +439,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v } // KCP is scaling out or being created. - ctx.Logger.V(1).Info("The placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + ctx.Logger.V(1).Info("KCP is in scaling up or being created, the placement group is full, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return false, nil } @@ -447,7 +448,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // and the virtual machine is not STOPPED, we need to migrate the virtual machine to a host that // is not used by the placement group before adding the virtual machine to the placement group. // Otherwise, just add the virtual machine to the placement group directly. - ctx.Logger.V(1).Info("The availableHosts for migrating the VM", "hosts", availableHosts.String(), "vmHost", *vm.Host.ID) + ctx.Logger.V(1).Info("The availableHosts for migrating the VM", "hosts", availableHosts.String(), "vmHost", formatNestedHost(vm.Host)) if !availableHosts.Contains(*vm.Host.ID) && *vm.Status != models.VMStatusSTOPPED { ctx.Logger.V(1).Info("Try to migrate the virtual machine to the specified target host if needed") @@ -528,7 +529,7 @@ func (r *ElfMachineReconciler) migrateVM(ctx *context.MachineContext, vm *models ctx.ElfMachine.SetTask(*withTaskVM.TaskID) - ctx.Logger.Info(fmt.Sprintf("Waiting for the VM to be migrated from %s to %s", *vm.Host.ID, targetHost), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID, "taskRef", ctx.ElfMachine.Status.TaskRef) + ctx.Logger.Info(fmt.Sprintf("Waiting for the VM to be migrated from %s to %s", formatNestedHost(vm.Host), targetHost), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID, "taskRef", ctx.ElfMachine.Status.TaskRef) return false, nil } @@ -614,3 +615,12 @@ func (r *ElfMachineReconciler) deletePlacementGroup(ctx *context.MachineContext) return true, nil } + +// formatNestedHost returns the basic information of the host (ID, name). +func formatNestedHost(host *models.NestedHost) string { + if host == nil { + return "{}" + } + + return fmt.Sprintf("{id: %s,name: %s}", service.GetTowerString(host.ID), service.GetTowerString(host.Name)) +} diff --git a/controllers/elfmachine_controller_test.go b/controllers/elfmachine_controller_test.go index 72343811..9f6e83e8 100644 --- a/controllers/elfmachine_controller_test.go +++ b/controllers/elfmachine_controller_test.go @@ -992,7 +992,7 @@ var _ = Describe("ElfMachineReconciler", func() { ok, err := reconciler.joinPlacementGroup(machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has no unavailable hosts, skip adding VM to the placement group")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has no unusable hosts, so skip adding VM to the placement group and power it on")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1005,7 +1005,7 @@ var _ = Describe("ElfMachineReconciler", func() { ok, err = reconciler.joinPlacementGroup(machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).To(BeZero()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1020,6 +1020,26 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("The placement group is full and VM is in %s status, skip adding VM to the placement group", *vm.Status))) + + logBuffer = new(bytes.Buffer) + klog.SetOutput(logBuffer) + kcp.Spec.Replicas = pointer.Int32(1) + kcp.Status.Replicas = 1 + kcp.Status.UpdatedReplicas = 1 + ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kcp) + machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) + host.HostState = &models.NestedMaintenanceHostState{State: models.NewMaintenanceModeEnum(models.MaintenanceModeEnumMAINTENANCEMODE)} + mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) + mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) + mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{}, nil) + + reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ok, err = reconciler.joinPlacementGroup(machineContext, vm) + Expect(ok).To(BeFalse()) + Expect(err).To(BeZero()) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in scaling up or being created, the placement group is full, so wait for enough available hosts")) }) It("should add VM to placement group when VM is not in placement group and the host where VM in is not in placement group", func() { @@ -1262,7 +1282,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and it's not in scaling down, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and not in scaling down, the placement group is full, wait for enough available hosts")) }) It("when placement group is full and KCP rolling update in progress", func() { @@ -1336,7 +1356,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unavailable hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, will wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1350,7 +1370,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, will wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1364,7 +1384,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unavailable hosts, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, will wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1405,7 +1425,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal(*vm3.Host.ID)) - Expect(logBuffer.String()).To(ContainSubstring("Selected the host server for VM since the placement group is full")) + Expect(logBuffer.String()).To(ContainSubstring("Select a host to power on the VM since the placement group is full")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1417,7 +1437,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal(*vm3.Host.ID)) - Expect(logBuffer.String()).To(ContainSubstring("Selected the host server for VM since the placement group is full")) + Expect(logBuffer.String()).To(ContainSubstring("Select a host to power on the VM since the placement group is full")) ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) @@ -1468,7 +1488,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err := reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and it's not in scaling down, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and not in scaling down, the placement group is full, wait for enough available hosts")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForAvailableHostRequiredByPlacementGroupReason}}) elfMachine.Status.Conditions = nil diff --git a/pkg/service/collections.go b/pkg/service/collections.go index 98680733..ff35b4fc 100644 --- a/pkg/service/collections.go +++ b/pkg/service/collections.go @@ -88,8 +88,8 @@ func (s Hosts) FilterAvailableHostsWithEnoughMemory(memory int64) Hosts { }) } -// FilterUnavailableHostsWithoutEnoughMemory returns a Hosts containing the unavailable host which has allocatable memory less than the specified memory. -func (s Hosts) FilterUnavailableHostsWithoutEnoughMemory(memory int64) Hosts { +// FilterUnavailableHostsOrWithoutEnoughMemory returns a Hosts containing the unavailable hosts or available hosts whose available memory is less than the specified memory. +func (s Hosts) FilterUnavailableHostsOrWithoutEnoughMemory(memory int64) Hosts { return s.Filter(func(h *models.Host) bool { ok, _ := IsAvailableHost(h, memory) return !ok diff --git a/pkg/service/collections_test.go b/pkg/service/collections_test.go index dabb92a9..3dc8ebc8 100644 --- a/pkg/service/collections_test.go +++ b/pkg/service/collections_test.go @@ -56,13 +56,13 @@ func TestHostCollection(t *testing.T) { g.Expect(availableHosts.Contains(*host2.ID)).To(gomega.BeTrue()) hosts = NewHosts() - unavailableHosts := hosts.FilterUnavailableHostsWithoutEnoughMemory(0) + unavailableHosts := hosts.FilterUnavailableHostsOrWithoutEnoughMemory(0) g.Expect(unavailableHosts.IsEmpty()).To(gomega.BeTrue()) g.Expect(unavailableHosts.Len()).To(gomega.Equal(0)) g.Expect(unavailableHosts.String()).To(gomega.Equal("[]")) hosts = NewHostsFromList([]*models.Host{host1, host2}) - unavailableHosts = hosts.FilterUnavailableHostsWithoutEnoughMemory(2) + unavailableHosts = hosts.FilterUnavailableHostsOrWithoutEnoughMemory(2) g.Expect(unavailableHosts.Len()).To(gomega.Equal(1)) g.Expect(unavailableHosts.Contains(*host1.ID)).To(gomega.BeTrue()) g.Expect(unavailableHosts.String()).To(gomega.Equal(fmt.Sprintf("[{id: %s,name: %s,memory: %d,status: %s,state: %s},]", *host1.ID, *host1.Name, *host1.AllocatableMemoryBytes, string(*host1.Status), ""))) From 7b337922d4b327b925616b8822e4ac147ce96a66 Mon Sep 17 00:00:00 2001 From: haijianyang Date: Thu, 3 Aug 2023 07:40:28 -0400 Subject: [PATCH 5/8] Fix comment --- controllers/elfmachine_controller_placement_group.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index cb8e9018..d01d0d56 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -354,8 +354,6 @@ func (r *ElfMachineReconciler) getPlacementGroup(ctx *context.MachineContext, pl // For example, the virtual machine has joined the placement group. // 2. false and error is nil means the virtual machine has not joined the placement group. // For example, the placement group is full or the virtual machine is being migrated. -// -//nolint:gocyclo func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, vm *models.VM) (ret bool, reterr error) { if !version.IsCompatibleWithPlacementGroup(ctx.ElfMachine) { ctx.Logger.V(1).Info(fmt.Sprintf("The capeVersion of ElfMachine is lower than %s, skip adding VM to the placement group", version.CAPEVersion1_2_0), "capeVersion", version.GetCAPEVersion(ctx.ElfMachine)) @@ -432,7 +430,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v return false, nil } - if *vm.Status == models.VMStatusRUNNING || *vm.Status == models.VMStatusSUSPENDED { + if *vm.Status != models.VMStatusSTOPPED { ctx.Logger.V(1).Info(fmt.Sprintf("The placement group is full and VM is in %s status, skip adding VM to the placement group", *vm.Status), "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return true, nil @@ -473,8 +471,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v usedHostsByPG := sets.Set[string]{} for i := 0; i < len(cpElfMachines); i++ { if ctx.ElfMachine.Name != cpElfMachines[i].Name && - cpElfMachines[i].Status.PlacementGroupRef == *placementGroup.ID && - cpElfMachines[i].CreationTimestamp.After(ctx.ElfMachine.CreationTimestamp.Time) { + cpElfMachines[i].Status.PlacementGroupRef == *placementGroup.ID { usedHostsByPG.Insert(cpElfMachines[i].Status.HostServerRef) } } From 52dd4ad1027ac496f4796786810259c6d5123ece Mon Sep 17 00:00:00 2001 From: haijianyang Date: Thu, 3 Aug 2023 08:33:00 -0400 Subject: [PATCH 6/8] Add formatHost --- .../elfmachine_controller_placement_group.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index d01d0d56..85f34efc 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -477,7 +477,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v } usedHostsCount := usedHostsByPG.Len() - ctx.Logger.V(1).Info("The hosts used by the PlacementGroup", "usedHosts", usedHostsByPG, "count", usedHostsCount, "targetHost", targetHost) + ctx.Logger.V(1).Info("The hosts used by the PlacementGroup", "usedHosts", usedHostsByPG, "count", usedHostsCount, "targetHost", formatHost(targetHost)) if usedHostsCount < int(*kcp.Spec.Replicas-1) { ctx.Logger.V(1).Info("Not all other CPs joined the PlacementGroup, skip migrating VM") return true, nil @@ -492,7 +492,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // This is the last CP ElfMachine (i.e. the 1st new CP ElfMachine) which has not been added into the target PlacementGroup. // Migrate this VM to the target host, then it will be added into the target PlacementGroup. - ctx.Logger.V(1).Info("Start migrateVM since KCP is not in rolling update process", "targetHost", targetHost) + ctx.Logger.V(1).Info("Start migrateVM since KCP is not in rolling update process", "targetHost", formatHost(targetHost)) return r.migrateVM(ctx, vm, *targetHost.ID) } @@ -613,7 +613,7 @@ func (r *ElfMachineReconciler) deletePlacementGroup(ctx *context.MachineContext) return true, nil } -// formatNestedHost returns the basic information of the host (ID, name). +// formatNestedHost returns the basic information of the NestedHost (ID, name). func formatNestedHost(host *models.NestedHost) string { if host == nil { return "{}" @@ -621,3 +621,12 @@ func formatNestedHost(host *models.NestedHost) string { return fmt.Sprintf("{id: %s,name: %s}", service.GetTowerString(host.ID), service.GetTowerString(host.Name)) } + +// formatHost returns the basic information of the Host (ID, name). +func formatHost(host *models.Host) string { + if host == nil { + return "{}" + } + + return fmt.Sprintf("{id: %s,name: %s}", service.GetTowerString(host.ID), service.GetTowerString(host.Name)) +} From 9cd49cb28261fa7aaded9bdc7d2b56943a4dc9a5 Mon Sep 17 00:00:00 2001 From: haijianyang Date: Fri, 4 Aug 2023 03:49:51 -0400 Subject: [PATCH 7/8] Fix migration --- .../elfmachine_controller_placement_group.go | 33 ++++++++++++++++--- controllers/elfmachine_controller_test.go | 10 +++--- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index 85f34efc..a5db0459 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" capiutil "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/collections" @@ -170,7 +171,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // When KCP is not in rolling update and not in scaling down, just return since the placement group is full. if !kcputil.IsKCPInRollingUpdate(kcp) && !kcputil.IsKCPInScalingDown(kcp) { - ctx.Logger.V(1).Info("KCP is not in rolling update and not in scaling down, the placement group is full, wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + ctx.Logger.V(1).Info("KCP is not in rolling update and not in scaling down, the placement group is full, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -218,7 +219,7 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex unusableHosts := usedHostsByPG.FilterUnavailableHostsOrWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) if !unusableHosts.IsEmpty() { - ctx.Logger.V(1).Info("KCP is in rolling update, the placement group is full and has unusable hosts, will wait for enough available hosts", "placementGroup", *placementGroup.Name, "unusableHosts", unusableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + ctx.Logger.V(1).Info("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "unusableHosts", unusableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -354,6 +355,8 @@ func (r *ElfMachineReconciler) getPlacementGroup(ctx *context.MachineContext, pl // For example, the virtual machine has joined the placement group. // 2. false and error is nil means the virtual machine has not joined the placement group. // For example, the placement group is full or the virtual machine is being migrated. +// +//nolint:gocyclo func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, vm *models.VM) (ret bool, reterr error) { if !version.IsCompatibleWithPlacementGroup(ctx.ElfMachine) { ctx.Logger.V(1).Info(fmt.Sprintf("The capeVersion of ElfMachine is lower than %s, skip adding VM to the placement group", version.CAPEVersion1_2_0), "capeVersion", version.GetCAPEVersion(ctx.ElfMachine)) @@ -460,24 +463,35 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v return true, nil } - // The 1st new CP ElfMachine should wait for other new CP ElfMachines to join the target PlacementGroup. + // The new CP ElfMachine should wait for other new CP ElfMachines to join the target PlacementGroup. // The code below double checks the recommended target host for migration is valid. cpElfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) if err != nil { return false, err } - targetHost := availableHosts.UnsortedList()[0] usedHostsByPG := sets.Set[string]{} + cpElfMachineNames := make([]string, 0, len(cpElfMachines)) for i := 0; i < len(cpElfMachines); i++ { + cpElfMachineNames = append(cpElfMachineNames, cpElfMachines[i].Name) if ctx.ElfMachine.Name != cpElfMachines[i].Name && cpElfMachines[i].Status.PlacementGroupRef == *placementGroup.ID { usedHostsByPG.Insert(cpElfMachines[i].Status.HostServerRef) } } + // During KCP rolling update, when the last new CP is just created, + // it may happen that kcp.Spec.Replicas == kcp.Status.Replicas == kcp.Status.UpdatedReplicas + // and kcp.Status.UnavailableReplicas == 0. + // So we need to check if the number of CP ElfMachine is equal to kcp.Spec.Replicas. + if len(cpElfMachines) != int(*kcp.Spec.Replicas) { + ctx.Logger.Info("The number of CP ElfMachine does not match the expected", "kcp", formatKCP(kcp), "cpElfMachines", cpElfMachineNames) + return true, nil + } + + targetHost := availableHosts.UnsortedList()[0] usedHostsCount := usedHostsByPG.Len() - ctx.Logger.V(1).Info("The hosts used by the PlacementGroup", "usedHosts", usedHostsByPG, "count", usedHostsCount, "targetHost", formatHost(targetHost)) + ctx.Logger.V(1).Info("The hosts used by the PlacementGroup", "usedHosts", usedHostsByPG, "count", usedHostsCount, "targetHost", formatHost(targetHost), "kcp", formatKCP(kcp), "cpElfMachines", cpElfMachineNames) if usedHostsCount < int(*kcp.Spec.Replicas-1) { ctx.Logger.V(1).Info("Not all other CPs joined the PlacementGroup, skip migrating VM") return true, nil @@ -630,3 +644,12 @@ func formatHost(host *models.Host) string { return fmt.Sprintf("{id: %s,name: %s}", service.GetTowerString(host.ID), service.GetTowerString(host.Name)) } + +// formatKCP returns the basic information of the KCP (name, namespace, replicas). +func formatKCP(kcp *controlplanev1.KubeadmControlPlane) string { + if kcp == nil { + return "{}" + } + + return fmt.Sprintf("{name:%s,namespace:%s,spec:{replicas:%d},status:{replicas:%d,readyReplicas:%d,updatedReplicas:%d,unavailableReplicas:%d}}", kcp.Name, kcp.Namespace, *kcp.Spec.Replicas, kcp.Status.Replicas, kcp.Status.ReadyReplicas, kcp.Status.UpdatedReplicas, kcp.Status.UnavailableReplicas) +} diff --git a/controllers/elfmachine_controller_test.go b/controllers/elfmachine_controller_test.go index 9f6e83e8..f34e640f 100644 --- a/controllers/elfmachine_controller_test.go +++ b/controllers/elfmachine_controller_test.go @@ -1282,7 +1282,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and not in scaling down, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and not in scaling down, the placement group is full, so wait for enough available hosts")) }) It("when placement group is full and KCP rolling update in progress", func() { @@ -1356,7 +1356,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, will wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1370,7 +1370,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, will wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1384,7 +1384,7 @@ var _ = Describe("ElfMachineReconciler", func() { host, err = reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, will wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts")) logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) @@ -1488,7 +1488,7 @@ var _ = Describe("ElfMachineReconciler", func() { hostID, err := reconciler.preCheckPlacementGroup(machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) - Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and not in scaling down, the placement group is full, wait for enough available hosts")) + Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and not in scaling down, the placement group is full, so wait for enough available hosts")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForAvailableHostRequiredByPlacementGroupReason}}) elfMachine.Status.Conditions = nil From a18cc5405b7da6ecaac245dc4197ce2ad4164f54 Mon Sep 17 00:00:00 2001 From: haijianyang Date: Fri, 4 Aug 2023 04:55:35 -0400 Subject: [PATCH 8/8] Fix comment --- controllers/elfmachine_controller_placement_group.go | 6 +++--- controllers/elfmachine_controller_test.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index a5db0459..accb4652 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -463,7 +463,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v return true, nil } - // The new CP ElfMachine should wait for other new CP ElfMachines to join the target PlacementGroup. + // The powered on CP ElfMachine which is not in the PlacementGroup should wait for other new CP ElfMachines to join the target PlacementGroup. // The code below double checks the recommended target host for migration is valid. cpElfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) if err != nil { @@ -485,7 +485,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // and kcp.Status.UnavailableReplicas == 0. // So we need to check if the number of CP ElfMachine is equal to kcp.Spec.Replicas. if len(cpElfMachines) != int(*kcp.Spec.Replicas) { - ctx.Logger.Info("The number of CP ElfMachine does not match the expected", "kcp", formatKCP(kcp), "cpElfMachines", cpElfMachineNames) + ctx.Logger.V(1).Info("The number of CP ElfMachine does not match the expected", "kcp", formatKCP(kcp), "cpElfMachines", cpElfMachineNames) return true, nil } @@ -506,7 +506,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // This is the last CP ElfMachine (i.e. the 1st new CP ElfMachine) which has not been added into the target PlacementGroup. // Migrate this VM to the target host, then it will be added into the target PlacementGroup. - ctx.Logger.V(1).Info("Start migrateVM since KCP is not in rolling update process", "targetHost", formatHost(targetHost)) + ctx.Logger.V(1).Info("Start migrating VM since KCP is not in rolling update process", "targetHost", formatHost(targetHost)) return r.migrateVM(ctx, vm, *targetHost.ID) } diff --git a/controllers/elfmachine_controller_test.go b/controllers/elfmachine_controller_test.go index f34e640f..e1ad0198 100644 --- a/controllers/elfmachine_controller_test.go +++ b/controllers/elfmachine_controller_test.go @@ -1167,7 +1167,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(ok).To(BeFalse()) Expect(err).To(BeZero()) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) - Expect(logBuffer.String()).To(ContainSubstring("Start migrateVM since KCP is not in rolling update process")) + Expect(logBuffer.String()).To(ContainSubstring("Start migrating VM since KCP is not in rolling update process")) Expect(logBuffer.String()).To(ContainSubstring("Waiting for the VM to be migrated from")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.JoiningPlacementGroupReason}})