diff --git a/api/v1beta1/elfmachine_types.go b/api/v1beta1/elfmachine_types.go index 0c046f4b..600e6f5a 100644 --- a/api/v1beta1/elfmachine_types.go +++ b/api/v1beta1/elfmachine_types.go @@ -30,6 +30,11 @@ const ( // API Server. MachineFinalizer = "elfmachine.infrastructure.cluster.x-k8s.io" + // MachineStaticIPFinalizer allows ReconcileElfMachine to clean up static ip + // resources associated with ElfMachine before removing it from the + // API Server. + MachineStaticIPFinalizer = "elfmachinestaticip.infrastructure.cluster.x-k8s.io" + // VMDisconnectionTimestampAnnotation is the annotation identifying the VM of ElfMachine disconnection time. VMDisconnectionTimestampAnnotation = "cape.infrastructure.cluster.x-k8s.io/vm-disconnection-timestamp" ) diff --git a/api/v1beta1/types.go b/api/v1beta1/types.go index 23633bbd..8f98f83f 100644 --- a/api/v1beta1/types.go +++ b/api/v1beta1/types.go @@ -99,6 +99,16 @@ type NetworkSpec struct { PreferredAPIServerCIDR string `json:"preferredAPIServerCidr,omitempty"` } +func (n *NetworkSpec) RequiresStaticIPs() bool { + for i := 0; i < len(n.Devices); i++ { + if n.Devices[i].NetworkType == NetworkTypeIPV4 && len(n.Devices[i].IPAddrs) == 0 { + return true + } + } + + return false +} + // NetworkDeviceSpec defines the network configuration for a virtual machine's // network device. type NetworkDeviceSpec struct { diff --git a/controllers/elfmachine_controller.go b/controllers/elfmachine_controller.go index 65bcbfd2..ccb80351 100644 --- a/controllers/elfmachine_controller.go +++ b/controllers/elfmachine_controller.go @@ -56,6 +56,7 @@ import ( "github.com/smartxworks/cluster-api-provider-elf/pkg/util" labelsutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/labels" machineutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/machine" + patchutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/patch" ) //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=elfmachines,verbs=get;list;watch;create;update;patch;delete @@ -377,7 +378,18 @@ func (r *ElfMachineReconciler) reconcileNormal(ctx *context.MachineContext) (rec } // If the ElfMachine doesn't have our finalizer, add it. - ctrlutil.AddFinalizer(ctx.ElfMachine, infrav1.MachineFinalizer) + if !ctrlutil.ContainsFinalizer(ctx.ElfMachine, infrav1.MachineFinalizer) { + return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, patchutil.AddFinalizerWithOptimisticLock(ctx, r.Client, ctx.ElfMachine, infrav1.MachineFinalizer) + } + + // If ElfMachine requires static IPs for devices, should wait for CAPE-IP to set MachineStaticIPFinalizer first + // to prevent CAPE from overwriting MachineStaticIPFinalizer when setting MachineFinalizer. + // If ElfMachine happens to be deleted at this time, CAPE-IP may not have time to release the IPs. + if ctx.ElfMachine.Spec.Network.RequiresStaticIPs() && !ctrlutil.ContainsFinalizer(ctx.ElfMachine, infrav1.MachineStaticIPFinalizer) { + r.Logger.V(2).Info("Waiting for CAPE-IP to set MachineStaticIPFinalizer on ElfMachine") + + return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil + } if !ctx.Cluster.Status.InfrastructureReady { ctx.Logger.Info("Cluster infrastructure is not ready yet", diff --git a/controllers/elfmachine_controller_test.go b/controllers/elfmachine_controller_test.go index 9ec3e728..a37c827b 100644 --- a/controllers/elfmachine_controller_test.go +++ b/controllers/elfmachine_controller_test.go @@ -54,6 +54,7 @@ import ( "github.com/smartxworks/cluster-api-provider-elf/pkg/service" "github.com/smartxworks/cluster-api-provider-elf/pkg/service/mock_services" machineutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/machine" + patchutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/patch" "github.com/smartxworks/cluster-api-provider-elf/test/fake" "github.com/smartxworks/cluster-api-provider-elf/test/helpers" ) @@ -164,6 +165,7 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should exit immediately if cluster infra isn't ready", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = false ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -179,6 +181,7 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should exit immediately if bootstrap data secret reference isn't available", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = true conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) @@ -195,6 +198,7 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should wait cluster ControlPlaneInitialized true when create worker machine", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = true ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -210,6 +214,7 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should not wait cluster ControlPlaneInitialized true when create master machine", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = true elfMachine.Labels[clusterv1.MachineControlPlaneLabel] = "" ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) @@ -238,6 +243,7 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should set CloningFailedReason condition when failed to retrieve bootstrap data", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) machine.Spec.Bootstrap.DataSecretName = pointer.String("notfound") ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -259,6 +265,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfCluster.Spec.Cluster = clusterKey task := fake.NewTowerTask() withTaskVM := fake.NewWithTaskVM(vm, task) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -297,6 +304,7 @@ var _ = Describe("ElfMachineReconciler", func() { vm := fake.NewTowerVM() vm.Name = &elfMachine.Name vm.LocalID = pointer.String("placeholder-%s" + *vm.LocalID) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -317,6 +325,7 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should handle clone error", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -343,6 +352,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.SetVMDisconnectionTimestamp(&now) nic := fake.NewTowerVMNic(0) placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -372,6 +382,7 @@ var _ = Describe("ElfMachineReconciler", func() { vm := fake.NewTowerVM() vm.EntityAsyncStatus = nil elfMachine.Status.VMRef = *vm.LocalID + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -406,6 +417,7 @@ var _ = Describe("ElfMachineReconciler", func() { vm.EntityAsyncStatus = nil vm.InRecycleBin = pointer.Bool(true) elfMachine.Status.VMRef = *vm.LocalID + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -429,6 +441,7 @@ var _ = Describe("ElfMachineReconciler", func() { task.Status = &status elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task.ID + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -457,6 +470,7 @@ var _ = Describe("ElfMachineReconciler", func() { task.ErrorMessage = service.TowerString("Cannot unwrap Ok value of Result.Err.\r\ncode: CREATE_VM_FORM_TEMPLATE_FAILED\r\nmessage: {\"data\":{},\"ec\":\"VM_CLOUD_INIT_CONFIG_ERROR\",\"error\":{\"msg\":\"[VM_CLOUD_INIT_CONFIG_ERROR]The gateway [192.168.31.215] is unreachable. \"}}") elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task.ID + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -489,6 +503,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -520,6 +535,7 @@ var _ = Describe("ElfMachineReconciler", func() { task.Status = &taskStatus elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task.ID + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -550,6 +566,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -582,6 +599,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.LocalID elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -616,6 +634,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -648,6 +667,7 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -926,6 +946,7 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroup1 := fake.NewVMPlacementGroup(nil) placementGroup2 := fake.NewVMPlacementGroup(nil) placementGroup2.EntityAsyncStatus = models.EntityAsyncStatusUPDATING.Pointer() + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -951,6 +972,7 @@ var _ = Describe("ElfMachineReconciler", func() { vm.Status = &status elfMachine.Status.VMRef = *vm.LocalID placementGroup := fake.NewVMPlacementGroup(nil) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -1665,6 +1687,7 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should set providerID to ElfMachine when VM is created", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = true conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) machine.Spec.Bootstrap = clusterv1.Bootstrap{DataSecretName: &secret.Name} @@ -1710,6 +1733,8 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should wait VM network ready", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineStaticIPFinalizer) vm := fake.NewTowerVMFromElfMachine(elfMachine) vm.EntityAsyncStatus = nil elfMachine.Status.VMRef = *vm.LocalID @@ -2137,6 +2162,7 @@ var _ = Describe("ElfMachineReconciler", func() { nic := fake.NewTowerVMNic(0) nic.IPAddress = service.TowerString("127.0.0.1") placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) @@ -2903,7 +2929,43 @@ var _ = Describe("ElfMachineReconciler", func() { machine.Spec.Bootstrap = clusterv1.Bootstrap{DataSecretName: &secret.Name} }) + It("should wait for MachineFinalizer", func() { + ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + originalElfMachine := elfMachine.DeepCopy() + + reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + elfMachineKey := capiutil.ObjectKey(elfMachine) + result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) + Expect(result.RequeueAfter).NotTo(BeZero()) + Expect(err).ShouldNot(HaveOccurred()) + Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(elfMachine.Finalizers).To(Equal([]string{infrav1.MachineFinalizer})) + + err = patchutil.AddFinalizerWithOptimisticLock(ctx, reconciler.Client, originalElfMachine, infrav1.MachineFinalizer) + Expect(err).To(HaveOccurred()) + Expect(apierrors.IsConflict(err)).To(BeTrue()) + }) + + It("should wait for MachineStaticIPFinalizer", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) + elfMachine.Spec.Network.Devices = []infrav1.NetworkDeviceSpec{ + {NetworkType: infrav1.NetworkTypeIPV4}, + } + ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + + reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + elfMachineKey := capiutil.ObjectKey(elfMachine) + result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) + Expect(result.RequeueAfter).NotTo(BeZero()) + Expect(err).ShouldNot(HaveOccurred()) + Expect(logBuffer.String()).To(ContainSubstring("Waiting for CAPE-IP to set MachineStaticIPFinalizer on ElfMachine")) + }) + It("should wait for IP allocation", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineStaticIPFinalizer) placementGroup := fake.NewVMPlacementGroup([]string{fake.ID()}) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Times(3).Return(placementGroup, nil) @@ -2929,6 +2991,8 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should not wait for IP allocation", func() { + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) + ctrlutil.AddFinalizer(elfMachine, infrav1.MachineStaticIPFinalizer) placementGroup := fake.NewVMPlacementGroup([]string{fake.ID()}) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) diff --git a/pkg/util/patch/finalizer.go b/pkg/util/patch/finalizer.go new file mode 100644 index 00000000..845568ce --- /dev/null +++ b/pkg/util/patch/finalizer.go @@ -0,0 +1,32 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package patch + +import ( + goctx "context" + + "sigs.k8s.io/controller-runtime/pkg/client" + ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +// AddFinalizerWithOptimisticLock adds a finalizer to the specified object +// and saves it by using resourceVersion optimistic lock. +func AddFinalizerWithOptimisticLock(ctx goctx.Context, c client.Client, obj client.Object, finalizer string) error { + modifiedObj := obj.DeepCopyObject().(client.Object) + ctrlutil.AddFinalizer(modifiedObj, finalizer) + return c.Patch(ctx, modifiedObj, client.MergeFromWithOptions(obj, client.MergeFromWithOptimisticLock{})) +}