Skip to content

Commit

Permalink
Wait and requeue if LB is in PENDING_DELETE
Browse files Browse the repository at this point in the history
If the LB that is being deleted when a cluster is deleted, it'll go
through the PENDING_DELETE state and at this stage there is nothing we
can do but wait for the LB to be actually deleted.

If the LB is in that state during the deletion, let's just return no
error but request a reconcile after some time.
  • Loading branch information
EmilienM committed Oct 16, 2024
1 parent 218212a commit 7ef114b
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 14 deletions.
9 changes: 7 additions & 2 deletions controllers/openstackcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ import (
)

const (
waitForBastionToReconcile = 15 * time.Second
waitForBastionToReconcile = 15 * time.Second
waitForOctaviaPortsCleanup = 15 * time.Second
)

// OpenStackClusterReconciler reconciles a OpenStackCluster object.
Expand Down Expand Up @@ -183,10 +184,14 @@ func (r *OpenStackClusterReconciler) reconcileDelete(ctx context.Context, scope
return reconcile.Result{}, err
}

if err = loadBalancerService.DeleteLoadBalancer(openStackCluster, clusterResourceName); err != nil {
result, err := loadBalancerService.DeleteLoadBalancer(openStackCluster, clusterResourceName)
if err != nil {
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete load balancer: %w", err), false)
return reconcile.Result{}, fmt.Errorf("failed to delete load balancer: %w", err)
}
if result != nil {
return *result, nil
}
}

// if ManagedSubnets was not set, no network was created.
Expand Down
40 changes: 28 additions & 12 deletions pkg/cloud/services/loadbalancer/loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
utilsnet "k8s.io/utils/net"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"

infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-openstack/pkg/record"
Expand All @@ -41,12 +42,16 @@ import (
)

const (
networkPrefix string = "k8s-clusterapi"
kubeapiLBSuffix string = "kubeapi"
resolvedMsg string = "ControlPlaneEndpoint.Host is not an IP address, using the first resolved IP address"
networkPrefix string = "k8s-clusterapi"
kubeapiLBSuffix string = "kubeapi"
resolvedMsg string = "ControlPlaneEndpoint.Host is not an IP address, using the first resolved IP address"
waitForOctaviaLBCleanup = 15 * time.Second
)

const loadBalancerProvisioningStatusActive = "ACTIVE"
const (
loadBalancerProvisioningStatusActive = "ACTIVE"
loadBalancerProvisioningStatusPendingDelete = "PENDING_DELETE"
)

// We wrap the LookupHost function in a variable to allow overriding it in unit tests.
//
Expand Down Expand Up @@ -658,32 +663,40 @@ func (s *Service) ReconcileLoadBalancerMember(openStackCluster *infrav1.OpenStac
return nil
}

func (s *Service) DeleteLoadBalancer(openStackCluster *infrav1.OpenStackCluster, clusterResourceName string) error {
func (s *Service) DeleteLoadBalancer(openStackCluster *infrav1.OpenStackCluster, clusterResourceName string) (result *ctrl.Result, reterr error) {
loadBalancerName := getLoadBalancerName(clusterResourceName)
lb, err := s.checkIfLbExists(loadBalancerName)
if err != nil {
return err
return nil, err
}

if lb == nil {
return nil
return nil, nil
}

// If the load balancer is already in PENDING_DELETE state, we don't need to do anything.
// However we should still wait for the load balancer to be deleted which is why we
// request a new reconcile after a certain amount of time.
if lb.ProvisioningStatus == loadBalancerProvisioningStatusPendingDelete {
s.scope.Logger().Info("Load balancer is already in PENDING_DELETE state", "name", loadBalancerName)
return &ctrl.Result{RequeueAfter: waitForOctaviaLBCleanup}, nil
}

if lb.VipPortID != "" {
fip, err := s.networkingService.GetFloatingIPByPortID(lb.VipPortID)
if err != nil {
return err
return nil, err
}

if fip != nil && fip.FloatingIP != "" {
if err = s.networkingService.DisassociateFloatingIP(openStackCluster, fip.FloatingIP); err != nil {
return err
return nil, err
}

// If the floating is user-provider (BYO floating IP), don't delete it.
if openStackCluster.Spec.APIServerFloatingIP == nil || *openStackCluster.Spec.APIServerFloatingIP != fip.FloatingIP {
if err = s.networkingService.DeleteFloatingIP(openStackCluster, fip.FloatingIP); err != nil {
return err
return nil, err
}
} else {
s.scope.Logger().Info("Skipping load balancer floating IP deletion as it's a user-provided resource", "name", loadBalancerName, "fip", fip.FloatingIP)
Expand All @@ -698,11 +711,14 @@ func (s *Service) DeleteLoadBalancer(openStackCluster *infrav1.OpenStackCluster,
err = s.loadbalancerClient.DeleteLoadBalancer(lb.ID, deleteOpts)
if err != nil && !capoerrors.IsNotFound(err) {
record.Warnf(openStackCluster, "FailedDeleteLoadBalancer", "Failed to delete load balancer %s with id %s: %v", lb.Name, lb.ID, err)
return err
return nil, err
}

record.Eventf(openStackCluster, "SuccessfulDeleteLoadBalancer", "Deleted load balancer %s with id %s", lb.Name, lb.ID)
return nil

// If we have reached this point, that means that the load balancer wasn't initially deleted but the request to delete it didn't return an error.
// So we want to requeue until the load balancer and its associated ports are actually deleted.
return &ctrl.Result{RequeueAfter: waitForOctaviaLBCleanup}, nil
}

func (s *Service) DeleteLoadBalancerMember(openStackCluster *infrav1.OpenStackCluster, openStackMachine *infrav1.OpenStackMachine, clusterResourceName string) error {
Expand Down

0 comments on commit 7ef114b

Please sign in to comment.