Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor infra creation to improve the overall infra setup time #1869

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions cloud/scope/powervs_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -790,13 +790,13 @@ func (s *PowerVSClusterScope) createServiceInstance() (*resourcecontrollerv2.Res
func (s *PowerVSClusterScope) ReconcileNetwork() (bool, error) {
if s.GetDHCPServerID() != nil {
s.V(3).Info("DHCP server ID is set, fetching details", "id", s.GetDHCPServerID())
requeue, err := s.isDHCPServerActive()
active, err := s.isDHCPServerActive()
if err != nil {
return false, err
}
// if dhcp server exist and in active state, its assumed that dhcp network exist
// TODO(Phase 2): Verify that dhcp network is exist.
return requeue, nil
return active, nil
// TODO(karthik-k-n): If needed set dhcp status here
}
// check network exist in cloud
Expand All @@ -818,7 +818,7 @@ func (s *PowerVSClusterScope) ReconcileNetwork() (bool, error) {

s.Info("Created DHCP Server", "id", *dhcpServer)
s.SetStatus(infrav1beta2.ResourceTypeDHCPServer, infrav1beta2.ResourceReference{ID: dhcpServer, ControllerCreated: ptr.To(true)})
return true, nil
return false, nil
}

// checkNetwork checks the network exist in cloud.
Expand Down Expand Up @@ -880,25 +880,25 @@ func (s *PowerVSClusterScope) isDHCPServerActive() (bool, error) {
return false, err
}

requeue, err := s.checkDHCPServerStatus(*dhcpServer)
active, err := s.checkDHCPServerStatus(*dhcpServer)
if err != nil {
return false, err
}
return requeue, nil
return active, nil
}

// checkDHCPServerStatus checks the state of a DHCP server.
// If state is BUILD, true is returned indicating a requeue for reconciliation.
// If state is active, true is returned.
// In all other cases, it returns false.
func (s *PowerVSClusterScope) checkDHCPServerStatus(dhcpServer models.DHCPServerDetail) (bool, error) {
s.V(3).Info("Checking the status of DHCP server", "id", *dhcpServer.ID)
switch *dhcpServer.Status {
case string(infrav1beta2.DHCPServerStateActive):
s.V(3).Info("DHCP server is in active state")
return false, nil
return true, nil
case string(infrav1beta2.DHCPServerStateBuild):
s.V(3).Info("DHCP server is in build state")
return true, nil
return false, nil
case string(infrav1beta2.DHCPServerStateError):
return false, fmt.Errorf("DHCP server creation failed and is in error state")
}
Expand Down Expand Up @@ -1884,9 +1884,9 @@ func (s *PowerVSClusterScope) ReconcileLoadBalancers() (bool, error) {
}
s.Info("Created VPC load balancer", "id", loadBalancerStatus.ID)
s.SetLoadBalancerStatus(loadBalancer.Name, *loadBalancerStatus)
return true, nil
return false, nil
}
return false, nil
return true, nil
}

// checkLoadBalancerStatus checks the state of a VPC load balancer.
Expand Down
232 changes: 162 additions & 70 deletions controllers/ibmpowervscluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ import (
"context"
"fmt"
"strings"
"sync"
"time"

"github.com/pkg/errors"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -111,6 +113,116 @@ func (r *IBMPowerVSClusterReconciler) Reconcile(ctx context.Context, req ctrl.Re
return r.reconcile(clusterScope)
}

type powerVSCluster struct {
cluster *infrav1beta2.IBMPowerVSCluster
mu sync.Mutex
}

type reconcileResult struct {
reconcile.Result
error
}

func (update *powerVSCluster) updateCondition(condition bool, conditionArgs ...interface{}) {
update.mu.Lock()
defer update.mu.Unlock()
if condition {
conditions.MarkTrue(update.cluster, conditionArgs[0].(capiv1beta1.ConditionType))
return
}

conditions.MarkFalse(update.cluster, conditionArgs[0].(capiv1beta1.ConditionType), conditionArgs[1].(string), conditionArgs[2].(capiv1beta1.ConditionSeverity), conditionArgs[3].(string), conditionArgs[4:]...)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there any better way rather than using interface{} types for conditionArgs? also since we are indexing it, Should we add a check for verifying the min legth to avoid index out of range?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I pass it as a straight types, we would get a golint error for exceeding the max number of args which is 5. Also I would need to create a separate func for true and false condition. It would complicate things IMO.

Should we add a check for verifying the min length to avoid index out of range?

Again, need to add a check on many places where we are updating the condition and won't be able to take major action if it errored. Ex here I want to return the main error instead of the error I m getting from this, it does n't look good to me. Scope of this is within the reconcile func and only two possible flow. I think the current way is ok IMO.

}

func (r *IBMPowerVSClusterReconciler) reconcilePowerVSResources(clusterScope *scope.PowerVSClusterScope, powerVSCluster *powerVSCluster, ch chan reconcileResult, wg *sync.WaitGroup) {
defer wg.Done()
powerVSLog := clusterScope.WithName("powervs")
// reconcile PowerVS service instance
powerVSLog.Info("Reconciling PowerVS service instance")
if requeue, err := clusterScope.ReconcilePowerVSServiceInstance(); err != nil {
powerVSLog.Error(err, "failed to reconcile PowerVS service instance")
powerVSCluster.updateCondition(false, infrav1beta2.ServiceInstanceReadyCondition, infrav1beta2.ServiceInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if requeue {
powerVSLog.Info("PowerVS service instance creation is pending, requeuing")
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
return
}
powerVSCluster.updateCondition(true, infrav1beta2.ServiceInstanceReadyCondition)

clusterScope.IBMPowerVSClient.WithClients(powervs.ServiceOptions{CloudInstanceID: clusterScope.GetServiceInstanceID()})

// reconcile network
powerVSLog.Info("Reconciling network")
if dhcpServerActive, err := clusterScope.ReconcileNetwork(); err != nil {
powerVSLog.Error(err, "failed to reconcile PowerVS network")
powerVSCluster.updateCondition(false, infrav1beta2.NetworkReadyCondition, infrav1beta2.NetworkReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if dhcpServerActive {
powerVSCluster.updateCondition(true, infrav1beta2.NetworkReadyCondition)
return
}
// Do not want to block the reconciliation of other resources like setting up TG and COS, so skipping the requeue and only logging the info.
powerVSLog.Info("PowerVS network creation is pending")
}

func (r *IBMPowerVSClusterReconciler) reconcileVPCResources(clusterScope *scope.PowerVSClusterScope, powerVSCluster *powerVSCluster, ch chan reconcileResult, wg *sync.WaitGroup) {
defer wg.Done()
vpcLog := clusterScope.WithName("vpc")
vpcLog.Info("Reconciling VPC")
if requeue, err := clusterScope.ReconcileVPC(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC")
powerVSCluster.updateCondition(false, infrav1beta2.VPCReadyCondition, infrav1beta2.VPCReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if requeue {
vpcLog.Info("VPC creation is pending, requeuing")
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
return
}
powerVSCluster.updateCondition(true, infrav1beta2.VPCReadyCondition)

// reconcile VPC Subnet
vpcLog.Info("Reconciling VPC subnets")
if requeue, err := clusterScope.ReconcileVPCSubnets(); err != nil {
vpcLog.Error(err, "failed to reconcile VPC subnets")
powerVSCluster.updateCondition(false, infrav1beta2.VPCSubnetReadyCondition, infrav1beta2.VPCSubnetReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if requeue {
vpcLog.Info("VPC subnet creation is pending, requeuing")
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
return
}
powerVSCluster.updateCondition(true, infrav1beta2.VPCSubnetReadyCondition)

// reconcile VPC security group
vpcLog.Info("Reconciling VPC security group")
if err := clusterScope.ReconcileVPCSecurityGroups(); err != nil {
vpcLog.Error(err, "failed to reconcile VPC security groups")
powerVSCluster.updateCondition(false, infrav1beta2.VPCSecurityGroupReadyCondition, infrav1beta2.VPCSecurityGroupReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
}
powerVSCluster.updateCondition(true, infrav1beta2.VPCSecurityGroupReadyCondition)

// reconcile LoadBalancer
vpcLog.Info("Reconciling VPC load balancers")
if loadBalancerReady, err := clusterScope.ReconcileLoadBalancers(); err != nil {
vpcLog.Error(err, "failed to reconcile VPC load balancers")
powerVSCluster.updateCondition(false, infrav1beta2.LoadBalancerReadyCondition, infrav1beta2.LoadBalancerReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if loadBalancerReady {
powerVSCluster.updateCondition(true, infrav1beta2.LoadBalancerReadyCondition)
return
}
// Do not want to block the reconciliation of other resources like setting up TG and COS, so skipping the requeue and only logging the info.
vpcLog.Info("VPC load balancer creation is pending")
}

func (r *IBMPowerVSClusterReconciler) reconcile(clusterScope *scope.PowerVSClusterScope) (ctrl.Result, error) { //nolint:gocyclo
if controllerutil.AddFinalizer(clusterScope.IBMPowerVSCluster, infrav1beta2.IBMPowerVSClusterFinalizer) {
return ctrl.Result{}, nil
Expand All @@ -137,116 +249,96 @@ func (r *IBMPowerVSClusterReconciler) reconcile(clusterScope *scope.PowerVSClust
return reconcile.Result{}, err
}

powerVSCluster := clusterScope.IBMPowerVSCluster
// reconcile PowerVS service instance
clusterScope.Info("Reconciling PowerVS service instance")
if requeue, err := clusterScope.ReconcilePowerVSServiceInstance(); err != nil {
clusterScope.Error(err, "failed to reconcile PowerVS service instance")
conditions.MarkFalse(powerVSCluster, infrav1beta2.ServiceInstanceReadyCondition, infrav1beta2.ServiceInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("PowerVS service instance creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
powerVSCluster := &powerVSCluster{
cluster: clusterScope.IBMPowerVSCluster,
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.ServiceInstanceReadyCondition)

clusterScope.IBMPowerVSClient.WithClients(powervs.ServiceOptions{CloudInstanceID: clusterScope.GetServiceInstanceID()})
var wg sync.WaitGroup
ch := make(chan reconcileResult)

// reconcile network
clusterScope.Info("Reconciling network")
if requeue, err := clusterScope.ReconcileNetwork(); err != nil {
clusterScope.Error(err, "failed to reconcile PowerVS network")
conditions.MarkFalse(powerVSCluster, infrav1beta2.NetworkReadyCondition, infrav1beta2.NetworkReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("PowerVS network creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.NetworkReadyCondition)
// reconcile PowerVS resources
wg.Add(1)
go r.reconcilePowerVSResources(clusterScope, powerVSCluster, ch, &wg)

// reconcile VPC
clusterScope.Info("Reconciling VPC")
if requeue, err := clusterScope.ReconcileVPC(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC")
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCReadyCondition, infrav1beta2.VPCReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("VPC creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 15 * time.Second}, nil
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCReadyCondition)
wg.Add(1)
go r.reconcileVPCResources(clusterScope, powerVSCluster, ch, &wg)

// reconcile VPC Subnet
clusterScope.Info("Reconciling VPC subnets")
if requeue, err := clusterScope.ReconcileVPCSubnets(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC subnets")
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCSubnetReadyCondition, infrav1beta2.VPCSubnetReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("VPC subnet creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 15 * time.Second}, nil
// wait for above reconcile to complete and close the channel
go func() {
wg.Wait()
close(ch)
}()

var requeue bool
var errList []error
// receive return values from the channel and decide the requeue
for val := range ch {
if val.Requeue {
requeue = true
}
if val.error != nil {
errList = append(errList, val.error)
}
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCSubnetReadyCondition)

// reconcile VPC security group
clusterScope.Info("Reconciling VPC security group")
if err := clusterScope.ReconcileVPCSecurityGroups(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC security groups")
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCSecurityGroupReadyCondition, infrav1beta2.VPCSecurityGroupReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
if requeue && len(errList) > 1 {
return ctrl.Result{RequeueAfter: 30 * time.Second}, kerrors.NewAggregate(errList)
} else if requeue {
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
} else if len(errList) > 1 {
return ctrl.Result{}, kerrors.NewAggregate(errList)
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCSecurityGroupReadyCondition)

// reconcile Transit Gateway
clusterScope.Info("Reconciling Transit Gateway")
if requeue, err := clusterScope.ReconcileTransitGateway(); err != nil {
clusterScope.Error(err, "failed to reconcile transit gateway")
conditions.MarkFalse(powerVSCluster, infrav1beta2.TransitGatewayReadyCondition, infrav1beta2.TransitGatewayReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
conditions.MarkFalse(powerVSCluster.cluster, infrav1beta2.TransitGatewayReadyCondition, infrav1beta2.TransitGatewayReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("Transit gateway creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.TransitGatewayReadyCondition)

// reconcile LoadBalancer
clusterScope.Info("Reconciling VPC load balancers")
if requeue, err := clusterScope.ReconcileLoadBalancers(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC load balancers")
conditions.MarkFalse(powerVSCluster, infrav1beta2.LoadBalancerReadyCondition, infrav1beta2.LoadBalancerReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("VPC load balancer creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
conditions.MarkTrue(powerVSCluster.cluster, infrav1beta2.TransitGatewayReadyCondition)

// reconcile COSInstance
if clusterScope.IBMPowerVSCluster.Spec.Ignition != nil {
clusterScope.Info("Reconciling COS service instance")
if err := clusterScope.ReconcileCOSInstance(); err != nil {
conditions.MarkFalse(powerVSCluster, infrav1beta2.COSInstanceReadyCondition, infrav1beta2.COSInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
conditions.MarkFalse(powerVSCluster.cluster, infrav1beta2.COSInstanceReadyCondition, infrav1beta2.COSInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.COSInstanceReadyCondition)
conditions.MarkTrue(powerVSCluster.cluster, infrav1beta2.COSInstanceReadyCondition)
}

var networkReady, loadBalancerReady bool
for _, cond := range clusterScope.IBMPowerVSCluster.Status.Conditions {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By this time network should be ready right, When do you think it may not be ready also if its not ready we cannot even attach with TransitGateway right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we consider moving this check before calling ReconcileTransitGateway to avoid failure?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With transit gateway we are directly attaching the powervs with CRN right? It is not dependent on the network. And I m not requeuing when network and load balancer is not ready since it would block the TG creation even though it is not dependant for the TG. But before proceeding to retrieve the VPC LB details and set the overall Ready status as true, wanted to validate their status has reached Ready, that's why added the condition to validate both of these resource's Ready status.

if cond.Type == infrav1beta2.NetworkReadyCondition && cond.Status == corev1.ConditionTrue {
networkReady = true
}
if cond.Type == infrav1beta2.LoadBalancerReadyCondition && cond.Status == corev1.ConditionTrue {
loadBalancerReady = true
}
}

if !networkReady || !loadBalancerReady {
clusterScope.Info("Network or LoadBalancer still not ready, requeuing")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}

// update cluster object with loadbalancer host
loadBalancer := clusterScope.PublicLoadBalancer()
if loadBalancer == nil {
return reconcile.Result{}, fmt.Errorf("failed to fetch public loadbalancer")
}
if clusterScope.GetLoadBalancerState(loadBalancer.Name) == nil || *clusterScope.GetLoadBalancerState(loadBalancer.Name) != infrav1beta2.VPCLoadBalancerStateActive {
clusterScope.Info("LoadBalancer state is not active")
return reconcile.Result{RequeueAfter: time.Minute}, nil
}

clusterScope.Info("Getting load balancer host")
hostName := clusterScope.GetLoadBalancerHostName(loadBalancer.Name)
if hostName == nil || *hostName == "" {
clusterScope.Info("LoadBalancer hostname is not yet available, requeuing")
return reconcile.Result{RequeueAfter: time.Minute}, nil
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.LoadBalancerReadyCondition)

clusterScope.IBMPowerVSCluster.Spec.ControlPlaneEndpoint.Host = *clusterScope.GetLoadBalancerHostName(loadBalancer.Name)
clusterScope.IBMPowerVSCluster.Spec.ControlPlaneEndpoint.Port = clusterScope.APIServerPort()
Expand Down