-
Notifications
You must be signed in to change notification settings - Fork 84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor infra creation to improve the overall infra setup time #1869
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,10 +20,12 @@ import ( | |
"context" | ||
"fmt" | ||
"strings" | ||
"sync" | ||
"time" | ||
|
||
"github.com/pkg/errors" | ||
|
||
corev1 "k8s.io/api/core/v1" | ||
apierrors "k8s.io/apimachinery/pkg/api/errors" | ||
"k8s.io/apimachinery/pkg/api/meta" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
|
@@ -111,6 +113,116 @@ func (r *IBMPowerVSClusterReconciler) Reconcile(ctx context.Context, req ctrl.Re | |
return r.reconcile(clusterScope) | ||
} | ||
|
||
type powerVSCluster struct { | ||
cluster *infrav1beta2.IBMPowerVSCluster | ||
mu sync.Mutex | ||
} | ||
|
||
type reconcileResult struct { | ||
reconcile.Result | ||
error | ||
} | ||
|
||
func (update *powerVSCluster) updateCondition(condition bool, conditionArgs ...interface{}) { | ||
update.mu.Lock() | ||
defer update.mu.Unlock() | ||
if condition { | ||
conditions.MarkTrue(update.cluster, conditionArgs[0].(capiv1beta1.ConditionType)) | ||
return | ||
} | ||
|
||
conditions.MarkFalse(update.cluster, conditionArgs[0].(capiv1beta1.ConditionType), conditionArgs[1].(string), conditionArgs[2].(capiv1beta1.ConditionSeverity), conditionArgs[3].(string), conditionArgs[4:]...) | ||
} | ||
|
||
func (r *IBMPowerVSClusterReconciler) reconcilePowerVSResources(clusterScope *scope.PowerVSClusterScope, powerVSCluster *powerVSCluster, ch chan reconcileResult, wg *sync.WaitGroup) { | ||
defer wg.Done() | ||
powerVSLog := clusterScope.WithName("powervs") | ||
// reconcile PowerVS service instance | ||
powerVSLog.Info("Reconciling PowerVS service instance") | ||
if requeue, err := clusterScope.ReconcilePowerVSServiceInstance(); err != nil { | ||
powerVSLog.Error(err, "failed to reconcile PowerVS service instance") | ||
powerVSCluster.updateCondition(false, infrav1beta2.ServiceInstanceReadyCondition, infrav1beta2.ServiceInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
ch <- reconcileResult{reconcile.Result{}, err} | ||
return | ||
} else if requeue { | ||
powerVSLog.Info("PowerVS service instance creation is pending, requeuing") | ||
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil} | ||
return | ||
} | ||
powerVSCluster.updateCondition(true, infrav1beta2.ServiceInstanceReadyCondition) | ||
|
||
clusterScope.IBMPowerVSClient.WithClients(powervs.ServiceOptions{CloudInstanceID: clusterScope.GetServiceInstanceID()}) | ||
|
||
// reconcile network | ||
powerVSLog.Info("Reconciling network") | ||
if dhcpServerActive, err := clusterScope.ReconcileNetwork(); err != nil { | ||
powerVSLog.Error(err, "failed to reconcile PowerVS network") | ||
powerVSCluster.updateCondition(false, infrav1beta2.NetworkReadyCondition, infrav1beta2.NetworkReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
ch <- reconcileResult{reconcile.Result{}, err} | ||
return | ||
} else if dhcpServerActive { | ||
powerVSCluster.updateCondition(true, infrav1beta2.NetworkReadyCondition) | ||
return | ||
} | ||
// Do not want to block the reconciliation of other resources like setting up TG and COS, so skipping the requeue and only logging the info. | ||
powerVSLog.Info("PowerVS network creation is pending") | ||
} | ||
|
||
func (r *IBMPowerVSClusterReconciler) reconcileVPCResources(clusterScope *scope.PowerVSClusterScope, powerVSCluster *powerVSCluster, ch chan reconcileResult, wg *sync.WaitGroup) { | ||
defer wg.Done() | ||
vpcLog := clusterScope.WithName("vpc") | ||
vpcLog.Info("Reconciling VPC") | ||
if requeue, err := clusterScope.ReconcileVPC(); err != nil { | ||
clusterScope.Error(err, "failed to reconcile VPC") | ||
powerVSCluster.updateCondition(false, infrav1beta2.VPCReadyCondition, infrav1beta2.VPCReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
ch <- reconcileResult{reconcile.Result{}, err} | ||
return | ||
} else if requeue { | ||
vpcLog.Info("VPC creation is pending, requeuing") | ||
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil} | ||
return | ||
} | ||
powerVSCluster.updateCondition(true, infrav1beta2.VPCReadyCondition) | ||
|
||
// reconcile VPC Subnet | ||
vpcLog.Info("Reconciling VPC subnets") | ||
if requeue, err := clusterScope.ReconcileVPCSubnets(); err != nil { | ||
vpcLog.Error(err, "failed to reconcile VPC subnets") | ||
powerVSCluster.updateCondition(false, infrav1beta2.VPCSubnetReadyCondition, infrav1beta2.VPCSubnetReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
ch <- reconcileResult{reconcile.Result{}, err} | ||
return | ||
} else if requeue { | ||
vpcLog.Info("VPC subnet creation is pending, requeuing") | ||
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil} | ||
return | ||
} | ||
powerVSCluster.updateCondition(true, infrav1beta2.VPCSubnetReadyCondition) | ||
|
||
// reconcile VPC security group | ||
vpcLog.Info("Reconciling VPC security group") | ||
if err := clusterScope.ReconcileVPCSecurityGroups(); err != nil { | ||
vpcLog.Error(err, "failed to reconcile VPC security groups") | ||
powerVSCluster.updateCondition(false, infrav1beta2.VPCSecurityGroupReadyCondition, infrav1beta2.VPCSecurityGroupReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
ch <- reconcileResult{reconcile.Result{}, err} | ||
return | ||
} | ||
powerVSCluster.updateCondition(true, infrav1beta2.VPCSecurityGroupReadyCondition) | ||
|
||
// reconcile LoadBalancer | ||
vpcLog.Info("Reconciling VPC load balancers") | ||
if loadBalancerReady, err := clusterScope.ReconcileLoadBalancers(); err != nil { | ||
vpcLog.Error(err, "failed to reconcile VPC load balancers") | ||
powerVSCluster.updateCondition(false, infrav1beta2.LoadBalancerReadyCondition, infrav1beta2.LoadBalancerReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
ch <- reconcileResult{reconcile.Result{}, err} | ||
return | ||
} else if loadBalancerReady { | ||
powerVSCluster.updateCondition(true, infrav1beta2.LoadBalancerReadyCondition) | ||
return | ||
} | ||
// Do not want to block the reconciliation of other resources like setting up TG and COS, so skipping the requeue and only logging the info. | ||
vpcLog.Info("VPC load balancer creation is pending") | ||
} | ||
|
||
func (r *IBMPowerVSClusterReconciler) reconcile(clusterScope *scope.PowerVSClusterScope) (ctrl.Result, error) { //nolint:gocyclo | ||
if controllerutil.AddFinalizer(clusterScope.IBMPowerVSCluster, infrav1beta2.IBMPowerVSClusterFinalizer) { | ||
return ctrl.Result{}, nil | ||
|
@@ -137,116 +249,96 @@ func (r *IBMPowerVSClusterReconciler) reconcile(clusterScope *scope.PowerVSClust | |
return reconcile.Result{}, err | ||
} | ||
|
||
powerVSCluster := clusterScope.IBMPowerVSCluster | ||
// reconcile PowerVS service instance | ||
clusterScope.Info("Reconciling PowerVS service instance") | ||
if requeue, err := clusterScope.ReconcilePowerVSServiceInstance(); err != nil { | ||
clusterScope.Error(err, "failed to reconcile PowerVS service instance") | ||
conditions.MarkFalse(powerVSCluster, infrav1beta2.ServiceInstanceReadyCondition, infrav1beta2.ServiceInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
return reconcile.Result{}, err | ||
} else if requeue { | ||
clusterScope.Info("PowerVS service instance creation is pending, requeuing") | ||
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil | ||
powerVSCluster := &powerVSCluster{ | ||
cluster: clusterScope.IBMPowerVSCluster, | ||
} | ||
conditions.MarkTrue(powerVSCluster, infrav1beta2.ServiceInstanceReadyCondition) | ||
|
||
clusterScope.IBMPowerVSClient.WithClients(powervs.ServiceOptions{CloudInstanceID: clusterScope.GetServiceInstanceID()}) | ||
var wg sync.WaitGroup | ||
ch := make(chan reconcileResult) | ||
|
||
// reconcile network | ||
clusterScope.Info("Reconciling network") | ||
if requeue, err := clusterScope.ReconcileNetwork(); err != nil { | ||
clusterScope.Error(err, "failed to reconcile PowerVS network") | ||
conditions.MarkFalse(powerVSCluster, infrav1beta2.NetworkReadyCondition, infrav1beta2.NetworkReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
return reconcile.Result{}, err | ||
} else if requeue { | ||
clusterScope.Info("PowerVS network creation is pending, requeuing") | ||
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil | ||
} | ||
conditions.MarkTrue(powerVSCluster, infrav1beta2.NetworkReadyCondition) | ||
// reconcile PowerVS resources | ||
wg.Add(1) | ||
go r.reconcilePowerVSResources(clusterScope, powerVSCluster, ch, &wg) | ||
|
||
// reconcile VPC | ||
clusterScope.Info("Reconciling VPC") | ||
if requeue, err := clusterScope.ReconcileVPC(); err != nil { | ||
clusterScope.Error(err, "failed to reconcile VPC") | ||
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCReadyCondition, infrav1beta2.VPCReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
return reconcile.Result{}, err | ||
} else if requeue { | ||
clusterScope.Info("VPC creation is pending, requeuing") | ||
return reconcile.Result{RequeueAfter: 15 * time.Second}, nil | ||
} | ||
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCReadyCondition) | ||
wg.Add(1) | ||
go r.reconcileVPCResources(clusterScope, powerVSCluster, ch, &wg) | ||
|
||
// reconcile VPC Subnet | ||
clusterScope.Info("Reconciling VPC subnets") | ||
if requeue, err := clusterScope.ReconcileVPCSubnets(); err != nil { | ||
clusterScope.Error(err, "failed to reconcile VPC subnets") | ||
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCSubnetReadyCondition, infrav1beta2.VPCSubnetReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
return reconcile.Result{}, err | ||
} else if requeue { | ||
clusterScope.Info("VPC subnet creation is pending, requeuing") | ||
return reconcile.Result{RequeueAfter: 15 * time.Second}, nil | ||
// wait for above reconcile to complete and close the channel | ||
go func() { | ||
wg.Wait() | ||
close(ch) | ||
}() | ||
|
||
var requeue bool | ||
var errList []error | ||
// receive return values from the channel and decide the requeue | ||
for val := range ch { | ||
if val.Requeue { | ||
requeue = true | ||
} | ||
if val.error != nil { | ||
errList = append(errList, val.error) | ||
} | ||
} | ||
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCSubnetReadyCondition) | ||
|
||
// reconcile VPC security group | ||
clusterScope.Info("Reconciling VPC security group") | ||
if err := clusterScope.ReconcileVPCSecurityGroups(); err != nil { | ||
clusterScope.Error(err, "failed to reconcile VPC security groups") | ||
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCSecurityGroupReadyCondition, infrav1beta2.VPCSecurityGroupReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
return reconcile.Result{}, err | ||
if requeue && len(errList) > 1 { | ||
return ctrl.Result{RequeueAfter: 30 * time.Second}, kerrors.NewAggregate(errList) | ||
} else if requeue { | ||
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil | ||
} else if len(errList) > 1 { | ||
return ctrl.Result{}, kerrors.NewAggregate(errList) | ||
} | ||
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCSecurityGroupReadyCondition) | ||
|
||
// reconcile Transit Gateway | ||
clusterScope.Info("Reconciling Transit Gateway") | ||
if requeue, err := clusterScope.ReconcileTransitGateway(); err != nil { | ||
clusterScope.Error(err, "failed to reconcile transit gateway") | ||
conditions.MarkFalse(powerVSCluster, infrav1beta2.TransitGatewayReadyCondition, infrav1beta2.TransitGatewayReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
conditions.MarkFalse(powerVSCluster.cluster, infrav1beta2.TransitGatewayReadyCondition, infrav1beta2.TransitGatewayReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
return reconcile.Result{}, err | ||
} else if requeue { | ||
clusterScope.Info("Transit gateway creation is pending, requeuing") | ||
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil | ||
} | ||
conditions.MarkTrue(powerVSCluster, infrav1beta2.TransitGatewayReadyCondition) | ||
|
||
// reconcile LoadBalancer | ||
clusterScope.Info("Reconciling VPC load balancers") | ||
if requeue, err := clusterScope.ReconcileLoadBalancers(); err != nil { | ||
clusterScope.Error(err, "failed to reconcile VPC load balancers") | ||
conditions.MarkFalse(powerVSCluster, infrav1beta2.LoadBalancerReadyCondition, infrav1beta2.LoadBalancerReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
return reconcile.Result{}, err | ||
} else if requeue { | ||
clusterScope.Info("VPC load balancer creation is pending, requeuing") | ||
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil | ||
} | ||
conditions.MarkTrue(powerVSCluster.cluster, infrav1beta2.TransitGatewayReadyCondition) | ||
|
||
// reconcile COSInstance | ||
if clusterScope.IBMPowerVSCluster.Spec.Ignition != nil { | ||
clusterScope.Info("Reconciling COS service instance") | ||
if err := clusterScope.ReconcileCOSInstance(); err != nil { | ||
conditions.MarkFalse(powerVSCluster, infrav1beta2.COSInstanceReadyCondition, infrav1beta2.COSInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
conditions.MarkFalse(powerVSCluster.cluster, infrav1beta2.COSInstanceReadyCondition, infrav1beta2.COSInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error()) | ||
return reconcile.Result{}, err | ||
} | ||
conditions.MarkTrue(powerVSCluster, infrav1beta2.COSInstanceReadyCondition) | ||
conditions.MarkTrue(powerVSCluster.cluster, infrav1beta2.COSInstanceReadyCondition) | ||
} | ||
|
||
var networkReady, loadBalancerReady bool | ||
for _, cond := range clusterScope.IBMPowerVSCluster.Status.Conditions { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By this time network should be ready right, When do you think it may not be ready also if its not ready we cannot even attach with TransitGateway right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we consider moving this check before calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With transit gateway we are directly attaching the powervs with CRN right? It is not dependent on the network. And I m not requeuing when network and load balancer is not ready since it would block the TG creation even though it is not dependant for the TG. But before proceeding to retrieve the VPC LB details and set the overall Ready status as true, wanted to validate their status has reached Ready, that's why added the condition to validate both of these resource's Ready status. |
||
if cond.Type == infrav1beta2.NetworkReadyCondition && cond.Status == corev1.ConditionTrue { | ||
networkReady = true | ||
} | ||
if cond.Type == infrav1beta2.LoadBalancerReadyCondition && cond.Status == corev1.ConditionTrue { | ||
loadBalancerReady = true | ||
} | ||
} | ||
|
||
if !networkReady || !loadBalancerReady { | ||
clusterScope.Info("Network or LoadBalancer still not ready, requeuing") | ||
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil | ||
} | ||
|
||
// update cluster object with loadbalancer host | ||
loadBalancer := clusterScope.PublicLoadBalancer() | ||
if loadBalancer == nil { | ||
return reconcile.Result{}, fmt.Errorf("failed to fetch public loadbalancer") | ||
} | ||
if clusterScope.GetLoadBalancerState(loadBalancer.Name) == nil || *clusterScope.GetLoadBalancerState(loadBalancer.Name) != infrav1beta2.VPCLoadBalancerStateActive { | ||
clusterScope.Info("LoadBalancer state is not active") | ||
return reconcile.Result{RequeueAfter: time.Minute}, nil | ||
} | ||
|
||
clusterScope.Info("Getting load balancer host") | ||
hostName := clusterScope.GetLoadBalancerHostName(loadBalancer.Name) | ||
if hostName == nil || *hostName == "" { | ||
clusterScope.Info("LoadBalancer hostname is not yet available, requeuing") | ||
return reconcile.Result{RequeueAfter: time.Minute}, nil | ||
} | ||
conditions.MarkTrue(powerVSCluster, infrav1beta2.LoadBalancerReadyCondition) | ||
|
||
clusterScope.IBMPowerVSCluster.Spec.ControlPlaneEndpoint.Host = *clusterScope.GetLoadBalancerHostName(loadBalancer.Name) | ||
clusterScope.IBMPowerVSCluster.Spec.ControlPlaneEndpoint.Port = clusterScope.APIServerPort() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't there any better way rather than using interface{} types for conditionArgs? also since we are indexing it, Should we add a check for verifying the min legth to avoid index out of range?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I pass it as a straight types, we would get a golint error for exceeding the max number of args which is 5. Also I would need to create a separate func for true and false condition. It would complicate things IMO.
Again, need to add a check on many places where we are updating the condition and won't be able to take major action if it errored. Ex here I want to return the main error instead of the error I m getting from this, it does n't look good to me. Scope of this is within the reconcile func and only two possible flow. I think the current way is ok IMO.