Skip to content
This repository has been archived by the owner on Nov 30, 2023. It is now read-only.

Commit

Permalink
AzureMachinePool conditions handler (#1123)
Browse files Browse the repository at this point in the history
* Add SubnetReady condition to AzureMachinePool

* Add AzureMachinePool VMSSReady condition, remove conditions setting from nodepool handler
  • Loading branch information
nprokopic authored Oct 30, 2020
1 parent f91b45f commit 3aa5431
Show file tree
Hide file tree
Showing 15 changed files with 633 additions and 119 deletions.
16 changes: 16 additions & 0 deletions service/controller/azure_machine_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/giantswarm/azure-operator/v5/service/controller/debugger"
"github.com/giantswarm/azure-operator/v5/service/controller/internal/vmsku"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/azureconfig"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/azuremachinepoolconditions"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/cloudconfigblob"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/ipam"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/nodepool"
Expand Down Expand Up @@ -137,6 +138,20 @@ func NewAzureMachinePoolResourceSet(config AzureMachinePoolConfig) ([]resource.I
organizationClientFactory = client.NewOrganizationFactory(c)
}

var azureMachinePoolConditionsResource resource.Interface
{
c := azuremachinepoolconditions.Config{
AzureClientsFactory: &organizationClientFactory,
CtrlClient: config.K8sClient.CtrlClient(),
Logger: config.Logger,
}

azureMachinePoolConditionsResource, err = azuremachinepoolconditions.New(c)
if err != nil {
return nil, microerror.Mask(err)
}
}

var newDebugger *debugger.Debugger
{
c := debugger.Config{
Expand Down Expand Up @@ -322,6 +337,7 @@ func NewAzureMachinePoolResourceSet(config AzureMachinePoolConfig) ([]resource.I
}

resources := []resource.Interface{
azureMachinePoolConditionsResource,
sparkResource,
cloudconfigblobResource,
ipamResource,
Expand Down
5 changes: 5 additions & 0 deletions service/controller/key/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
masterNatGatewayName = "masters-nat-gw"
prefixMaster = "master"
prefixWorker = "worker"
subnetDeploymentPrefix = "subnet"
virtualNetworkSuffix = "VirtualNetwork"
vpnGatewaySubnet = "GatewaySubnet"
vpnGatewaySuffix = "VPNGateway"
Expand Down Expand Up @@ -629,6 +630,10 @@ func NodePoolDeploymentName(azureMachinePool *expcapzv1alpha3.AzureMachinePool)
return NodePoolVMSSName(azureMachinePool)
}

func SubnetDeploymentName(subnetName string) string {
return fmt.Sprintf("%s-%s", subnetDeploymentPrefix, subnetName)
}

func MachinePoolID(getter LabelsGetter) (string, error) {
machinePoolID, exists := getter.GetLabels()[apiextensionslabels.MachinePool]
if !exists {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package azuremachinepoolconditions

import (
"context"

azureconditions "github.com/giantswarm/apiextensions/v3/pkg/conditions/azure"
"github.com/giantswarm/microerror"
corev1 "k8s.io/api/core/v1"
capzexp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1alpha3"
capi "sigs.k8s.io/cluster-api/api/v1alpha3"
capiconditions "sigs.k8s.io/cluster-api/util/conditions"
)

func (r *Resource) ensureReadyCondition(ctx context.Context, azureMachinePool *capzexp.AzureMachinePool) error {
r.logDebug(ctx, "ensuring condition Ready")
var err error

// Ensure VMMSReady condition
err = r.ensureSubnetReadyCondition(ctx, azureMachinePool)
if err != nil {
return microerror.Mask(err)
}

// Ensure VMMSReady condition
err = r.ensureVmssReadyCondition(ctx, azureMachinePool)
if err != nil {
return microerror.Mask(err)
}

// List of conditions that all need to be True for the Ready condition to
// be True:
// - VMSSReady: node pool VMSS is ready
// - SubnetReady: node pool subnet is ready
conditionsToSummarize := capiconditions.WithConditions(
azureconditions.SubnetReadyCondition,
azureconditions.VMSSReadyCondition)

// Update Ready condition
capiconditions.SetSummary(
azureMachinePool,
conditionsToSummarize,
capiconditions.AddSourceRef())

// Now check current Ready condition so we can log the value
r.logConditionStatus(ctx, azureMachinePool, capi.ReadyCondition)
r.logDebug(ctx, "ensured condition Ready")
return nil
}

func (r *Resource) logConditionStatus(ctx context.Context, azureMachinePool *capzexp.AzureMachinePool, conditionType capi.ConditionType) {
condition := capiconditions.Get(azureMachinePool, conditionType)

if condition == nil {
r.logWarning(ctx, "condition %s not set", conditionType)
} else {
messageFormat := "condition %s set to %s"
messageArgs := []interface{}{conditionType, condition.Status}
if condition.Status != corev1.ConditionTrue {
messageFormat += ", Reason=%s, Severity=%s, Message=%s"
messageArgs = append(messageArgs, condition.Reason)
messageArgs = append(messageArgs, condition.Severity)
messageArgs = append(messageArgs, condition.Message)
}
r.logDebug(ctx, messageFormat, messageArgs...)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package azuremachinepoolconditions

import (
"context"

"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-11-01/network"
azureconditions "github.com/giantswarm/apiextensions/v3/pkg/conditions/azure"
"github.com/giantswarm/microerror"
capzexp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1alpha3"
capi "sigs.k8s.io/cluster-api/api/v1alpha3"
capiconditions "sigs.k8s.io/cluster-api/util/conditions"

"github.com/giantswarm/azure-operator/v5/pkg/helpers"
"github.com/giantswarm/azure-operator/v5/service/controller/key"
)

const (
SubnetNotFoundReason = "SubnetNotFound"
SubnetProvisioningStatePrefix = "SubnetProvisioningState"
)

func (r *Resource) ensureSubnetReadyCondition(ctx context.Context, azureMachinePool *capzexp.AzureMachinePool) error {
r.logDebug(ctx, "ensuring condition %s", azureconditions.SubnetReadyCondition)

deploymentsClient, err := r.azureClientsFactory.GetDeploymentsClient(ctx, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

// Now let's first check ARM deployment state
subnetDeploymentName := key.SubnetDeploymentName(azureMachinePool.Name)
isSubnetDeploymentSuccessful, err := r.checkIfDeploymentIsSuccessful(ctx, deploymentsClient, azureMachinePool, subnetDeploymentName, azureconditions.SubnetReadyCondition)
if err != nil {
return microerror.Mask(err)
} else if !isSubnetDeploymentSuccessful {
// Function checkIfDeploymentIsSuccessful that is called above, if it
// sees that the deployment is not succeeded, for whatever reason, it
// will also set appropriate condition value, so our job here is done.
return nil
}

// Deployment is successful, we proceed with checking the actual Azure
// subnet.
subnetsClient, err := r.azureClientsFactory.GetSubnetsClient(ctx, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

azureCluster, err := helpers.GetAzureClusterFromMetadata(ctx, r.ctrlClient, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

subnetName := azureMachinePool.Name
subnet, err := subnetsClient.Get(ctx, azureCluster.Name, azureCluster.Spec.NetworkSpec.Vnet.Name, subnetName, "")
if IsNotFound(err) {
r.setSubnetNotFound(ctx, azureMachinePool, subnetName, azureconditions.SubnetReadyCondition)
return nil
} else if err != nil {
return microerror.Mask(err)
}

// Note: Here we check if the subnet exists and that its provisioning state
// is succeeded. It would be good to also check network security group,
// routing table and service endpoints.
if subnet.ProvisioningState == network.Succeeded {
capiconditions.MarkTrue(azureMachinePool, azureconditions.SubnetReadyCondition)
} else {
r.setSubnetProvisioningStateNotSuccessful(ctx, azureMachinePool, subnetName, subnet.ProvisioningState, azureconditions.SubnetReadyCondition)
}

r.logConditionStatus(ctx, azureMachinePool, azureconditions.SubnetReadyCondition)
r.logDebug(ctx, "ensured condition %s", azureconditions.SubnetReadyCondition)
return nil
}

func (r *Resource) setSubnetNotFound(ctx context.Context, cr capiconditions.Setter, subnetName string, condition capi.ConditionType) {
message := "Subnet %s is not found"
messageArgs := subnetName
capiconditions.MarkFalse(
cr,
condition,
SubnetNotFoundReason,
capi.ConditionSeverityError,
message,
messageArgs)

r.logWarning(ctx, message, messageArgs)
}

func (r *Resource) setSubnetProvisioningStateNotSuccessful(ctx context.Context, cr capiconditions.Setter, subnetName string, provisioningState network.ProvisioningState, condition capi.ConditionType) {
message := "Subnet %s provisioning state is %s"
messageArgs := []interface{}{subnetName, provisioningState}
reason := SubnetProvisioningStatePrefix + string(provisioningState)

capiconditions.MarkFalse(
cr,
condition,
reason,
capi.ConditionSeverityWarning,
message,
messageArgs...)

r.logWarning(ctx, message, messageArgs...)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package azuremachinepoolconditions

import (
"context"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
azureconditions "github.com/giantswarm/apiextensions/v3/pkg/conditions/azure"
"github.com/giantswarm/microerror"
capzexp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1alpha3"
capi "sigs.k8s.io/cluster-api/api/v1alpha3"
capiconditions "sigs.k8s.io/cluster-api/util/conditions"

"github.com/giantswarm/azure-operator/v5/service/controller/key"
)

const (
VMSSNotFoundReason = "VMSSNotFound"
VMSSProvisioningStatePrefix = "VMSSProvisioningState"
VMSSProvisioningStateUnknownReason = "VMSSProvisioningStateUnknown"
VmssProvisioningStateSucceeded = string(compute.ProvisioningStateSucceeded)
VmssProvisioningStateFailed = string(compute.ProvisioningStateFailed)
)

func (r *Resource) ensureVmssReadyCondition(ctx context.Context, azureMachinePool *capzexp.AzureMachinePool) error {
r.logDebug(ctx, "ensuring condition %s", azureconditions.VMSSReadyCondition)

deploymentsClient, err := r.azureClientsFactory.GetDeploymentsClient(ctx, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

// Now let's first check ARM deployment state
deploymentName := key.NodePoolDeploymentName(azureMachinePool)
isDeploymentSuccessful, err := r.checkIfDeploymentIsSuccessful(ctx, deploymentsClient, azureMachinePool, deploymentName, azureconditions.VMSSReadyCondition)
if err != nil {
return microerror.Mask(err)
} else if !isDeploymentSuccessful {
// Function checkIfDeploymentIsSuccessful that is called above, if it
// sees that the deployment is not succeeded, for whatever reason, it
// will also set appropriate condition value, so our job here is done.
return nil
}

// Deployment is successful, we proceed with checking the actual Azure
// VMSS.
vmssClient, err := r.azureClientsFactory.GetVirtualMachineScaleSetsClient(ctx, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

resourceGroupName := key.ClusterName(azureMachinePool)
vmssName := key.NodePoolVMSSName(azureMachinePool)

vmss, err := vmssClient.Get(ctx, resourceGroupName, vmssName)
if IsNotFound(err) {
r.setVMSSNotFound(ctx, azureMachinePool, vmssName, azureconditions.VMSSReadyCondition)
return nil
} else if err != nil {
return microerror.Mask(err)
}

// Note: Here we are only checking the provisioning state of VMSS. Ideally
// we would check the provisioning and power state of all instances, but
// that would require more VMSS instance API calls that have very low
// throttling limits, so we will add that later, once throttling situation
// is better.

// Check if VMSS provisioning state is set. We expect that it is, since we
// already checked the deployment, but it's not impossible that the VMSS
// resource got changed for some reason.
if vmss.ProvisioningState == nil {
r.setVMSSProvisioningStateUnknown(ctx, azureMachinePool, deploymentName, azureconditions.VMSSReadyCondition)
return nil
}

switch *vmss.ProvisioningState {
// VMSS provisioning state is Succeeded, all good.
case VmssProvisioningStateSucceeded:
capiconditions.MarkTrue(azureMachinePool, azureconditions.VMSSReadyCondition)
// VMSS provisioning state is Failed, VMSS has some issues.
case VmssProvisioningStateFailed:
r.setVMSSProvisioningStateFailed(ctx, azureMachinePool, vmssName, azureconditions.VMSSReadyCondition)
default:
// VMSS provisioning state not Succeeded, set current state to VMSSReady condition.
r.setVMSSProvisioningStateWarning(ctx, azureMachinePool, vmssName, *vmss.ProvisioningState, azureconditions.VMSSReadyCondition)
}

// Log current VMSSReady condition
r.logConditionStatus(ctx, azureMachinePool, azureconditions.VMSSReadyCondition)
r.logDebug(ctx, "ensured condition %s", azureconditions.VMSSReadyCondition)
return nil
}

func (r *Resource) setVMSSNotFound(ctx context.Context, cr capiconditions.Setter, vmssName string, condition capi.ConditionType) {
message := "VMSS %s is not found, which should not happen when the deployment is successful"
messageArgs := vmssName
capiconditions.MarkFalse(
cr,
condition,
VMSSNotFoundReason,
capi.ConditionSeverityError,
message,
messageArgs)

r.logWarning(ctx, message, messageArgs)
}

func (r *Resource) setVMSSProvisioningStateUnknown(ctx context.Context, cr capiconditions.Setter, deploymentName string, condition capi.ConditionType) {
message := "VMSS %s provisioning state not returned by Azure API, check back in few minutes"
messageArgs := deploymentName
capiconditions.MarkFalse(
cr,
condition,
VMSSProvisioningStateUnknownReason,
capi.ConditionSeverityWarning,
message,
messageArgs)

r.logWarning(ctx, message, messageArgs)
}

func (r *Resource) setVMSSProvisioningStateFailed(ctx context.Context, cr capiconditions.Setter, vmssName string, condition capi.ConditionType) {
message := "VMSS %s failed, it might succeed after retrying, see Azure portal for more details"
messageArgs := vmssName
reason := VMSSProvisioningStatePrefix + VmssProvisioningStateFailed

capiconditions.MarkFalse(
cr,
condition,
reason,
capi.ConditionSeverityError,
message,
messageArgs)

r.logWarning(ctx, message, messageArgs)
}

func (r *Resource) setVMSSProvisioningStateWarning(ctx context.Context, cr capiconditions.Setter, vmssName string, currentProvisioningState string, condition capi.ConditionType) {
message := "Deployment %s has not succeeded yet, current state is %s, " +
"check back in few minutes, see Azure portal for more details"
messageArgs := []interface{}{vmssName, currentProvisioningState}
reason := VMSSProvisioningStatePrefix + currentProvisioningState

capiconditions.MarkFalse(
cr,
condition,
reason,
capi.ConditionSeverityWarning,
message,
messageArgs...)

r.logWarning(ctx, message, messageArgs...)
}
Loading

0 comments on commit 3aa5431

Please sign in to comment.