Skip to content
This repository has been archived by the owner on Nov 30, 2023. It is now read-only.

AzureMachinePool conditions handler #1123

Merged
merged 18 commits into from
Oct 30, 2020
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions service/controller/azure_machine_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/giantswarm/azure-operator/v5/service/controller/debugger"
"github.com/giantswarm/azure-operator/v5/service/controller/internal/vmsku"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/azureconfig"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/azuremachinepoolconditions"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/cloudconfigblob"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/ipam"
"github.com/giantswarm/azure-operator/v5/service/controller/resource/nodepool"
Expand Down Expand Up @@ -136,6 +137,20 @@ func NewAzureMachinePoolResourceSet(config AzureMachinePoolConfig) ([]resource.I
organizationClientFactory = client.NewOrganizationFactory(c)
}

var azureMachinePoolConditionsResource resource.Interface
{
c := azuremachinepoolconditions.Config{
AzureClientsFactory: &organizationClientFactory,
CtrlClient: config.K8sClient.CtrlClient(),
Logger: config.Logger,
}

azureMachinePoolConditionsResource, err = azuremachinepoolconditions.New(c)
if err != nil {
return nil, microerror.Mask(err)
}
}

var newDebugger *debugger.Debugger
{
c := debugger.Config{
Expand Down Expand Up @@ -320,6 +335,7 @@ func NewAzureMachinePoolResourceSet(config AzureMachinePoolConfig) ([]resource.I
}

resources := []resource.Interface{
azureMachinePoolConditionsResource,
sparkResource,
cloudconfigblobResource,
ipamResource,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package azuremachinepoolconditions

import (
"context"

azureconditions "github.com/giantswarm/apiextensions/v3/pkg/conditions/azure"
"github.com/giantswarm/microerror"
corev1 "k8s.io/api/core/v1"
capzexp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1alpha3"
capi "sigs.k8s.io/cluster-api/api/v1alpha3"
capiconditions "sigs.k8s.io/cluster-api/util/conditions"
)

func (r *Resource) ensureReadyCondition(ctx context.Context, azureMachinePool *capzexp.AzureMachinePool) error {
r.logDebug(ctx, "ensuring condition Ready")
var err error

// Ensure VMMSReady condition
err = r.ensureSubnetReadyCondition(ctx, azureMachinePool)
if err != nil {
return microerror.Mask(err)
}

// Ensure VMMSReady condition
err = r.ensureVmssReadyCondition(ctx, azureMachinePool)
if err != nil {
return microerror.Mask(err)
}

// List of conditions that all need to be True for the Ready condition to
// be True:
// - VMSSReady: node pool VMSS is ready
// - SubnetReady: node pool subnet is ready
conditionsToSummarize := capiconditions.WithConditions(
azureconditions.SubnetReadyCondition,
azureconditions.VMSSReadyCondition)

// Update Ready condition
capiconditions.SetSummary(
azureMachinePool,
conditionsToSummarize,
capiconditions.AddSourceRef())

// Now check current Ready condition so we can log the value
r.logConditionStatus(ctx, azureMachinePool, capi.ReadyCondition)
r.logDebug(ctx, "ensured condition Ready")
return nil
}

func (r *Resource) logConditionStatus(ctx context.Context, azureMachinePool *capzexp.AzureMachinePool, conditionType capi.ConditionType) {
condition := capiconditions.Get(azureMachinePool, conditionType)

if condition == nil {
r.logWarning(ctx, "condition %s not set", conditionType)
} else {
messageFormat := "condition %s set to %s"
messageArgs := []interface{}{conditionType, condition.Status}
if condition.Status != corev1.ConditionTrue {
messageFormat += ", Reason=%s, Severity=%s, Message=%s"
messageArgs = append(messageArgs, condition.Reason)
messageArgs = append(messageArgs, condition.Severity)
messageArgs = append(messageArgs, condition.Message)
}
r.logDebug(ctx, messageFormat, messageArgs...)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package azuremachinepoolconditions

import (
"context"
"fmt"

"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-11-01/network"
azureconditions "github.com/giantswarm/apiextensions/v3/pkg/conditions/azure"
"github.com/giantswarm/microerror"
capzexp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1alpha3"
capi "sigs.k8s.io/cluster-api/api/v1alpha3"
capiconditions "sigs.k8s.io/cluster-api/util/conditions"

"github.com/giantswarm/azure-operator/v5/pkg/helpers"
)

const (
subnetDeploymentPrefix = "subnet"
provisioningStateSucceeded = "Succeeded"

SubnetNotFoundReason = "SubnetNotFound"
SubnetProvisioningStatePrefix = "SubnetProvisioningState"
)

func (r *Resource) ensureSubnetReadyCondition(ctx context.Context, azureMachinePool *capzexp.AzureMachinePool) error {
r.logDebug(ctx, "ensuring condition %s", azureconditions.SubnetReadyCondition)

// Get Azure deployments client
deploymentsClient, err := r.azureClientsFactory.GetDeploymentsClient(ctx, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

// Now let's first check ARM deployment state
subnetDeploymentName := getSubnetDeploymentName(azureMachinePool.Name)
isSubnetDeploymentSuccessful, err := r.checkIfDeploymentIsSuccessful(ctx, deploymentsClient, azureMachinePool, subnetDeploymentName, azureconditions.SubnetReadyCondition)
if err != nil {
return microerror.Mask(err)
} else if !isSubnetDeploymentSuccessful {
// in the deployment is not yet successful, the check method will set
nprokopic marked this conversation as resolved.
Show resolved Hide resolved
// appropriate condition value.
return nil
}

// Deployment is successful, now let's check the actual resource.
subnetsClient, err := r.azureClientsFactory.GetSubnetsClient(ctx, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

azureCluster, err := helpers.GetAzureClusterFromMetadata(ctx, r.ctrlClient, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

subnetName := azureMachinePool.Name
subnet, err := subnetsClient.Get(ctx, azureCluster.Name, azureCluster.Spec.NetworkSpec.Vnet.Name, subnetName, "")
if IsNotFound(err) {
r.setSubnetNotFound(ctx, azureMachinePool, subnetName, azureconditions.SubnetReadyCondition)
return nil
} else if err != nil {
return microerror.Mask(err)
}

// Note: Here we check if the subnet exists and that its provisioning state
// is succeeded. It would be good to also check network security group,
// routing table and service endpoints.
if subnet.ProvisioningState == provisioningStateSucceeded {
nprokopic marked this conversation as resolved.
Show resolved Hide resolved
capiconditions.MarkTrue(azureMachinePool, azureconditions.SubnetReadyCondition)
} else {
r.setSubnetProvisioningStateNotSuccessful(ctx, azureMachinePool, subnetName, subnet.ProvisioningState, azureconditions.SubnetReadyCondition)
}

r.logConditionStatus(ctx, azureMachinePool, azureconditions.SubnetReadyCondition)
r.logDebug(ctx, "ensured condition %s", azureconditions.SubnetReadyCondition)
return nil
}

func getSubnetDeploymentName(subnetName string) string {
return fmt.Sprintf("%s-%s", subnetDeploymentPrefix, subnetName)
}
nprokopic marked this conversation as resolved.
Show resolved Hide resolved

func (r *Resource) setSubnetNotFound(ctx context.Context, cr capiconditions.Setter, subnetName string, condition capi.ConditionType) {
message := "Subnet %s is not found"
messageArgs := subnetName
capiconditions.MarkFalse(
cr,
condition,
SubnetNotFoundReason,
capi.ConditionSeverityError,
message,
messageArgs)

r.logWarning(ctx, message, messageArgs)
}

func (r *Resource) setSubnetProvisioningStateNotSuccessful(ctx context.Context, cr capiconditions.Setter, subnetName string, provisioningState network.ProvisioningState, condition capi.ConditionType) {
message := "Subnet %s provisioning state is %s"
messageArgs := []interface{}{subnetName, provisioningState}
reason := SubnetProvisioningStatePrefix + string(provisioningState)

capiconditions.MarkFalse(
cr,
condition,
reason,
capi.ConditionSeverityWarning,
message,
messageArgs...)

r.logWarning(ctx, message, messageArgs...)
nprokopic marked this conversation as resolved.
Show resolved Hide resolved
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package azuremachinepoolconditions
nprokopic marked this conversation as resolved.
Show resolved Hide resolved

import (
"context"
"fmt"

azureconditions "github.com/giantswarm/apiextensions/v3/pkg/conditions/azure"
"github.com/giantswarm/microerror"
capzexp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1alpha3"
capi "sigs.k8s.io/cluster-api/api/v1alpha3"
capiconditions "sigs.k8s.io/cluster-api/util/conditions"

"github.com/giantswarm/azure-operator/v5/service/controller/key"
)

const (
vmssDeploymentPrefix = "nodepool-"
VMSSNotFoundReason = "VMSSNotFound"
VMSSIDNotSetReason = "VMSSIDNotSet"
VMSSProvisioningStatePrefix = "VMSSProvisioningState"
VMSSProvisioningStateUnknownReason = "VMSSProvisioningStateUnknown"
VmssProvisioningStateSucceeded = "Succeeded"
VmssProvisioningStateFailed = "Failed"
)

func (r *Resource) ensureVmssReadyCondition(ctx context.Context, azureMachinePool *capzexp.AzureMachinePool) error {
r.logDebug(ctx, "ensuring condition %s", azureconditions.VMSSReadyCondition)

// Get Azure deployments client
nprokopic marked this conversation as resolved.
Show resolved Hide resolved
deploymentsClient, err := r.azureClientsFactory.GetDeploymentsClient(ctx, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

// Now let's first check ARM deployment state
deploymentName := getVMSSDeploymentName(azureMachinePool.Name)
isDeploymentSuccessful, err := r.checkIfDeploymentIsSuccessful(ctx, deploymentsClient, azureMachinePool, deploymentName, azureconditions.VMSSReadyCondition)
if err != nil {
return microerror.Mask(err)
} else if !isDeploymentSuccessful {
// in the deployment is not yet successful, the check method has set
// appropriate condition value.
return nil
}

// Deployment is successful, now let's check the actual resource.
vmssClient, err := r.azureClientsFactory.GetVirtualMachineScaleSetsClient(ctx, azureMachinePool.ObjectMeta)
if err != nil {
return microerror.Mask(err)
}

// Get VMSS from Azure API.
resourceGroupName := key.ClusterName(azureMachinePool)
vmssName := key.NodePoolVMSSName(azureMachinePool)

vmss, err := vmssClient.Get(ctx, resourceGroupName, vmssName)
if IsNotFound(err) {
r.setVMSSNotFound(ctx, azureMachinePool, vmssName, azureconditions.VMSSReadyCondition)
return nil
} else if err != nil {
return microerror.Mask(err)
}

// Note: Here we are only checking the provisioning state of VMSS. Ideally
// we would check the provisioning and power state of all instances, but
// that would require more VMSS instance API calls that have very low
// throttling limits, so we will add that later, once throttling situation
// is better.

// Check if VMSS provisioning state is set.
if vmss.ProvisioningState == nil {
r.setVMSSProvisioningStateUnknown(ctx, azureMachinePool, deploymentName, azureconditions.VMSSReadyCondition)
return nil
}

switch *vmss.ProvisioningState {
// VMSS provisioning state is Succeeded, all good.
case VmssProvisioningStateSucceeded:
capiconditions.MarkTrue(azureMachinePool, azureconditions.VMSSReadyCondition)
// VMSS provisioning state is Failed, VMSS has some issues.
case VmssProvisioningStateFailed:
r.setVMSSProvisioningStateFailed(ctx, azureMachinePool, vmssName, azureconditions.VMSSReadyCondition)
default:
// VMSS provisioning state not Succeeded, set current state to VMSSReady condition.
r.setVMSSProvisioningStateWarning(ctx, azureMachinePool, vmssName, *vmss.ProvisioningState, azureconditions.VMSSReadyCondition)
}

// Log current VMSSReady condition
r.logConditionStatus(ctx, azureMachinePool, azureconditions.VMSSReadyCondition)
r.logDebug(ctx, "ensured condition %s", azureconditions.VMSSReadyCondition)
return nil
}

func getVMSSDeploymentName(nodepoolID string) string {
return fmt.Sprintf("%s%s", vmssDeploymentPrefix, nodepoolID)
}

func (r *Resource) setVMSSNotFound(ctx context.Context, cr capiconditions.Setter, vmssName string, condition capi.ConditionType) {
message := "VMSS %s is not found, which should not happen when the deployment is successful"
messageArgs := vmssName
capiconditions.MarkFalse(
cr,
condition,
VMSSNotFoundReason,
capi.ConditionSeverityError,
message,
messageArgs)

r.logWarning(ctx, message, messageArgs)
}

func (r *Resource) setVMSSProvisioningStateUnknown(ctx context.Context, cr capiconditions.Setter, deploymentName string, condition capi.ConditionType) {
message := "VMSS %s provisioning state not returned by Azure API, check back in few minutes"
messageArgs := deploymentName
capiconditions.MarkFalse(
cr,
condition,
VMSSProvisioningStateUnknownReason,
capi.ConditionSeverityWarning,
message,
messageArgs)

r.logWarning(ctx, message, messageArgs)
}

func (r *Resource) setVMSSProvisioningStateFailed(ctx context.Context, cr capiconditions.Setter, vmssName string, condition capi.ConditionType) {
message := "VMSS %s failed, it might succeed after retrying, see Azure portal for more details"
messageArgs := vmssName
reason := VMSSProvisioningStatePrefix + VmssProvisioningStateFailed

capiconditions.MarkFalse(
cr,
condition,
reason,
capi.ConditionSeverityError,
message,
messageArgs)

r.logWarning(ctx, message, messageArgs)
}

func (r *Resource) setVMSSProvisioningStateWarning(ctx context.Context, cr capiconditions.Setter, vmssName string, currentProvisioningState string, condition capi.ConditionType) {
message := "Deployment %s has not succeeded yet, current state is %s, " +
"check back in few minutes, see Azure portal for more details"
messageArgs := []interface{}{vmssName, currentProvisioningState}
reason := VMSSProvisioningStatePrefix + currentProvisioningState

capiconditions.MarkFalse(
cr,
condition,
reason,
capi.ConditionSeverityWarning,
message,
messageArgs...)

r.logWarning(ctx, message, messageArgs...)
}
Loading