Skip to content

Commit

Permalink
Resolved refresh custom data when Bootstrap token rotates.
Browse files Browse the repository at this point in the history
  • Loading branch information
BrennenMM7 committed Feb 22, 2023
1 parent cdd0ca2 commit dc43931
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 2 deletions.
6 changes: 6 additions & 0 deletions azure/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,10 @@ const (
// See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
// for annotation formatting rules.
RGTagsLastAppliedAnnotation = "sigs.k8s.io/cluster-api-provider-azure-last-applied-tags-rg"

// CustomDataHashAnnotation is the key for the machine object annotation
// which tracks the hash of the custom data.
// See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
// for annotation formatting rules.
CustomDataHashAnnotation = "sigs.k8s.io/cluster-api-provider-azure-vmss-custom-data-hash"
)
42 changes: 42 additions & 0 deletions azure/scope/machinepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package scope

import (
"context"
"crypto/sha256"
"encoding/base64"
"fmt"
"io"
"strings"

azureautorest "github.com/Azure/go-autorest/autorest/azure"
Expand Down Expand Up @@ -535,6 +537,13 @@ func (m *MachinePoolScope) Close(ctx context.Context) error {
if err := m.updateReplicasAndProviderIDs(ctx); err != nil {
return errors.Wrap(err, "failed to update replicas and providerIDs")
}
if m.HasReplicasExternallyManaged(ctx) {
if err := m.updateCustomDataHash(ctx); err != nil {
// ignore errors to calculating the custom data hash since it's not absolutely crucial.
log.V(4).Error(err, "unable to update custom data hash, ignoring.")
}
}

}

if err := m.PatchObject(ctx); err != nil {
Expand Down Expand Up @@ -568,6 +577,39 @@ func (m *MachinePoolScope) GetBootstrapData(ctx context.Context) (string, error)
return base64.StdEncoding.EncodeToString(value), nil
}

// calculateBootstrapDataHash calculates the sha256 hash of the bootstrap data.
func (m *MachinePoolScope) calculateBootstrapDataHash(ctx context.Context) (string, error) {
bootstrapData, err := m.GetBootstrapData(ctx)
if err != nil {
return "", err
}
h := sha256.New()
n, err := io.WriteString(h, bootstrapData)
if err != nil || n == 0 {
return "", fmt.Errorf("unable to write custom data (bytes written: %q): %w", n, err)
}
return fmt.Sprintf("%x", h.Sum(nil)), nil
}

// HasBootstrapDataChanges calculates the sha256 hash of the bootstrap data and compares it with the saved hash in AzureMachinePool.Status.
func (m *MachinePoolScope) HasBootstrapDataChanges(ctx context.Context) (bool, error) {
newHash, err := m.calculateBootstrapDataHash(ctx)
if err != nil {
return false, err
}
return m.AzureMachinePool.GetAnnotations()[azure.CustomDataHashAnnotation] != newHash, nil
}

// updateCustomDataHash calculates the sha256 hash of the bootstrap data and saves it in AzureMachinePool.Status.
func (m *MachinePoolScope) updateCustomDataHash(ctx context.Context) error {
newHash, err := m.calculateBootstrapDataHash(ctx)
if err != nil {
return err
}
m.SetAnnotation(azure.CustomDataHashAnnotation, newHash)
return nil
}

// GetVMImage picks an image from the AzureMachinePool configuration, or uses a default one.
func (m *MachinePoolScope) GetVMImage(ctx context.Context) (*infrav1.Image, error) {
ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.GetVMImage")
Expand Down
29 changes: 29 additions & 0 deletions azure/services/scalesets/mock_scalesets/scalesets_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 19 additions & 2 deletions azure/services/scalesets/scalesets.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ type (
SetProviderID(string)
SetVMSSState(*azure.VMSS)
ReconcileReplicas(context.Context, *azure.VMSS) error
HasReplicasExternallyManaged(context.Context) bool
HasBootstrapDataChanges(context.Context) (bool, error)
}

// Service provides operations on Azure resources.
Expand Down Expand Up @@ -276,6 +278,20 @@ func (s *Service) patchVMSSIfNeeded(ctx context.Context, infraVMSS *azure.VMSS)
return nil, errors.Wrap(err, "failed to calculate maxSurge")
}

// If the VMSS is managed by an external autoscaler, we should patch the VMSS if customData has changed.
shouldPatchCustomData := false
if s.Scope.HasReplicasExternallyManaged(ctx) {
shouldPatchCustomData, err := s.Scope.HasBootstrapDataChanges(ctx)
if err != nil {
return nil, errors.Wrap(err, "unable to calculate custom data hash")
}
if shouldPatchCustomData {
log.V(4).Info("custom data changed")
} else {
log.V(4).Info("custom data unchanged")
}
}

hasModelChanges := hasModelModifyingDifferences(infraVMSS, vmss)
var isFlex bool
for _, instance := range infraVMSS.Instances {
Expand All @@ -295,10 +311,11 @@ func (s *Service) patchVMSSIfNeeded(ctx context.Context, infraVMSS *azure.VMSS)
patch.Sku.Capacity = pointer.Int64(surge)
}

// If the VMSS is managed by an external autoscaler, we should patch the VMSS if customData has changed.
// If there are no model changes and no increase in the replica count, do not update the VMSS.
// Decreases in replica count is handled by deleting AzureMachinePoolMachine instances in the MachinePoolScope
if *patch.Sku.Capacity <= infraVMSS.Capacity && !hasModelChanges {
log.V(4).Info("nothing to update on vmss", "scale set", spec.Name, "newReplicas", *patch.Sku.Capacity, "oldReplicas", infraVMSS.Capacity, "hasChanges", hasModelChanges)
if *patch.Sku.Capacity <= infraVMSS.Capacity && !hasModelChanges && !shouldPatchCustomData {
log.V(4).Info("nothing to update on vmss", "scale set", spec.Name, "newReplicas", *patch.Sku.Capacity, "oldReplicas", infraVMSS.Capacity, "hasModelChanges", hasModelChanges, "shouldPatchCustomData", shouldPatchCustomData)
return nil, nil
}

Expand Down
6 changes: 6 additions & 0 deletions azure/services/scalesets/scalesets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ func TestReconcileVMSS(t *testing.T) {
s.DeleteLongRunningOperationState(defaultSpec.Name, serviceName, infrav1.PutFuture)
s.DeleteLongRunningOperationState(defaultSpec.Name, serviceName, infrav1.PatchFuture)
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)
s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false)
},
},
{
Expand All @@ -238,6 +239,7 @@ func TestReconcileVMSS(t *testing.T) {
s.DeleteLongRunningOperationState(defaultSpec.Name, serviceName, infrav1.PutFuture)
s.DeleteLongRunningOperationState(defaultSpec.Name, serviceName, infrav1.PatchFuture)
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)
s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false)
},
},
{
Expand Down Expand Up @@ -582,6 +584,7 @@ func TestReconcileVMSS(t *testing.T) {
m.GetResultIfDone(gomockinternal.AContext(), patchFuture).Return(compute.VirtualMachineScaleSet{}, azure.NewOperationNotDoneError(patchFuture))
m.Get(gomockinternal.AContext(), defaultResourceGroup, defaultVMSSName).Return(clone, nil)
m.ListInstances(gomockinternal.AContext(), defaultResourceGroup, defaultVMSSName).Return(instances, nil)
s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false)
},
},
{
Expand Down Expand Up @@ -741,6 +744,7 @@ func TestReconcileVMSS(t *testing.T) {
s.DeleteLongRunningOperationState(spec.Name, serviceName, infrav1.PatchFuture)
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)
s.Location().AnyTimes().Return("test-location")
s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false)
},
},
{
Expand All @@ -767,6 +771,7 @@ func TestReconcileVMSS(t *testing.T) {
s.DeleteLongRunningOperationState(spec.Name, serviceName, infrav1.PatchFuture)
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)
s.Location().AnyTimes().Return("test-location")
s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false)
},
},
{
Expand All @@ -792,6 +797,7 @@ func TestReconcileVMSS(t *testing.T) {
s.DeleteLongRunningOperationState(spec.Name, serviceName, infrav1.PatchFuture)
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)
s.Location().AnyTimes().Return("test-location")
s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false)
},
},
}
Expand Down
9 changes: 9 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ rules:
- get
- list
- watch
- apiGroups:
- bootstrap.cluster.x-k8s.io
resources:
- kubeadmconfigs
- kubeadmconfigs/status
verbs:
- get
- list
- watch
- apiGroups:
- cluster.x-k8s.io
resources:
Expand Down
10 changes: 10 additions & 0 deletions exp/controllers/azuremachinepool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,17 @@ import (
"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/predicates"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
)
Expand Down Expand Up @@ -113,6 +116,12 @@ func (ampr *AzureMachinePoolReconciler) SetupWithManager(ctx context.Context, mg
&source.Kind{Type: &infrav1.AzureCluster{}},
handler.EnqueueRequestsFromMapFunc(azureClusterMapper),
).
// watch for changes in KubeadmConfig to sync bootstrap token
Watches(
&source.Kind{Type: &kubeadmv1.KubeadmConfig{}},
handler.EnqueueRequestsFromMapFunc(KubeadmConfigToInfrastructureMapFunc(ctx, ampr.Client, log)),
builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}),
).
Build(r)
if err != nil {
return errors.Wrap(err, "error creating controller")
Expand Down Expand Up @@ -147,6 +156,7 @@ func (ampr *AzureMachinePoolReconciler) SetupWithManager(ctx context.Context, mg

// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch
Expand Down
60 changes: 60 additions & 0 deletions exp/controllers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/util"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -317,3 +318,62 @@ func MachinePoolMachineHasStateOrVersionChange(logger logr.Logger) predicate.Fun
GenericFunc: func(e event.GenericEvent) bool { return false },
}
}

// KubeadmConfigToInfrastructureMapFunc returns a handler.ToRequestsFunc that watches for KubeadmConfig events and returns.
func KubeadmConfigToInfrastructureMapFunc(ctx context.Context, c client.Client, log logr.Logger) handler.MapFunc {
return func(o client.Object) []reconcile.Request {
ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultMappingTimeout)
defer cancel()

kc, ok := o.(*kubeadmv1.KubeadmConfig)
if !ok {
log.V(4).Info("attempt to map incorrect type", "type", fmt.Sprintf("%T", o))
return nil
}

mpKey := client.ObjectKey{
Namespace: kc.Namespace,
Name: kc.Name,
}

// fetch MachinePool to get reference
mp := &expv1.MachinePool{}
if err := c.Get(ctx, mpKey, mp); err != nil {
if !apierrors.IsNotFound(err) {
log.Error(err, "failed to fetch MachinePool for KubeadmConfig")
}
return []reconcile.Request{}
}

ref := mp.Spec.Template.Spec.Bootstrap.ConfigRef
if ref == nil {
log.V(4).Info("fetched MachinePool has no Bootstrap.ConfigRef")
return []reconcile.Request{}
}
sameKind := ref.Kind != o.GetObjectKind().GroupVersionKind().Kind
sameName := ref.Name == kc.Name
sameNamespace := ref.Namespace == kc.Namespace
if !sameKind || !sameName || !sameNamespace {
log.V(4).Info("Bootstrap.ConfigRef does not match",
"sameKind", sameKind,
"ref kind", ref.Kind,
"other kind", o.GetObjectKind().GroupVersionKind().Kind,
"sameName", sameName,
"sameNamespace", sameNamespace,
)
return []reconcile.Request{}
}

key := client.ObjectKey{
Namespace: kc.Namespace,
Name: kc.Name,
}
log.V(4).Info("adding KubeadmConfig to watch", "key", key)

return []reconcile.Request{
{
NamespacedName: key,
},
}
}
}
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import (
"sigs.k8s.io/cluster-api-provider-azure/version"
clusterv1alpha4 "sigs.k8s.io/cluster-api/api/v1alpha4"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
expv1alpha4 "sigs.k8s.io/cluster-api/exp/api/v1alpha4"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
capifeature "sigs.k8s.io/cluster-api/feature"
Expand Down Expand Up @@ -80,6 +81,7 @@ func init() {
_ = expv1alpha4.AddToScheme(scheme)
_ = clusterv1.AddToScheme(scheme)
_ = expv1.AddToScheme(scheme)
_ = kubeadmv1.AddToScheme(scheme)
// +kubebuilder:scaffold:scheme

// Add aadpodidentity v1 to the scheme.
Expand Down

0 comments on commit dc43931

Please sign in to comment.