From 9e26a5e3ffd978784f7a2698582248cb3dd8c02b Mon Sep 17 00:00:00 2001 From: Brennen Murray Date: Mon, 30 Jan 2023 14:21:33 -0800 Subject: [PATCH] Resolved refresh custom data when Bootstrap token rotates. --- azure/const.go | 6 ++ azure/scope/machinepool.go | 41 +++++++++++++ .../mock_scalesets/scalesets_mock.go | 29 +++++++++ azure/services/scalesets/scalesets.go | 21 ++++++- azure/services/scalesets/scalesets_test.go | 6 ++ config/rbac/role.yaml | 9 +++ .../azuremachinepool_controller.go | 10 ++++ exp/controllers/helpers.go | 60 +++++++++++++++++++ main.go | 2 + 9 files changed, 182 insertions(+), 2 deletions(-) diff --git a/azure/const.go b/azure/const.go index 9c4ac179ab2..ff655a4c5d7 100644 --- a/azure/const.go +++ b/azure/const.go @@ -28,4 +28,10 @@ const ( // See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ // for annotation formatting rules. RGTagsLastAppliedAnnotation = "sigs.k8s.io/cluster-api-provider-azure-last-applied-tags-rg" + + // CustomDataHashAnnotation is the key for the machine object annotation + // which tracks the hash of the custom data. + // See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ + // for annotation formatting rules. + CustomDataHashAnnotation = "sigs.k8s.io/cluster-api-provider-azure-vmss-custom-data-hash" ) diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index e427d4f086d..9286378872c 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -18,8 +18,10 @@ package scope import ( "context" + "crypto/sha256" "encoding/base64" "fmt" + "io" "strings" azureautorest "github.com/Azure/go-autorest/autorest/azure" @@ -535,6 +537,12 @@ func (m *MachinePoolScope) Close(ctx context.Context) error { if err := m.updateReplicasAndProviderIDs(ctx); err != nil { return errors.Wrap(err, "failed to update replicas and providerIDs") } + if m.HasReplicasExternallyManaged(ctx) { + if err := m.updateCustomDataHash(ctx); err != nil { + // ignore errors to calculating the custom data hash since it's not absolutely crucial. + log.V(4).Error(err, "unable to update custom data hash, ignoring.") + } + } } if err := m.PatchObject(ctx); err != nil { @@ -568,6 +576,39 @@ func (m *MachinePoolScope) GetBootstrapData(ctx context.Context) (string, error) return base64.StdEncoding.EncodeToString(value), nil } +// calculateBootstrapDataHash calculates the sha256 hash of the bootstrap data. +func (m *MachinePoolScope) calculateBootstrapDataHash(ctx context.Context) (string, error) { + bootstrapData, err := m.GetBootstrapData(ctx) + if err != nil { + return "", err + } + h := sha256.New() + n, err := io.WriteString(h, bootstrapData) + if err != nil || n == 0 { + return "", fmt.Errorf("unable to write custom data (bytes written: %q): %w", n, err) + } + return fmt.Sprintf("%x", h.Sum(nil)), nil +} + +// HasBootstrapDataChanges calculates the sha256 hash of the bootstrap data and compares it with the saved hash in AzureMachinePool.Status. +func (m *MachinePoolScope) HasBootstrapDataChanges(ctx context.Context) (bool, error) { + newHash, err := m.calculateBootstrapDataHash(ctx) + if err != nil { + return false, err + } + return m.AzureMachinePool.GetAnnotations()[azure.CustomDataHashAnnotation] != newHash, nil +} + +// updateCustomDataHash calculates the sha256 hash of the bootstrap data and saves it in AzureMachinePool.Status. +func (m *MachinePoolScope) updateCustomDataHash(ctx context.Context) error { + newHash, err := m.calculateBootstrapDataHash(ctx) + if err != nil { + return err + } + m.SetAnnotation(azure.CustomDataHashAnnotation, newHash) + return nil +} + // GetVMImage picks an image from the AzureMachinePool configuration, or uses a default one. func (m *MachinePoolScope) GetVMImage(ctx context.Context) (*infrav1.Image, error) { ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.GetVMImage") diff --git a/azure/services/scalesets/mock_scalesets/scalesets_mock.go b/azure/services/scalesets/mock_scalesets/scalesets_mock.go index 88c8d86bcce..45022f6a4e5 100644 --- a/azure/services/scalesets/mock_scalesets/scalesets_mock.go +++ b/azure/services/scalesets/mock_scalesets/scalesets_mock.go @@ -250,6 +250,35 @@ func (mr *MockScaleSetScopeMockRecorder) GetVMImage(arg0 interface{}) *gomock.Ca return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVMImage", reflect.TypeOf((*MockScaleSetScope)(nil).GetVMImage), arg0) } +// HasBootstrapDataChanges mocks base method. +func (m *MockScaleSetScope) HasBootstrapDataChanges(arg0 context.Context) (bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "HasBootstrapDataChanges", arg0) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// HasBootstrapDataChanges indicates an expected call of HasBootstrapDataChanges. +func (mr *MockScaleSetScopeMockRecorder) HasBootstrapDataChanges(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "HasBootstrapDataChanges", reflect.TypeOf((*MockScaleSetScope)(nil).HasBootstrapDataChanges), arg0) +} + +// HasReplicasExternallyManaged mocks base method. +func (m *MockScaleSetScope) HasReplicasExternallyManaged(arg0 context.Context) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "HasReplicasExternallyManaged", arg0) + ret0, _ := ret[0].(bool) + return ret0 +} + +// HasReplicasExternallyManaged indicates an expected call of HasReplicasExternallyManaged. +func (mr *MockScaleSetScopeMockRecorder) HasReplicasExternallyManaged(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "HasReplicasExternallyManaged", reflect.TypeOf((*MockScaleSetScope)(nil).HasReplicasExternallyManaged), arg0) +} + // HashKey mocks base method. func (m *MockScaleSetScope) HashKey() string { m.ctrl.T.Helper() diff --git a/azure/services/scalesets/scalesets.go b/azure/services/scalesets/scalesets.go index 5cc35d59a3c..e5a342cd3cf 100644 --- a/azure/services/scalesets/scalesets.go +++ b/azure/services/scalesets/scalesets.go @@ -53,6 +53,8 @@ type ( SetProviderID(string) SetVMSSState(*azure.VMSS) ReconcileReplicas(context.Context, *azure.VMSS) error + HasReplicasExternallyManaged(context.Context) bool + HasBootstrapDataChanges(context.Context) (bool, error) } // Service provides operations on Azure resources. @@ -276,6 +278,20 @@ func (s *Service) patchVMSSIfNeeded(ctx context.Context, infraVMSS *azure.VMSS) return nil, errors.Wrap(err, "failed to calculate maxSurge") } + // If the VMSS is managed by an external autoscaler, we should patch the VMSS if customData has changed. + shouldPatchCustomData := false + if s.Scope.HasReplicasExternallyManaged(ctx) { + shouldPatchCustomData, err := s.Scope.HasBootstrapDataChanges(ctx) + if err != nil { + return nil, errors.Wrap(err, "unable to calculate custom data hash") + } + if shouldPatchCustomData { + log.V(4).Info("custom data changed") + } else { + log.V(4).Info("custom data unchanged") + } + } + hasModelChanges := hasModelModifyingDifferences(infraVMSS, vmss) var isFlex bool for _, instance := range infraVMSS.Instances { @@ -295,10 +311,11 @@ func (s *Service) patchVMSSIfNeeded(ctx context.Context, infraVMSS *azure.VMSS) patch.Sku.Capacity = pointer.Int64(surge) } + // If the VMSS is managed by an external autoscaler, we should patch the VMSS if customData has changed. // If there are no model changes and no increase in the replica count, do not update the VMSS. // Decreases in replica count is handled by deleting AzureMachinePoolMachine instances in the MachinePoolScope - if *patch.Sku.Capacity <= infraVMSS.Capacity && !hasModelChanges { - log.V(4).Info("nothing to update on vmss", "scale set", spec.Name, "newReplicas", *patch.Sku.Capacity, "oldReplicas", infraVMSS.Capacity, "hasChanges", hasModelChanges) + if *patch.Sku.Capacity <= infraVMSS.Capacity && !hasModelChanges && !shouldPatchCustomData { + log.V(4).Info("nothing to update on vmss", "scale set", spec.Name, "newReplicas", *patch.Sku.Capacity, "oldReplicas", infraVMSS.Capacity, "hasModelChanges", hasModelChanges, "shouldPatchCustomData", shouldPatchCustomData) return nil, nil } diff --git a/azure/services/scalesets/scalesets_test.go b/azure/services/scalesets/scalesets_test.go index 70a0b25ff4c..2c1faef91e6 100644 --- a/azure/services/scalesets/scalesets_test.go +++ b/azure/services/scalesets/scalesets_test.go @@ -223,6 +223,7 @@ func TestReconcileVMSS(t *testing.T) { s.DeleteLongRunningOperationState(defaultSpec.Name, serviceName, infrav1.PutFuture) s.DeleteLongRunningOperationState(defaultSpec.Name, serviceName, infrav1.PatchFuture) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) + s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false) }, }, { @@ -238,6 +239,7 @@ func TestReconcileVMSS(t *testing.T) { s.DeleteLongRunningOperationState(defaultSpec.Name, serviceName, infrav1.PutFuture) s.DeleteLongRunningOperationState(defaultSpec.Name, serviceName, infrav1.PatchFuture) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) + s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false) }, }, { @@ -582,6 +584,7 @@ func TestReconcileVMSS(t *testing.T) { m.GetResultIfDone(gomockinternal.AContext(), patchFuture).Return(compute.VirtualMachineScaleSet{}, azure.NewOperationNotDoneError(patchFuture)) m.Get(gomockinternal.AContext(), defaultResourceGroup, defaultVMSSName).Return(clone, nil) m.ListInstances(gomockinternal.AContext(), defaultResourceGroup, defaultVMSSName).Return(instances, nil) + s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false) }, }, { @@ -741,6 +744,7 @@ func TestReconcileVMSS(t *testing.T) { s.DeleteLongRunningOperationState(spec.Name, serviceName, infrav1.PatchFuture) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) s.Location().AnyTimes().Return("test-location") + s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false) }, }, { @@ -767,6 +771,7 @@ func TestReconcileVMSS(t *testing.T) { s.DeleteLongRunningOperationState(spec.Name, serviceName, infrav1.PatchFuture) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) s.Location().AnyTimes().Return("test-location") + s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false) }, }, { @@ -792,6 +797,7 @@ func TestReconcileVMSS(t *testing.T) { s.DeleteLongRunningOperationState(spec.Name, serviceName, infrav1.PatchFuture) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) s.Location().AnyTimes().Return("test-location") + s.HasReplicasExternallyManaged(gomockinternal.AContext()).Return(false) }, }, } diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 1983ac5bb8e..9bef000c89e 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -52,6 +52,15 @@ rules: - get - list - watch +- apiGroups: + - bootstrap.cluster.x-k8s.io + resources: + - kubeadmconfigs + - kubeadmconfigs/status + verbs: + - get + - list + - watch - apiGroups: - cluster.x-k8s.io resources: diff --git a/exp/controllers/azuremachinepool_controller.go b/exp/controllers/azuremachinepool_controller.go index 163326b4fb7..fbb9d9c90a8 100644 --- a/exp/controllers/azuremachinepool_controller.go +++ b/exp/controllers/azuremachinepool_controller.go @@ -34,14 +34,17 @@ import ( "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" "sigs.k8s.io/cluster-api-provider-azure/util/tele" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/predicates" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" ) @@ -113,6 +116,12 @@ func (ampr *AzureMachinePoolReconciler) SetupWithManager(ctx context.Context, mg &source.Kind{Type: &infrav1.AzureCluster{}}, handler.EnqueueRequestsFromMapFunc(azureClusterMapper), ). + // watch for changes in KubeadmConfig to sync bootstrap token + Watches( + &source.Kind{Type: &kubeadmv1.KubeadmConfig{}}, + handler.EnqueueRequestsFromMapFunc(KubeadmConfigToInfrastructureMapFunc(ctx, ampr.Client, log)), + builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), + ). Build(r) if err != nil { return errors.Wrap(err, "error creating controller") @@ -147,6 +156,7 @@ func (ampr *AzureMachinePoolReconciler) SetupWithManager(ctx context.Context, mg // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch diff --git a/exp/controllers/helpers.go b/exp/controllers/helpers.go index fc96d07ac99..c1bc74885e9 100644 --- a/exp/controllers/helpers.go +++ b/exp/controllers/helpers.go @@ -32,6 +32,7 @@ import ( infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/util" ctrl "sigs.k8s.io/controller-runtime" @@ -317,3 +318,62 @@ func MachinePoolMachineHasStateOrVersionChange(logger logr.Logger) predicate.Fun GenericFunc: func(e event.GenericEvent) bool { return false }, } } + +// KubeadmConfigToInfrastructureMapFunc returns a handler.ToRequestsFunc that watches for KubeadmConfig events and returns. +func KubeadmConfigToInfrastructureMapFunc(ctx context.Context, c client.Client, log logr.Logger) handler.MapFunc { + return func(o client.Object) []reconcile.Request { + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultMappingTimeout) + defer cancel() + + kc, ok := o.(*kubeadmv1.KubeadmConfig) + if !ok { + log.V(4).Info("attempt to map incorrect type", "type", fmt.Sprintf("%T", o)) + return nil + } + + mpKey := client.ObjectKey{ + Namespace: kc.Namespace, + Name: kc.Name, + } + + // fetch MachinePool to get reference + mp := &expv1.MachinePool{} + if err := c.Get(ctx, mpKey, mp); err != nil { + if !apierrors.IsNotFound(err) { + log.Error(err, "failed to fetch MachinePool for KubeadmConfig") + } + return []reconcile.Request{} + } + + ref := mp.Spec.Template.Spec.Bootstrap.ConfigRef + if ref == nil { + log.V(4).Info("fetched MachinePool has no Bootstrap.ConfigRef") + return []reconcile.Request{} + } + sameKind := ref.Kind != o.GetObjectKind().GroupVersionKind().Kind + sameName := ref.Name == kc.Name + sameNamespace := ref.Namespace == kc.Namespace + if !sameKind || !sameName || !sameNamespace { + log.V(4).Info("Bootstrap.ConfigRef does not match", + "sameKind", sameKind, + "ref kind", ref.Kind, + "other kind", o.GetObjectKind().GroupVersionKind().Kind, + "sameName", sameName, + "sameNamespace", sameNamespace, + ) + return []reconcile.Request{} + } + + key := client.ObjectKey{ + Namespace: kc.Namespace, + Name: kc.Name, + } + log.V(4).Info("adding KubeadmConfig to watch", "key", key) + + return []reconcile.Request{ + { + NamespacedName: key, + }, + } + } +} diff --git a/main.go b/main.go index eaced819303..6a16701a98b 100644 --- a/main.go +++ b/main.go @@ -52,6 +52,7 @@ import ( "sigs.k8s.io/cluster-api-provider-azure/version" clusterv1alpha4 "sigs.k8s.io/cluster-api/api/v1alpha4" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" expv1alpha4 "sigs.k8s.io/cluster-api/exp/api/v1alpha4" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" capifeature "sigs.k8s.io/cluster-api/feature" @@ -80,6 +81,7 @@ func init() { _ = expv1alpha4.AddToScheme(scheme) _ = clusterv1.AddToScheme(scheme) _ = expv1.AddToScheme(scheme) + _ = kubeadmv1.AddToScheme(scheme) // +kubebuilder:scaffold:scheme // Add aadpodidentity v1 to the scheme.