From 207a72f5435887bee7eab252e08fd639146bf653 Mon Sep 17 00:00:00 2001 From: Michael Weibel Date: Tue, 19 Nov 2024 17:18:44 +0100 Subject: [PATCH] fix(MachinePool): always update vmssState Updates ScaleSets service to always update the scope's vmss state with the latest data. Previously, a long running operation would cause the vmssState to not be updated and delayed creation of AzureMachinePoolMachines until much later when that long running operation completed. This change avoids this and updates the vmssState to what is retrieved from the API at all times. The call does not make more API requests because the VMSS GET request was done anyway but its result ignored. --- azure/services/scalesets/scalesets.go | 51 ++++++++++++++-------- azure/services/scalesets/scalesets_test.go | 28 ++++-------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/azure/services/scalesets/scalesets.go b/azure/services/scalesets/scalesets.go index 383965482e9..78500dc2444 100644 --- a/azure/services/scalesets/scalesets.go +++ b/azure/services/scalesets/scalesets.go @@ -96,7 +96,7 @@ func (s *Service) Reconcile(ctx context.Context) (retErr error) { return errors.Errorf("%T is not of type ScaleSetSpec", spec) } - _, err := s.Client.Get(ctx, spec) + result, err := s.Client.Get(ctx, spec) if err == nil { // We can only get the existing instances if the VMSS already exists scaleSetSpec.VMSSInstances, err = s.Client.ListInstances(ctx, spec.ResourceGroupName(), spec.ResourceName()) @@ -105,34 +105,51 @@ func (s *Service) Reconcile(ctx context.Context) (retErr error) { s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err) return err } + if result != nil { + if err := s.updateScopeState(ctx, result, scaleSetSpec); err != nil { + return err + } + } } else if !azure.ResourceNotFound(err) { return errors.Wrapf(err, "failed to get existing VMSS") } - result, err := s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName) + result, err = s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName) s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err) if err == nil && result != nil { - vmss, ok := result.(armcompute.VirtualMachineScaleSet) - if !ok { - return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result) + if err := s.updateScopeState(ctx, result, scaleSetSpec); err != nil { + return err } + } - fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances) - if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil { - return errors.Wrap(err, "unable to reconcile VMSS replicas") - } + return err +} - // Transform the VMSS resource representation to conform to the cloud-provider-azure representation - providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID) - if err != nil { - return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID) - } - s.Scope.SetProviderID(providerID) - s.Scope.SetVMSSState(&fetchedVMSS) +// updateScopeState updates the scope's VMSS state and provider ID +// +// Code later in the reconciler uses scope's VMSS state for determining scale status and whether to create/delete +// AzureMachinePoolMachines. +// N.B.: before calling this function, make sure scaleSetSpec.VMSSInstances is updated to the latest state. +func (s *Service) updateScopeState(ctx context.Context, result interface{}, scaleSetSpec *ScaleSetSpec) error { + vmss, ok := result.(armcompute.VirtualMachineScaleSet) + if !ok { + return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result) } - return err + fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances) + if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil { + return errors.Wrap(err, "unable to reconcile VMSS replicas") + } + + // Transform the VMSS resource representation to conform to the cloud-provider-azure representation + providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID) + if err != nil { + return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID) + } + s.Scope.SetProviderID(providerID) + s.Scope.SetVMSSState(&fetchedVMSS) + return nil } // Delete deletes a scale set asynchronously. Delete sends a DELETE request to Azure and if accepted without error, diff --git a/azure/services/scalesets/scalesets_test.go b/azure/services/scalesets/scalesets_test.go index c0b0ba4b1da..ebb64d30dc6 100644 --- a/azure/services/scalesets/scalesets_test.go +++ b/azure/services/scalesets/scalesets_test.go @@ -119,14 +119,14 @@ func TestReconcileVMSS(t *testing.T) { spec := getDefaultVMSSSpec() // Validate spec s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes() - m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil) + m.Get(gomockinternal.AContext(), &defaultSpec).Return(resultVMSS, nil) m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil) r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).Return(getResultVMSS(), nil) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) - s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil) - s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID) - s.SetVMSSState(&fetchedVMSS) + s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil).Times(2) + s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID).Times(2) + s.SetVMSSState(&fetchedVMSS).Times(2) }, }, { @@ -177,28 +177,16 @@ func TestReconcileVMSS(t *testing.T) { s.DefaultedAzureServiceReconcileTimeout().Return(reconciler.DefaultAzureServiceReconcileTimeout) spec := getDefaultVMSSSpec() s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes() - m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil) + m.Get(gomockinternal.AContext(), &defaultSpec).Return(resultVMSS, nil) m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil) r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName). Return(nil, internalError()) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, internalError()) - }, - }, - { - name: "failed to reconcile replicas", - expectedError: "unable to reconcile VMSS replicas:.*#: Internal Server Error: StatusCode=500", - expect: func(g *WithT, s *mock_scalesets.MockScaleSetScopeMockRecorder, r *mock_async.MockReconcilerMockRecorder, m *mock_scalesets.MockClientMockRecorder) { - s.DefaultedAzureServiceReconcileTimeout().Return(reconciler.DefaultAzureServiceReconcileTimeout) - spec := getDefaultVMSSSpec() - s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes() - m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil) - m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil) - r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).Return(getResultVMSS(), nil) - s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) - - s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(internalError()) + s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil) + s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID) + s.SetVMSSState(&fetchedVMSS) }, }, {